#ifndef DATASETS_H #define DATASETS_H #include #include #include #include namespace platform { using namespace std; enum fileType_t { CSV, ARFF }; class Dataset { private: string path; string name; fileType_t fileType; string className; int n_samples, n_features; vector features; map> states; bool loaded; bool discretize; torch::Tensor X, y; vector> Xv; vector> Xd; vector yv; void buildTensors(); void load_csv(); void load_arff(); void computeStates(); public: Dataset(string path, string name, string className, bool discretize, fileType_t fileType) : path(path), name(name), className(className), discretize(discretize), loaded(false), fileType(fileType) {}; Dataset(Dataset&); string getName(); string getClassName(); vector getFeatures(); map> getStates(); pair>&, vector&> getVectors(); pair>&, vector&> getVectorsDiscretized(); pair getTensors(); int getNFeatures(); int getNSamples(); void load(); const bool inline isLoaded() const { return loaded; }; }; class Datasets { private: string path; fileType_t fileType; map> datasets; bool discretize; void load(); // Loads the list of datasets public: Datasets(string path, bool discretize = false, fileType_t fileType = ARFF) : path(path), discretize(discretize), fileType(fileType) { load(); }; vector getNames(); vector getFeatures(string name); int getNSamples(string name); string getClassName(string name); map> getStates(string name); pair>&, vector&> getVectors(string name); pair>&, vector&> getVectorsDiscretized(string name); pair getTensors(string name); bool isDataset(string name); }; }; #endif