diff --git a/src/BayesNet/BayesMetrics.cc b/src/BayesNet/BayesMetrics.cc index 6671995..75b83ef 100644 --- a/src/BayesNet/BayesMetrics.cc +++ b/src/BayesNet/BayesMetrics.cc @@ -12,8 +12,8 @@ namespace bayesnet { : features(features) , className(className) , classNumStates(classNumStates) + , samples(torch::zeros({ static_cast(vsamples[0].size()), static_cast(vsamples.size() + 1) }, torch::kInt32)) { - samples = torch::zeros({ static_cast(vsamples[0].size()), static_cast(vsamples.size() + 1) }, torch::kInt32); for (int i = 0; i < vsamples.size(); ++i) { samples.index_put_({ "...", i }, torch::tensor(vsamples[i], torch::kInt32)); } @@ -67,6 +67,7 @@ namespace bayesnet { } return matrix; } + // To Interface with Python vector Metrics::conditionalEdgeWeights() { auto matrix = conditionalEdge(); @@ -123,7 +124,6 @@ namespace bayesnet { */ vector> Metrics::maximumSpanningTree(vector features, Tensor& weights, int root) { - auto result = vector>(); auto mst = MST(features, weights, root); return mst.maximumSpanningTree(); } diff --git a/src/BayesNet/Classifier.cc b/src/BayesNet/Classifier.cc index d77c2c8..0064f66 100644 --- a/src/BayesNet/Classifier.cc +++ b/src/BayesNet/Classifier.cc @@ -4,7 +4,7 @@ namespace bayesnet { using namespace torch; - Classifier::Classifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {} + Classifier::Classifier(const Network& model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {} Classifier& Classifier::build(vector& features, string className, map>& states) { dataset = torch::cat({ X, y.view({y.size(0), 1}) }, 1); @@ -125,7 +125,6 @@ namespace bayesnet { } void Classifier::addNodes() { - auto test = model.getEdges(); // Add all nodes to the network for (auto feature : features) { model.addNode(feature, states[feature].size()); diff --git a/src/BayesNet/Classifier.h b/src/BayesNet/Classifier.h index ad56336..a9ae764 100644 --- a/src/BayesNet/Classifier.h +++ b/src/BayesNet/Classifier.h @@ -27,7 +27,7 @@ namespace bayesnet { void checkFitParameters(); virtual void train() = 0; public: - Classifier(Network model); + Classifier(const Network& model); virtual ~Classifier() = default; Classifier& fit(vector>& X, vector& y, vector& features, string className, map>& states) override; Classifier& fit(torch::Tensor& X, torch::Tensor& y, vector& features, string className, map>& states) override; diff --git a/src/BayesNet/Ensemble.cc b/src/BayesNet/Ensemble.cc index 8aa2518..dce0d3d 100644 --- a/src/BayesNet/Ensemble.cc +++ b/src/BayesNet/Ensemble.cc @@ -148,10 +148,10 @@ namespace bayesnet { } int Ensemble::getNumberOfStates() { - int states = 0; + int nstates = 0; for (auto i = 0; i < n_models; ++i) { - states += models[i]->getNumberOfStates(); + nstates += models[i]->getNumberOfStates(); } - return states; + return nstates; } } \ No newline at end of file diff --git a/src/BayesNet/KDB.h b/src/BayesNet/KDB.h index 9683955..d8045c3 100644 --- a/src/BayesNet/KDB.h +++ b/src/BayesNet/KDB.h @@ -13,7 +13,7 @@ namespace bayesnet { protected: void train() override; public: - KDB(int k, float theta = 0.03); + explicit KDB(int k, float theta = 0.03); vector graph(string name = "KDB") override; }; } diff --git a/src/BayesNet/Mst.cc b/src/BayesNet/Mst.cc index b86812b..36c2fd0 100644 --- a/src/BayesNet/Mst.cc +++ b/src/BayesNet/Mst.cc @@ -7,9 +7,8 @@ namespace bayesnet { using namespace std; - Graph::Graph(int V) + Graph::Graph(int V) : V(V), parent{ vector(V) } { - parent = vector(V); for (int i = 0; i < V; i++) parent[i] = i; G.clear(); @@ -34,10 +33,11 @@ namespace bayesnet { } void Graph::kruskal_algorithm() { - int i, uSt, vEd; + int i; // sort the edges ordered on decreasing weight - sort(G.begin(), G.end(), [](auto& left, auto& right) {return left.first > right.first;}); + sort(G.begin(), G.end(), [](const auto& left, const auto& right) {return left.first > right.first;}); for (i = 0; i < G.size(); i++) { + int uSt, vEd; uSt = find_set(G[i].second.first); vEd = find_set(G[i].second.second); if (uSt != vEd) { diff --git a/src/BayesNet/Mst.h b/src/BayesNet/Mst.h index 15b0dbb..e257b40 100644 --- a/src/BayesNet/Mst.h +++ b/src/BayesNet/Mst.h @@ -12,7 +12,6 @@ namespace bayesnet { vector features; int root; public: - MST() = default; MST(vector& features, Tensor& weights, int root); vector> maximumSpanningTree(); }; @@ -23,7 +22,7 @@ namespace bayesnet { vector >> T; // vector for mst vector parent; public: - Graph(int V); + explicit Graph(int V); void addEdge(int u, int v, float wt); int find_set(int i); void union_set(int u, int v); diff --git a/src/BayesNet/Network.cc b/src/BayesNet/Network.cc index eb3ffeb..7c0572a 100644 --- a/src/BayesNet/Network.cc +++ b/src/BayesNet/Network.cc @@ -4,11 +4,11 @@ #include "bayesnetUtils.h" namespace bayesnet { Network::Network() : laplaceSmoothing(1), features(vector()), className(""), classNumStates(0), maxThreads(0.8), fitted(false) {} - Network::Network(float maxT) : laplaceSmoothing(1), features(vector()), className(""), classNumStates(0), maxThreads(maxT), fitted(false) {} - Network::Network(float maxT, int smoothing) : laplaceSmoothing(smoothing), features(vector()), className(""), classNumStates(0), maxThreads(maxT), fitted(false) {} - Network::Network(Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()), maxThreads(other.getmaxThreads()), fitted(other.fitted) + Network::Network(const float maxT) : laplaceSmoothing(1), features(vector()), className(""), classNumStates(0), maxThreads(maxT), fitted(false) {} + Network::Network(const float maxT, const int smoothing) : laplaceSmoothing(smoothing), features(vector()), className(""), classNumStates(0), maxThreads(maxT), fitted(false) {} + Network::Network(const Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.classNumStates), maxThreads(other.maxThreads), fitted(other.fitted) { - for (auto& pair : other.nodes) { + for (const auto& pair : other.nodes) { nodes[pair.first] = std::make_unique(*pair.second); } } @@ -20,7 +20,7 @@ namespace bayesnet { { return samples; } - void Network::addNode(string name, int numStates) + void Network::addNode(const string& name, const int numStates) { if (find(features.begin(), features.end(), name) == features.end()) { features.push_back(name); @@ -37,11 +37,11 @@ namespace bayesnet { { return features; } - int Network::getClassNumStates() + const int Network::getClassNumStates() { return classNumStates; } - int Network::getStates() + const int Network::getStates() { int result = 0; for (auto& node : nodes) { @@ -49,7 +49,7 @@ namespace bayesnet { } return result; } - string Network::getClassName() + const string Network::getClassName() { return className; } @@ -69,7 +69,7 @@ namespace bayesnet { recStack.erase(nodeId); // remove node from recursion stack before function ends return false; } - void Network::addEdge(const string parent, const string child) + void Network::addEdge(const string& parent, const string& child) { if (nodes.find(parent) == nodes.end()) { throw invalid_argument("Parent node " + parent + " does not exist"); @@ -105,8 +105,8 @@ namespace bayesnet { for (int i = 0; i < featureNames.size(); ++i) { auto column = torch::flatten(X.index({ "...", i })); auto k = vector(); - for (auto i = 0; i < X.size(0); ++i) { - k.push_back(column[i].item()); + for (auto z = 0; z < X.size(0); ++z) { + k.push_back(column[z].item()); } dataset[featureNames[i]] = k; } @@ -262,9 +262,7 @@ namespace bayesnet { // Normalize result double sum = accumulate(result.begin(), result.end(), 0.0); - for (double& value : result) { - value /= sum; - } + transform(result.begin(), result.end(), result.begin(), [sum](double x) { return x / sum; }); return result; } vector Network::show() @@ -280,7 +278,7 @@ namespace bayesnet { } return result; } - vector Network::graph(string title) + vector Network::graph(const string& title) { auto output = vector(); auto prefix = "digraph BayesNet {\nlabel=>& getNodes(); vector getFeatures(); - int getStates(); + const int getStates(); vector> getEdges(); - int getClassNumStates(); - string getClassName(); + const int getClassNumStates(); + const string getClassName(); void fit(const vector>&, const vector&, const vector&, const string&); void fit(torch::Tensor&, torch::Tensor&, const vector&, const string&); vector predict(const vector>&); @@ -48,7 +48,7 @@ namespace bayesnet { vector> predict_proba(const vector>&); double score(const vector>&, const vector&); vector show(); - vector graph(string title); // Returns a vector of strings representing the graph in graphviz format + vector graph(const string& title); // Returns a vector of strings representing the graph in graphviz format inline string version() { return "0.1.0"; } }; } diff --git a/src/BayesNet/Node.cc b/src/BayesNet/Node.cc index d33fecf..e25f807 100644 --- a/src/BayesNet/Node.cc +++ b/src/BayesNet/Node.cc @@ -88,18 +88,14 @@ namespace bayesnet { { // Get dimensions of the CPT dimensions.push_back(numStates); - for (auto father : getParents()) { - dimensions.push_back(father->getNumStates()); - } + transform(parents.begin(), parents.end(), back_inserter(dimensions), [](Node* parent) { return parent->getNumStates(); }); // Create a tensor of zeros with the dimensions of the CPT cpTable = torch::zeros(dimensions, torch::kFloat) + laplaceSmoothing; // Fill table with counts for (int n_sample = 0; n_sample < dataset[name].size(); ++n_sample) { torch::List> coordinates; coordinates.push_back(torch::tensor(dataset[name][n_sample])); - for (auto father : getParents()) { - coordinates.push_back(torch::tensor(dataset[father->getName()][n_sample])); - } + transform(parents.begin(), parents.end(), back_inserter(coordinates), [&dataset, &n_sample](Node* parent) { return torch::tensor(dataset[parent->getName()][n_sample]); }); // Increment the count of the corresponding coordinate cpTable.index_put_({ coordinates }, cpTable.index({ coordinates }) + 1); } @@ -111,19 +107,15 @@ namespace bayesnet { torch::List> coordinates; // following predetermined order of indices in the cpTable (see Node.h) coordinates.push_back(torch::tensor(evidence[name])); - for (auto parent : getParents()) { - coordinates.push_back(torch::tensor(evidence[parent->getName()])); - } + transform(parents.begin(), parents.end(), back_inserter(coordinates), [&evidence](Node* parent) { return torch::tensor(evidence[parent->getName()]); }); return cpTable.index({ coordinates }).item(); } - vector Node::graph(string className) + vector Node::graph(const string& className) { auto output = vector(); auto suffix = name == className ? ", fontcolor=red, fillcolor=lightblue, style=filled " : ""; output.push_back(name + " [shape=circle" + suffix + "] \n"); - for (auto& child : children) { - output.push_back(name + " -> " + child->getName()); - } + transform(children.begin(), children.end(), back_inserter(output), [this](Node* child) { return name + " -> " + child->getName() + "\n"; }); return output; } } \ No newline at end of file diff --git a/src/BayesNet/Node.h b/src/BayesNet/Node.h index 5c5932a..16c0386 100644 --- a/src/BayesNet/Node.h +++ b/src/BayesNet/Node.h @@ -30,7 +30,7 @@ namespace bayesnet { int getNumStates() const; void setNumStates(int); unsigned minFill(); - vector graph(string clasName); // Returns a vector of strings representing the graph in graphviz format + vector graph(const string& clasName); // Returns a vector of strings representing the graph in graphviz format float getFactorValue(map&); }; } diff --git a/src/BayesNet/SPODE.h b/src/BayesNet/SPODE.h index 668bbca..ac1fd52 100644 --- a/src/BayesNet/SPODE.h +++ b/src/BayesNet/SPODE.h @@ -8,7 +8,7 @@ namespace bayesnet { protected: void train() override; public: - SPODE(int root); + explicit SPODE(int root); vector graph(string name = "SPODE") override; }; } diff --git a/src/BayesNet/TAN.cc b/src/BayesNet/TAN.cc index 9c8dfff..51f0c1b 100644 --- a/src/BayesNet/TAN.cc +++ b/src/BayesNet/TAN.cc @@ -18,7 +18,7 @@ namespace bayesnet { auto mi_value = metrics.mutualInformation(class_dataset, feature_dataset); mi.push_back({ i, mi_value }); } - sort(mi.begin(), mi.end(), [](auto& left, auto& right) {return left.second < right.second;}); + sort(mi.begin(), mi.end(), [](const auto& left, const auto& right) {return left.second < right.second;}); auto root = mi[mi.size() - 1].first; // 2. Compute mutual information between each feature and the class auto weights = metrics.conditionalEdge(); diff --git a/src/Platform/CrossValidation.cc b/src/Platform/CrossValidation.cc index b27788b..f19e19f 100644 --- a/src/Platform/CrossValidation.cc +++ b/src/Platform/CrossValidation.cc @@ -5,11 +5,11 @@ namespace platform { using json = nlohmann::json; using namespace std::chrono; - CrossValidation::CrossValidation(string modelName, bool stratified, int nfolds, vector randomSeeds, platform::Datasets& datasets) : modelName(modelName), stratified(stratified), nfolds(nfolds), randomSeeds(randomSeeds), datasets(datasets) + CrossValidation::CrossValidation(const string& modelName, bool stratified, int nfolds, const vector& randomSeeds, platform::Datasets& datasets) : modelName(modelName), stratified(stratified), nfolds(nfolds), randomSeeds(randomSeeds), datasets(datasets) { } - Result CrossValidation::crossValidate(string fileName) + Result CrossValidation::crossValidate(const string& fileName) { auto [Xt, y] = datasets.getTensors(fileName); auto states = datasets.getStates(fileName); diff --git a/src/Platform/CrossValidation.h b/src/Platform/CrossValidation.h index ff084d7..43a19db 100644 --- a/src/Platform/CrossValidation.h +++ b/src/Platform/CrossValidation.h @@ -17,9 +17,9 @@ namespace platform { vector randomSeeds; platform::Datasets& datasets; public: - CrossValidation(string modelName, bool stratified, int nfolds, vector randomSeeds, platform::Datasets& datasets); + CrossValidation(const string& modelName, bool stratified, int nfolds, const vector& randomSeeds, platform::Datasets& datasets); ~CrossValidation() = default; - Result crossValidate(string fileName); + Result crossValidate(const string& fileName); }; } #endif // !CROSSVALIDATION_H \ No newline at end of file diff --git a/src/Platform/Datasets.cc b/src/Platform/Datasets.cc index 0c09c59..1287533 100644 --- a/src/Platform/Datasets.cc +++ b/src/Platform/Datasets.cc @@ -2,7 +2,7 @@ #include "platformUtils.h" #include "ArffFiles.h" namespace platform { - vector split(string text, char delimiter) + vector split(const string& text, char delimiter) { vector result; stringstream ss(text); @@ -14,9 +14,9 @@ namespace platform { } void Datasets::load() { - string line; ifstream catalog(path + "/all.txt"); if (catalog.is_open()) { + string line; while (getline(catalog, line)) { vector tokens = split(line, ','); string name = tokens[0]; @@ -31,12 +31,10 @@ namespace platform { vector Datasets::getNames() { vector result; - for (auto& d : datasets) { - result.push_back(d.first); - } + transform(datasets.begin(), datasets.end(), back_inserter(result), [](const auto& d) { return d.first; }); return result; } - vector Datasets::getFeatures(string name) + vector Datasets::getFeatures(const string& name) { if (datasets[name]->isLoaded()) { return datasets[name]->getFeatures(); @@ -44,7 +42,7 @@ namespace platform { throw invalid_argument("Dataset not loaded."); } } - map> Datasets::getStates(string name) + map> Datasets::getStates(const string& name) { if (datasets[name]->isLoaded()) { return datasets[name]->getStates(); @@ -52,7 +50,7 @@ namespace platform { throw invalid_argument("Dataset not loaded."); } } - string Datasets::getClassName(string name) + string Datasets::getClassName(const string& name) { if (datasets[name]->isLoaded()) { return datasets[name]->getClassName(); @@ -60,7 +58,7 @@ namespace platform { throw invalid_argument("Dataset not loaded."); } } - int Datasets::getNSamples(string name) + int Datasets::getNSamples(const string& name) { if (datasets[name]->isLoaded()) { return datasets[name]->getNSamples(); @@ -68,49 +66,32 @@ namespace platform { throw invalid_argument("Dataset not loaded."); } } - pair>&, vector&> Datasets::getVectors(string name) + pair>&, vector&> Datasets::getVectors(const string& name) { if (!datasets[name]->isLoaded()) { datasets[name]->load(); } return datasets[name]->getVectors(); } - pair>&, vector&> Datasets::getVectorsDiscretized(string name) + pair>&, vector&> Datasets::getVectorsDiscretized(const string& name) { if (!datasets[name]->isLoaded()) { datasets[name]->load(); } return datasets[name]->getVectorsDiscretized(); } - pair Datasets::getTensors(string name) + pair Datasets::getTensors(const string& name) { if (!datasets[name]->isLoaded()) { datasets[name]->load(); } return datasets[name]->getTensors(); } - bool Datasets::isDataset(string name) + bool Datasets::isDataset(const string& name) { return datasets.find(name) != datasets.end(); } - Dataset::Dataset(Dataset& dataset) - { - path = dataset.path; - name = dataset.name; - className = dataset.className; - n_samples = dataset.n_samples; - n_features = dataset.n_features; - features = dataset.features; - states = dataset.states; - loaded = dataset.loaded; - discretize = dataset.discretize; - X = dataset.X; - y = dataset.y; - Xv = dataset.Xv; - Xd = dataset.Xd; - yv = dataset.yv; - fileType = dataset.fileType; - } + Dataset::Dataset(Dataset& dataset) : path(dataset.path), name(dataset.name), className(dataset.className), n_samples(dataset.n_samples), n_features(dataset.n_features), features(dataset.features), states(dataset.states), loaded(dataset.loaded), discretize(dataset.discretize), X(dataset.X), y(dataset.y), Xv(dataset.Xv), Xd(dataset.Xd), yv(dataset.yv), fileType(dataset.fileType) {} string Dataset::getName() { return name; @@ -178,9 +159,9 @@ namespace platform { } void Dataset::load_csv() { - string line; ifstream file(path + "/" + name + ".csv"); if (file.is_open()) { + string line; getline(file, line); vector tokens = split(line, ','); features = vector(tokens.begin(), tokens.end() - 1); @@ -218,9 +199,8 @@ namespace platform { yv = arff.getY(); // Get className & Features className = arff.getClassName(); - for (auto feature : arff.getAttributes()) { - features.push_back(feature.first); - } + auto attributes = arff.getAttributes(); + transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& f) { return f.first; }); } void Dataset::load() { diff --git a/src/Platform/Datasets.h b/src/Platform/Datasets.h index 1d178b1..57c9c5f 100644 --- a/src/Platform/Datasets.h +++ b/src/Platform/Datasets.h @@ -27,8 +27,8 @@ namespace platform { void load_arff(); void computeStates(); public: - Dataset(string path, string name, string className, bool discretize, fileType_t fileType) : path(path), name(name), className(className), discretize(discretize), loaded(false), fileType(fileType) {}; - Dataset(Dataset&); + Dataset(const string& path, const string& name, const string& className, bool discretize, fileType_t fileType) : path(path), name(name), className(className), discretize(discretize), loaded(false), fileType(fileType), n_samples(0), n_features(0) {}; + explicit Dataset(Dataset&); string getName(); string getClassName(); vector getFeatures(); @@ -49,18 +49,18 @@ namespace platform { bool discretize; void load(); // Loads the list of datasets public: - Datasets(string path, bool discretize = false, fileType_t fileType = ARFF) : path(path), discretize(discretize), fileType(fileType) { load(); }; + explicit Datasets(const string& path, bool discretize = false, fileType_t fileType = ARFF) : path(path), discretize(discretize), fileType(fileType) { load(); }; vector getNames(); - vector getFeatures(string name); - int getNSamples(string name); - string getClassName(string name); - map> getStates(string name); - pair>&, vector&> getVectors(string name); - pair>&, vector&> getVectorsDiscretized(string name); - pair getTensors(string name); - bool isDataset(string name); + vector getFeatures(const string& name); + int getNSamples(const string& name); + string getClassName(const string& name); + map> getStates(const string& name); + pair>&, vector&> getVectors(const string& name); + pair>&, vector&> getVectorsDiscretized(const string& name); + pair getTensors(const string& name); + bool isDataset(const string& name); }; - vector split(string, char); + vector split(const string&, char); }; #endif \ No newline at end of file diff --git a/src/Platform/DotEnv.h b/src/Platform/DotEnv.h index 4c3783f..189a79e 100644 --- a/src/Platform/DotEnv.h +++ b/src/Platform/DotEnv.h @@ -60,9 +60,7 @@ public: seeds_str = trim(seeds_str); seeds_str = seeds_str.substr(1, seeds_str.size() - 2); auto seeds_str_split = split(seeds_str, ','); - for (auto seed_str : seeds_str_split) { - seeds.push_back(stoi(seed_str)); - } + transform(seeds_str_split.begin(), seeds_str_split.end(), back_inserter(seeds), [](const auto& s) { return stoi(s); }); return seeds; } }; diff --git a/src/Platform/Experiment.cc b/src/Platform/Experiment.cc index b86fa03..2846e08 100644 --- a/src/Platform/Experiment.cc +++ b/src/Platform/Experiment.cc @@ -46,7 +46,7 @@ namespace platform { result["seeds"] = random_seeds; result["duration"] = duration; result["results"] = json::array(); - for (auto& r : results) { + for (const auto& r : results) { json j; j["dataset"] = r.getDataset(); j["hyperparameters"] = r.getHyperparameters(); @@ -76,7 +76,7 @@ namespace platform { } return result; } - void Experiment::save(string path) + void Experiment::save(const string& path) { json data = build_json(); ofstream file(path + "/" + get_file_name()); diff --git a/src/Platform/Experiment.h b/src/Platform/Experiment.h index a9cadfb..9d1e5d2 100644 --- a/src/Platform/Experiment.h +++ b/src/Platform/Experiment.h @@ -24,34 +24,34 @@ namespace platform { }; class Result { private: - string dataset, hyperparameters, model_version; - int samples, features, classes; - double score_train, score_test, score_train_std, score_test_std, train_time, train_time_std, test_time, test_time_std; - vector scores_train, scores_test, times_train, times_test; - float nodes, leaves, depth; + string dataset = "", hyperparameters = "", model_version = ""; + int samples{ 0 }, features{ 0 }, classes{ 0 }; + double score_train{ 0 }, score_test = 0, score_train_std = 0, score_test_std = 0, train_time = 0, train_time_std = 0, test_time = 0, test_time_std = 0; + vector scores_train{}, scores_test{}, times_train{}, times_test{}; + float nodes{ 0 }, leaves{ 0 }, depth{ 0 }; public: Result() = default; - Result& setDataset(string dataset) { this->dataset = dataset; return *this; } - Result& setHyperparameters(string hyperparameters) { this->hyperparameters = hyperparameters; return *this; } - Result& setSamples(int samples) { this->samples = samples; return *this; } - Result& setFeatures(int features) { this->features = features; return *this; } - Result& setClasses(int classes) { this->classes = classes; return *this; } - Result& setScoreTrain(double score) { this->score_train = score; return *this; } - Result& setScoreTest(double score) { this->score_test = score; return *this; } - Result& setScoreTrainStd(double score_std) { this->score_train_std = score_std; return *this; } - Result& setScoreTestStd(double score_std) { this->score_test_std = score_std; return *this; } - Result& setTrainTime(double train_time) { this->train_time = train_time; return *this; } - Result& setTrainTimeStd(double train_time_std) { this->train_time_std = train_time_std; return *this; } - Result& setTestTime(double test_time) { this->test_time = test_time; return *this; } - Result& setTestTimeStd(double test_time_std) { this->test_time_std = test_time_std; return *this; } - Result& setNodes(float nodes) { this->nodes = nodes; return *this; } - Result& setLeaves(float leaves) { this->leaves = leaves; return *this; } - Result& setDepth(float depth) { this->depth = depth; return *this; } - Result& setModelVersion(string model_version) { this->model_version = model_version; return *this; } - Result& addScoreTrain(double score) { scores_train.push_back(score); return *this; } - Result& addScoreTest(double score) { scores_test.push_back(score); return *this; } - Result& addTimeTrain(double time) { times_train.push_back(time); return *this; } - Result& addTimeTest(double time) { times_test.push_back(time); return *this; } + Result& setDataset(const string& dataset) { this->dataset = dataset; return *this; } + Result& setHyperparameters(const string& hyperparameters) { this->hyperparameters = hyperparameters; return *this; } + Result& setSamples(const int samples) { this->samples = samples; return *this; } + Result& setFeatures(const int features) { this->features = features; return *this; } + Result& setClasses(const int classes) { this->classes = classes; return *this; } + Result& setScoreTrain(const double score) { this->score_train = score; return *this; } + Result& setScoreTest(const double score) { this->score_test = score; return *this; } + Result& setScoreTrainStd(const double score_std) { this->score_train_std = score_std; return *this; } + Result& setScoreTestStd(const double score_std) { this->score_test_std = score_std; return *this; } + Result& setTrainTime(const double train_time) { this->train_time = train_time; return *this; } + Result& setTrainTimeStd(const double train_time_std) { this->train_time_std = train_time_std; return *this; } + Result& setTestTime(const double test_time) { this->test_time = test_time; return *this; } + Result& setTestTimeStd(const double test_time_std) { this->test_time_std = test_time_std; return *this; } + Result& setNodes(const float nodes) { this->nodes = nodes; return *this; } + Result& setLeaves(const float leaves) { this->leaves = leaves; return *this; } + Result& setDepth(const float depth) { this->depth = depth; return *this; } + Result& setModelVersion(const string& model_version) { this->model_version = model_version; return *this; } + Result& addScoreTrain(const double score) { scores_train.push_back(score); return *this; } + Result& addScoreTest(const double score) { scores_test.push_back(score); return *this; } + Result& addTimeTrain(const double time) { times_train.push_back(time); return *this; } + Result& addTimeTest(const double time) { times_test.push_back(time); return *this; } const float get_score_train() const { return score_train; } float get_score_test() { return score_test; } const string& getDataset() const { return dataset; } @@ -78,30 +78,30 @@ namespace platform { }; class Experiment { private: - string title, model, platform, score_name, model_version, language_version, language; - bool discretized, stratified; + string title{""}, model{""}, platform{""}, score_name{""}, model_version{""}, language_version{""}, language{""}; + bool discretized{false}, stratified{false}; vector results; vector random_seeds; - int nfolds; - float duration; + int nfolds{0}; + float duration{0}; json build_json(); public: Experiment() = default; - Experiment& setTitle(string title) { this->title = title; return *this; } - Experiment& setModel(string model) { this->model = model; return *this; } - Experiment& setPlatform(string platform) { this->platform = platform; return *this; } - Experiment& setScoreName(string score_name) { this->score_name = score_name; return *this; } - Experiment& setModelVersion(string model_version) { this->model_version = model_version; return *this; } - Experiment& setLanguage(string language) { this->language = language; return *this; } - Experiment& setLanguageVersion(string language_version) { this->language_version = language_version; return *this; } - Experiment& setDiscretized(bool discretized) { this->discretized = discretized; return *this; } - Experiment& setStratified(bool stratified) { this->stratified = stratified; return *this; } - Experiment& setNFolds(int nfolds) { this->nfolds = nfolds; return *this; } + Experiment& setTitle(const string& title) { this->title = title; return *this; } + Experiment& setModel(const string& model) { this->model = model; return *this; } + Experiment& setPlatform(const string& platform) { this->platform = platform; return *this; } + Experiment& setScoreName(const string& score_name) { this->score_name = score_name; return *this; } + Experiment& setModelVersion(const string& model_version) { this->model_version = model_version; return *this; } + Experiment& setLanguage(const string& language) { this->language = language; return *this; } + Experiment& setLanguageVersion(const string& language_version) { this->language_version = language_version; return *this; } + Experiment& setDiscretized(const bool discretized) { this->discretized = discretized; return *this; } + Experiment& setStratified(const bool stratified) { this->stratified = stratified; return *this; } + Experiment& setNFolds(const int nfolds) { this->nfolds = nfolds; return *this; } Experiment& addResult(Result result) { results.push_back(result); return *this; } - Experiment& addRandomSeed(int random_seed) { random_seeds.push_back(random_seed); return *this; } - Experiment& setDuration(float duration) { this->duration = duration; return *this; } + Experiment& addRandomSeed(const int random_seed) { random_seeds.push_back(random_seed); return *this; } + Experiment& setDuration(const float duration) { this->duration = duration; return *this; } string get_file_name(); - void save(string path); + void save(const string& path); void show(); }; } diff --git a/src/Platform/Folding.cc b/src/Platform/Folding.cc index ec7c4b5..1a8091e 100644 --- a/src/Platform/Folding.cc +++ b/src/Platform/Folding.cc @@ -7,9 +7,8 @@ Fold::Fold(int k, int n, int seed) : k(k), n(n), seed(seed) random_seed = default_random_engine(seed == -1 ? rd() : seed); srand(seed == -1 ? time(0) : seed); } -KFold::KFold(int k, int n, int seed) : Fold(k, n, seed) +KFold::KFold(int k, int n, int seed) : Fold(k, n, seed), indices(vector()) { - indices = vector(n); iota(begin(indices), end(indices), 0); // fill with 0, 1, ..., n - 1 shuffle(indices.begin(), indices.end(), random_seed); } diff --git a/src/Platform/Folding.h b/src/Platform/Folding.h index d7736d0..eaf0c4b 100644 --- a/src/Platform/Folding.h +++ b/src/Platform/Folding.h @@ -22,7 +22,7 @@ private: vector indices; public: KFold(int k, int n, int seed = -1); - pair, vector> getFold(int nFold); + pair, vector> getFold(int nFold) override; }; class StratifiedKFold : public Fold { private: @@ -32,6 +32,6 @@ private: public: StratifiedKFold(int k, const vector& y, int seed = -1); StratifiedKFold(int k, torch::Tensor& y, int seed = -1); - pair, vector> getFold(int nFold); + pair, vector> getFold(int nFold) override; }; #endif \ No newline at end of file diff --git a/src/Platform/main.cc b/src/Platform/main.cc index 25984d6..381c168 100644 --- a/src/Platform/main.cc +++ b/src/Platform/main.cc @@ -50,22 +50,17 @@ argparse::ArgumentParser manageArguments(int argc, char** argv) }}); auto seed_values = env.getSeeds(); program.add_argument("-s", "--seeds").nargs(1, 10).help("Random seeds. Set to -1 to have pseudo random").scan<'i', int>().default_value(seed_values); - bool class_last, discretize_dataset, stratified; - int n_folds; - vector seeds; - string model_name, file_name, path, complete_file_name, title; try { program.parse_args(argc, argv); - file_name = program.get("dataset"); - path = program.get("path"); - model_name = program.get("model"); - discretize_dataset = program.get("discretize"); - stratified = program.get("stratified"); - n_folds = program.get("folds"); - seeds = program.get>("seeds"); - complete_file_name = path + file_name + ".arff"; - class_last = false;//datasets[file_name]; - title = program.get("title"); + auto file_name = program.get("dataset"); + auto path = program.get("path"); + auto model_name = program.get("model"); + auto discretize_dataset = program.get("discretize"); + auto stratified = program.get("stratified"); + auto n_folds = program.get("folds"); + auto seeds = program.get>("seeds"); + auto complete_file_name = path + file_name + ".arff"; + auto title = program.get("title"); if (title == "" && file_name == "") { throw runtime_error("title is mandatory if dataset is not provided"); } @@ -81,7 +76,6 @@ argparse::ArgumentParser manageArguments(int argc, char** argv) int main(int argc, char** argv) { auto program = manageArguments(argc, argv); - auto env = DotEnv(); bool saveResults = false; auto file_name = program.get("dataset"); auto path = program.get("path"); @@ -124,10 +118,8 @@ int main(int argc, char** argv) for (auto fileName : filesToProcess) { cout << "- " << setw(20) << left << fileName << " " << right << flush; auto [X, y] = datasets.getTensors(fileName); - auto states = datasets.getStates(fileName); auto features = datasets.getFeatures(fileName); auto samples = datasets.getNSamples(fileName); - auto className = datasets.getClassName(fileName); cout << " (" << setw(5) << samples << "," << setw(3) << features.size() << ") " << flush; auto result = validation.crossValidate(fileName); result.setDataset(fileName); diff --git a/src/Platform/platformUtils.cc b/src/Platform/platformUtils.cc index ea8fad3..d999353 100644 --- a/src/Platform/platformUtils.cc +++ b/src/Platform/platformUtils.cc @@ -38,7 +38,7 @@ bool file_exists(const std::string& name) } } -tuple, string, map>> loadDataset(string path, string name, bool class_last, bool discretize_dataset) +tuple, string, map>> loadDataset(const string& path, const string& name, bool class_last, bool discretize_dataset) { auto handler = ArffFiles(); handler.load(path + static_cast(name) + ".arff", class_last); @@ -48,9 +48,8 @@ tuple, string, map>> loadData // Get className & Features auto className = handler.getClassName(); vector features; - for (auto feature : handler.getAttributes()) { - features.push_back(feature.first); - } + auto attributes = handler.getAttributes(); + transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& f) { return f.first; }); Tensor Xd; auto states = map>(); if (discretize_dataset) { @@ -72,7 +71,7 @@ tuple, string, map>> loadData return { Xd, torch::tensor(y, torch::kInt32), features, className, states }; } -tuple>, vector, vector, string, map>> loadFile(string name) +tuple>, vector, vector, string, map>> loadFile(const string& name) { auto handler = ArffFiles(); handler.load(PATH + static_cast(name) + ".arff"); @@ -82,9 +81,8 @@ tuple>, vector, vector, string, map features; - for (auto feature : handler.getAttributes()) { - features.push_back(feature.first); - } + auto attributes = handler.getAttributes(); + transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& f) { return f.first; }); // Discretize Dataset vector Xd; map maxes; diff --git a/src/Platform/platformUtils.h b/src/Platform/platformUtils.h index abc69bd..8d9a75f 100644 --- a/src/Platform/platformUtils.h +++ b/src/Platform/platformUtils.h @@ -13,8 +13,8 @@ const string PATH = "../../data/"; bool file_exists(const std::string& name); pair, map> discretize(vector& X, mdlp::labels_t& y, vector features); vector discretizeDataset(vector& X, mdlp::labels_t& y); -pair>> discretizeTorch(torch::Tensor& X, torch::Tensor& y, vector& features, string className); -tuple>, vector, vector, string, map>> loadFile(string name); -tuple, string, map>> loadDataset(string path, string name, bool class_last, bool discretize_dataset); +// pair>> discretizeTorch(torch::Tensor& X, torch::Tensor& y, vector& features, const string& className); +tuple>, vector, vector, string, map>> loadFile(const string& name); +tuple, string, map>> loadDataset(const string& path, const string& name, bool class_last, bool discretize_dataset); map> get_states(vector& features, string className, map& maxes); #endif //PLATFORM_UTILS_H