diff --git a/.vscode/launch.json b/.vscode/launch.json index ba01ca6..cade330 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -25,15 +25,35 @@ "program": "${workspaceFolder}/build/src/Platform/main", "args": [ "-m", - "SPODELd", + "BoostAODE", "-p", "/Users/rmontanana/Code/discretizbench/datasets", + "--discretize", "--stratified", "-d", "iris" ], "cwd": "/Users/rmontanana/Code/discretizbench", }, + { + "type": "lldb", + "request": "launch", + "name": "manage", + "program": "${workspaceFolder}/build/src/Platform/manage", + "args": [ + "-n", + "20" + ], + "cwd": "/Users/rmontanana/Code/discretizbench", + }, + { + "type": "lldb", + "request": "launch", + "name": "list", + "program": "${workspaceFolder}/build/src/Platform/list", + "args": [], + "cwd": "/Users/rmontanana/Code/discretizbench", + }, { "name": "Build & debug active file", "type": "cppdbg", diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 5d92a8f..45cc63d 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -32,6 +32,29 @@ ], "group": "build", "detail": "Task generated by Debugger." + }, + { + "type": "cppbuild", + "label": "C/C++: g++ build active file", + "command": "/usr/bin/g++", + "args": [ + "-fdiagnostics-color=always", + "-g", + "${file}", + "-o", + "${fileDirname}/${fileBasenameNoExtension}" + ], + "options": { + "cwd": "${fileDirname}" + }, + "problemMatcher": [ + "$gcc" + ], + "group": { + "kind": "build", + "isDefault": true + }, + "detail": "Task generated by Debugger." } ] } \ No newline at end of file diff --git a/Makefile b/Makefile index 9806dc3..b883892 100644 --- a/Makefile +++ b/Makefile @@ -15,7 +15,7 @@ dependency: ## Create a dependency graph diagram of the project (build/dependenc cd build && cmake .. --graphviz=dependency.dot && dot -Tpng dependency.dot -o dependency.png build: ## Build the main and BayesNetSample - cmake --build build -t main -t BayesNetSample -j 32 + cmake --build build -t main -t BayesNetSample -t manage -t list -j 32 clean: ## Clean the debug info @echo ">>> Cleaning Debug BayesNet ..."; @@ -35,7 +35,7 @@ release: ## Build a Release version of the project @if [ -d ./build ]; then rm -rf ./build; fi @mkdir build; cmake -S . -B build -D CMAKE_BUILD_TYPE=Release; \ - cmake --build build -t main -t BayesNetSample -j 32; + cmake --build build -t main -t BayesNetSample -t manage -t list -j 32; @echo ">>> Done"; test: ## Run tests diff --git a/lib/Files/CMakeLists.txt b/lib/Files/CMakeLists.txt index 5e3412f..fce5b8f 100644 --- a/lib/Files/CMakeLists.txt +++ b/lib/Files/CMakeLists.txt @@ -1,2 +1 @@ -add_library(ArffFiles ArffFiles.cc) -#target_link_libraries(BayesNet "${TORCH_LIBRARIES}") \ No newline at end of file +add_library(ArffFiles ArffFiles.cc) \ No newline at end of file diff --git a/sample/sample.cc b/sample/sample.cc index 7da318d..1045c2f 100644 --- a/sample/sample.cc +++ b/sample/sample.cc @@ -141,43 +141,58 @@ int main(int argc, char** argv) /* * Begin Processing */ - auto handler = ArffFiles(); - handler.load(complete_file_name, class_last); - // Get Dataset X, y - vector& X = handler.getX(); - mdlp::labels_t& y = handler.getY(); - // Get className & Features - auto className = handler.getClassName(); - vector features; - auto attributes = handler.getAttributes(); - transform(attributes.begin(), attributes.end(), back_inserter(features), - [](const pair& item) { return item.first; }); - // Discretize Dataset - auto [Xd, maxes] = discretize(X, y, features); - maxes[className] = *max_element(y.begin(), y.end()) + 1; - map> states; - for (auto feature : features) { - states[feature] = vector(maxes[feature]); - } - states[className] = vector(maxes[className]); - auto clf = platform::Models::instance()->create(model_name); - clf->fit(Xd, y, features, className, states); - if (dump_cpt) { - cout << "--- CPT Tables ---" << endl; - clf->dump_cpt(); - } - auto lines = clf->show(); - for (auto line : lines) { - cout << line << endl; - } - cout << "--- Topological Order ---" << endl; - auto order = clf->topological_order(); - for (auto name : order) { - cout << name << ", "; - } - cout << "end." << endl; - auto score = clf->score(Xd, y); - cout << "Score: " << score << endl; + auto ypred = torch::tensor({ 1,2,3,2,2,3,4,5,2,1 }); + auto y = torch::tensor({ 0,0,0,0,2,3,4,0,0,0 }); + auto weights = torch::ones({ 10 }, kDouble); + auto mask = ypred == y; + cout << "ypred:" << ypred << endl; + cout << "y:" << y << endl; + cout << "weights:" << weights << endl; + cout << "mask:" << mask << endl; + double value_to_add = 0.5; + weights += mask.to(torch::kDouble) * value_to_add; + cout << "New weights:" << weights << endl; + auto masked_weights = weights * mask.to(weights.dtype()); + double sum_of_weights = masked_weights.sum().item(); + cout << "Sum of weights: " << sum_of_weights << endl; + //weights.index_put_({ mask }, weights + 10); + // auto handler = ArffFiles(); + // handler.load(complete_file_name, class_last); + // // Get Dataset X, y + // vector& X = handler.getX(); + // mdlp::labels_t& y = handler.getY(); + // // Get className & Features + // auto className = handler.getClassName(); + // vector features; + // auto attributes = handler.getAttributes(); + // transform(attributes.begin(), attributes.end(), back_inserter(features), + // [](const pair& item) { return item.first; }); + // // Discretize Dataset + // auto [Xd, maxes] = discretize(X, y, features); + // maxes[className] = *max_element(y.begin(), y.end()) + 1; + // map> states; + // for (auto feature : features) { + // states[feature] = vector(maxes[feature]); + // } + // states[className] = vector(maxes[className]); + // auto clf = platform::Models::instance()->create(model_name); + // clf->fit(Xd, y, features, className, states); + // if (dump_cpt) { + // cout << "--- CPT Tables ---" << endl; + // clf->dump_cpt(); + // } + // auto lines = clf->show(); + // for (auto line : lines) { + // cout << line << endl; + // } + // cout << "--- Topological Order ---" << endl; + // auto order = clf->topological_order(); + // for (auto name : order) { + // cout << name << ", "; + // } + // cout << "end." << endl; + // auto score = clf->score(Xd, y); + // cout << "Score: " << score << endl; // auto graph = clf->graph(); // auto dot_file = model_name + "_" + file_name; // ofstream file(dot_file + ".dot"); diff --git a/src/BayesNet/AODE.cc b/src/BayesNet/AODE.cc index 7e6a95f..6db843e 100644 --- a/src/BayesNet/AODE.cc +++ b/src/BayesNet/AODE.cc @@ -2,12 +2,14 @@ namespace bayesnet { AODE::AODE() : Ensemble() {} - void AODE::buildModel() + void AODE::buildModel(const torch::Tensor& weights) { models.clear(); for (int i = 0; i < features.size(); ++i) { models.push_back(std::make_unique(i)); } + n_models = models.size(); + significanceModels = vector(n_models, 1.0); } vector AODE::graph(const string& title) const { diff --git a/src/BayesNet/AODE.h b/src/BayesNet/AODE.h index 3d58851..00965f6 100644 --- a/src/BayesNet/AODE.h +++ b/src/BayesNet/AODE.h @@ -5,7 +5,7 @@ namespace bayesnet { class AODE : public Ensemble { protected: - void buildModel() override; + void buildModel(const torch::Tensor& weights) override; public: AODE(); virtual ~AODE() {}; diff --git a/src/BayesNet/AODELd.cc b/src/BayesNet/AODELd.cc index 9f36ed2..cc842be 100644 --- a/src/BayesNet/AODELd.cc +++ b/src/BayesNet/AODELd.cc @@ -19,7 +19,7 @@ namespace bayesnet { return *this; } - void AODELd::buildModel() + void AODELd::buildModel(const torch::Tensor& weights) { models.clear(); for (int i = 0; i < features.size(); ++i) { @@ -27,7 +27,7 @@ namespace bayesnet { } n_models = models.size(); } - void AODELd::trainModel() + void AODELd::trainModel(const torch::Tensor& weights) { for (const auto& model : models) { model->fit(Xf, y, features, className, states); diff --git a/src/BayesNet/AODELd.h b/src/BayesNet/AODELd.h index 14be0c4..aa67247 100644 --- a/src/BayesNet/AODELd.h +++ b/src/BayesNet/AODELd.h @@ -8,8 +8,8 @@ namespace bayesnet { using namespace std; class AODELd : public Ensemble, public Proposal { protected: - void trainModel() override; - void buildModel() override; + void trainModel(const torch::Tensor& weights) override; + void buildModel(const torch::Tensor& weights) override; public: AODELd(); AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, vector& features_, string className_, map>& states_) override; diff --git a/src/BayesNet/BaseClassifier.h b/src/BayesNet/BaseClassifier.h index ff202e1..5f1cbaa 100644 --- a/src/BayesNet/BaseClassifier.h +++ b/src/BayesNet/BaseClassifier.h @@ -6,13 +6,14 @@ namespace bayesnet { using namespace std; class BaseClassifier { protected: - virtual void trainModel() = 0; + virtual void trainModel(const torch::Tensor& weights) = 0; public: // X is nxm vector, y is nx1 vector virtual BaseClassifier& fit(vector>& X, vector& y, vector& features, string className, map>& states) = 0; // X is nxm tensor, y is nx1 tensor virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, vector& features, string className, map>& states) = 0; virtual BaseClassifier& fit(torch::Tensor& dataset, vector& features, string className, map>& states) = 0; + virtual BaseClassifier& fit(torch::Tensor& dataset, vector& features, string className, map>& states, const torch::Tensor& weights) = 0; virtual ~BaseClassifier() = default; torch::Tensor virtual predict(torch::Tensor& X) = 0; vector virtual predict(vector>& X) = 0; diff --git a/src/BayesNet/BayesMetrics.cc b/src/BayesNet/BayesMetrics.cc index 8952ead..2c08836 100644 --- a/src/BayesNet/BayesMetrics.cc +++ b/src/BayesNet/BayesMetrics.cc @@ -21,6 +21,31 @@ namespace bayesnet { } samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32)); } + vector Metrics::SelectKBestWeighted(const torch::Tensor& weights, unsigned k) + { + auto n = samples.size(0) - 1; + if (k == 0) { + k = n; + } + // compute scores + scoresKBest.reserve(n); + auto label = samples.index({ -1, "..." }); + for (int i = 0; i < n; ++i) { + scoresKBest.push_back(mutualInformation(label, samples.index({ i, "..." }), weights)); + featuresKBest.push_back(i); + } + // sort & reduce scores and features + sort(featuresKBest.begin(), featuresKBest.end(), [&](int i, int j) + { return scoresKBest[i] > scoresKBest[j]; }); + sort(scoresKBest.begin(), scoresKBest.end(), std::greater()); + featuresKBest.resize(k); + scoresKBest.resize(k); + return featuresKBest; + } + vector Metrics::getScoresKBest() const + { + return scoresKBest; + } vector> Metrics::doCombinations(const vector& source) { vector> result; @@ -32,17 +57,18 @@ namespace bayesnet { } return result; } - torch::Tensor Metrics::conditionalEdge() + torch::Tensor Metrics::conditionalEdge(const torch::Tensor& weights) { auto result = vector(); auto source = vector(features); source.push_back(className); auto combinations = doCombinations(source); + double totalWeight = weights.sum().item(); // Compute class prior - auto margin = torch::zeros({ classNumStates }); + auto margin = torch::zeros({ classNumStates }, torch::kFloat); for (int value = 0; value < classNumStates; ++value) { auto mask = samples.index({ -1, "..." }) == value; - margin[value] = mask.sum().item() / samples.size(1); + margin[value] = mask.sum().item() / samples.size(1); } for (auto [first, second] : combinations) { int index_first = find(features.begin(), features.end(), first) - features.begin(); @@ -52,8 +78,9 @@ namespace bayesnet { auto mask = samples.index({ -1, "..." }) == value; auto first_dataset = samples.index({ index_first, mask }); auto second_dataset = samples.index({ index_second, mask }); - auto mi = mutualInformation(first_dataset, second_dataset); - auto pb = margin[value].item(); + auto weights_dataset = weights.index({ mask }); + auto mi = mutualInformation(first_dataset, second_dataset, weights_dataset); + auto pb = margin[value].item(); accumulated += pb * mi; } result.push_back(accumulated); @@ -70,31 +97,32 @@ namespace bayesnet { return matrix; } // To use in Python - vector Metrics::conditionalEdgeWeights() + vector Metrics::conditionalEdgeWeights(vector& weights_) { - auto matrix = conditionalEdge(); + const torch::Tensor weights = torch::tensor(weights_); + auto matrix = conditionalEdge(weights); std::vector v(matrix.data_ptr(), matrix.data_ptr() + matrix.numel()); return v; } - double Metrics::entropy(const torch::Tensor& feature) + double Metrics::entropy(const torch::Tensor& feature, const torch::Tensor& weights) { - torch::Tensor counts = feature.bincount(); - int totalWeight = counts.sum().item(); + torch::Tensor counts = feature.bincount(weights); + double totalWeight = counts.sum().item(); torch::Tensor probs = counts.to(torch::kFloat) / totalWeight; torch::Tensor logProbs = torch::log(probs); torch::Tensor entropy = -probs * logProbs; return entropy.nansum().item(); } // H(Y|X) = sum_{x in X} p(x) H(Y|X=x) - double Metrics::conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature) + double Metrics::conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights) { int numSamples = firstFeature.sizes()[0]; - torch::Tensor featureCounts = secondFeature.bincount(); + torch::Tensor featureCounts = secondFeature.bincount(weights); unordered_map> jointCounts; double totalWeight = 0; for (auto i = 0; i < numSamples; i++) { - jointCounts[secondFeature[i].item()][firstFeature[i].item()] += 1; - totalWeight += 1; + jointCounts[secondFeature[i].item()][firstFeature[i].item()] += weights[i].item(); + totalWeight += weights[i].item(); } if (totalWeight == 0) return 0; @@ -115,9 +143,9 @@ namespace bayesnet { return entropyValue; } // I(X;Y) = H(Y) - H(Y|X) - double Metrics::mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature) + double Metrics::mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights) { - return entropy(firstFeature) - conditionalEntropy(firstFeature, secondFeature); + return entropy(firstFeature, weights) - conditionalEntropy(firstFeature, secondFeature, weights); } /* Compute the maximum spanning tree considering the weights as distances diff --git a/src/BayesNet/BayesMetrics.h b/src/BayesNet/BayesMetrics.h index 2a2fff3..70d33e9 100644 --- a/src/BayesNet/BayesMetrics.h +++ b/src/BayesNet/BayesMetrics.h @@ -12,16 +12,20 @@ namespace bayesnet { vector features; string className; int classNumStates = 0; + vector scoresKBest; + vector featuresKBest; // sorted indices of the features + double entropy(const Tensor& feature, const Tensor& weights); + double conditionalEntropy(const Tensor& firstFeature, const Tensor& secondFeature, const Tensor& weights); + vector> doCombinations(const vector&); public: Metrics() = default; - Metrics(const Tensor&, const vector&, const string&, const int); - Metrics(const vector>&, const vector&, const vector&, const string&, const int); - double entropy(const Tensor&); - double conditionalEntropy(const Tensor&, const Tensor&); - double mutualInformation(const Tensor&, const Tensor&); - vector conditionalEdgeWeights(); // To use in Python - Tensor conditionalEdge(); - vector> doCombinations(const vector&); + Metrics(const torch::Tensor& samples, const vector& features, const string& className, const int classNumStates); + Metrics(const vector>& vsamples, const vector& labels, const vector& features, const string& className, const int classNumStates); + vector SelectKBestWeighted(const torch::Tensor& weights, unsigned k = 0); + vector getScoresKBest() const; + double mutualInformation(const Tensor& firstFeature, const Tensor& secondFeature, const Tensor& weights); + vector conditionalEdgeWeights(vector& weights); // To use in Python + Tensor conditionalEdge(const torch::Tensor& weights); vector> maximumSpanningTree(const vector& features, const Tensor& weights, const int root); }; } diff --git a/src/BayesNet/BoostAODE.cc b/src/BayesNet/BoostAODE.cc new file mode 100644 index 0000000..eb8da07 --- /dev/null +++ b/src/BayesNet/BoostAODE.cc @@ -0,0 +1,82 @@ +#include "BoostAODE.h" +#include "BayesMetrics.h" + +namespace bayesnet { + BoostAODE::BoostAODE() : Ensemble() {} + void BoostAODE::buildModel(const torch::Tensor& weights) + { + // Models shall be built in trainModel + } + void BoostAODE::trainModel(const torch::Tensor& weights) + { + models.clear(); + n_models = 0; + int max_models = .1 * n > 10 ? .1 * n : n; + Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64); + auto X_ = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." }); + auto y_ = dataset.index({ -1, "..." }); + bool exitCondition = false; + bool repeatSparent = false; + vector featuresUsed; + // Step 0: Set the finish condition + // if not repeatSparent a finish condition is run out of features + // n_models == max_models + int numClasses = states[className].size(); + while (!exitCondition) { + // Step 1: Build ranking with mutual information + auto featureSelection = metrics.SelectKBestWeighted(weights_, n); // Get all the features sorted + auto feature = featureSelection[0]; + unique_ptr model; + if (!repeatSparent) { + if (n_models == 0) { + models.resize(n); // Resize for n==nfeatures SPODEs + significanceModels.resize(n); + } + bool found = false; + for (int i = 0; i < featureSelection.size(); ++i) { + if (find(featuresUsed.begin(), featuresUsed.end(), i) != featuresUsed.end()) { + continue; + } + found = true; + feature = i; + featuresUsed.push_back(feature); + n_models++; + break; + } + if (!found) { + exitCondition = true; + continue; + } + } + model = std::make_unique(feature); + model->fit(dataset, features, className, states, weights_); + auto ypred = model->predict(X_); + // Step 3.1: Compute the classifier amout of say + auto mask_wrong = ypred != y_; + auto masked_weights = weights_ * mask_wrong.to(weights_.dtype()); + double wrongWeights = masked_weights.sum().item(); + double significance = wrongWeights == 0 ? 1 : 0.5 * log((1 - wrongWeights) / wrongWeights); + // Step 3.2: Update weights for next classifier + // Step 3.2.1: Update weights of wrong samples + weights_ += mask_wrong.to(weights_.dtype()) * exp(significance) * weights_; + // Step 3.3: Normalise the weights + double totalWeights = torch::sum(weights_).item(); + weights_ = weights_ / totalWeights; + // Step 3.4: Store classifier and its accuracy to weigh its future vote + if (!repeatSparent) { + models[feature] = std::move(model); + significanceModels[feature] = significance; + } else { + models.push_back(std::move(model)); + significanceModels.push_back(significance); + n_models++; + } + exitCondition = n_models == max_models; + } + weights.copy_(weights_); + } + vector BoostAODE::graph(const string& title) const + { + return Ensemble::graph(title); + } +} \ No newline at end of file diff --git a/src/BayesNet/BoostAODE.h b/src/BayesNet/BoostAODE.h new file mode 100644 index 0000000..b14c7c6 --- /dev/null +++ b/src/BayesNet/BoostAODE.h @@ -0,0 +1,16 @@ +#ifndef BOOSTAODE_H +#define BOOSTAODE_H +#include "Ensemble.h" +#include "SPODE.h" +namespace bayesnet { + class BoostAODE : public Ensemble { + protected: + void buildModel(const torch::Tensor& weights) override; + void trainModel(const torch::Tensor& weights) override; + public: + BoostAODE(); + virtual ~BoostAODE() {}; + vector graph(const string& title = "BoostAODE") const override; + }; +} +#endif \ No newline at end of file diff --git a/src/BayesNet/CMakeLists.txt b/src/BayesNet/CMakeLists.txt index a2b9126..435511c 100644 --- a/src/BayesNet/CMakeLists.txt +++ b/src/BayesNet/CMakeLists.txt @@ -3,5 +3,6 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/Files) include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet) include_directories(${BayesNet_SOURCE_DIR}/src/Platform) add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc BayesMetrics.cc Classifier.cc - KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc TANLd.cc KDBLd.cc SPODELd.cc AODELd.cc Mst.cc Proposal.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc) -target_link_libraries(BayesNet mdlp ArffFiles "${TORCH_LIBRARIES}") \ No newline at end of file + KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc TANLd.cc KDBLd.cc SPODELd.cc AODELd.cc BoostAODE.cc + Mst.cc Proposal.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc) +target_link_libraries(BayesNet mdlp "${TORCH_LIBRARIES}") \ No newline at end of file diff --git a/src/BayesNet/Classifier.cc b/src/BayesNet/Classifier.cc index b3317f4..ff25657 100644 --- a/src/BayesNet/Classifier.cc +++ b/src/BayesNet/Classifier.cc @@ -5,7 +5,7 @@ namespace bayesnet { using namespace torch; Classifier::Classifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {} - Classifier& Classifier::build(vector& features, string className, map>& states) + Classifier& Classifier::build(vector& features, string className, map>& states, const torch::Tensor& weights) { this->features = features; this->className = className; @@ -16,12 +16,11 @@ namespace bayesnet { auto n_classes = states[className].size(); metrics = Metrics(dataset, features, className, n_classes); model.initialize(); - buildModel(); - trainModel(); + buildModel(weights); + trainModel(weights); fitted = true; return *this; } - void Classifier::buildDataset(Tensor& ytmp) { try { @@ -35,16 +34,17 @@ namespace bayesnet { exit(1); } } - void Classifier::trainModel() + void Classifier::trainModel(const torch::Tensor& weights) { - model.fit(dataset, features, className, states); + model.fit(dataset, weights, features, className, states); } // X is nxm where n is the number of features and m the number of samples Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, vector& features, string className, map>& states) { dataset = X; buildDataset(y); - return build(features, className, states); + const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble); + return build(features, className, states, weights); } // X is nxm where n is the number of features and m the number of samples Classifier& Classifier::fit(vector>& X, vector& y, vector& features, string className, map>& states) @@ -55,12 +55,19 @@ namespace bayesnet { } auto ytmp = torch::tensor(y, kInt32); buildDataset(ytmp); - return build(features, className, states); + const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble); + return build(features, className, states, weights); } Classifier& Classifier::fit(torch::Tensor& dataset, vector& features, string className, map>& states) { this->dataset = dataset; - return build(features, className, states); + const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble); + return build(features, className, states, weights); + } + Classifier& Classifier::fit(torch::Tensor& dataset, vector& features, string className, map>& states, const torch::Tensor& weights) + { + this->dataset = dataset; + return build(features, className, states, weights); } void Classifier::checkFitParameters() { diff --git a/src/BayesNet/Classifier.h b/src/BayesNet/Classifier.h index 2e736a3..0c2940b 100644 --- a/src/BayesNet/Classifier.h +++ b/src/BayesNet/Classifier.h @@ -11,25 +11,26 @@ namespace bayesnet { class Classifier : public BaseClassifier { private: void buildDataset(torch::Tensor& y); - Classifier& build(vector& features, string className, map>& states); + Classifier& build(vector& features, string className, map>& states, const torch::Tensor& weights); protected: bool fitted; - Network model; int m, n; // m: number of samples, n: number of features - Tensor dataset; // (n+1)xm tensor + Network model; Metrics metrics; vector features; string className; map> states; + Tensor dataset; // (n+1)xm tensor void checkFitParameters(); - virtual void buildModel() = 0; - void trainModel() override; + virtual void buildModel(const torch::Tensor& weights) = 0; + void trainModel(const torch::Tensor& weights) override; public: Classifier(Network model); virtual ~Classifier() = default; Classifier& fit(vector>& X, vector& y, vector& features, string className, map>& states) override; Classifier& fit(torch::Tensor& X, torch::Tensor& y, vector& features, string className, map>& states) override; Classifier& fit(torch::Tensor& dataset, vector& features, string className, map>& states) override; + Classifier& fit(torch::Tensor& dataset, vector& features, string className, map>& states, const torch::Tensor& weights) override; void addNodes(); int getNumberOfNodes() const override; int getNumberOfEdges() const override; diff --git a/src/BayesNet/Ensemble.cc b/src/BayesNet/Ensemble.cc index 34c6894..33a11a2 100644 --- a/src/BayesNet/Ensemble.cc +++ b/src/BayesNet/Ensemble.cc @@ -5,7 +5,7 @@ namespace bayesnet { Ensemble::Ensemble() : Classifier(Network()) {} - void Ensemble::trainModel() + void Ensemble::trainModel(const torch::Tensor& weights) { n_models = models.size(); for (auto i = 0; i < n_models; ++i) { @@ -18,9 +18,9 @@ namespace bayesnet { auto y_pred_ = y_pred.accessor(); vector y_pred_final; for (int i = 0; i < y_pred.size(0); ++i) { - vector votes(y_pred.size(1), 0); + vector votes(y_pred.size(1), 0); for (int j = 0; j < y_pred.size(1); ++j) { - votes[y_pred_[i][j]] += 1; + votes[y_pred_[i][j]] += significanceModels[j]; } // argsort in descending order auto indices = argsort(votes); diff --git a/src/BayesNet/Ensemble.h b/src/BayesNet/Ensemble.h index f0d750b..58a1d63 100644 --- a/src/BayesNet/Ensemble.h +++ b/src/BayesNet/Ensemble.h @@ -14,7 +14,8 @@ namespace bayesnet { protected: unsigned n_models; vector> models; - void trainModel() override; + vector significanceModels; + void trainModel(const torch::Tensor& weights) override; vector voting(Tensor& y_pred); public: Ensemble(); diff --git a/src/BayesNet/KDB.cc b/src/BayesNet/KDB.cc index 74566b0..cfbbca1 100644 --- a/src/BayesNet/KDB.cc +++ b/src/BayesNet/KDB.cc @@ -4,7 +4,7 @@ namespace bayesnet { using namespace torch; KDB::KDB(int k, float theta) : Classifier(Network()), k(k), theta(theta) {} - void KDB::buildModel() + void KDB::buildModel(const torch::Tensor& weights) { /* 1. For each feature Xi, compute mutual information, I(X;C), @@ -29,13 +29,13 @@ namespace bayesnet { // where C is the class. addNodes(); const Tensor& y = dataset.index({ -1, "..." }); - vector mi; + vector mi; for (auto i = 0; i < features.size(); i++) { Tensor firstFeature = dataset.index({ i, "..." }); - mi.push_back(metrics.mutualInformation(firstFeature, y)); + mi.push_back(metrics.mutualInformation(firstFeature, y, weights)); } // 2. Compute class conditional mutual information I(Xi;XjIC), f or each - auto conditionalEdgeWeights = metrics.conditionalEdge(); + auto conditionalEdgeWeights = metrics.conditionalEdge(weights); // 3. Let the used variable list, S, be empty. vector S; // 4. Let the DAG network being constructed, BN, begin with a single diff --git a/src/BayesNet/KDB.h b/src/BayesNet/KDB.h index e7af8c5..b997cdd 100644 --- a/src/BayesNet/KDB.h +++ b/src/BayesNet/KDB.h @@ -1,5 +1,6 @@ #ifndef KDB_H #define KDB_H +#include #include "Classifier.h" #include "bayesnetUtils.h" namespace bayesnet { @@ -11,7 +12,7 @@ namespace bayesnet { float theta; void add_m_edges(int idx, vector& S, Tensor& weights); protected: - void buildModel() override; + void buildModel(const torch::Tensor& weights) override; public: explicit KDB(int k, float theta = 0.03); virtual ~KDB() {}; diff --git a/src/BayesNet/Network.cc b/src/BayesNet/Network.cc index 8a4106c..5753eb8 100644 --- a/src/BayesNet/Network.cc +++ b/src/BayesNet/Network.cc @@ -5,7 +5,6 @@ namespace bayesnet { Network::Network() : features(vector()), className(""), classNumStates(0), fitted(false) {} Network::Network(float maxT) : features(vector()), className(""), classNumStates(0), maxThreads(maxT), fitted(false) {} - Network::Network(float maxT, int smoothing) : laplaceSmoothing(smoothing), features(vector()), className(""), classNumStates(0), maxThreads(maxT), fitted(false) {} Network::Network(Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()), maxThreads(other. getmaxThreads()), fitted(other.fitted) { @@ -104,8 +103,11 @@ namespace bayesnet { { return nodes; } - void Network::checkFitData(int n_samples, int n_features, int n_samples_y, const vector& featureNames, const string& className, const map>& states) + void Network::checkFitData(int n_samples, int n_features, int n_samples_y, const vector& featureNames, const string& className, const map>& states, const torch::Tensor& weights) { + if (weights.size(0) != n_samples) { + throw invalid_argument("Weights (" + to_string(weights.size(0)) + ") must have the same number of elements as samples (" + to_string(n_samples) + ") in Network::fit"); + } if (n_samples != n_samples_y) { throw invalid_argument("X and y must have the same number of samples in Network::fit (" + to_string(n_samples) + " != " + to_string(n_samples_y) + ")"); } @@ -136,28 +138,29 @@ namespace bayesnet { classNumStates = nodes[className]->getNumStates(); } // X comes in nxm, where n is the number of features and m the number of samples - void Network::fit(const torch::Tensor& X, const torch::Tensor& y, const vector& featureNames, const string& className, const map>& states) + void Network::fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const vector& featureNames, const string& className, const map>& states) { - checkFitData(X.size(1), X.size(0), y.size(0), featureNames, className, states); + checkFitData(X.size(1), X.size(0), y.size(0), featureNames, className, states, weights); this->className = className; Tensor ytmp = torch::transpose(y.view({ y.size(0), 1 }), 0, 1); samples = torch::cat({ X , ytmp }, 0); for (int i = 0; i < featureNames.size(); ++i) { auto row_feature = X.index({ i, "..." }); } - completeFit(states); + completeFit(states, weights); } - void Network::fit(const torch::Tensor& samples, const vector& featureNames, const string& className, const map>& states) + void Network::fit(const torch::Tensor& samples, const torch::Tensor& weights, const vector& featureNames, const string& className, const map>& states) { - checkFitData(samples.size(1), samples.size(0) - 1, samples.size(1), featureNames, className, states); + checkFitData(samples.size(1), samples.size(0) - 1, samples.size(1), featureNames, className, states, weights); this->className = className; this->samples = samples; - completeFit(states); + completeFit(states, weights); } // input_data comes in nxm, where n is the number of features and m the number of samples - void Network::fit(const vector>& input_data, const vector& labels, const vector& featureNames, const string& className, const map>& states) + void Network::fit(const vector>& input_data, const vector& labels, const vector& weights_, const vector& featureNames, const string& className, const map>& states) { - checkFitData(input_data[0].size(), input_data.size(), labels.size(), featureNames, className, states); + const torch::Tensor weights = torch::tensor(weights_, torch::kFloat64); + checkFitData(input_data[0].size(), input_data.size(), labels.size(), featureNames, className, states, weights); this->className = className; // Build tensor of samples (nxm) (n+1 because of the class) samples = torch::zeros({ static_cast(input_data.size() + 1), static_cast(input_data[0].size()) }, torch::kInt32); @@ -165,11 +168,12 @@ namespace bayesnet { samples.index_put_({ i, "..." }, torch::tensor(input_data[i], torch::kInt32)); } samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32)); - completeFit(states); + completeFit(states, weights); } - void Network::completeFit(const map>& states) + void Network::completeFit(const map>& states, const torch::Tensor& weights) { setStates(states); + laplaceSmoothing = 1.0 / samples.size(1); // To use in CPT computation int maxThreadsRunning = static_cast(std::thread::hardware_concurrency() * maxThreads); if (maxThreadsRunning < 1) { maxThreadsRunning = 1; @@ -182,7 +186,7 @@ namespace bayesnet { while (nextNodeIndex < nodes.size()) { unique_lock lock(mtx); cv.wait(lock, [&activeThreads, &maxThreadsRunning]() { return activeThreads < maxThreadsRunning; }); - threads.emplace_back([this, &nextNodeIndex, &mtx, &cv, &activeThreads]() { + threads.emplace_back([this, &nextNodeIndex, &mtx, &cv, &activeThreads, &weights]() { while (true) { unique_lock lock(mtx); if (nextNodeIndex >= nodes.size()) { @@ -191,7 +195,7 @@ namespace bayesnet { auto& pair = *std::next(nodes.begin(), nextNodeIndex); ++nextNodeIndex; lock.unlock(); - pair.second->computeCPT(samples, features, laplaceSmoothing); + pair.second->computeCPT(samples, features, laplaceSmoothing, weights); lock.lock(); nodes[pair.first] = std::move(pair.second); lock.unlock(); @@ -343,7 +347,7 @@ namespace bayesnet { } // Normalize result double sum = accumulate(result.begin(), result.end(), 0.0); - transform(result.begin(), result.end(), result.begin(), [sum](double& value) { return value / sum; }); + transform(result.begin(), result.end(), result.begin(), [sum](const double& value) { return value / sum; }); return result; } vector Network::show() const @@ -431,6 +435,7 @@ namespace bayesnet { { for (auto& node : nodes) { cout << "* " << node.first << ": (" << node.second->getNumStates() << ") : " << node.second->getCPT().sizes() << endl; + cout << node.second->getCPT() << endl; } } } diff --git a/src/BayesNet/Network.h b/src/BayesNet/Network.h index d8db620..a26e790 100644 --- a/src/BayesNet/Network.h +++ b/src/BayesNet/Network.h @@ -13,19 +13,18 @@ namespace bayesnet { int classNumStates; vector features; // Including classname string className; - int laplaceSmoothing = 1; + double laplaceSmoothing; torch::Tensor samples; // nxm tensor used to fit the model bool isCyclic(const std::string&, std::unordered_set&, std::unordered_set&); vector predict_sample(const vector&); vector predict_sample(const torch::Tensor&); vector exactInference(map&); double computeFactor(map&); - void completeFit(const map>&); - void checkFitData(int n_features, int n_samples, int n_samples_y, const vector& featureNames, const string& className, const map>&); + void completeFit(const map>& states, const torch::Tensor& weights); + void checkFitData(int n_features, int n_samples, int n_samples_y, const vector& featureNames, const string& className, const map>& states, const torch::Tensor& weights); void setStates(const map>&); public: Network(); - explicit Network(float, int); explicit Network(float); explicit Network(Network&); torch::Tensor& getSamples(); @@ -39,9 +38,9 @@ namespace bayesnet { int getNumEdges() const; int getClassNumStates() const; string getClassName() const; - void fit(const vector>&, const vector&, const vector&, const string&, const map>&); - void fit(const torch::Tensor&, const torch::Tensor&, const vector&, const string&, const map>&); - void fit(const torch::Tensor&, const vector&, const string&, const map>&); + void fit(const vector>& input_data, const vector& labels, const vector& weights, const vector& featureNames, const string& className, const map>& states); + void fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const vector& featureNames, const string& className, const map>& states); + void fit(const torch::Tensor& samples, const torch::Tensor& weights, const vector& featureNames, const string& className, const map>& states); vector predict(const vector>&); // Return mx1 vector of predictions torch::Tensor predict(const torch::Tensor&); // Return mx1 tensor of predictions torch::Tensor predict_tensor(const torch::Tensor& samples, const bool proba); diff --git a/src/BayesNet/Node.cc b/src/BayesNet/Node.cc index 6669819..04d2ed2 100644 --- a/src/BayesNet/Node.cc +++ b/src/BayesNet/Node.cc @@ -84,7 +84,7 @@ namespace bayesnet { } return result; } - void Node::computeCPT(const torch::Tensor& dataset, const vector& features, const int laplaceSmoothing) + void Node::computeCPT(const torch::Tensor& dataset, const vector& features, const double laplaceSmoothing, const torch::Tensor& weights) { dimensions.clear(); // Get dimensions of the CPT @@ -111,7 +111,7 @@ namespace bayesnet { coordinates.push_back(dataset.index({ parent_index, n_sample })); } // Increment the count of the corresponding coordinate - cpTable.index_put_({ coordinates }, cpTable.index({ coordinates }) + 1); + cpTable.index_put_({ coordinates }, cpTable.index({ coordinates }) + weights.index({ n_sample }).item()); } // Normalize the counts cpTable = cpTable / cpTable.sum(0); diff --git a/src/BayesNet/Node.h b/src/BayesNet/Node.h index f4eb320..6758c5c 100644 --- a/src/BayesNet/Node.h +++ b/src/BayesNet/Node.h @@ -26,7 +26,7 @@ namespace bayesnet { vector& getParents(); vector& getChildren(); torch::Tensor& getCPT(); - void computeCPT(const torch::Tensor&, const vector&, const int); + void computeCPT(const torch::Tensor& dataset, const vector& features, const double laplaceSmoothing, const torch::Tensor& weights); int getNumStates() const; void setNumStates(int); unsigned minFill(); diff --git a/src/BayesNet/Proposal.cc b/src/BayesNet/Proposal.cc index eef0088..c410289 100644 --- a/src/BayesNet/Proposal.cc +++ b/src/BayesNet/Proposal.cc @@ -65,7 +65,8 @@ namespace bayesnet { //Update new states of the feature/node states[pFeatures[index]] = xStates; } - model.fit(pDataset, pFeatures, pClassName, states); + const torch::Tensor weights = torch::full({ pDataset.size(1) }, 1.0 / pDataset.size(1), torch::kDouble); + model.fit(pDataset, weights, pFeatures, pClassName, states); } return states; } diff --git a/src/BayesNet/SPODE.cc b/src/BayesNet/SPODE.cc index a90e5ef..83c9231 100644 --- a/src/BayesNet/SPODE.cc +++ b/src/BayesNet/SPODE.cc @@ -4,7 +4,7 @@ namespace bayesnet { SPODE::SPODE(int root) : Classifier(Network()), root(root) {} - void SPODE::buildModel() + void SPODE::buildModel(const torch::Tensor& weights) { // 0. Add all nodes to the model addNodes(); diff --git a/src/BayesNet/SPODE.h b/src/BayesNet/SPODE.h index f9b6af0..0a78830 100644 --- a/src/BayesNet/SPODE.h +++ b/src/BayesNet/SPODE.h @@ -7,7 +7,7 @@ namespace bayesnet { private: int root; protected: - void buildModel() override; + void buildModel(const torch::Tensor& weights) override; public: explicit SPODE(int root); virtual ~SPODE() {}; diff --git a/src/BayesNet/SPODELd.cc b/src/BayesNet/SPODELd.cc index 8a38160..2711c86 100644 --- a/src/BayesNet/SPODELd.cc +++ b/src/BayesNet/SPODELd.cc @@ -21,7 +21,6 @@ namespace bayesnet { SPODELd& SPODELd::fit(torch::Tensor& dataset, vector& features_, string className_, map>& states_) { Xf = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." }).clone(); - cout << "Xf " << Xf.sizes() << " dtype: " << Xf.dtype() << endl; y = dataset.index({ -1, "..." }).clone(); // This first part should go in a Classifier method called fit_local_discretization o fit_float... features = features_; diff --git a/src/BayesNet/TAN.cc b/src/BayesNet/TAN.cc index 7b3e3a6..f0728be 100644 --- a/src/BayesNet/TAN.cc +++ b/src/BayesNet/TAN.cc @@ -5,7 +5,7 @@ namespace bayesnet { TAN::TAN() : Classifier(Network()) {} - void TAN::buildModel() + void TAN::buildModel(const torch::Tensor& weights) { // 0. Add all nodes to the model addNodes(); @@ -15,15 +15,15 @@ namespace bayesnet { Tensor class_dataset = dataset.index({ -1, "..." }); for (int i = 0; i < static_cast(features.size()); ++i) { Tensor feature_dataset = dataset.index({ i, "..." }); - auto mi_value = metrics.mutualInformation(class_dataset, feature_dataset); + auto mi_value = metrics.mutualInformation(class_dataset, feature_dataset, weights); mi.push_back({ i, mi_value }); } sort(mi.begin(), mi.end(), [](const auto& left, const auto& right) {return left.second < right.second;}); auto root = mi[mi.size() - 1].first; // 2. Compute mutual information between each feature and the class - auto weights = metrics.conditionalEdge(); + auto weights_matrix = metrics.conditionalEdge(weights); // 3. Compute the maximum spanning tree - auto mst = metrics.maximumSpanningTree(features, weights, root); + auto mst = metrics.maximumSpanningTree(features, weights_matrix, root); // 4. Add edges from the maximum spanning tree to the model for (auto i = 0; i < mst.size(); ++i) { auto [from, to] = mst[i]; diff --git a/src/BayesNet/TAN.h b/src/BayesNet/TAN.h index 4c1c5f5..91b5109 100644 --- a/src/BayesNet/TAN.h +++ b/src/BayesNet/TAN.h @@ -7,7 +7,7 @@ namespace bayesnet { class TAN : public Classifier { private: protected: - void buildModel() override; + void buildModel(const torch::Tensor& weights) override; public: TAN(); virtual ~TAN() {}; diff --git a/src/BayesNet/bayesnetUtils.cc b/src/BayesNet/bayesnetUtils.cc index 8b69006..480034b 100644 --- a/src/BayesNet/bayesnetUtils.cc +++ b/src/BayesNet/bayesnetUtils.cc @@ -4,7 +4,7 @@ namespace bayesnet { using namespace std; using namespace torch; // Return the indices in descending order - vector argsort(vector& nums) + vector argsort(vector& nums) { int n = nums.size(); vector indices(n); diff --git a/src/BayesNet/bayesnetUtils.h b/src/BayesNet/bayesnetUtils.h index adfa8d7..b5811f7 100644 --- a/src/BayesNet/bayesnetUtils.h +++ b/src/BayesNet/bayesnetUtils.h @@ -5,7 +5,7 @@ namespace bayesnet { using namespace std; using namespace torch; - vector argsort(vector& nums); + vector argsort(vector& nums); vector> tensorToVector(Tensor& tensor); } #endif //BAYESNET_UTILS_H \ No newline at end of file diff --git a/src/Platform/BestResult.h b/src/Platform/BestResult.h new file mode 100644 index 0000000..8b3f1cb --- /dev/null +++ b/src/Platform/BestResult.h @@ -0,0 +1,10 @@ +#ifndef BESTRESULT_H +#define BESTRESULT_H +#include +class BestResult { +public: + static std::string title() { return "STree_default (linear-ovo)"; } + static double score() { return 22.109799; } + static std::string scoreName() { return "accuracy"; } +}; +#endif \ No newline at end of file diff --git a/src/Platform/CMakeLists.txt b/src/Platform/CMakeLists.txt index 3b13abc..78c6615 100644 --- a/src/Platform/CMakeLists.txt +++ b/src/Platform/CMakeLists.txt @@ -5,4 +5,8 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp) include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include) include_directories(${BayesNet_SOURCE_DIR}/lib/json/include) add_executable(main main.cc Folding.cc platformUtils.cc Experiment.cc Datasets.cc Models.cc Report.cc) -target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}") \ No newline at end of file +add_executable(manage manage.cc Results.cc Report.cc) +add_executable(list list.cc platformUtils Datasets.cc) +target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}") +target_link_libraries(manage "${TORCH_LIBRARIES}") +target_link_libraries(list ArffFiles mdlp "${TORCH_LIBRARIES}") \ No newline at end of file diff --git a/src/Platform/Colors.h b/src/Platform/Colors.h new file mode 100644 index 0000000..7ab2e08 --- /dev/null +++ b/src/Platform/Colors.h @@ -0,0 +1,14 @@ +#ifndef COLORS_H +#define COLORS_H +class Colors { +public: + static std::string MAGENTA() { return "\033[1;35m"; } + static std::string BLUE() { return "\033[1;34m"; } + static std::string CYAN() { return "\033[1;36m"; } + static std::string GREEN() { return "\033[1;32m"; } + static std::string YELLOW() { return "\033[1;33m"; } + static std::string RED() { return "\033[1;31m"; } + static std::string WHITE() { return "\033[1;37m"; } + static std::string RESET() { return "\033[0m"; } +}; +#endif // COLORS_H \ No newline at end of file diff --git a/src/Platform/Datasets.cc b/src/Platform/Datasets.cc index 6756148..b187be8 100644 --- a/src/Platform/Datasets.cc +++ b/src/Platform/Datasets.cc @@ -24,75 +24,110 @@ namespace platform { transform(datasets.begin(), datasets.end(), back_inserter(result), [](const auto& d) { return d.first; }); return result; } - vector Datasets::getFeatures(string name) + vector Datasets::getFeatures(const string& name) const { - if (datasets[name]->isLoaded()) { - return datasets[name]->getFeatures(); + if (datasets.at(name)->isLoaded()) { + return datasets.at(name)->getFeatures(); } else { throw invalid_argument("Dataset not loaded."); } } - map> Datasets::getStates(string name) + map> Datasets::getStates(const string& name) const { - if (datasets[name]->isLoaded()) { - return datasets[name]->getStates(); + if (datasets.at(name)->isLoaded()) { + return datasets.at(name)->getStates(); } else { throw invalid_argument("Dataset not loaded."); } } - string Datasets::getClassName(string name) + void Datasets::loadDataset(const string& name) const { - if (datasets[name]->isLoaded()) { - return datasets[name]->getClassName(); + if (datasets.at(name)->isLoaded()) { + return; + } else { + datasets.at(name)->load(); + } + } + string Datasets::getClassName(const string& name) const + { + if (datasets.at(name)->isLoaded()) { + return datasets.at(name)->getClassName(); } else { throw invalid_argument("Dataset not loaded."); } } - int Datasets::getNSamples(string name) + int Datasets::getNSamples(const string& name) const { - if (datasets[name]->isLoaded()) { - return datasets[name]->getNSamples(); + if (datasets.at(name)->isLoaded()) { + return datasets.at(name)->getNSamples(); } else { throw invalid_argument("Dataset not loaded."); } } - pair>&, vector&> Datasets::getVectors(string name) + int Datasets::getNClasses(const string& name) + { + if (datasets.at(name)->isLoaded()) { + auto className = datasets.at(name)->getClassName(); + if (discretize) { + auto states = getStates(name); + return states.at(className).size(); + } + auto [Xv, yv] = getVectors(name); + return *max_element(yv.begin(), yv.end()) + 1; + } else { + throw invalid_argument("Dataset not loaded."); + } + } + vector Datasets::getClassesCounts(const string& name) const + { + if (datasets.at(name)->isLoaded()) { + auto [Xv, yv] = datasets.at(name)->getVectors(); + vector counts(*max_element(yv.begin(), yv.end()) + 1); + for (auto y : yv) { + counts[y]++; + } + return counts; + } else { + throw invalid_argument("Dataset not loaded."); + } + } + pair>&, vector&> Datasets::getVectors(const string& name) { if (!datasets[name]->isLoaded()) { datasets[name]->load(); } return datasets[name]->getVectors(); } - pair>&, vector&> Datasets::getVectorsDiscretized(string name) + pair>&, vector&> Datasets::getVectorsDiscretized(const string& name) { if (!datasets[name]->isLoaded()) { datasets[name]->load(); } return datasets[name]->getVectorsDiscretized(); } - pair Datasets::getTensors(string name) + pair Datasets::getTensors(const string& name) { if (!datasets[name]->isLoaded()) { datasets[name]->load(); } return datasets[name]->getTensors(); } - bool Datasets::isDataset(const string& name) + bool Datasets::isDataset(const string& name) const { return datasets.find(name) != datasets.end(); } Dataset::Dataset(const Dataset& dataset) : path(dataset.path), name(dataset.name), className(dataset.className), n_samples(dataset.n_samples), n_features(dataset.n_features), features(dataset.features), states(dataset.states), loaded(dataset.loaded), discretize(dataset.discretize), X(dataset.X), y(dataset.y), Xv(dataset.Xv), Xd(dataset.Xd), yv(dataset.yv), fileType(dataset.fileType) { } - string Dataset::getName() + string Dataset::getName() const { return name; } - string Dataset::getClassName() + string Dataset::getClassName() const { return className; } - vector Dataset::getFeatures() + vector Dataset::getFeatures() const { if (loaded) { return features; @@ -100,7 +135,7 @@ namespace platform { throw invalid_argument("Dataset not loaded."); } } - int Dataset::getNFeatures() + int Dataset::getNFeatures() const { if (loaded) { return n_features; @@ -108,7 +143,7 @@ namespace platform { throw invalid_argument("Dataset not loaded."); } } - int Dataset::getNSamples() + int Dataset::getNSamples() const { if (loaded) { return n_samples; @@ -116,7 +151,7 @@ namespace platform { throw invalid_argument("Dataset not loaded."); } } - map> Dataset::getStates() + map> Dataset::getStates() const { if (loaded) { return states; diff --git a/src/Platform/Datasets.h b/src/Platform/Datasets.h index 4ccd1f0..a99c86e 100644 --- a/src/Platform/Datasets.h +++ b/src/Platform/Datasets.h @@ -29,15 +29,15 @@ namespace platform { public: Dataset(const string& path, const string& name, const string& className, bool discretize, fileType_t fileType) : path(path), name(name), className(className), discretize(discretize), loaded(false), fileType(fileType) {}; explicit Dataset(const Dataset&); - string getName(); - string getClassName(); - vector getFeatures(); - map> getStates(); + string getName() const; + string getClassName() const; + vector getFeatures() const; + map> getStates() const; pair>&, vector&> getVectors(); pair>&, vector&> getVectorsDiscretized(); pair getTensors(); - int getNFeatures(); - int getNSamples(); + int getNFeatures() const; + int getNSamples() const; void load(); const bool inline isLoaded() const { return loaded; }; }; @@ -51,14 +51,17 @@ namespace platform { public: explicit Datasets(const string& path, bool discretize = false, fileType_t fileType = ARFF) : path(path), discretize(discretize), fileType(fileType) { load(); }; vector getNames(); - vector getFeatures(string name); - int getNSamples(string name); - string getClassName(string name); - map> getStates(string name); - pair>&, vector&> getVectors(string name); - pair>&, vector&> getVectorsDiscretized(string name); - pair getTensors(string name); - bool isDataset(const string& name); + vector getFeatures(const string& name) const; + int getNSamples(const string& name) const; + string getClassName(const string& name) const; + int getNClasses(const string& name); + vector getClassesCounts(const string& name) const; + map> getStates(const string& name) const; + pair>&, vector&> getVectors(const string& name); + pair>&, vector&> getVectorsDiscretized(const string& name); + pair getTensors(const string& name); + bool isDataset(const string& name) const; + void loadDataset(const string& name) const; }; }; diff --git a/src/Platform/Models.h b/src/Platform/Models.h index 0e3184b..6c5d437 100644 --- a/src/Platform/Models.h +++ b/src/Platform/Models.h @@ -10,6 +10,7 @@ #include "KDBLd.h" #include "SPODELd.h" #include "AODELd.h" +#include "BoostAODE.h" namespace platform { class Models { private: diff --git a/src/Platform/Paths.h b/src/Platform/Paths.h new file mode 100644 index 0000000..fdda25a --- /dev/null +++ b/src/Platform/Paths.h @@ -0,0 +1,11 @@ +#ifndef PATHS_H +#define PATHS_H +#include +namespace platform { + class Paths { + public: + static std::string datasets() { return "datasets/"; } + static std::string results() { return "results/"; } + }; +} +#endif \ No newline at end of file diff --git a/src/Platform/Report.cc b/src/Platform/Report.cc index 3693248..5690668 100644 --- a/src/Platform/Report.cc +++ b/src/Platform/Report.cc @@ -1,4 +1,8 @@ +#include +#include #include "Report.h" +#include "BestResult.h" + namespace platform { string headerLine(const string& text) @@ -9,59 +13,103 @@ namespace platform { } string Report::fromVector(const string& key) { - string result = ""; - + stringstream oss; + string sep = ""; + oss << "["; for (auto& item : data[key]) { - result += to_string(item) + ", "; + oss << sep << item.get(); + sep = ", "; } - return "[" + result.substr(0, result.size() - 2) + "]"; + oss << "]"; + return oss.str(); } - string fVector(const json& data) + string fVector(const string& title, const json& data, const int width, const int precision) { - string result = ""; + stringstream oss; + string sep = ""; + oss << title << "["; for (const auto& item : data) { - result += to_string(item) + ", "; + oss << sep << fixed << setw(width) << setprecision(precision) << item.get(); + sep = ", "; } - return "[" + result.substr(0, result.size() - 2) + "]"; + oss << "]"; + return oss.str(); } void Report::show() { header(); body(); + footer(); } + struct separated : numpunct { + char do_decimal_point() const { return ','; } + char do_thousands_sep() const { return '.'; } + string do_grouping() const { return "\03"; } + }; void Report::header() { - cout << string(MAXL, '*') << endl; + locale mylocale(cout.getloc(), new separated); + locale::global(mylocale); + cout.imbue(mylocale); + stringstream oss; + cout << Colors::MAGENTA() << string(MAXL, '*') << endl; cout << headerLine("Report " + data["model"].get() + " ver. " + data["version"].get() + " with " + to_string(data["folds"].get()) + " Folds cross validation and " + to_string(data["seeds"].size()) + " random seeds. " + data["date"].get() + " " + data["time"].get()); cout << headerLine(data["title"].get()); cout << headerLine("Random seeds: " + fromVector("seeds") + " Stratified: " + (data["stratified"].get() ? "True" : "False")); - cout << headerLine("Execution took " + to_string(data["duration"].get()) + " seconds, " + to_string(data["duration"].get() / 3600) + " hours, on " + data["platform"].get()); + oss << "Execution took " << setprecision(2) << fixed << data["duration"].get() << " seconds, " << data["duration"].get() / 3600 << " hours, on " << data["platform"].get(); + cout << headerLine(oss.str()); cout << headerLine("Score is " + data["score_name"].get()); cout << string(MAXL, '*') << endl; cout << endl; } void Report::body() { - cout << "Dataset Sampl. Feat. Cls Nodes Edges States Score Time Hyperparameters" << endl; - cout << "============================== ====== ===== === ======= ======= ======= =============== ================= ===============" << endl; + cout << Colors::GREEN() << "Dataset Sampl. Feat. Cls Nodes Edges States Score Time Hyperparameters" << endl; + cout << "============================== ====== ===== === ========= ========= ========= =============== ================== ===============" << endl; + json lastResult; + totalScore = 0; + bool odd = true; for (const auto& r : data["results"]) { - cout << setw(30) << left << r["dataset"].get() << " "; + auto color = odd ? Colors::CYAN() : Colors::BLUE(); + cout << color << setw(30) << left << r["dataset"].get() << " "; cout << setw(6) << right << r["samples"].get() << " "; cout << setw(5) << right << r["features"].get() << " "; cout << setw(3) << right << r["classes"].get() << " "; - cout << setw(7) << setprecision(2) << fixed << r["nodes"].get() << " "; - cout << setw(7) << setprecision(2) << fixed << r["leaves"].get() << " "; - cout << setw(7) << setprecision(2) << fixed << r["depth"].get() << " "; - cout << setw(8) << right << setprecision(6) << fixed << r["score_test"].get() << "±" << setw(6) << setprecision(4) << fixed << r["score_test_std"].get() << " "; - cout << setw(10) << right << setprecision(6) << fixed << r["test_time"].get() << "±" << setw(6) << setprecision(4) << fixed << r["test_time_std"].get() << " "; - cout << " " << r["hyperparameters"].get(); + cout << setw(9) << setprecision(2) << fixed << r["nodes"].get() << " "; + cout << setw(9) << setprecision(2) << fixed << r["leaves"].get() << " "; + cout << setw(9) << setprecision(2) << fixed << r["depth"].get() << " "; + cout << setw(8) << right << setprecision(6) << fixed << r["score"].get() << "±" << setw(6) << setprecision(4) << fixed << r["score_std"].get() << " "; + cout << setw(11) << right << setprecision(6) << fixed << r["time"].get() << "±" << setw(6) << setprecision(4) << fixed << r["time_std"].get() << " "; + try { + cout << r["hyperparameters"].get(); + } + catch (const exception& err) { + cout << r["hyperparameters"]; + } cout << endl; + lastResult = r; + totalScore += r["score"].get(); + odd = !odd; + } + if (data["results"].size() == 1) { cout << string(MAXL, '*') << endl; - cout << headerLine("Train scores: " + fVector(r["scores_train"])); - cout << headerLine("Test scores: " + fVector(r["scores_test"])); - cout << headerLine("Train times: " + fVector(r["times_train"])); - cout << headerLine("Test times: " + fVector(r["times_test"])); + cout << headerLine(fVector("Train scores: ", lastResult["scores_train"], 14, 12)); + cout << headerLine(fVector("Test scores: ", lastResult["scores_test"], 14, 12)); + cout << headerLine(fVector("Train times: ", lastResult["times_train"], 10, 3)); + cout << headerLine(fVector("Test times: ", lastResult["times_test"], 10, 3)); cout << string(MAXL, '*') << endl; } } + void Report::footer() + { + cout << Colors::MAGENTA() << string(MAXL, '*') << endl; + auto score = data["score_name"].get(); + if (score == BestResult::scoreName()) { + stringstream oss; + oss << score << " compared to " << BestResult::title() << " .: " << totalScore / BestResult::score(); + cout << headerLine(oss.str()); + } + cout << string(MAXL, '*') << endl << Colors::RESET(); + + } } \ No newline at end of file diff --git a/src/Platform/Report.h b/src/Platform/Report.h index c6ea8a1..105785f 100644 --- a/src/Platform/Report.h +++ b/src/Platform/Report.h @@ -3,9 +3,10 @@ #include #include #include +#include "Colors.h" using json = nlohmann::json; -const int MAXL = 121; +const int MAXL = 128; namespace platform { using namespace std; class Report { @@ -16,8 +17,10 @@ namespace platform { private: void header(); void body(); + void footer(); string fromVector(const string& key); json data; + double totalScore; // Total score of all results in a report }; }; #endif \ No newline at end of file diff --git a/src/Platform/Results.cc b/src/Platform/Results.cc new file mode 100644 index 0000000..0bf4070 --- /dev/null +++ b/src/Platform/Results.cc @@ -0,0 +1,239 @@ +#include +#include "platformUtils.h" +#include "Results.h" +#include "Report.h" +#include "BestResult.h" +#include "Colors.h" +namespace platform { + Result::Result(const string& path, const string& filename) + : path(path) + , filename(filename) + { + auto data = load(); + date = data["date"]; + score = 0; + for (const auto& result : data["results"]) { + score += result["score"].get(); + } + scoreName = data["score_name"]; + if (scoreName == BestResult::scoreName()) { + score /= BestResult::score(); + } + title = data["title"]; + duration = data["duration"]; + model = data["model"]; + } + json Result::load() const + { + ifstream resultData(path + "/" + filename); + if (resultData.is_open()) { + json data = json::parse(resultData); + return data; + } + throw invalid_argument("Unable to open result file. [" + path + "/" + filename + "]"); + } + void Results::load() + { + using std::filesystem::directory_iterator; + for (const auto& file : directory_iterator(path)) { + auto filename = file.path().filename().string(); + if (filename.find(".json") != string::npos && filename.find("results_") == 0) { + auto result = Result(path, filename); + bool addResult = true; + if (model != "any" && result.getModel() != model || scoreName != "any" && scoreName != result.getScoreName()) + addResult = false; + if (addResult) + files.push_back(result); + } + } + } + string Result::to_string() const + { + stringstream oss; + oss << date << " "; + oss << setw(12) << left << model << " "; + oss << setw(11) << left << scoreName << " "; + oss << right << setw(11) << setprecision(7) << fixed << score << " "; + oss << setw(9) << setprecision(3) << fixed << duration << " "; + oss << setw(50) << left << title << " "; + return oss.str(); + } + void Results::show() const + { + cout << Colors::GREEN() << "Results found: " << files.size() << endl; + cout << "-------------------" << endl; + auto i = 0; + cout << " # Date Model Score Name Score Duration Title" << endl; + cout << "=== ========== ============ =========== =========== ========= =============================================================" << endl; + bool odd = true; + for (const auto& result : files) { + auto color = odd ? Colors::BLUE() : Colors::CYAN(); + cout << color << setw(3) << fixed << right << i++ << " "; + cout << result.to_string() << endl; + if (i == max && max != 0) { + break; + } + odd = !odd; + } + } + int Results::getIndex(const string& intent) const + { + string color; + if (intent == "delete") { + color = Colors::RED(); + } else { + color = Colors::YELLOW(); + } + cout << color << "Choose result to " << intent << " (cancel=-1): "; + string line; + getline(cin, line); + int index = stoi(line); + if (index >= -1 && index < static_cast(files.size())) { + return index; + } + cout << "Invalid index" << endl; + return -1; + } + void Results::report(const int index) const + { + cout << Colors::YELLOW() << "Reporting " << files.at(index).getFilename() << endl; + auto data = files.at(index).load(); + Report report(data); + report.show(); + } + void Results::menu() + { + char option; + int index; + bool finished = false; + string filename, line, options = "qldhsr"; + while (!finished) { + cout << Colors::RESET() << "Choose option (quit='q', list='l', delete='d', hide='h', sort='s', report='r'): "; + getline(cin, line); + if (line.size() == 0) + continue; + if (options.find(line[0]) != string::npos) { + if (line.size() > 1) { + cout << "Invalid option" << endl; + continue; + } + option = line[0]; + } else { + index = stoi(line); + if (index >= 0 && index < files.size()) { + report(index); + } else { + cout << "Invalid option" << endl; + } + continue; + } + switch (option) { + case 'q': + finished = true; + break; + case 'l': + show(); + break; + case 'd': + index = getIndex("delete"); + if (index == -1) + break; + filename = files[index].getFilename(); + cout << "Deleting " << filename << endl; + remove((path + "/" + filename).c_str()); + files.erase(files.begin() + index); + cout << "File: " + filename + " deleted!" << endl; + show(); + break; + case 'h': + index = getIndex("hide"); + if (index == -1) + break; + filename = files[index].getFilename(); + cout << "Hiding " << filename << endl; + rename((path + "/" + filename).c_str(), (path + "/." + filename).c_str()); + files.erase(files.begin() + index); + show(); + menu(); + break; + case 's': + sortList(); + show(); + break; + case 'r': + index = getIndex("report"); + if (index == -1) + break; + report(index); + break; + default: + cout << "Invalid option" << endl; + } + } + } + void Results::sortList() + { + cout << Colors::YELLOW() << "Choose sorting field (date='d', score='s', duration='u', model='m'): "; + string line; + char option; + getline(cin, line); + if (line.size() == 0) + return; + if (line.size() > 1) { + cout << "Invalid option" << endl; + return; + } + option = line[0]; + switch (option) { + case 'd': + sortDate(); + break; + case 's': + sortScore(); + break; + case 'u': + sortDuration(); + break; + case 'm': + sortModel(); + break; + default: + cout << "Invalid option" << endl; + } + } + void Results::sortDate() + { + sort(files.begin(), files.end(), [](const Result& a, const Result& b) { + return a.getDate() > b.getDate(); + }); + } + void Results::sortModel() + { + sort(files.begin(), files.end(), [](const Result& a, const Result& b) { + return a.getModel() > b.getModel(); + }); + } + void Results::sortDuration() + { + sort(files.begin(), files.end(), [](const Result& a, const Result& b) { + return a.getDuration() > b.getDuration(); + }); + } + void Results::sortScore() + { + sort(files.begin(), files.end(), [](const Result& a, const Result& b) { + return a.getScore() > b.getScore(); + }); + } + void Results::manage() + { + if (files.size() == 0) { + cout << "No results found!" << endl; + exit(0); + } + show(); + menu(); + cout << "Done!" << endl; + } + +} \ No newline at end of file diff --git a/src/Platform/Results.h b/src/Platform/Results.h new file mode 100644 index 0000000..e6b1552 --- /dev/null +++ b/src/Platform/Results.h @@ -0,0 +1,56 @@ +#ifndef RESULTS_H +#define RESULTS_H +#include +#include +#include +#include +namespace platform { + using namespace std; + using json = nlohmann::json; + + class Result { + public: + Result(const string& path, const string& filename); + json load() const; + string to_string() const; + string getFilename() const { return filename; }; + string getDate() const { return date; }; + double getScore() const { return score; }; + string getTitle() const { return title; }; + double getDuration() const { return duration; }; + string getModel() const { return model; }; + string getScoreName() const { return scoreName; }; + private: + string path; + string filename; + string date; + double score; + string title; + double duration; + string model; + string scoreName; + }; + class Results { + public: + Results(const string& path, const int max, const string& model, const string& score) : path(path), max(max), model(model), scoreName(score) { load(); }; + void manage(); + private: + string path; + int max; + string model; + string scoreName; + vector files; + void load(); // Loads the list of results + void show() const; + void report(const int index) const; + int getIndex(const string& intent) const; + void menu(); + void sortList(); + void sortDate(); + void sortScore(); + void sortModel(); + void sortDuration(); + }; +}; + +#endif \ No newline at end of file diff --git a/src/Platform/list.cc b/src/Platform/list.cc new file mode 100644 index 0000000..ed8396d --- /dev/null +++ b/src/Platform/list.cc @@ -0,0 +1,57 @@ +#include +#include +#include "Paths.h" +#include "Colors.h" +#include "Datasets.h" + +using namespace std; +const int BALANCE_LENGTH = 75; + +struct separated : numpunct { + char do_decimal_point() const { return ','; } + char do_thousands_sep() const { return '.'; } + string do_grouping() const { return "\03"; } +}; + +void outputBalance(const string& balance) +{ + auto temp = string(balance); + while (temp.size() > BALANCE_LENGTH - 1) { + auto part = temp.substr(0, BALANCE_LENGTH); + cout << part << endl; + cout << setw(48) << " "; + temp = temp.substr(BALANCE_LENGTH); + } + cout << temp << endl; +} + +int main(int argc, char** argv) +{ + auto data = platform::Datasets(platform::Paths().datasets(), false); + locale mylocale(cout.getloc(), new separated); + locale::global(mylocale); + cout.imbue(mylocale); + cout << Colors::GREEN() << "Dataset Sampl. Feat. Cls. Balance" << endl; + string balanceBars = string(BALANCE_LENGTH, '='); + cout << "============================== ====== ===== === " << balanceBars << endl; + bool odd = true; + for (const auto& dataset : data.getNames()) { + auto color = odd ? Colors::CYAN() : Colors::BLUE(); + cout << color << setw(30) << left << dataset << " "; + data.loadDataset(dataset); + auto nSamples = data.getNSamples(dataset); + cout << setw(6) << right << nSamples << " "; + cout << setw(5) << right << data.getFeatures(dataset).size() << " "; + cout << setw(3) << right << data.getNClasses(dataset) << " "; + stringstream oss; + string sep = ""; + for (auto number : data.getClassesCounts(dataset)) { + oss << sep << setprecision(2) << fixed << (float)number / nSamples * 100.0 << "% (" << number << ")"; + sep = " / "; + } + outputBalance(oss.str()); + odd = !odd; + } + cout << Colors::RESET() << endl; + return 0; +} diff --git a/src/Platform/main.cc b/src/Platform/main.cc index 24d0a33..6f9ce1c 100644 --- a/src/Platform/main.cc +++ b/src/Platform/main.cc @@ -6,20 +6,19 @@ #include "DotEnv.h" #include "Models.h" #include "modelRegister.h" +#include "Paths.h" + using namespace std; -const string PATH_RESULTS = "results"; -const string PATH_DATASETS = "datasets"; argparse::ArgumentParser manageArguments(int argc, char** argv) { auto env = platform::DotEnv(); - argparse::ArgumentParser program("BayesNetSample"); + argparse::ArgumentParser program("main"); program.add_argument("-d", "--dataset").default_value("").help("Dataset file name"); program.add_argument("-p", "--path") .help("folder where the data files are located, default") - .default_value(string{ PATH_DATASETS } - ); + .default_value(string{ platform::Paths::datasets() }); program.add_argument("-m", "--model") .help("Model to use " + platform::Models::instance()->toString()) .action([](const std::string& value) { @@ -104,7 +103,7 @@ int main(int argc, char** argv) */ auto env = platform::DotEnv(); auto experiment = platform::Experiment(); - experiment.setTitle(title).setLanguage("cpp").setLanguageVersion("1.0.0"); + experiment.setTitle(title).setLanguage("cpp").setLanguageVersion("14.0.3"); experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform(env.get("platform")); experiment.setStratified(stratified).setNFolds(n_folds).setScoreName("accuracy"); for (auto seed : seeds) { @@ -115,7 +114,7 @@ int main(int argc, char** argv) experiment.go(filesToTest, path); experiment.setDuration(timer.getDuration()); if (saveResults) - experiment.save(PATH_RESULTS); + experiment.save(platform::Paths::results()); else experiment.report(); cout << "Done!" << endl; diff --git a/src/Platform/manage.cc b/src/Platform/manage.cc new file mode 100644 index 0000000..34e66cd --- /dev/null +++ b/src/Platform/manage.cc @@ -0,0 +1,41 @@ +#include +#include +#include "platformUtils.h" +#include "Paths.h" +#include "Results.h" + +using namespace std; + +argparse::ArgumentParser manageArguments(int argc, char** argv) +{ + argparse::ArgumentParser program("manage"); + program.add_argument("-n", "--number").default_value(0).help("Number of results to show (0 = all)").scan<'i', int>(); + program.add_argument("-m", "--model").default_value("any").help("Filter results of the selected model)"); + program.add_argument("-s", "--score").default_value("any").help("Filter results of the score name supplied"); + try { + program.parse_args(argc, argv); + auto number = program.get("number"); + if (number < 0) { + throw runtime_error("Number of results must be greater than or equal to 0"); + } + auto model = program.get("model"); + auto score = program.get("score"); + } + catch (const exception& err) { + cerr << err.what() << endl; + cerr << program; + exit(1); + } + return program; +} + +int main(int argc, char** argv) +{ + auto program = manageArguments(argc, argv); + auto number = program.get("number"); + auto model = program.get("model"); + auto score = program.get("score"); + auto results = platform::Results(platform::Paths::results(), number, model, score); + results.manage(); + return 0; +} diff --git a/src/Platform/modelRegister.h b/src/Platform/modelRegister.h index 6ae9af3..04b48cf 100644 --- a/src/Platform/modelRegister.h +++ b/src/Platform/modelRegister.h @@ -16,4 +16,6 @@ static platform::Registrar registrarA("AODE", [](void) -> bayesnet::BaseClassifier* { return new bayesnet::AODE();}); static platform::Registrar registrarALD("AODELd", [](void) -> bayesnet::BaseClassifier* { return new bayesnet::AODELd();}); +static platform::Registrar registrarBA("BoostAODE", + [](void) -> bayesnet::BaseClassifier* { return new bayesnet::BoostAODE();}); #endif \ No newline at end of file diff --git a/src/Platform/platformUtils.cc b/src/Platform/platformUtils.cc index 6fca9d9..74e97fd 100644 --- a/src/Platform/platformUtils.cc +++ b/src/Platform/platformUtils.cc @@ -1,4 +1,5 @@ #include "platformUtils.h" +#include "Paths.h" using namespace torch; @@ -85,7 +86,7 @@ tuple, string, map>> loadData tuple>, vector, vector, string, map>> loadFile(const string& name) { auto handler = ArffFiles(); - handler.load(PATH + static_cast(name) + ".arff"); + handler.load(platform::Paths::datasets() + static_cast(name) + ".arff"); // Get Dataset X, y vector& X = handler.getX(); mdlp::labels_t& y = handler.getY();