diff --git a/.vscode/launch.json b/.vscode/launch.json index 0ff1c47..bbf7429 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -27,15 +27,15 @@ "-m", "AODE", "-p", - "/Users/rmontanana/Code/discretizbench/datasets", + "/home/rmontanana/Code/discretizbench/datasets", "--stratified", "-d", - "letter", + "mfeat-morphological", "--discretize" // "--hyperparameters", // "{\"repeatSparent\": true, \"maxModels\": 12}" ], - "cwd": "/Users/rmontanana/Code/discretizbench", + "cwd": "/home/rmontanana/Code/discretizbench", }, { "type": "lldb", diff --git a/CMakeLists.txt b/CMakeLists.txt index 6b0f675..0034c2f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.20) project(BayesNet - VERSION 0.1.0 + VERSION 0.2.0 DESCRIPTION "Bayesian Network and basic classifiers Library." HOMEPAGE_URL "https://github.com/rmontanana/bayesnet" LANGUAGES CXX @@ -40,8 +40,7 @@ if (CODE_COVERAGE) enable_testing() include(CodeCoverage) MESSAGE("Code coverage enabled") - set(CMAKE_C_FLAGS " ${CMAKE_C_FLAGS} -fprofile-arcs -ftest-coverage") - set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage") + set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage -O0") SET(GCC_COVERAGE_LINK_FLAGS " ${GCC_COVERAGE_LINK_FLAGS} -lgcov --coverage") endif (CODE_COVERAGE) diff --git a/data/_TAN_cpp_accuracy__.json b/data/_TAN_cpp_accuracy__.json deleted file mode 100644 index ec747fa..0000000 --- a/data/_TAN_cpp_accuracy__.json +++ /dev/null @@ -1 +0,0 @@ -null \ No newline at end of file diff --git a/src/BayesNet/Classifier.cc b/src/BayesNet/Classifier.cc index af9ab8c..aca6ea5 100644 --- a/src/BayesNet/Classifier.cc +++ b/src/BayesNet/Classifier.cc @@ -13,7 +13,7 @@ namespace bayesnet { m = dataset.size(1); n = dataset.size(0) - 1; checkFitParameters(); - auto n_classes = states[className].size(); + auto n_classes = states.at(className).size(); metrics = Metrics(dataset, features, className, n_classes); model.initialize(); buildModel(weights); diff --git a/src/BayesNet/Ensemble.cc b/src/BayesNet/Ensemble.cc index eed6a3c..97987d1 100644 --- a/src/BayesNet/Ensemble.cc +++ b/src/BayesNet/Ensemble.cc @@ -17,9 +17,13 @@ namespace bayesnet { { auto y_pred_ = y_pred.accessor(); vector y_pred_final; + int numClasses = states.at(className).size(); + // y_pred is m x n_models with the prediction of every model for each sample for (int i = 0; i < y_pred.size(0); ++i) { - vector votes(y_pred.size(1), 0); - for (int j = 0; j < y_pred.size(1); ++j) { + // votes store in each index (value of class) the significance added by each model + // i.e. votes[0] contains how much value has the value 0 of class. That value is generated by the models predictions + vector votes(numClasses, 0.0); + for (int j = 0; j < n_models; ++j) { votes[y_pred_[i][j]] += significanceModels[j]; } // argsort in descending order @@ -34,22 +38,17 @@ namespace bayesnet { throw logic_error("Ensemble has not been fitted"); } Tensor y_pred = torch::zeros({ X.size(1), n_models }, kInt32); - // //Create a threadpool - // auto threads{ vector() }; - // mutex mtx; - // for (auto i = 0; i < n_models; ++i) { - // threads.push_back(thread([&, i]() { - // auto ypredict = models[i]->predict(X); - // lock_guard lock(mtx); - // y_pred.index_put_({ "...", i }, ypredict); - // })); - // Hacer voting aquĆ­ ? ? ? - // } - // for (auto& thread : threads) { - // thread.join(); - // } + auto threads{ vector() }; + mutex mtx; for (auto i = 0; i < n_models; ++i) { - y_pred.index_put_({ "...", i }, models[i]->predict(X)); + threads.push_back(thread([&, i]() { + auto ypredict = models[i]->predict(X); + lock_guard lock(mtx); + y_pred.index_put_({ "...", i }, ypredict); + })); + } + for (auto& thread : threads) { + thread.join(); } return torch::tensor(voting(y_pred)); } diff --git a/src/BayesNet/Network.cc b/src/BayesNet/Network.cc index fb8bac8..da8898c 100644 --- a/src/BayesNet/Network.cc +++ b/src/BayesNet/Network.cc @@ -299,25 +299,19 @@ namespace bayesnet { vector Network::exactInference(map& evidence) { vector result(classNumStates, 0.0); - // vector threads; - // mutex mtx; - // for (int i = 0; i < classNumStates; ++i) { - // threads.emplace_back([this, &result, &evidence, i, &mtx]() { - // auto completeEvidence = map(evidence); - // completeEvidence[getClassName()] = i; - // double factor = computeFactor(completeEvidence); - // lock_guard lock(mtx); - // result[i] = factor; - // }); - // } - // for (auto& thread : threads) { - // thread.join(); - // } + vector threads; + mutex mtx; for (int i = 0; i < classNumStates; ++i) { - auto completeEvidence = map(evidence); - completeEvidence[getClassName()] = i; - double factor = computeFactor(completeEvidence); - result[i] = factor; + threads.emplace_back([this, &result, &evidence, i, &mtx]() { + auto completeEvidence = map(evidence); + completeEvidence[getClassName()] = i; + double factor = computeFactor(completeEvidence); + lock_guard lock(mtx); + result[i] = factor; + }); + } + for (auto& thread : threads) { + thread.join(); } // Normalize result double sum = accumulate(result.begin(), result.end(), 0.0); diff --git a/src/BayesNet/Node.cc b/src/BayesNet/Node.cc index 04d2ed2..4f01bec 100644 --- a/src/BayesNet/Node.cc +++ b/src/BayesNet/Node.cc @@ -100,7 +100,7 @@ namespace bayesnet { } int name_index = pos - features.begin(); for (int n_sample = 0; n_sample < dataset.size(1); ++n_sample) { - torch::List> coordinates; + c10::List> coordinates; coordinates.push_back(dataset.index({ name_index, n_sample })); for (auto parent : parents) { pos = find(features.begin(), features.end(), parent->getName()); @@ -118,10 +118,10 @@ namespace bayesnet { } float Node::getFactorValue(map& evidence) { - torch::List> coordinates; + c10::List> coordinates; // following predetermined order of indices in the cpTable (see Node.h) - coordinates.push_back(torch::tensor(evidence[name])); - transform(parents.begin(), parents.end(), back_inserter(coordinates), [&evidence](const auto& parent) { return torch::tensor(evidence[parent->getName()]); }); + coordinates.push_back(at::tensor(evidence[name])); + transform(parents.begin(), parents.end(), back_inserter(coordinates), [&evidence](const auto& parent) { return at::tensor(evidence[parent->getName()]); }); return cpTable.index({ coordinates }).item(); } vector Node::graph(const string& className) diff --git a/src/BayesNet/Proposal.cc b/src/BayesNet/Proposal.cc index 09b39c3..6bc819b 100644 --- a/src/BayesNet/Proposal.cc +++ b/src/BayesNet/Proposal.cc @@ -53,15 +53,6 @@ namespace bayesnet { auto xvf_ptr = Xf.index({ index }).data_ptr(); auto xvf = vector(xvf_ptr, xvf_ptr + Xf.size(1)); discretizers[feature]->fit(xvf, yxv); - // - // - // - // auto tmp = discretizers[feature]->transform(xvf); - // Xv[index] = tmp; - // auto xStates = vector(discretizers[pFeatures[index]]->getCutPoints().size() + 1); - // iota(xStates.begin(), xStates.end(), 0); - // //Update new states of the feature/node - // states[feature] = xStates; } if (upgrade) { // Discretize again X (only the affected indices) with the new fitted discretizers diff --git a/src/Platform/Datasets.cc b/src/Platform/Datasets.cc index 2239a46..1e240a0 100644 --- a/src/Platform/Datasets.cc +++ b/src/Platform/Datasets.cc @@ -213,10 +213,11 @@ namespace platform { { for (int i = 0; i < features.size(); ++i) { states[features[i]] = vector(*max_element(Xd[i].begin(), Xd[i].end()) + 1); - iota(begin(states[features[i]]), end(states[features[i]]), 0); + auto item = states.at(features[i]); + iota(begin(item), end(item), 0); } states[className] = vector(*max_element(yv.begin(), yv.end()) + 1); - iota(begin(states[className]), end(states[className]), 0); + iota(begin(states.at(className)), end(states.at(className)), 0); } void Dataset::load_arff() { diff --git a/src/Platform/Experiment.cc b/src/Platform/Experiment.cc index 83cb840..0fe6cc2 100644 --- a/src/Platform/Experiment.cc +++ b/src/Platform/Experiment.cc @@ -182,6 +182,7 @@ namespace platform { clf.reset(); } cout << "end. " << flush; + delete fold; } result.setScoreTest(torch::mean(accuracy_test).item()).setScoreTrain(torch::mean(accuracy_train).item()); result.setScoreTestStd(torch::std(accuracy_test).item()).setScoreTrainStd(torch::std(accuracy_train).item()); diff --git a/src/Platform/platformUtils.cc b/src/Platform/platformUtils.cc index 74e97fd..f114636 100644 --- a/src/Platform/platformUtils.cc +++ b/src/Platform/platformUtils.cc @@ -69,11 +69,12 @@ tuple, string, map>> loadData Xd = torch::zeros({ static_cast(Xr[0].size()), static_cast(Xr.size()) }, torch::kInt32); for (int i = 0; i < features.size(); ++i) { states[features[i]] = vector(*max_element(Xr[i].begin(), Xr[i].end()) + 1); - iota(begin(states[features[i]]), end(states[features[i]]), 0); + auto item = states.at(features[i]); + iota(begin(item), end(item), 0); Xd.index_put_({ "...", i }, torch::tensor(Xr[i], torch::kInt32)); } states[className] = vector(*max_element(y.begin(), y.end()) + 1); - iota(begin(states[className]), end(states[className]), 0); + iota(begin(states.at(className)), end(states.at(className)), 0); } else { Xd = torch::zeros({ static_cast(X[0].size()), static_cast(X.size()) }, torch::kFloat32); for (int i = 0; i < features.size(); ++i) {