diff --git a/.vscode/launch.json b/.vscode/launch.json index 407feb6..bbf7429 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -25,16 +25,17 @@ "program": "${workspaceFolder}/build/src/Platform/main", "args": [ "-m", - "AODELd", + "AODE", "-p", - "/Users/rmontanana/Code/discretizbench/datasets", + "/home/rmontanana/Code/discretizbench/datasets", "--stratified", "-d", - "wine" + "mfeat-morphological", + "--discretize" // "--hyperparameters", // "{\"repeatSparent\": true, \"maxModels\": 12}" ], - "cwd": "/Users/rmontanana/Code/discretizbench", + "cwd": "/home/rmontanana/Code/discretizbench", }, { "type": "lldb", diff --git a/CMakeLists.txt b/CMakeLists.txt index 6b0f675..0034c2f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.20) project(BayesNet - VERSION 0.1.0 + VERSION 0.2.0 DESCRIPTION "Bayesian Network and basic classifiers Library." HOMEPAGE_URL "https://github.com/rmontanana/bayesnet" LANGUAGES CXX @@ -40,8 +40,7 @@ if (CODE_COVERAGE) enable_testing() include(CodeCoverage) MESSAGE("Code coverage enabled") - set(CMAKE_C_FLAGS " ${CMAKE_C_FLAGS} -fprofile-arcs -ftest-coverage") - set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage") + set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage -O0") SET(GCC_COVERAGE_LINK_FLAGS " ${GCC_COVERAGE_LINK_FLAGS} -lgcov --coverage") endif (CODE_COVERAGE) diff --git a/data/_TAN_cpp_accuracy__.json b/data/_TAN_cpp_accuracy__.json deleted file mode 100644 index ec747fa..0000000 --- a/data/_TAN_cpp_accuracy__.json +++ /dev/null @@ -1 +0,0 @@ -null \ No newline at end of file diff --git a/diagrams/BayesNet.pdf b/diagrams/BayesNet.pdf new file mode 100755 index 0000000..dd478da Binary files /dev/null and b/diagrams/BayesNet.pdf differ diff --git a/src/BayesNet/BaseClassifier.h b/src/BayesNet/BaseClassifier.h index 5744f62..8cdb038 100644 --- a/src/BayesNet/BaseClassifier.h +++ b/src/BayesNet/BaseClassifier.h @@ -25,7 +25,7 @@ namespace bayesnet { int virtual getNumberOfStates() const = 0; vector virtual show() const = 0; vector virtual graph(const string& title = "") const = 0; - const string inline getVersion() const { return "0.1.0"; }; + const string inline getVersion() const { return "0.2.0"; }; vector virtual topological_order() = 0; void virtual dump_cpt()const = 0; virtual void setHyperparameters(nlohmann::json& hyperparameters) = 0; diff --git a/src/BayesNet/Classifier.cc b/src/BayesNet/Classifier.cc index af9ab8c..aca6ea5 100644 --- a/src/BayesNet/Classifier.cc +++ b/src/BayesNet/Classifier.cc @@ -13,7 +13,7 @@ namespace bayesnet { m = dataset.size(1); n = dataset.size(0) - 1; checkFitParameters(); - auto n_classes = states[className].size(); + auto n_classes = states.at(className).size(); metrics = Metrics(dataset, features, className, n_classes); model.initialize(); buildModel(weights); diff --git a/src/BayesNet/Ensemble.cc b/src/BayesNet/Ensemble.cc index d7ee5b9..97987d1 100644 --- a/src/BayesNet/Ensemble.cc +++ b/src/BayesNet/Ensemble.cc @@ -17,9 +17,13 @@ namespace bayesnet { { auto y_pred_ = y_pred.accessor(); vector y_pred_final; + int numClasses = states.at(className).size(); + // y_pred is m x n_models with the prediction of every model for each sample for (int i = 0; i < y_pred.size(0); ++i) { - vector votes(y_pred.size(1), 0); - for (int j = 0; j < y_pred.size(1); ++j) { + // votes store in each index (value of class) the significance added by each model + // i.e. votes[0] contains how much value has the value 0 of class. That value is generated by the models predictions + vector votes(numClasses, 0.0); + for (int j = 0; j < n_models; ++j) { votes[y_pred_[i][j]] += significanceModels[j]; } // argsort in descending order @@ -34,7 +38,6 @@ namespace bayesnet { throw logic_error("Ensemble has not been fitted"); } Tensor y_pred = torch::zeros({ X.size(1), n_models }, kInt32); - //Create a threadpool auto threads{ vector() }; mutex mtx; for (auto i = 0; i < n_models; ++i) { diff --git a/src/BayesNet/Network.cc b/src/BayesNet/Network.cc index 15fcacc..da8898c 100644 --- a/src/BayesNet/Network.cc +++ b/src/BayesNet/Network.cc @@ -174,42 +174,10 @@ namespace bayesnet { { setStates(states); laplaceSmoothing = 1.0 / samples.size(1); // To use in CPT computation - int maxThreadsRunning = static_cast(std::thread::hardware_concurrency() * maxThreads); - if (maxThreadsRunning < 1) { - maxThreadsRunning = 1; + for (auto& node : nodes) { + node.second->computeCPT(samples, features, laplaceSmoothing, weights); + fitted = true; } - vector threads; - mutex mtx; - condition_variable cv; - int activeThreads = 0; - int nextNodeIndex = 0; - while (nextNodeIndex < nodes.size()) { - unique_lock lock(mtx); - cv.wait(lock, [&activeThreads, &maxThreadsRunning]() { return activeThreads < maxThreadsRunning; }); - threads.emplace_back([this, &nextNodeIndex, &mtx, &cv, &activeThreads, &weights]() { - while (true) { - unique_lock lock(mtx); - if (nextNodeIndex >= nodes.size()) { - break; // No more work remaining - } - auto& pair = *std::next(nodes.begin(), nextNodeIndex); - ++nextNodeIndex; - lock.unlock(); - pair.second->computeCPT(samples, features, laplaceSmoothing, weights); - lock.lock(); - nodes[pair.first] = std::move(pair.second); - lock.unlock(); - } - lock_guard lock(mtx); - --activeThreads; - cv.notify_one(); - }); - ++activeThreads; - } - for (auto& thread : threads) { - thread.join(); - } - fitted = true; } torch::Tensor Network::predict_tensor(const torch::Tensor& samples, const bool proba) { diff --git a/src/BayesNet/Network.h b/src/BayesNet/Network.h index a26e790..0bf1b08 100644 --- a/src/BayesNet/Network.h +++ b/src/BayesNet/Network.h @@ -27,6 +27,7 @@ namespace bayesnet { Network(); explicit Network(float); explicit Network(Network&); + ~Network() = default; torch::Tensor& getSamples(); float getmaxThreads(); void addNode(const string&); @@ -52,7 +53,7 @@ namespace bayesnet { vector graph(const string& title) const; // Returns a vector of strings representing the graph in graphviz format void initialize(); void dump_cpt() const; - inline string version() { return "0.1.0"; } + inline string version() { return "0.2.0"; } }; } #endif \ No newline at end of file diff --git a/src/BayesNet/Node.cc b/src/BayesNet/Node.cc index 04d2ed2..4f01bec 100644 --- a/src/BayesNet/Node.cc +++ b/src/BayesNet/Node.cc @@ -100,7 +100,7 @@ namespace bayesnet { } int name_index = pos - features.begin(); for (int n_sample = 0; n_sample < dataset.size(1); ++n_sample) { - torch::List> coordinates; + c10::List> coordinates; coordinates.push_back(dataset.index({ name_index, n_sample })); for (auto parent : parents) { pos = find(features.begin(), features.end(), parent->getName()); @@ -118,10 +118,10 @@ namespace bayesnet { } float Node::getFactorValue(map& evidence) { - torch::List> coordinates; + c10::List> coordinates; // following predetermined order of indices in the cpTable (see Node.h) - coordinates.push_back(torch::tensor(evidence[name])); - transform(parents.begin(), parents.end(), back_inserter(coordinates), [&evidence](const auto& parent) { return torch::tensor(evidence[parent->getName()]); }); + coordinates.push_back(at::tensor(evidence[name])); + transform(parents.begin(), parents.end(), back_inserter(coordinates), [&evidence](const auto& parent) { return at::tensor(evidence[parent->getName()]); }); return cpTable.index({ coordinates }).item(); } vector Node::graph(const string& className) diff --git a/src/BayesNet/Proposal.cc b/src/BayesNet/Proposal.cc index 09b39c3..6bc819b 100644 --- a/src/BayesNet/Proposal.cc +++ b/src/BayesNet/Proposal.cc @@ -53,15 +53,6 @@ namespace bayesnet { auto xvf_ptr = Xf.index({ index }).data_ptr(); auto xvf = vector(xvf_ptr, xvf_ptr + Xf.size(1)); discretizers[feature]->fit(xvf, yxv); - // - // - // - // auto tmp = discretizers[feature]->transform(xvf); - // Xv[index] = tmp; - // auto xStates = vector(discretizers[pFeatures[index]]->getCutPoints().size() + 1); - // iota(xStates.begin(), xStates.end(), 0); - // //Update new states of the feature/node - // states[feature] = xStates; } if (upgrade) { // Discretize again X (only the affected indices) with the new fitted discretizers diff --git a/src/Platform/Datasets.cc b/src/Platform/Datasets.cc index 2239a46..1e240a0 100644 --- a/src/Platform/Datasets.cc +++ b/src/Platform/Datasets.cc @@ -213,10 +213,11 @@ namespace platform { { for (int i = 0; i < features.size(); ++i) { states[features[i]] = vector(*max_element(Xd[i].begin(), Xd[i].end()) + 1); - iota(begin(states[features[i]]), end(states[features[i]]), 0); + auto item = states.at(features[i]); + iota(begin(item), end(item), 0); } states[className] = vector(*max_element(yv.begin(), yv.end()) + 1); - iota(begin(states[className]), end(states[className]), 0); + iota(begin(states.at(className)), end(states.at(className)), 0); } void Dataset::load_arff() { diff --git a/src/Platform/Experiment.cc b/src/Platform/Experiment.cc index ac26972..0fe6cc2 100644 --- a/src/Platform/Experiment.cc +++ b/src/Platform/Experiment.cc @@ -179,8 +179,10 @@ namespace platform { result.addTimeTrain(train_time[item].item()); result.addTimeTest(test_time[item].item()); item++; + clf.reset(); } cout << "end. " << flush; + delete fold; } result.setScoreTest(torch::mean(accuracy_test).item()).setScoreTrain(torch::mean(accuracy_train).item()); result.setScoreTestStd(torch::std(accuracy_test).item()).setScoreTrainStd(torch::std(accuracy_train).item()); diff --git a/src/Platform/Models.cc b/src/Platform/Models.cc index 1a66156..08b4a45 100644 --- a/src/Platform/Models.cc +++ b/src/Platform/Models.cc @@ -26,7 +26,7 @@ namespace platform { instance = it->second(); // wrap instance in a shared ptr and return if (instance != nullptr) - return shared_ptr(instance); + return unique_ptr(instance); else return nullptr; } diff --git a/src/Platform/platformUtils.cc b/src/Platform/platformUtils.cc index 74e97fd..f114636 100644 --- a/src/Platform/platformUtils.cc +++ b/src/Platform/platformUtils.cc @@ -69,11 +69,12 @@ tuple, string, map>> loadData Xd = torch::zeros({ static_cast(Xr[0].size()), static_cast(Xr.size()) }, torch::kInt32); for (int i = 0; i < features.size(); ++i) { states[features[i]] = vector(*max_element(Xr[i].begin(), Xr[i].end()) + 1); - iota(begin(states[features[i]]), end(states[features[i]]), 0); + auto item = states.at(features[i]); + iota(begin(item), end(item), 0); Xd.index_put_({ "...", i }, torch::tensor(Xr[i], torch::kInt32)); } states[className] = vector(*max_element(y.begin(), y.end()) + 1); - iota(begin(states[className]), end(states[className]), 0); + iota(begin(states.at(className)), end(states.at(className)), 0); } else { Xd = torch::zeros({ static_cast(X[0].size()), static_cast(X.size()) }, torch::kFloat32); for (int i = 0; i < features.size(); ++i) {