From 684443a788d9e064bcbd7e75a4133e88b21d4ba3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Sun, 9 Jun 2024 17:19:38 +0200 Subject: [PATCH 01/18] Implement Cestnik & Laplace smoothing --- CMakeLists.txt | 2 +- bayesnet/network/Network.cc | 24 +++++++++++++----------- bayesnet/network/Network.h | 10 +++++++--- bayesnet/network/Node.cc | 4 ++-- bayesnet/network/Node.h | 2 +- lib/mdlp | 2 +- tests/TestBayesModels.cc | 2 +- tests/TestModulesVersions.cc | 2 +- tests/lib/Files | 2 +- 9 files changed, 28 insertions(+), 22 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index eed7062..6e35773 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.20) project(BayesNet - VERSION 1.0.5.1 + VERSION 1.0.6 DESCRIPTION "Bayesian Network and basic classifiers Library." HOMEPAGE_URL "https://github.com/rmontanana/bayesnet" LANGUAGES CXX diff --git a/bayesnet/network/Network.cc b/bayesnet/network/Network.cc index e0e2e38..2cc8541 100644 --- a/bayesnet/network/Network.cc +++ b/bayesnet/network/Network.cc @@ -7,17 +7,18 @@ #include #include #include +#include #include "Network.h" #include "bayesnet/utils/bayesnetUtils.h" namespace bayesnet { - Network::Network() : fitted{ false }, maxThreads{ 0.95 }, classNumStates{ 0 }, laplaceSmoothing{ 0 } + Network::Network() : fitted{ false }, maxThreads{ 0.95 }, classNumStates{ 0 }, smoothing{ Smoothing_t::LAPLACE } { } - Network::Network(float maxT) : fitted{ false }, maxThreads{ maxT }, classNumStates{ 0 }, laplaceSmoothing{ 0 } + Network::Network(float maxT) : fitted{ false }, maxThreads{ maxT }, classNumStates{ 0 }, smoothing{ Smoothing_t::LAPLACE } { } - Network::Network(const Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()), + Network::Network(const Network& other) : smoothing(other.smoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()), maxThreads(other.getMaxThreads()), fitted(other.fitted), samples(other.samples) { if (samples.defined()) @@ -164,14 +165,14 @@ namespace bayesnet { for (int i = 0; i < featureNames.size(); ++i) { auto row_feature = X.index({ i, "..." }); } - completeFit(states, weights); + completeFit(states, X.size(0), weights); } void Network::fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector& featureNames, const std::string& className, const std::map>& states) { checkFitData(samples.size(1), samples.size(0) - 1, samples.size(1), featureNames, className, states, weights); this->className = className; this->samples = samples; - completeFit(states, weights); + completeFit(states, samples.size(1), weights); } // input_data comes in nxm, where n is the number of features and m the number of samples void Network::fit(const std::vector>& input_data, const std::vector& labels, const std::vector& weights_, const std::vector& featureNames, const std::string& className, const std::map>& states) @@ -185,16 +186,17 @@ namespace bayesnet { samples.index_put_({ i, "..." }, torch::tensor(input_data[i], torch::kInt32)); } samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32)); - completeFit(states, weights); + completeFit(states, input_data[0].size(), weights); } - void Network::completeFit(const std::map>& states, const torch::Tensor& weights) + void Network::completeFit(const std::map>& states, const int n_samples, const torch::Tensor& weights) { setStates(states); - laplaceSmoothing = 1.0 / samples.size(1); // To use in CPT computation std::vector threads; for (auto& node : nodes) { - threads.emplace_back([this, &node, &weights]() { - node.second->computeCPT(samples, features, laplaceSmoothing, weights); + threads.emplace_back([this, &node, &weights, n_samples]() { + auto numStates = node.second->getNumStates(); + double smoothing_factor = smoothing == Smoothing_t::CESTNIK ? static_cast(n_samples) / numStates : 1.0 / static_cast(n_samples); + node.second->computeCPT(samples, features, smoothing_factor, weights); }); } for (auto& thread : threads) { @@ -337,7 +339,7 @@ namespace bayesnet { thread.join(); } // Normalize result - double sum = accumulate(result.begin(), result.end(), 0.0); + double sum = std::accumulate(result.begin(), result.end(), 0.0); transform(result.begin(), result.end(), result.begin(), [sum](const double& value) { return value / sum; }); return result; } diff --git a/bayesnet/network/Network.h b/bayesnet/network/Network.h index a87d5e1..dd08110 100644 --- a/bayesnet/network/Network.h +++ b/bayesnet/network/Network.h @@ -12,6 +12,10 @@ #include "Node.h" namespace bayesnet { + enum class Smoothing_t { + LAPLACE, + CESTNIK + }; class Network { public: Network(); @@ -54,15 +58,15 @@ namespace bayesnet { int classNumStates; std::vector features; // Including classname std::string className; - double laplaceSmoothing; + Smoothing_t smoothing; torch::Tensor samples; // n+1xm tensor used to fit the model bool isCyclic(const std::string&, std::unordered_set&, std::unordered_set&); std::vector predict_sample(const std::vector&); std::vector predict_sample(const torch::Tensor&); std::vector exactInference(std::map&); double computeFactor(std::map&); - void completeFit(const std::map>& states, const torch::Tensor& weights); - void checkFitData(int n_features, int n_samples, int n_samples_y, const std::vector& featureNames, const std::string& className, const std::map>& states, const torch::Tensor& weights); + void completeFit(const std::map>& states, const int n_samples, const torch::Tensor& weights); + void checkFitData(int n_samples, int n_features, int n_samples_y, const std::vector& featureNames, const std::string& className, const std::map>& states, const torch::Tensor& weights); void setStates(const std::map>&); }; } diff --git a/bayesnet/network/Node.cc b/bayesnet/network/Node.cc index cc63b29..fcb1e53 100644 --- a/bayesnet/network/Node.cc +++ b/bayesnet/network/Node.cc @@ -90,14 +90,14 @@ namespace bayesnet { } return result; } - void Node::computeCPT(const torch::Tensor& dataset, const std::vector& features, const double laplaceSmoothing, const torch::Tensor& weights) + void Node::computeCPT(const torch::Tensor& dataset, const std::vector& features, const double smoothing, const torch::Tensor& weights) { dimensions.clear(); // Get dimensions of the CPT dimensions.push_back(numStates); transform(parents.begin(), parents.end(), back_inserter(dimensions), [](const auto& parent) { return parent->getNumStates(); }); // Create a tensor of zeros with the dimensions of the CPT - cpTable = torch::zeros(dimensions, torch::kFloat) + laplaceSmoothing; + cpTable = torch::zeros(dimensions, torch::kFloat) + smoothing; // Fill table with counts auto pos = find(features.begin(), features.end(), name); if (pos == features.end()) { diff --git a/bayesnet/network/Node.h b/bayesnet/network/Node.h index e1cfa06..dc21119 100644 --- a/bayesnet/network/Node.h +++ b/bayesnet/network/Node.h @@ -23,7 +23,7 @@ namespace bayesnet { std::vector& getParents(); std::vector& getChildren(); torch::Tensor& getCPT(); - void computeCPT(const torch::Tensor& dataset, const std::vector& features, const double laplaceSmoothing, const torch::Tensor& weights); + void computeCPT(const torch::Tensor& dataset, const std::vector& features, const double smoothing, const torch::Tensor& weights); int getNumStates() const; void setNumStates(int); unsigned minFill(); diff --git a/lib/mdlp b/lib/mdlp index 236d1b2..c4e6c04 160000 --- a/lib/mdlp +++ b/lib/mdlp @@ -1 +1 @@ -Subproject commit 236d1b2f8be185039493fe7fce04a83e02ed72e5 +Subproject commit c4e6c041fe7f769ec24c0a2bd66a5aff482fd630 diff --git a/tests/TestBayesModels.cc b/tests/TestBayesModels.cc index b5ee426..2d60d5e 100644 --- a/tests/TestBayesModels.cc +++ b/tests/TestBayesModels.cc @@ -20,7 +20,7 @@ #include "bayesnet/ensembles/BoostAODE.h" #include "TestUtils.h" -const std::string ACTUAL_VERSION = "1.0.5.1"; +const std::string ACTUAL_VERSION = "1.0.6"; TEST_CASE("Test Bayesian Classifiers score & version", "[Models]") { diff --git a/tests/TestModulesVersions.cc b/tests/TestModulesVersions.cc index a8b2ce2..5b29178 100644 --- a/tests/TestModulesVersions.cc +++ b/tests/TestModulesVersions.cc @@ -16,7 +16,7 @@ #include "TestUtils.h" std::map modules = { - { "mdlp", "1.1.2" }, + { "mdlp", "1.2.0" }, { "Folding", "1.1.0" }, { "json", "3.11" }, { "ArffFiles", "1.0.0" } diff --git a/tests/lib/Files b/tests/lib/Files index 40ac380..dbefa02 160000 --- a/tests/lib/Files +++ b/tests/lib/Files @@ -1 +1 @@ -Subproject commit 40ac38011a2445e00df8a18048c67abaff16fa59 +Subproject commit dbefa02d9c0ca0f029f77e744cd80cb0150725c8 From 27a3e5a5e0998f28d476595d9d11709fc6e9462f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Mon, 10 Jun 2024 15:49:01 +0200 Subject: [PATCH 02/18] Implement 3 types of smoothing --- CHANGELOG.md | 1 + README.md | 2 +- bayesnet/BaseClassifier.h | 4 ++++ bayesnet/classifiers/Classifier.cc | 1 + bayesnet/classifiers/Classifier.h | 1 - bayesnet/ensembles/AODELd.cc | 1 + bayesnet/ensembles/BoostA2DE.cc | 2 ++ bayesnet/ensembles/BoostAODE.cc | 2 ++ bayesnet/ensembles/Ensemble.cc | 1 + bayesnet/network/Network.cc | 26 ++++++++++++++++++++------ bayesnet/network/Network.h | 5 ++++- 11 files changed, 37 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 585439a..9550cd2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add tests to check the correct version of the mdlp, folding and json libraries. - Library documentation generated with Doxygen. - Link to documentation in the README.md. +- Three types of smoothing the Bayesian Network OLD_LAPLACE, LAPLACE and CESTNIK. ### Internal diff --git a/README.md b/README.md index e444046..ba22453 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ [![Security Rating](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_BayesNet&metric=security_rating)](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet) [![Reliability Rating](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_BayesNet&metric=reliability_rating)](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet) ![Gitea Last Commit](https://img.shields.io/gitea/last-commit/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es:3000&logo=gitea) -[![Coverage Badge](https://img.shields.io/badge/Coverage-97,3%25-green)](html/index.html) +[![Coverage Badge](https://img.shields.io/badge/Coverage-97,2%25-green)](html/index.html) Bayesian Network Classifiers using libtorch from scratch diff --git a/bayesnet/BaseClassifier.h b/bayesnet/BaseClassifier.h index f8b4c84..1f80271 100644 --- a/bayesnet/BaseClassifier.h +++ b/bayesnet/BaseClassifier.h @@ -8,10 +8,13 @@ #include #include #include +#include "bayesnet/network/Network.h" + namespace bayesnet { enum status_t { NORMAL, WARNING, ERROR }; class BaseClassifier { public: + void setSmoothing(Smoothing_t smoothing) { this->smoothing = smoothing; } // To call before fit // X is nxm std::vector, y is nx1 std::vector virtual BaseClassifier& fit(std::vector>& X, std::vector& y, const std::vector& features, const std::string& className, std::map>& states) = 0; // X is nxm tensor, y is nx1 tensor @@ -41,5 +44,6 @@ namespace bayesnet { protected: virtual void trainModel(const torch::Tensor& weights) = 0; std::vector validHyperparameters; + Smoothing_t smoothing = Smoothing_t::NONE; }; } \ No newline at end of file diff --git a/bayesnet/classifiers/Classifier.cc b/bayesnet/classifiers/Classifier.cc index 8d7ba15..287e1bf 100644 --- a/bayesnet/classifiers/Classifier.cc +++ b/bayesnet/classifiers/Classifier.cc @@ -22,6 +22,7 @@ namespace bayesnet { auto n_classes = states.at(className).size(); metrics = Metrics(dataset, features, className, n_classes); model.initialize(); + model.setSmoothing(smoothing); buildModel(weights); trainModel(weights); fitted = true; diff --git a/bayesnet/classifiers/Classifier.h b/bayesnet/classifiers/Classifier.h index 0349bcd..9be1e3f 100644 --- a/bayesnet/classifiers/Classifier.h +++ b/bayesnet/classifiers/Classifier.h @@ -8,7 +8,6 @@ #define CLASSIFIER_H #include #include "bayesnet/utils/BayesMetrics.h" -#include "bayesnet/network/Network.h" #include "bayesnet/BaseClassifier.h" namespace bayesnet { diff --git a/bayesnet/ensembles/AODELd.cc b/bayesnet/ensembles/AODELd.cc index 28fc793..29d66f8 100644 --- a/bayesnet/ensembles/AODELd.cc +++ b/bayesnet/ensembles/AODELd.cc @@ -37,6 +37,7 @@ namespace bayesnet { void AODELd::trainModel(const torch::Tensor& weights) { for (const auto& model : models) { + model->setSmoothing(smoothing); model->fit(Xf, y, features, className, states); } } diff --git a/bayesnet/ensembles/BoostA2DE.cc b/bayesnet/ensembles/BoostA2DE.cc index 4738358..481bfab 100644 --- a/bayesnet/ensembles/BoostA2DE.cc +++ b/bayesnet/ensembles/BoostA2DE.cc @@ -32,6 +32,7 @@ namespace bayesnet { for (int j = i + 1; j < featuresSelected.size(); j++) { auto parents = { featuresSelected[i], featuresSelected[j] }; std::unique_ptr model = std::make_unique(parents); + model->setSmoothing(smoothing); model->fit(dataset, features, className, states, weights_); models.push_back(std::move(model)); significanceModels.push_back(1.0); // They will be updated later in trainModel @@ -96,6 +97,7 @@ namespace bayesnet { pairSelection.erase(pairSelection.begin()); std::unique_ptr model; model = std::make_unique(std::vector({ feature_pair.first, feature_pair.second })); + model->setSmoothing(smoothing); model->fit(dataset, features, className, states, weights_); alpha_t = 0.0; if (!block_update) { diff --git a/bayesnet/ensembles/BoostAODE.cc b/bayesnet/ensembles/BoostAODE.cc index 30137df..f37cf2d 100644 --- a/bayesnet/ensembles/BoostAODE.cc +++ b/bayesnet/ensembles/BoostAODE.cc @@ -22,6 +22,7 @@ namespace bayesnet { std::vector featuresSelected = featureSelection(weights_); for (const int& feature : featuresSelected) { std::unique_ptr model = std::make_unique(feature); + model->setSmoothing(smoothing); model->fit(dataset, features, className, states, weights_); models.push_back(std::move(model)); significanceModels.push_back(1.0); // They will be updated later in trainModel @@ -89,6 +90,7 @@ namespace bayesnet { featureSelection.erase(featureSelection.begin()); std::unique_ptr model; model = std::make_unique(feature); + model->setSmoothing(smoothing); model->fit(dataset, features, className, states, weights_); alpha_t = 0.0; if (!block_update) { diff --git a/bayesnet/ensembles/Ensemble.cc b/bayesnet/ensembles/Ensemble.cc index fa2c271..6ef096b 100644 --- a/bayesnet/ensembles/Ensemble.cc +++ b/bayesnet/ensembles/Ensemble.cc @@ -18,6 +18,7 @@ namespace bayesnet { n_models = models.size(); for (auto i = 0; i < n_models; ++i) { // fit with std::vectors + models[i]->setSmoothing(smoothing); models[i]->fit(dataset, features, className, states); } } diff --git a/bayesnet/network/Network.cc b/bayesnet/network/Network.cc index 2cc8541..2cdb3f1 100644 --- a/bayesnet/network/Network.cc +++ b/bayesnet/network/Network.cc @@ -165,14 +165,14 @@ namespace bayesnet { for (int i = 0; i < featureNames.size(); ++i) { auto row_feature = X.index({ i, "..." }); } - completeFit(states, X.size(0), weights); + completeFit(states, weights); } void Network::fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector& featureNames, const std::string& className, const std::map>& states) { checkFitData(samples.size(1), samples.size(0) - 1, samples.size(1), featureNames, className, states, weights); this->className = className; this->samples = samples; - completeFit(states, samples.size(1), weights); + completeFit(states, weights); } // input_data comes in nxm, where n is the number of features and m the number of samples void Network::fit(const std::vector>& input_data, const std::vector& labels, const std::vector& weights_, const std::vector& featureNames, const std::string& className, const std::map>& states) @@ -186,16 +186,30 @@ namespace bayesnet { samples.index_put_({ i, "..." }, torch::tensor(input_data[i], torch::kInt32)); } samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32)); - completeFit(states, input_data[0].size(), weights); + completeFit(states, weights); } - void Network::completeFit(const std::map>& states, const int n_samples, const torch::Tensor& weights) + void Network::completeFit(const std::map>& states, const torch::Tensor& weights) { setStates(states); std::vector threads; + const double n_samples = static_cast(samples.size(1)); for (auto& node : nodes) { threads.emplace_back([this, &node, &weights, n_samples]() { - auto numStates = node.second->getNumStates(); - double smoothing_factor = smoothing == Smoothing_t::CESTNIK ? static_cast(n_samples) / numStates : 1.0 / static_cast(n_samples); + double numStates = static_cast(node.second->getNumStates()); + double smoothing_factor = 0.0; + switch (smoothing) { + case Smoothing_t::OLD_LAPLACE: + smoothing_factor = 1.0 / n_samples; + break; + case Smoothing_t::LAPLACE: + smoothing_factor = 1.0; + break; + case Smoothing_t::CESTNIK: + smoothing_factor = n_samples / numStates; + break; + default: + throw std::invalid_argument("Smoothing method not recognized " + std::to_string(static_cast(smoothing))); + } node.second->computeCPT(samples, features, smoothing_factor, weights); }); } diff --git a/bayesnet/network/Network.h b/bayesnet/network/Network.h index dd08110..37177cc 100644 --- a/bayesnet/network/Network.h +++ b/bayesnet/network/Network.h @@ -13,6 +13,8 @@ namespace bayesnet { enum class Smoothing_t { + NONE = -1, + OLD_LAPLACE = 0, LAPLACE, CESTNIK }; @@ -36,6 +38,7 @@ namespace bayesnet { /* Notice: Nodes have to be inserted in the same order as they are in the dataset, i.e., first node is first column and so on. */ + void setSmoothing(Smoothing_t smoothing) { this->smoothing = smoothing; }; void fit(const std::vector>& input_data, const std::vector& labels, const std::vector& weights, const std::vector& featureNames, const std::string& className, const std::map>& states); void fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector& featureNames, const std::string& className, const std::map>& states); void fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector& featureNames, const std::string& className, const std::map>& states); @@ -65,7 +68,7 @@ namespace bayesnet { std::vector predict_sample(const torch::Tensor&); std::vector exactInference(std::map&); double computeFactor(std::map&); - void completeFit(const std::map>& states, const int n_samples, const torch::Tensor& weights); + void completeFit(const std::map>& states, const torch::Tensor& weights); void checkFitData(int n_samples, int n_features, int n_samples_y, const std::vector& featureNames, const std::string& className, const std::map>& states, const torch::Tensor& weights); void setStates(const std::map>&); }; From b34869cc61c7413fd204e42759efd28f523d0a6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Tue, 11 Jun 2024 11:40:45 +0200 Subject: [PATCH 03/18] Set smoothing as fit parameter --- README.md | 2 +- bayesnet/BaseClassifier.h | 12 +++---- bayesnet/classifiers/Classifier.cc | 25 +++++++-------- bayesnet/classifiers/Classifier.h | 12 +++---- bayesnet/classifiers/KDBLd.cc | 4 +-- bayesnet/classifiers/KDBLd.h | 2 +- bayesnet/classifiers/Proposal.cc | 2 +- bayesnet/classifiers/SPODELd.cc | 12 +++---- bayesnet/classifiers/SPODELd.h | 8 ++--- bayesnet/classifiers/TANLd.cc | 4 +-- bayesnet/classifiers/TANLd.h | 5 ++- bayesnet/ensembles/AODELd.cc | 9 +++--- bayesnet/ensembles/AODELd.h | 4 +-- bayesnet/ensembles/BoostA2DE.cc | 12 +++---- bayesnet/ensembles/BoostA2DE.h | 4 +-- bayesnet/ensembles/BoostAODE.cc | 12 +++---- bayesnet/ensembles/BoostAODE.h | 4 +-- bayesnet/ensembles/Ensemble.cc | 5 ++- bayesnet/ensembles/Ensemble.h | 2 +- bayesnet/network/Network.cc | 22 ++++++------- bayesnet/network/Network.h | 10 +++--- tests/TestA2DE.cc | 8 ++--- tests/TestBayesClassifier.cc | 32 +++++++++---------- tests/TestBayesEnsemble.cc | 14 ++++----- tests/TestBayesModels.cc | 22 ++++++------- tests/TestBayesNetwork.cc | 50 +++++++++++++++--------------- tests/TestBoostA2DE.cc | 22 ++++++------- tests/TestBoostAODE.cc | 22 ++++++------- tests/TestModulesVersions.cc | 2 +- tests/TestUtils.h | 2 ++ 30 files changed, 168 insertions(+), 178 deletions(-) diff --git a/README.md b/README.md index ba22453..f387482 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ [![Security Rating](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_BayesNet&metric=security_rating)](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet) [![Reliability Rating](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_BayesNet&metric=reliability_rating)](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet) ![Gitea Last Commit](https://img.shields.io/gitea/last-commit/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es:3000&logo=gitea) -[![Coverage Badge](https://img.shields.io/badge/Coverage-97,2%25-green)](html/index.html) +[![Coverage Badge](https://img.shields.io/badge/Coverage-96,9%25-green)](html/index.html) Bayesian Network Classifiers using libtorch from scratch diff --git a/bayesnet/BaseClassifier.h b/bayesnet/BaseClassifier.h index 1f80271..81fbe26 100644 --- a/bayesnet/BaseClassifier.h +++ b/bayesnet/BaseClassifier.h @@ -14,13 +14,12 @@ namespace bayesnet { enum status_t { NORMAL, WARNING, ERROR }; class BaseClassifier { public: - void setSmoothing(Smoothing_t smoothing) { this->smoothing = smoothing; } // To call before fit // X is nxm std::vector, y is nx1 std::vector - virtual BaseClassifier& fit(std::vector>& X, std::vector& y, const std::vector& features, const std::string& className, std::map>& states) = 0; + virtual BaseClassifier& fit(std::vector>& X, std::vector& y, const std::vector& features, const std::string& className, std::map>& states, const Smoothing_t smoothing) = 0; // X is nxm tensor, y is nx1 tensor - virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, std::map>& states) = 0; - virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states) = 0; - virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states, const torch::Tensor& weights) = 0; + virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, std::map>& states, const Smoothing_t smoothing) = 0; + virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states, const Smoothing_t smoothing) = 0; + virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states, const torch::Tensor& weights, const Smoothing_t smoothing) = 0; virtual ~BaseClassifier() = default; torch::Tensor virtual predict(torch::Tensor& X) = 0; std::vector virtual predict(std::vector>& X) = 0; @@ -42,8 +41,7 @@ namespace bayesnet { virtual void setHyperparameters(const nlohmann::json& hyperparameters) = 0; std::vector& getValidHyperparameters() { return validHyperparameters; } protected: - virtual void trainModel(const torch::Tensor& weights) = 0; + virtual void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) = 0; std::vector validHyperparameters; - Smoothing_t smoothing = Smoothing_t::NONE; }; } \ No newline at end of file diff --git a/bayesnet/classifiers/Classifier.cc b/bayesnet/classifiers/Classifier.cc index 287e1bf..5262401 100644 --- a/bayesnet/classifiers/Classifier.cc +++ b/bayesnet/classifiers/Classifier.cc @@ -11,7 +11,7 @@ namespace bayesnet { Classifier::Classifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {} const std::string CLASSIFIER_NOT_FITTED = "Classifier has not been fitted"; - Classifier& Classifier::build(const std::vector& features, const std::string& className, std::map>& states, const torch::Tensor& weights) + Classifier& Classifier::build(const std::vector& features, const std::string& className, std::map>& states, const torch::Tensor& weights, const Smoothing_t smoothing) { this->features = features; this->className = className; @@ -22,9 +22,8 @@ namespace bayesnet { auto n_classes = states.at(className).size(); metrics = Metrics(dataset, features, className, n_classes); model.initialize(); - model.setSmoothing(smoothing); buildModel(weights); - trainModel(weights); + trainModel(weights, smoothing); fitted = true; return *this; } @@ -42,20 +41,20 @@ namespace bayesnet { throw std::runtime_error(oss.str()); } } - void Classifier::trainModel(const torch::Tensor& weights) + void Classifier::trainModel(const torch::Tensor& weights, Smoothing_t smoothing) { - model.fit(dataset, weights, features, className, states); + model.fit(dataset, weights, features, className, states, smoothing); } // X is nxm where n is the number of features and m the number of samples - Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, std::map>& states) + Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, std::map>& states, const Smoothing_t smoothing) { dataset = X; buildDataset(y); const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble); - return build(features, className, states, weights); + return build(features, className, states, weights, smoothing); } // X is nxm where n is the number of features and m the number of samples - Classifier& Classifier::fit(std::vector>& X, std::vector& y, const std::vector& features, const std::string& className, std::map>& states) + Classifier& Classifier::fit(std::vector>& X, std::vector& y, const std::vector& features, const std::string& className, std::map>& states, const Smoothing_t smoothing) { dataset = torch::zeros({ static_cast(X.size()), static_cast(X[0].size()) }, torch::kInt32); for (int i = 0; i < X.size(); ++i) { @@ -64,18 +63,18 @@ namespace bayesnet { auto ytmp = torch::tensor(y, torch::kInt32); buildDataset(ytmp); const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble); - return build(features, className, states, weights); + return build(features, className, states, weights, smoothing); } - Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states) + Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states, const Smoothing_t smoothing) { this->dataset = dataset; const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble); - return build(features, className, states, weights); + return build(features, className, states, weights, smoothing); } - Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states, const torch::Tensor& weights) + Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states, const torch::Tensor& weights, const Smoothing_t smoothing) { this->dataset = dataset; - return build(features, className, states, weights); + return build(features, className, states, weights, smoothing); } void Classifier::checkFitParameters() { diff --git a/bayesnet/classifiers/Classifier.h b/bayesnet/classifiers/Classifier.h index 9be1e3f..4d3ea83 100644 --- a/bayesnet/classifiers/Classifier.h +++ b/bayesnet/classifiers/Classifier.h @@ -15,10 +15,10 @@ namespace bayesnet { public: Classifier(Network model); virtual ~Classifier() = default; - Classifier& fit(std::vector>& X, std::vector& y, const std::vector& features, const std::string& className, std::map>& states) override; - Classifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, std::map>& states) override; - Classifier& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states) override; - Classifier& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states, const torch::Tensor& weights) override; + Classifier& fit(std::vector>& X, std::vector& y, const std::vector& features, const std::string& className, std::map>& states, const Smoothing_t smoothing) override; + Classifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, std::map>& states, const Smoothing_t smoothing) override; + Classifier& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states, const Smoothing_t smoothing) override; + Classifier& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states, const torch::Tensor& weights, const Smoothing_t smoothing) override; void addNodes(); int getNumberOfNodes() const override; int getNumberOfEdges() const override; @@ -50,10 +50,10 @@ namespace bayesnet { std::vector notes; // Used to store messages occurred during the fit process void checkFitParameters(); virtual void buildModel(const torch::Tensor& weights) = 0; - void trainModel(const torch::Tensor& weights) override; + void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override; void buildDataset(torch::Tensor& y); private: - Classifier& build(const std::vector& features, const std::string& className, std::map>& states, const torch::Tensor& weights); + Classifier& build(const std::vector& features, const std::string& className, std::map>& states, const torch::Tensor& weights, const Smoothing_t smoothing); }; } #endif diff --git a/bayesnet/classifiers/KDBLd.cc b/bayesnet/classifiers/KDBLd.cc index 9f1647c..a285da1 100644 --- a/bayesnet/classifiers/KDBLd.cc +++ b/bayesnet/classifiers/KDBLd.cc @@ -8,7 +8,7 @@ namespace bayesnet { KDBLd::KDBLd(int k) : KDB(k), Proposal(dataset, features, className) {} - KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_) + KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_, const Smoothing_t smoothing) { checkInput(X_, y_); features = features_; @@ -19,7 +19,7 @@ namespace bayesnet { states = fit_local_discretization(y); // We have discretized the input data // 1st we need to fit the model to build the normal KDB structure, KDB::fit initializes the base Bayesian network - KDB::fit(dataset, features, className, states); + KDB::fit(dataset, features, className, states, smoothing); states = localDiscretizationProposal(states, model); return *this; } diff --git a/bayesnet/classifiers/KDBLd.h b/bayesnet/classifiers/KDBLd.h index 9150bba..77b9eec 100644 --- a/bayesnet/classifiers/KDBLd.h +++ b/bayesnet/classifiers/KDBLd.h @@ -15,7 +15,7 @@ namespace bayesnet { public: explicit KDBLd(int k); virtual ~KDBLd() = default; - KDBLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, map>& states) override; + KDBLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, map>& states, const Smoothing_t smoothing) override; std::vector graph(const std::string& name = "KDB") const override; torch::Tensor predict(torch::Tensor& X) override; static inline std::string version() { return "0.0.1"; }; diff --git a/bayesnet/classifiers/Proposal.cc b/bayesnet/classifiers/Proposal.cc index 2dfadb7..a5d5f12 100644 --- a/bayesnet/classifiers/Proposal.cc +++ b/bayesnet/classifiers/Proposal.cc @@ -70,7 +70,7 @@ namespace bayesnet { states[pFeatures[index]] = xStates; } const torch::Tensor weights = torch::full({ pDataset.size(1) }, 1.0 / pDataset.size(1), torch::kDouble); - model.fit(pDataset, weights, pFeatures, pClassName, states); + model.fit(pDataset, weights, pFeatures, pClassName, states, Smoothing_t::OLD_LAPLACE); } return states; } diff --git a/bayesnet/classifiers/SPODELd.cc b/bayesnet/classifiers/SPODELd.cc index 98c41ff..d733253 100644 --- a/bayesnet/classifiers/SPODELd.cc +++ b/bayesnet/classifiers/SPODELd.cc @@ -8,25 +8,25 @@ namespace bayesnet { SPODELd::SPODELd(int root) : SPODE(root), Proposal(dataset, features, className) {} - SPODELd& SPODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_) + SPODELd& SPODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_, const Smoothing_t smoothing) { checkInput(X_, y_); Xf = X_; y = y_; - return commonFit(features_, className_, states_); + return commonFit(features_, className_, states_, smoothing); } - SPODELd& SPODELd::fit(torch::Tensor& dataset, const std::vector& features_, const std::string& className_, map>& states_) + SPODELd& SPODELd::fit(torch::Tensor& dataset, const std::vector& features_, const std::string& className_, map>& states_, const Smoothing_t smoothing) { if (!torch::is_floating_point(dataset)) { throw std::runtime_error("Dataset must be a floating point tensor"); } Xf = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." }).clone(); y = dataset.index({ -1, "..." }).clone().to(torch::kInt32); - return commonFit(features_, className_, states_); + return commonFit(features_, className_, states_, smoothing); } - SPODELd& SPODELd::commonFit(const std::vector& features_, const std::string& className_, map>& states_) + SPODELd& SPODELd::commonFit(const std::vector& features_, const std::string& className_, map>& states_, const Smoothing_t smoothing) { features = features_; className = className_; @@ -34,7 +34,7 @@ namespace bayesnet { states = fit_local_discretization(y); // We have discretized the input data // 1st we need to fit the model to build the normal SPODE structure, SPODE::fit initializes the base Bayesian network - SPODE::fit(dataset, features, className, states); + SPODE::fit(dataset, features, className, states, smoothing); states = localDiscretizationProposal(states, model); return *this; } diff --git a/bayesnet/classifiers/SPODELd.h b/bayesnet/classifiers/SPODELd.h index f24a030..b92d24c 100644 --- a/bayesnet/classifiers/SPODELd.h +++ b/bayesnet/classifiers/SPODELd.h @@ -14,10 +14,10 @@ namespace bayesnet { public: explicit SPODELd(int root); virtual ~SPODELd() = default; - SPODELd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, map>& states) override; - SPODELd& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, map>& states) override; - SPODELd& commonFit(const std::vector& features, const std::string& className, map>& states); - std::vector graph(const std::string& name = "SPODE") const override; + SPODELd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, map>& states, const Smoothing_t smoothing) override; + SPODELd& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, map>& states, const Smoothing_t smoothing) override; + SPODELd& commonFit(const std::vector& features, const std::string& className, map>& states, const Smoothing_t smoothing); + std::vector graph(const std::string& name = "SPODELd") const override; torch::Tensor predict(torch::Tensor& X) override; static inline std::string version() { return "0.0.1"; }; }; diff --git a/bayesnet/classifiers/TANLd.cc b/bayesnet/classifiers/TANLd.cc index ab86dc4..6e7d443 100644 --- a/bayesnet/classifiers/TANLd.cc +++ b/bayesnet/classifiers/TANLd.cc @@ -8,7 +8,7 @@ namespace bayesnet { TANLd::TANLd() : TAN(), Proposal(dataset, features, className) {} - TANLd& TANLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_) + TANLd& TANLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_, const Smoothing_t smoothing) { checkInput(X_, y_); features = features_; @@ -19,7 +19,7 @@ namespace bayesnet { states = fit_local_discretization(y); // We have discretized the input data // 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network - TAN::fit(dataset, features, className, states); + TAN::fit(dataset, features, className, states, smoothing); states = localDiscretizationProposal(states, model); return *this; diff --git a/bayesnet/classifiers/TANLd.h b/bayesnet/classifiers/TANLd.h index e6c3c75..d05a9c3 100644 --- a/bayesnet/classifiers/TANLd.h +++ b/bayesnet/classifiers/TANLd.h @@ -15,10 +15,9 @@ namespace bayesnet { public: TANLd(); virtual ~TANLd() = default; - TANLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, map>& states) override; - std::vector graph(const std::string& name = "TAN") const override; + TANLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, map>& states, const Smoothing_t smoothing) override; + std::vector graph(const std::string& name = "TANLd") const override; torch::Tensor predict(torch::Tensor& X) override; - static inline std::string version() { return "0.0.1"; }; }; } #endif // !TANLD_H \ No newline at end of file diff --git a/bayesnet/ensembles/AODELd.cc b/bayesnet/ensembles/AODELd.cc index 29d66f8..1de8218 100644 --- a/bayesnet/ensembles/AODELd.cc +++ b/bayesnet/ensembles/AODELd.cc @@ -10,7 +10,7 @@ namespace bayesnet { AODELd::AODELd(bool predict_voting) : Ensemble(predict_voting), Proposal(dataset, features, className) { } - AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_) + AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_, const Smoothing_t smoothing) { checkInput(X_, y_); features = features_; @@ -21,7 +21,7 @@ namespace bayesnet { states = fit_local_discretization(y); // We have discretized the input data // 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network - Ensemble::fit(dataset, features, className, states); + Ensemble::fit(dataset, features, className, states, smoothing); return *this; } @@ -34,11 +34,10 @@ namespace bayesnet { n_models = models.size(); significanceModels = std::vector(n_models, 1.0); } - void AODELd::trainModel(const torch::Tensor& weights) + void AODELd::trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) { for (const auto& model : models) { - model->setSmoothing(smoothing); - model->fit(Xf, y, features, className, states); + model->fit(Xf, y, features, className, states, smoothing); } } std::vector AODELd::graph(const std::string& name) const diff --git a/bayesnet/ensembles/AODELd.h b/bayesnet/ensembles/AODELd.h index 9c87090..4bf0b63 100644 --- a/bayesnet/ensembles/AODELd.h +++ b/bayesnet/ensembles/AODELd.h @@ -15,10 +15,10 @@ namespace bayesnet { public: AODELd(bool predict_voting = true); virtual ~AODELd() = default; - AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_) override; + AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_, const Smoothing_t smoothing) override; std::vector graph(const std::string& name = "AODELd") const override; protected: - void trainModel(const torch::Tensor& weights) override; + void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override; void buildModel(const torch::Tensor& weights) override; }; } diff --git a/bayesnet/ensembles/BoostA2DE.cc b/bayesnet/ensembles/BoostA2DE.cc index 481bfab..236c165 100644 --- a/bayesnet/ensembles/BoostA2DE.cc +++ b/bayesnet/ensembles/BoostA2DE.cc @@ -19,7 +19,7 @@ namespace bayesnet { BoostA2DE::BoostA2DE(bool predict_voting) : Boost(predict_voting) { } - std::vector BoostA2DE::initializeModels() + std::vector BoostA2DE::initializeModels(const Smoothing_t smoothing) { torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64); std::vector featuresSelected = featureSelection(weights_); @@ -32,8 +32,7 @@ namespace bayesnet { for (int j = i + 1; j < featuresSelected.size(); j++) { auto parents = { featuresSelected[i], featuresSelected[j] }; std::unique_ptr model = std::make_unique(parents); - model->setSmoothing(smoothing); - model->fit(dataset, features, className, states, weights_); + model->fit(dataset, features, className, states, weights_, smoothing); models.push_back(std::move(model)); significanceModels.push_back(1.0); // They will be updated later in trainModel n_models++; @@ -42,7 +41,7 @@ namespace bayesnet { notes.push_back("Used features in initialization: " + std::to_string(featuresSelected.size()) + " of " + std::to_string(features.size()) + " with " + select_features_algorithm); return featuresSelected; } - void BoostA2DE::trainModel(const torch::Tensor& weights) + void BoostA2DE::trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) { // // Logging setup @@ -59,7 +58,7 @@ namespace bayesnet { bool finished = false; std::vector featuresUsed; if (selectFeatures) { - featuresUsed = initializeModels(); + featuresUsed = initializeModels(smoothing); auto ypred = predict(X_train); std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_); // Update significance of the models @@ -97,8 +96,7 @@ namespace bayesnet { pairSelection.erase(pairSelection.begin()); std::unique_ptr model; model = std::make_unique(std::vector({ feature_pair.first, feature_pair.second })); - model->setSmoothing(smoothing); - model->fit(dataset, features, className, states, weights_); + model->fit(dataset, features, className, states, weights_, smoothing); alpha_t = 0.0; if (!block_update) { auto ypred = model->predict(X_train); diff --git a/bayesnet/ensembles/BoostA2DE.h b/bayesnet/ensembles/BoostA2DE.h index ff56b79..71cfa99 100644 --- a/bayesnet/ensembles/BoostA2DE.h +++ b/bayesnet/ensembles/BoostA2DE.h @@ -17,9 +17,9 @@ namespace bayesnet { virtual ~BoostA2DE() = default; std::vector graph(const std::string& title = "BoostA2DE") const override; protected: - void trainModel(const torch::Tensor& weights) override; + void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override; private: - std::vector initializeModels(); + std::vector initializeModels(const Smoothing_t smoothing); }; } #endif \ No newline at end of file diff --git a/bayesnet/ensembles/BoostAODE.cc b/bayesnet/ensembles/BoostAODE.cc index f37cf2d..0638d78 100644 --- a/bayesnet/ensembles/BoostAODE.cc +++ b/bayesnet/ensembles/BoostAODE.cc @@ -16,14 +16,13 @@ namespace bayesnet { BoostAODE::BoostAODE(bool predict_voting) : Boost(predict_voting) { } - std::vector BoostAODE::initializeModels() + std::vector BoostAODE::initializeModels(const Smoothing_t smoothing) { torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64); std::vector featuresSelected = featureSelection(weights_); for (const int& feature : featuresSelected) { std::unique_ptr model = std::make_unique(feature); - model->setSmoothing(smoothing); - model->fit(dataset, features, className, states, weights_); + model->fit(dataset, features, className, states, weights_, smoothing); models.push_back(std::move(model)); significanceModels.push_back(1.0); // They will be updated later in trainModel n_models++; @@ -31,7 +30,7 @@ namespace bayesnet { notes.push_back("Used features in initialization: " + std::to_string(featuresSelected.size()) + " of " + std::to_string(features.size()) + " with " + select_features_algorithm); return featuresSelected; } - void BoostAODE::trainModel(const torch::Tensor& weights) + void BoostAODE::trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) { // // Logging setup @@ -48,7 +47,7 @@ namespace bayesnet { bool finished = false; std::vector featuresUsed; if (selectFeatures) { - featuresUsed = initializeModels(); + featuresUsed = initializeModels(smoothing); auto ypred = predict(X_train); std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_); // Update significance of the models @@ -90,8 +89,7 @@ namespace bayesnet { featureSelection.erase(featureSelection.begin()); std::unique_ptr model; model = std::make_unique(feature); - model->setSmoothing(smoothing); - model->fit(dataset, features, className, states, weights_); + model->fit(dataset, features, className, states, weights_, smoothing); alpha_t = 0.0; if (!block_update) { auto ypred = model->predict(X_train); diff --git a/bayesnet/ensembles/BoostAODE.h b/bayesnet/ensembles/BoostAODE.h index e4eb250..bc66ec1 100644 --- a/bayesnet/ensembles/BoostAODE.h +++ b/bayesnet/ensembles/BoostAODE.h @@ -18,9 +18,9 @@ namespace bayesnet { virtual ~BoostAODE() = default; std::vector graph(const std::string& title = "BoostAODE") const override; protected: - void trainModel(const torch::Tensor& weights) override; + void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override; private: - std::vector initializeModels(); + std::vector initializeModels(const Smoothing_t smoothing); }; } #endif \ No newline at end of file diff --git a/bayesnet/ensembles/Ensemble.cc b/bayesnet/ensembles/Ensemble.cc index 6ef096b..68f3ee5 100644 --- a/bayesnet/ensembles/Ensemble.cc +++ b/bayesnet/ensembles/Ensemble.cc @@ -13,13 +13,12 @@ namespace bayesnet { }; const std::string ENSEMBLE_NOT_FITTED = "Ensemble has not been fitted"; - void Ensemble::trainModel(const torch::Tensor& weights) + void Ensemble::trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) { n_models = models.size(); for (auto i = 0; i < n_models; ++i) { // fit with std::vectors - models[i]->setSmoothing(smoothing); - models[i]->fit(dataset, features, className, states); + models[i]->fit(dataset, features, className, states, smoothing); } } std::vector Ensemble::compute_arg_max(std::vector>& X) diff --git a/bayesnet/ensembles/Ensemble.h b/bayesnet/ensembles/Ensemble.h index 2c072a8..5172a40 100644 --- a/bayesnet/ensembles/Ensemble.h +++ b/bayesnet/ensembles/Ensemble.h @@ -46,7 +46,7 @@ namespace bayesnet { unsigned n_models; std::vector> models; std::vector significanceModels; - void trainModel(const torch::Tensor& weights) override; + void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override; bool predict_voting; }; } diff --git a/bayesnet/network/Network.cc b/bayesnet/network/Network.cc index 2cdb3f1..419585b 100644 --- a/bayesnet/network/Network.cc +++ b/bayesnet/network/Network.cc @@ -11,14 +11,14 @@ #include "Network.h" #include "bayesnet/utils/bayesnetUtils.h" namespace bayesnet { - Network::Network() : fitted{ false }, maxThreads{ 0.95 }, classNumStates{ 0 }, smoothing{ Smoothing_t::LAPLACE } + Network::Network() : fitted{ false }, maxThreads{ 0.95 }, classNumStates{ 0 } { } - Network::Network(float maxT) : fitted{ false }, maxThreads{ maxT }, classNumStates{ 0 }, smoothing{ Smoothing_t::LAPLACE } + Network::Network(float maxT) : fitted{ false }, maxThreads{ maxT }, classNumStates{ 0 } { } - Network::Network(const Network& other) : smoothing(other.smoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()), + Network::Network(const Network& other) : features(other.features), className(other.className), classNumStates(other.getClassNumStates()), maxThreads(other.getMaxThreads()), fitted(other.fitted), samples(other.samples) { if (samples.defined()) @@ -156,7 +156,7 @@ namespace bayesnet { classNumStates = nodes.at(className)->getNumStates(); } // X comes in nxm, where n is the number of features and m the number of samples - void Network::fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector& featureNames, const std::string& className, const std::map>& states) + void Network::fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector& featureNames, const std::string& className, const std::map>& states, const Smoothing_t smoothing) { checkFitData(X.size(1), X.size(0), y.size(0), featureNames, className, states, weights); this->className = className; @@ -165,17 +165,17 @@ namespace bayesnet { for (int i = 0; i < featureNames.size(); ++i) { auto row_feature = X.index({ i, "..." }); } - completeFit(states, weights); + completeFit(states, weights, smoothing); } - void Network::fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector& featureNames, const std::string& className, const std::map>& states) + void Network::fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector& featureNames, const std::string& className, const std::map>& states, const Smoothing_t smoothing) { checkFitData(samples.size(1), samples.size(0) - 1, samples.size(1), featureNames, className, states, weights); this->className = className; this->samples = samples; - completeFit(states, weights); + completeFit(states, weights, smoothing); } // input_data comes in nxm, where n is the number of features and m the number of samples - void Network::fit(const std::vector>& input_data, const std::vector& labels, const std::vector& weights_, const std::vector& featureNames, const std::string& className, const std::map>& states) + void Network::fit(const std::vector>& input_data, const std::vector& labels, const std::vector& weights_, const std::vector& featureNames, const std::string& className, const std::map>& states, const Smoothing_t smoothing) { const torch::Tensor weights = torch::tensor(weights_, torch::kFloat64); checkFitData(input_data[0].size(), input_data.size(), labels.size(), featureNames, className, states, weights); @@ -186,15 +186,15 @@ namespace bayesnet { samples.index_put_({ i, "..." }, torch::tensor(input_data[i], torch::kInt32)); } samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32)); - completeFit(states, weights); + completeFit(states, weights, smoothing); } - void Network::completeFit(const std::map>& states, const torch::Tensor& weights) + void Network::completeFit(const std::map>& states, const torch::Tensor& weights, const Smoothing_t smoothing) { setStates(states); std::vector threads; const double n_samples = static_cast(samples.size(1)); for (auto& node : nodes) { - threads.emplace_back([this, &node, &weights, n_samples]() { + threads.emplace_back([this, &node, &weights, n_samples, smoothing]() { double numStates = static_cast(node.second->getNumStates()); double smoothing_factor = 0.0; switch (smoothing) { diff --git a/bayesnet/network/Network.h b/bayesnet/network/Network.h index 37177cc..b3417cd 100644 --- a/bayesnet/network/Network.h +++ b/bayesnet/network/Network.h @@ -38,10 +38,9 @@ namespace bayesnet { /* Notice: Nodes have to be inserted in the same order as they are in the dataset, i.e., first node is first column and so on. */ - void setSmoothing(Smoothing_t smoothing) { this->smoothing = smoothing; }; - void fit(const std::vector>& input_data, const std::vector& labels, const std::vector& weights, const std::vector& featureNames, const std::string& className, const std::map>& states); - void fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector& featureNames, const std::string& className, const std::map>& states); - void fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector& featureNames, const std::string& className, const std::map>& states); + void fit(const std::vector>& input_data, const std::vector& labels, const std::vector& weights, const std::vector& featureNames, const std::string& className, const std::map>& states, const Smoothing_t smoothing); + void fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector& featureNames, const std::string& className, const std::map>& states, const Smoothing_t smoothing); + void fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector& featureNames, const std::string& className, const std::map>& states, const Smoothing_t smoothing); std::vector predict(const std::vector>&); // Return mx1 std::vector of predictions torch::Tensor predict(const torch::Tensor&); // Return mx1 tensor of predictions torch::Tensor predict_tensor(const torch::Tensor& samples, const bool proba); @@ -61,14 +60,13 @@ namespace bayesnet { int classNumStates; std::vector features; // Including classname std::string className; - Smoothing_t smoothing; torch::Tensor samples; // n+1xm tensor used to fit the model bool isCyclic(const std::string&, std::unordered_set&, std::unordered_set&); std::vector predict_sample(const std::vector&); std::vector predict_sample(const torch::Tensor&); std::vector exactInference(std::map&); double computeFactor(std::map&); - void completeFit(const std::map>& states, const torch::Tensor& weights); + void completeFit(const std::map>& states, const torch::Tensor& weights, const Smoothing_t smoothing); void checkFitData(int n_samples, int n_features, int n_samples_y, const std::vector& featureNames, const std::string& className, const std::map>& states, const torch::Tensor& weights); void setStates(const std::map>&); }; diff --git a/tests/TestA2DE.cc b/tests/TestA2DE.cc index 31809a4..60b876f 100644 --- a/tests/TestA2DE.cc +++ b/tests/TestA2DE.cc @@ -16,7 +16,7 @@ TEST_CASE("Fit and Score", "[A2DE]") { auto raw = RawDatasets("glass", true); auto clf = bayesnet::A2DE(); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.score(raw.Xv, raw.yv) == Catch::Approx(0.831776).epsilon(raw.epsilon)); REQUIRE(clf.getNumberOfNodes() == 360); REQUIRE(clf.getNumberOfEdges() == 756); @@ -30,18 +30,18 @@ TEST_CASE("Test score with predict_voting", "[A2DE]") {"predict_voting", true}, }; clf.setHyperparameters(hyperparameters); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.score(raw.Xv, raw.yv) == Catch::Approx(0.82243).epsilon(raw.epsilon)); hyperparameters["predict_voting"] = false; clf.setHyperparameters(hyperparameters); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.score(raw.Xv, raw.yv) == Catch::Approx(0.83178).epsilon(raw.epsilon)); } TEST_CASE("Test graph", "[A2DE]") { auto raw = RawDatasets("iris", true); auto clf = bayesnet::A2DE(); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); auto graph = clf.graph(); REQUIRE(graph.size() == 78); REQUIRE(graph[0] == "digraph BayesNet {\nlabel=\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n"); diff --git a/tests/TestBayesClassifier.cc b/tests/TestBayesClassifier.cc index 3eb0a3f..ce5887b 100644 --- a/tests/TestBayesClassifier.cc +++ b/tests/TestBayesClassifier.cc @@ -18,38 +18,38 @@ TEST_CASE("Test Cannot build dataset with wrong data vector", "[Classifier]") auto model = bayesnet::TAN(); auto raw = RawDatasets("iris", true); raw.yv.pop_back(); - REQUIRE_THROWS_AS(model.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states), std::runtime_error); - REQUIRE_THROWS_WITH(model.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states), "* Error in X and y dimensions *\nX dimensions: [4, 150]\ny dimensions: [149]"); + REQUIRE_THROWS_AS(model.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing), std::runtime_error); + REQUIRE_THROWS_WITH(model.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing), "* Error in X and y dimensions *\nX dimensions: [4, 150]\ny dimensions: [149]"); } TEST_CASE("Test Cannot build dataset with wrong data tensor", "[Classifier]") { auto model = bayesnet::TAN(); auto raw = RawDatasets("iris", true); auto yshort = torch::zeros({ 149 }, torch::kInt32); - REQUIRE_THROWS_AS(model.fit(raw.Xt, yshort, raw.features, raw.className, raw.states), std::runtime_error); - REQUIRE_THROWS_WITH(model.fit(raw.Xt, yshort, raw.features, raw.className, raw.states), "* Error in X and y dimensions *\nX dimensions: [4, 150]\ny dimensions: [149]"); + REQUIRE_THROWS_AS(model.fit(raw.Xt, yshort, raw.features, raw.className, raw.states, raw.smoothing), std::runtime_error); + REQUIRE_THROWS_WITH(model.fit(raw.Xt, yshort, raw.features, raw.className, raw.states, raw.smoothing), "* Error in X and y dimensions *\nX dimensions: [4, 150]\ny dimensions: [149]"); } TEST_CASE("Invalid data type", "[Classifier]") { auto model = bayesnet::TAN(); auto raw = RawDatasets("iris", false); - REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states), std::invalid_argument); - REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states), "dataset (X, y) must be of type Integer"); + REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing), std::invalid_argument); + REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing), "dataset (X, y) must be of type Integer"); } TEST_CASE("Invalid number of features", "[Classifier]") { auto model = bayesnet::TAN(); auto raw = RawDatasets("iris", true); auto Xt = torch::cat({ raw.Xt, torch::zeros({ 1, 150 }, torch::kInt32) }, 0); - REQUIRE_THROWS_AS(model.fit(Xt, raw.yt, raw.features, raw.className, raw.states), std::invalid_argument); - REQUIRE_THROWS_WITH(model.fit(Xt, raw.yt, raw.features, raw.className, raw.states), "Classifier: X 5 and features 4 must have the same number of features"); + REQUIRE_THROWS_AS(model.fit(Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing), std::invalid_argument); + REQUIRE_THROWS_WITH(model.fit(Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing), "Classifier: X 5 and features 4 must have the same number of features"); } TEST_CASE("Invalid class name", "[Classifier]") { auto model = bayesnet::TAN(); auto raw = RawDatasets("iris", true); - REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.features, "duck", raw.states), std::invalid_argument); - REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.features, "duck", raw.states), "class name not found in states"); + REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.features, "duck", raw.states, raw.smoothing), std::invalid_argument); + REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.features, "duck", raw.states, raw.smoothing), "class name not found in states"); } TEST_CASE("Invalid feature name", "[Classifier]") { @@ -57,8 +57,8 @@ TEST_CASE("Invalid feature name", "[Classifier]") auto raw = RawDatasets("iris", true); auto statest = raw.states; statest.erase("petallength"); - REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.features, raw.className, statest), std::invalid_argument); - REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.features, raw.className, statest), "feature [petallength] not found in states"); + REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.features, raw.className, statest, raw.smoothing), std::invalid_argument); + REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.features, raw.className, statest, raw.smoothing), "feature [petallength] not found in states"); } TEST_CASE("Invalid hyperparameter", "[Classifier]") { @@ -71,7 +71,7 @@ TEST_CASE("Topological order", "[Classifier]") { auto model = bayesnet::TAN(); auto raw = RawDatasets("iris", true); - model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states); + model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing); auto order = model.topological_order(); REQUIRE(order.size() == 4); REQUIRE(order[0] == "petallength"); @@ -83,7 +83,7 @@ TEST_CASE("Dump_cpt", "[Classifier]") { auto model = bayesnet::TAN(); auto raw = RawDatasets("iris", true); - model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states); + model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing); auto cpt = model.dump_cpt(); REQUIRE(cpt.size() == 1713); } @@ -111,7 +111,7 @@ TEST_CASE("KDB Graph", "[Classifier]") { auto model = bayesnet::KDB(2); auto raw = RawDatasets("iris", true); - model.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + model.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); auto graph = model.graph(); REQUIRE(graph.size() == 15); } @@ -119,7 +119,7 @@ TEST_CASE("KDBLd Graph", "[Classifier]") { auto model = bayesnet::KDBLd(2); auto raw = RawDatasets("iris", false); - model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states); + model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing); auto graph = model.graph(); REQUIRE(graph.size() == 15); } \ No newline at end of file diff --git a/tests/TestBayesEnsemble.cc b/tests/TestBayesEnsemble.cc index bbfe086..da9f839 100644 --- a/tests/TestBayesEnsemble.cc +++ b/tests/TestBayesEnsemble.cc @@ -18,7 +18,7 @@ TEST_CASE("Topological Order", "[Ensemble]") { auto raw = RawDatasets("glass", true); auto clf = bayesnet::BoostAODE(); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); auto order = clf.topological_order(); REQUIRE(order.size() == 0); } @@ -26,7 +26,7 @@ TEST_CASE("Dump CPT", "[Ensemble]") { auto raw = RawDatasets("glass", true); auto clf = bayesnet::BoostAODE(); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); auto dump = clf.dump_cpt(); REQUIRE(dump == ""); } @@ -34,7 +34,7 @@ TEST_CASE("Number of States", "[Ensemble]") { auto clf = bayesnet::BoostAODE(); auto raw = RawDatasets("iris", true); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfStates() == 76); } TEST_CASE("Show", "[Ensemble]") @@ -46,7 +46,7 @@ TEST_CASE("Show", "[Ensemble]") {"maxTolerance", 1}, {"convergence", false}, }); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); std::vector expected = { "class -> sepallength, sepalwidth, petallength, petalwidth, ", "petallength -> sepallength, sepalwidth, petalwidth, ", @@ -78,16 +78,16 @@ TEST_CASE("Graph", "[Ensemble]") { auto clf = bayesnet::BoostAODE(); auto raw = RawDatasets("iris", true); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); auto graph = clf.graph(); REQUIRE(graph.size() == 56); auto clf2 = bayesnet::AODE(); - clf2.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf2.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); graph = clf2.graph(); REQUIRE(graph.size() == 56); raw = RawDatasets("glass", false); auto clf3 = bayesnet::AODELd(); - clf3.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states); + clf3.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing); graph = clf3.graph(); REQUIRE(graph.size() == 261); } diff --git a/tests/TestBayesModels.cc b/tests/TestBayesModels.cc index 2d60d5e..cbc56a9 100644 --- a/tests/TestBayesModels.cc +++ b/tests/TestBayesModels.cc @@ -54,7 +54,7 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[Models]") auto clf = models[name]; auto discretize = name.substr(name.length() - 2) != "Ld"; auto raw = RawDatasets(file_name, discretize); - clf->fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states); + clf->fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing); auto score = clf->score(raw.Xt, raw.yt); INFO("Classifier: " << name << " File: " << file_name); REQUIRE(score == Catch::Approx(scores[{file_name, name}]).epsilon(raw.epsilon)); @@ -81,7 +81,7 @@ TEST_CASE("Models features & Graph", "[Models]") { auto raw = RawDatasets("iris", true); auto clf = bayesnet::TAN(); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 5); REQUIRE(clf.getNumberOfEdges() == 7); REQUIRE(clf.getNumberOfStates() == 19); @@ -93,7 +93,7 @@ TEST_CASE("Models features & Graph", "[Models]") { auto clf = bayesnet::TANLd(); auto raw = RawDatasets("iris", false); - clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states); + clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 5); REQUIRE(clf.getNumberOfEdges() == 7); REQUIRE(clf.getNumberOfStates() == 19); @@ -106,7 +106,7 @@ TEST_CASE("Get num features & num edges", "[Models]") { auto raw = RawDatasets("iris", true); auto clf = bayesnet::KDB(2); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 5); REQUIRE(clf.getNumberOfEdges() == 8); } @@ -166,7 +166,7 @@ TEST_CASE("Model predict_proba", "[Models]") SECTION("Test " + model + " predict_proba") { auto clf = models[model]; - clf->fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf->fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); auto y_pred_proba = clf->predict_proba(raw.Xv); auto yt_pred_proba = clf->predict_proba(raw.Xt); auto y_pred = clf->predict(raw.Xv); @@ -203,7 +203,7 @@ TEST_CASE("AODE voting-proba", "[Models]") { auto raw = RawDatasets("glass", true); auto clf = bayesnet::AODE(false); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); auto score_proba = clf.score(raw.Xv, raw.yv); auto pred_proba = clf.predict_proba(raw.Xv); clf.setHyperparameters({ @@ -222,9 +222,9 @@ TEST_CASE("SPODELd dataset", "[Models]") auto raw = RawDatasets("iris", false); auto clf = bayesnet::SPODELd(0); // raw.dataset.to(torch::kFloat32); - clf.fit(raw.dataset, raw.features, raw.className, raw.states); + clf.fit(raw.dataset, raw.features, raw.className, raw.states, raw.smoothing); auto score = clf.score(raw.Xt, raw.yt); - clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states); + clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing); auto scoret = clf.score(raw.Xt, raw.yt); REQUIRE(score == Catch::Approx(0.97333f).epsilon(raw.epsilon)); REQUIRE(scoret == Catch::Approx(0.97333f).epsilon(raw.epsilon)); @@ -233,13 +233,13 @@ TEST_CASE("KDB with hyperparameters", "[Models]") { auto raw = RawDatasets("glass", true); auto clf = bayesnet::KDB(2); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); auto score = clf.score(raw.Xv, raw.yv); clf.setHyperparameters({ {"k", 3}, {"theta", 0.7}, }); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); auto scoret = clf.score(raw.Xv, raw.yv); REQUIRE(score == Catch::Approx(0.827103).epsilon(raw.epsilon)); REQUIRE(scoret == Catch::Approx(0.761682).epsilon(raw.epsilon)); @@ -248,7 +248,7 @@ TEST_CASE("Incorrect type of data for SPODELd", "[Models]") { auto raw = RawDatasets("iris", true); auto clf = bayesnet::SPODELd(0); - REQUIRE_THROWS_AS(clf.fit(raw.dataset, raw.features, raw.className, raw.states), std::runtime_error); + REQUIRE_THROWS_AS(clf.fit(raw.dataset, raw.features, raw.className, raw.states, raw.smoothing), std::runtime_error); } TEST_CASE("Predict, predict_proba & score without fitting", "[Models]") { diff --git a/tests/TestBayesNetwork.cc b/tests/TestBayesNetwork.cc index 680a349..b1d6911 100644 --- a/tests/TestBayesNetwork.cc +++ b/tests/TestBayesNetwork.cc @@ -115,9 +115,9 @@ TEST_CASE("Test Bayesian Network", "[Network]") REQUIRE(children == children3); } // Fit networks - net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states); - net2.fit(raw.dataset, raw.weights, raw.features, raw.className, raw.states); - net3.fit(raw.Xt, raw.yt, raw.weights, raw.features, raw.className, raw.states); + net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing); + net2.fit(raw.dataset, raw.weights, raw.features, raw.className, raw.states, raw.smoothing); + net3.fit(raw.Xt, raw.yt, raw.weights, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(net.getStates() == net2.getStates()); REQUIRE(net.getStates() == net3.getStates()); REQUIRE(net.getFeatures() == net2.getFeatures()); @@ -194,7 +194,7 @@ TEST_CASE("Test Bayesian Network", "[Network]") SECTION("Test predict") { buildModel(net, raw.features, raw.className); - net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states); + net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing); std::vector> test = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1}, {2, 2, 2, 2, 1} }; std::vector y_test = { 2, 2, 0, 2, 1 }; auto y_pred = net.predict(test); @@ -203,7 +203,7 @@ TEST_CASE("Test Bayesian Network", "[Network]") SECTION("Test predict_proba") { buildModel(net, raw.features, raw.className); - net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states); + net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing); std::vector> test = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1}, {2, 2, 2, 2, 1} }; std::vector> y_test = { {0.450237, 0.0866621, 0.463101}, @@ -224,14 +224,14 @@ TEST_CASE("Test Bayesian Network", "[Network]") SECTION("Test score") { buildModel(net, raw.features, raw.className); - net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states); + net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing); auto score = net.score(raw.Xv, raw.yv); REQUIRE(score == Catch::Approx(0.97333333).margin(threshold)); } SECTION("Copy constructor") { buildModel(net, raw.features, raw.className); - net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states); + net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing); auto net2 = bayesnet::Network(net); REQUIRE(net.getFeatures() == net2.getFeatures()); REQUIRE(net.getEdges() == net2.getEdges()); @@ -268,7 +268,7 @@ TEST_CASE("Test Bayesian Network", "[Network]") // predict with wrong data auto netx = bayesnet::Network(); buildModel(netx, raw.features, raw.className); - netx.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states); + netx.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing); std::vector> test2 = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1} }; auto test_tensor2 = bayesnet::vectorToTensor(test2, false); REQUIRE_THROWS_AS(netx.predict(test2), std::logic_error); @@ -278,17 +278,17 @@ TEST_CASE("Test Bayesian Network", "[Network]") // fit with wrong data // Weights auto net2 = bayesnet::Network(); - REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, std::vector(), raw.features, raw.className, raw.states), std::invalid_argument); + REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, std::vector(), raw.features, raw.className, raw.states, raw.smoothing), std::invalid_argument); std::string invalid_weights = "Weights (0) must have the same number of elements as samples (150) in Network::fit"; - REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, std::vector(), raw.features, raw.className, raw.states), invalid_weights); + REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, std::vector(), raw.features, raw.className, raw.states, raw.smoothing), invalid_weights); // X & y std::string invalid_labels = "X and y must have the same number of samples in Network::fit (150 != 0)"; - REQUIRE_THROWS_AS(net2.fit(raw.Xv, std::vector(), raw.weightsv, raw.features, raw.className, raw.states), std::invalid_argument); - REQUIRE_THROWS_WITH(net2.fit(raw.Xv, std::vector(), raw.weightsv, raw.features, raw.className, raw.states), invalid_labels); + REQUIRE_THROWS_AS(net2.fit(raw.Xv, std::vector(), raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing), std::invalid_argument); + REQUIRE_THROWS_WITH(net2.fit(raw.Xv, std::vector(), raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing), invalid_labels); // Features std::string invalid_features = "X and features must have the same number of features in Network::fit (4 != 0)"; - REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, raw.weightsv, std::vector(), raw.className, raw.states), std::invalid_argument); - REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, raw.weightsv, std::vector(), raw.className, raw.states), invalid_features); + REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, raw.weightsv, std::vector(), raw.className, raw.states, raw.smoothing), std::invalid_argument); + REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, raw.weightsv, std::vector(), raw.className, raw.states, raw.smoothing), invalid_features); // Different number of features auto net3 = bayesnet::Network(); auto test2y = { 1, 2, 3, 4, 5 }; @@ -296,23 +296,23 @@ TEST_CASE("Test Bayesian Network", "[Network]") auto features3 = raw.features; features3.pop_back(); std::string invalid_features2 = "X and local features must have the same number of features in Network::fit (3 != 4)"; - REQUIRE_THROWS_AS(net3.fit(test2, test2y, std::vector(5, 0), features3, raw.className, raw.states), std::invalid_argument); - REQUIRE_THROWS_WITH(net3.fit(test2, test2y, std::vector(5, 0), features3, raw.className, raw.states), invalid_features2); + REQUIRE_THROWS_AS(net3.fit(test2, test2y, std::vector(5, 0), features3, raw.className, raw.states, raw.smoothing), std::invalid_argument); + REQUIRE_THROWS_WITH(net3.fit(test2, test2y, std::vector(5, 0), features3, raw.className, raw.states, raw.smoothing), invalid_features2); // Uninitialized network std::string network_invalid = "The network has not been initialized. You must call addNode() before calling fit()"; - REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states), std::invalid_argument); - REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states), network_invalid); + REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states, raw.smoothing), std::invalid_argument); + REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states, raw.smoothing), network_invalid); // Classname std::string invalid_classname = "Class Name not found in Network::features"; - REQUIRE_THROWS_AS(net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states), std::invalid_argument); - REQUIRE_THROWS_WITH(net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states), invalid_classname); + REQUIRE_THROWS_AS(net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states, raw.smoothing), std::invalid_argument); + REQUIRE_THROWS_WITH(net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states, raw.smoothing), invalid_classname); // Invalid feature auto features2 = raw.features; features2.pop_back(); features2.push_back("duck"); std::string invalid_feature = "Feature duck not found in Network::features"; - REQUIRE_THROWS_AS(net.fit(raw.Xv, raw.yv, raw.weightsv, features2, raw.className, raw.states), std::invalid_argument); - REQUIRE_THROWS_WITH(net.fit(raw.Xv, raw.yv, raw.weightsv, features2, raw.className, raw.states), invalid_feature); + REQUIRE_THROWS_AS(net.fit(raw.Xv, raw.yv, raw.weightsv, features2, raw.className, raw.states, raw.smoothing), std::invalid_argument); + REQUIRE_THROWS_WITH(net.fit(raw.Xv, raw.yv, raw.weightsv, features2, raw.className, raw.states, raw.smoothing), invalid_feature); // Add twice the same node name to the network => Nothing should happen net.addNode("A"); net.addNode("A"); @@ -320,8 +320,8 @@ TEST_CASE("Test Bayesian Network", "[Network]") auto net4 = bayesnet::Network(); buildModel(net4, raw.features, raw.className); std::string invalid_state = "Feature sepallength not found in states"; - REQUIRE_THROWS_AS(net4.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, std::map>()), std::invalid_argument); - REQUIRE_THROWS_WITH(net4.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, std::map>()), invalid_state); + REQUIRE_THROWS_AS(net4.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, std::map>(), raw.smoothing), std::invalid_argument); + REQUIRE_THROWS_WITH(net4.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, std::map>(), raw.smoothing), invalid_state); } } @@ -366,7 +366,7 @@ TEST_CASE("Dump CPT", "[Network]") auto net = bayesnet::Network(); auto raw = RawDatasets("iris", true); buildModel(net, raw.features, raw.className); - net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states); + net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing); auto res = net.dump_cpt(); std::string expected = R"(* class: (3) : [3] 0.3333 diff --git a/tests/TestBoostA2DE.cc b/tests/TestBoostA2DE.cc index b841bc3..41e7dd7 100644 --- a/tests/TestBoostA2DE.cc +++ b/tests/TestBoostA2DE.cc @@ -17,7 +17,7 @@ TEST_CASE("Build basic model", "[BoostA2DE]") { auto raw = RawDatasets("diabetes", true); auto clf = bayesnet::BoostA2DE(); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 342); REQUIRE(clf.getNumberOfEdges() == 684); REQUIRE(clf.getNotes().size() == 3); @@ -32,7 +32,7 @@ TEST_CASE("Build basic model", "[BoostA2DE]") // auto raw = RawDatasets("glass", true); // auto clf = bayesnet::BoostAODE(); // clf.setHyperparameters({ {"select_features", "IWSS"}, {"threshold", 0.5 } }); -// clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); +// clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); // REQUIRE(clf.getNumberOfNodes() == 90); // REQUIRE(clf.getNumberOfEdges() == 153); // REQUIRE(clf.getNotes().size() == 2); @@ -44,7 +44,7 @@ TEST_CASE("Build basic model", "[BoostA2DE]") // auto raw = RawDatasets("glass", true); // auto clf = bayesnet::BoostAODE(); // clf.setHyperparameters({ {"select_features", "FCBF"}, {"threshold", 1e-7 } }); -// clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); +// clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); // REQUIRE(clf.getNumberOfNodes() == 90); // REQUIRE(clf.getNumberOfEdges() == 153); // REQUIRE(clf.getNotes().size() == 2); @@ -60,7 +60,7 @@ TEST_CASE("Build basic model", "[BoostA2DE]") // {"convergence", true}, // {"select_features","CFS"}, // }); -// clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); +// clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); // REQUIRE(clf.getNumberOfNodes() == 72); // REQUIRE(clf.getNumberOfEdges() == 120); // REQUIRE(clf.getNotes().size() == 2); @@ -75,7 +75,7 @@ TEST_CASE("Build basic model", "[BoostA2DE]") // { // auto raw = RawDatasets("iris", true); // auto clf = bayesnet::BoostAODE(false); -// clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); +// clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); // auto score_proba = clf.score(raw.Xv, raw.yv); // auto pred_proba = clf.predict_proba(raw.Xv); // clf.setHyperparameters({ @@ -104,7 +104,7 @@ TEST_CASE("Build basic model", "[BoostA2DE]") // {"maxTolerance", 1}, // {"convergence", false}, // }); -// clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); +// clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); // auto score = clf.score(raw.Xv, raw.yv); // auto scoret = clf.score(raw.Xt, raw.yt); // INFO("BoostAODE order: " + order); @@ -136,7 +136,7 @@ TEST_CASE("Build basic model", "[BoostA2DE]") // for (const auto& hyper : bad_hyper_fit.items()) { // INFO("BoostAODE hyper: " + hyper.value().dump()); // clf.setHyperparameters(hyper.value()); -// REQUIRE_THROWS_AS(clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states), std::invalid_argument); +// REQUIRE_THROWS_AS(clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing, std::invalid_argument); // } // } @@ -151,7 +151,7 @@ TEST_CASE("Build basic model", "[BoostA2DE]") // {"block_update", false}, // {"convergence_best", false}, // }); -// clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states); +// clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing); // REQUIRE(clf.getNumberOfNodes() == 210); // REQUIRE(clf.getNumberOfEdges() == 378); // REQUIRE(clf.getNotes().size() == 1); @@ -172,13 +172,13 @@ TEST_CASE("Build basic model", "[BoostA2DE]") // {"convergence_best", true}, // }; // clf.setHyperparameters(hyperparameters); -// clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states); +// clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing); // auto score_best = clf.score(raw.X_test, raw.y_test); // REQUIRE(score_best == Catch::Approx(0.980000019f).epsilon(raw.epsilon)); // // Now we will set the hyperparameter to use the last accuracy // hyperparameters["convergence_best"] = false; // clf.setHyperparameters(hyperparameters); -// clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states); +// clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing); // auto score_last = clf.score(raw.X_test, raw.y_test); // REQUIRE(score_last == Catch::Approx(0.976666689f).epsilon(raw.epsilon)); // } @@ -193,7 +193,7 @@ TEST_CASE("Build basic model", "[BoostA2DE]") // {"maxTolerance", 3}, // {"convergence", true}, // }); -// clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states); +// clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing); // REQUIRE(clf.getNumberOfNodes() == 868); // REQUIRE(clf.getNumberOfEdges() == 1724); // REQUIRE(clf.getNotes().size() == 3); diff --git a/tests/TestBoostAODE.cc b/tests/TestBoostAODE.cc index 66fa7fb..1a8a0f0 100644 --- a/tests/TestBoostAODE.cc +++ b/tests/TestBoostAODE.cc @@ -18,7 +18,7 @@ TEST_CASE("Feature_select CFS", "[BoostAODE]") auto raw = RawDatasets("glass", true); auto clf = bayesnet::BoostAODE(); clf.setHyperparameters({ {"select_features", "CFS"} }); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 90); REQUIRE(clf.getNumberOfEdges() == 153); REQUIRE(clf.getNotes().size() == 2); @@ -30,7 +30,7 @@ TEST_CASE("Feature_select IWSS", "[BoostAODE]") auto raw = RawDatasets("glass", true); auto clf = bayesnet::BoostAODE(); clf.setHyperparameters({ {"select_features", "IWSS"}, {"threshold", 0.5 } }); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 90); REQUIRE(clf.getNumberOfEdges() == 153); REQUIRE(clf.getNotes().size() == 2); @@ -42,7 +42,7 @@ TEST_CASE("Feature_select FCBF", "[BoostAODE]") auto raw = RawDatasets("glass", true); auto clf = bayesnet::BoostAODE(); clf.setHyperparameters({ {"select_features", "FCBF"}, {"threshold", 1e-7 } }); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 90); REQUIRE(clf.getNumberOfEdges() == 153); REQUIRE(clf.getNotes().size() == 2); @@ -58,7 +58,7 @@ TEST_CASE("Test used features in train note and score", "[BoostAODE]") {"convergence", true}, {"select_features","CFS"}, }); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 72); REQUIRE(clf.getNumberOfEdges() == 120); REQUIRE(clf.getNotes().size() == 2); @@ -73,7 +73,7 @@ TEST_CASE("Voting vs proba", "[BoostAODE]") { auto raw = RawDatasets("iris", true); auto clf = bayesnet::BoostAODE(false); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); auto score_proba = clf.score(raw.Xv, raw.yv); auto pred_proba = clf.predict_proba(raw.Xv); clf.setHyperparameters({ @@ -102,7 +102,7 @@ TEST_CASE("Order asc, desc & random", "[BoostAODE]") {"maxTolerance", 1}, {"convergence", false}, }); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); auto score = clf.score(raw.Xv, raw.yv); auto scoret = clf.score(raw.Xt, raw.yt); INFO("BoostAODE order: " << order); @@ -134,7 +134,7 @@ TEST_CASE("Oddities", "[BoostAODE]") for (const auto& hyper : bad_hyper_fit.items()) { INFO("BoostAODE hyper: " << hyper.value().dump()); clf.setHyperparameters(hyper.value()); - REQUIRE_THROWS_AS(clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states), std::invalid_argument); + REQUIRE_THROWS_AS(clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing), std::invalid_argument); } } @@ -149,7 +149,7 @@ TEST_CASE("Bisection Best", "[BoostAODE]") {"block_update", false}, {"convergence_best", false}, }); - clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states); + clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 210); REQUIRE(clf.getNumberOfEdges() == 378); REQUIRE(clf.getNotes().size() == 1); @@ -170,13 +170,13 @@ TEST_CASE("Bisection Best vs Last", "[BoostAODE]") {"convergence_best", true}, }; clf.setHyperparameters(hyperparameters); - clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states); + clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing); auto score_best = clf.score(raw.X_test, raw.y_test); REQUIRE(score_best == Catch::Approx(0.980000019f).epsilon(raw.epsilon)); // Now we will set the hyperparameter to use the last accuracy hyperparameters["convergence_best"] = false; clf.setHyperparameters(hyperparameters); - clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states); + clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing); auto score_last = clf.score(raw.X_test, raw.y_test); REQUIRE(score_last == Catch::Approx(0.976666689f).epsilon(raw.epsilon)); } @@ -191,7 +191,7 @@ TEST_CASE("Block Update", "[BoostAODE]") {"maxTolerance", 3}, {"convergence", true}, }); - clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states); + clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 868); REQUIRE(clf.getNumberOfEdges() == 1724); REQUIRE(clf.getNotes().size() == 3); diff --git a/tests/TestModulesVersions.cc b/tests/TestModulesVersions.cc index 5b29178..5ee5d40 100644 --- a/tests/TestModulesVersions.cc +++ b/tests/TestModulesVersions.cc @@ -16,7 +16,7 @@ #include "TestUtils.h" std::map modules = { - { "mdlp", "1.2.0" }, + { "mdlp", "1.2.1" }, { "Folding", "1.1.0" }, { "json", "3.11" }, { "ArffFiles", "1.0.0" } diff --git a/tests/TestUtils.h b/tests/TestUtils.h index 96b6775..b986278 100644 --- a/tests/TestUtils.h +++ b/tests/TestUtils.h @@ -14,6 +14,7 @@ #include #include #include +#include class RawDatasets { @@ -32,6 +33,7 @@ public: bool discretize; int num_samples = 0; bool shuffle = false; + bayesnet::Smoothing_t smoothing = bayesnet::Smoothing_t::OLD_LAPLACE; private: std::string to_string() { From ca0ae4dacfca1282f67017c5809e45a3bf0bfb2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Thu, 13 Jun 2024 09:11:47 +0200 Subject: [PATCH 04/18] Refactor Cestnik smoothin factor assuming m=1 --- README.md | 2 +- bayesnet/network/Network.cc | 4 +-- tests/TestBayesNetwork.cc | 69 +++++++++++++++++++++++++++++++++++-- 3 files changed, 70 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index f387482..bdc77a3 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ [![Security Rating](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_BayesNet&metric=security_rating)](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet) [![Reliability Rating](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_BayesNet&metric=reliability_rating)](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet) ![Gitea Last Commit](https://img.shields.io/gitea/last-commit/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es:3000&logo=gitea) -[![Coverage Badge](https://img.shields.io/badge/Coverage-96,9%25-green)](html/index.html) +[![Coverage Badge](https://img.shields.io/badge/Coverage-97,0%25-green)](html/index.html) Bayesian Network Classifiers using libtorch from scratch diff --git a/bayesnet/network/Network.cc b/bayesnet/network/Network.cc index 419585b..d028bb6 100644 --- a/bayesnet/network/Network.cc +++ b/bayesnet/network/Network.cc @@ -204,8 +204,8 @@ namespace bayesnet { case Smoothing_t::LAPLACE: smoothing_factor = 1.0; break; - case Smoothing_t::CESTNIK: - smoothing_factor = n_samples / numStates; + case Smoothing_t::CESTNIK: // Considering m=1 pa = 1/numStates + smoothing_factor = 1 / numStates; break; default: throw std::invalid_argument("Smoothing method not recognized " + std::to_string(static_cast(smoothing))); diff --git a/tests/TestBayesNetwork.cc b/tests/TestBayesNetwork.cc index b1d6911..3638a19 100644 --- a/tests/TestBayesNetwork.cc +++ b/tests/TestBayesNetwork.cc @@ -15,6 +15,7 @@ #include "bayesnet/network/Node.h" #include "bayesnet/utils/bayesnetUtils.h" +const double threshold = 1e-4; void buildModel(bayesnet::Network& net, const std::vector& features, const std::string& className) { std::vector> network = { {0, 1}, {0, 2}, {1, 3} }; @@ -29,13 +30,11 @@ void buildModel(bayesnet::Network& net, const std::vector& features net.addEdge(className, feature); } } - TEST_CASE("Test Bayesian Network", "[Network]") { auto raw = RawDatasets("iris", true); auto net = bayesnet::Network(); - double threshold = 1e-4; SECTION("Test get features") { @@ -459,3 +458,69 @@ TEST_CASE("Dump CPT", "[Network]") REQUIRE(res == expected); } +TEST_CASE("Test Smoothing", "[Network]") +{ + /* + + Tomando m = 1 Pa = 0.5 + Si estoy calculando P(A | C), con C en{ 0,1,2 } y tengo : + AC = { 11, 12, 11, 10, 10, 12, 10, 01, 00, 02 } + Entonces: + P(A = 1 | C = 0) = (3 + 1 / 2 * 1) / (4 + 1) = 3.5 / 5 + P(A = 0 | C = 0) = (1 + 1 / 2 * 1) / (4 + 1) = 1.5 / 5 + Donde m aquí es el número de veces de C = 0 que es la que condiciona y la a priori vuelve a ser sobre A que es sobre las que estaríamos calculando esas marginales. + P(A = 1 | C = 1) = (2 + 1 / 2 * 1) / (3 + 1) = 2.5 / 4 + P(A = 0 | C = 1) = (1 + 1 / 2 * 1) / (3 + 1) = 1.5 / 4 + P(A = 1 | C = 2) = (2 + 1 / 2 * 1) / (3 + 1) = 2.5 / 5 + P(A = 0 | C = 2) = (1 + 1 / 2 * 1) / (3 + 1) = 1.5 / 5 + En realidad es parecido a Laplace, que en este caso p.e.con C = 0 sería + P(A = 1 | C = 0) = (3 + 1) / (4 + 2) = 4 / 6 + P(A = 0 | C = 0) = (1 + 1) / (4 + 2) = 2 / 6 + */ + auto net = bayesnet::Network(); + net.addNode("A"); + net.addNode("C"); + net.addEdge("C", "A"); + std::vector C = { 1, 2, 1, 0, 0, 2, 0, 1, 0, 2 }; + std::vector> A = { { 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 } }; + std::map> states = { { "A", {0, 1} }, { "C", {0, 1, 2} } }; + auto weights = std::vector(C.size(), 1); + // + // Laplace + // + net.fit(A, C, weights, { "A" }, "C", states, bayesnet::Smoothing_t::LAPLACE); + auto cpt_c_laplace = net.getNodes().at("C")->getCPT(); + REQUIRE(cpt_c_laplace.size(0) == 3); + auto laplace_c = std::vector({ 0.3846, 0.3077, 0.3077 }); + for (int i = 0; i < laplace_c.size(); ++i) { + REQUIRE(cpt_c_laplace.index({ i }).item() == Catch::Approx(laplace_c[i]).margin(threshold)); + } + auto cpt_a_laplace = net.getNodes().at("A")->getCPT(); + REQUIRE(cpt_a_laplace.size(0) == 2); + REQUIRE(cpt_a_laplace.size(1) == 3); + auto laplace_a = std::vector>({ {0.3333, 0.4000,0.4000}, {0.6667, 0.6000, 0.6000} }); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + REQUIRE(cpt_a_laplace.index({ i, j }).item() == Catch::Approx(laplace_a[i][j]).margin(threshold)); + } + } + // + // Cestnik + // + net.fit(A, C, weights, { "A" }, "C", states, bayesnet::Smoothing_t::CESTNIK); + auto cpt_c_cestnik = net.getNodes().at("C")->getCPT(); + REQUIRE(cpt_c_cestnik.size(0) == 3); + auto cestnik_c = std::vector({ 0.3939, 0.3030, 0.3030 }); + for (int i = 0; i < laplace_c.size(); ++i) { + REQUIRE(cpt_c_cestnik.index({ i }).item() == Catch::Approx(cestnik_c[i]).margin(threshold)); + } + auto cpt_a_cestnik = net.getNodes().at("A")->getCPT(); + REQUIRE(cpt_a_cestnik.size(0) == 2); + REQUIRE(cpt_a_cestnik.size(1) == 3); + auto cestnik_a = std::vector>({ {0.3000, 0.3750, 0.3750}, {0.7000, 0.6250, 0.6250} }); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + REQUIRE(cpt_a_cestnik.index({ i, j }).item() == Catch::Approx(cestnik_a[i][j]).margin(threshold)); + } + } +} From 3eb61905fb8ae57339a6c93dc9b60dd57b32d026 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Thu, 13 Jun 2024 12:33:54 +0200 Subject: [PATCH 05/18] Upgrade ArffFiles Module version --- tests/TestModulesVersions.cc | 2 +- tests/lib/Files | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/TestModulesVersions.cc b/tests/TestModulesVersions.cc index 5ee5d40..5c9cc83 100644 --- a/tests/TestModulesVersions.cc +++ b/tests/TestModulesVersions.cc @@ -19,7 +19,7 @@ std::map modules = { { "mdlp", "1.2.1" }, { "Folding", "1.1.0" }, { "json", "3.11" }, - { "ArffFiles", "1.0.0" } + { "ArffFiles", "1.1.0" } }; TEST_CASE("MDLP", "[Modules]") diff --git a/tests/lib/Files b/tests/lib/Files index dbefa02..a531692 160000 --- a/tests/lib/Files +++ b/tests/lib/Files @@ -1 +1 @@ -Subproject commit dbefa02d9c0ca0f029f77e744cd80cb0150725c8 +Subproject commit a5316928d408266aa425f64131ab0f592b010a8d From fa26aa80f7407d217b1a21360956e2f3f1dcc52b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Thu, 13 Jun 2024 15:04:15 +0200 Subject: [PATCH 06/18] Rename OLD_LAPLACE to ORIGINAL --- bayesnet/classifiers/Proposal.cc | 2 +- bayesnet/network/Network.cc | 2 +- bayesnet/network/Network.h | 2 +- tests/TestUtils.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/bayesnet/classifiers/Proposal.cc b/bayesnet/classifiers/Proposal.cc index a5d5f12..6fce507 100644 --- a/bayesnet/classifiers/Proposal.cc +++ b/bayesnet/classifiers/Proposal.cc @@ -70,7 +70,7 @@ namespace bayesnet { states[pFeatures[index]] = xStates; } const torch::Tensor weights = torch::full({ pDataset.size(1) }, 1.0 / pDataset.size(1), torch::kDouble); - model.fit(pDataset, weights, pFeatures, pClassName, states, Smoothing_t::OLD_LAPLACE); + model.fit(pDataset, weights, pFeatures, pClassName, states, Smoothing_t::ORIGINAL); } return states; } diff --git a/bayesnet/network/Network.cc b/bayesnet/network/Network.cc index d028bb6..b19a054 100644 --- a/bayesnet/network/Network.cc +++ b/bayesnet/network/Network.cc @@ -198,7 +198,7 @@ namespace bayesnet { double numStates = static_cast(node.second->getNumStates()); double smoothing_factor = 0.0; switch (smoothing) { - case Smoothing_t::OLD_LAPLACE: + case Smoothing_t::ORIGINAL: smoothing_factor = 1.0 / n_samples; break; case Smoothing_t::LAPLACE: diff --git a/bayesnet/network/Network.h b/bayesnet/network/Network.h index b3417cd..3485e64 100644 --- a/bayesnet/network/Network.h +++ b/bayesnet/network/Network.h @@ -14,7 +14,7 @@ namespace bayesnet { enum class Smoothing_t { NONE = -1, - OLD_LAPLACE = 0, + ORIGINAL = 0, LAPLACE, CESTNIK }; diff --git a/tests/TestUtils.h b/tests/TestUtils.h index b986278..652680b 100644 --- a/tests/TestUtils.h +++ b/tests/TestUtils.h @@ -33,7 +33,7 @@ public: bool discretize; int num_samples = 0; bool shuffle = false; - bayesnet::Smoothing_t smoothing = bayesnet::Smoothing_t::OLD_LAPLACE; + bayesnet::Smoothing_t smoothing = bayesnet::Smoothing_t::ORIGINAL; private: std::string to_string() { From 0b31780d390ffc41d848e4e0aa6019fd5da42924 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Tue, 18 Jun 2024 23:18:24 +0200 Subject: [PATCH 07/18] Add Thread max spawning to Network --- bayesnet/network/Network.cc | 97 +++++++++++++++++++++++++++---------- bayesnet/network/Network.h | 3 +- 2 files changed, 74 insertions(+), 26 deletions(-) diff --git a/bayesnet/network/Network.cc b/bayesnet/network/Network.cc index b19a054..d034a41 100644 --- a/bayesnet/network/Network.cc +++ b/bayesnet/network/Network.cc @@ -6,6 +6,7 @@ #include #include +#include #include #include #include "Network.h" @@ -13,10 +14,17 @@ namespace bayesnet { Network::Network() : fitted{ false }, maxThreads{ 0.95 }, classNumStates{ 0 } { + maxThreadsRunning = static_cast(std::thread::hardware_concurrency() * maxThreads); + if (maxThreadsRunning < 1) { + maxThreadsRunning = 1; + } } Network::Network(float maxT) : fitted{ false }, maxThreads{ maxT }, classNumStates{ 0 } { - + maxThreadsRunning = static_cast(std::thread::hardware_concurrency() * maxThreads); + if (maxThreadsRunning < 1 || maxT > 1) { + maxThreadsRunning = 1; + } } Network::Network(const Network& other) : features(other.features), className(other.className), classNumStates(other.getClassNumStates()), maxThreads(other.getMaxThreads()), fitted(other.fitted), samples(other.samples) @@ -192,30 +200,52 @@ namespace bayesnet { { setStates(states); std::vector threads; + std::mutex mtx; + std::condition_variable cv; + size_t activeThreads = 0; const double n_samples = static_cast(samples.size(1)); + + auto worker = [&](std::pair>& node) { + { + std::unique_lock lock(mtx); + cv.wait(lock, [&] { return activeThreads < maxThreadsRunning; }); + ++activeThreads; + } + + double numStates = static_cast(node.second->getNumStates()); + double smoothing_factor = 0.0; + + switch (smoothing) { + case Smoothing_t::ORIGINAL: + smoothing_factor = 1.0 / n_samples; + break; + case Smoothing_t::LAPLACE: + smoothing_factor = 1.0; + break; + case Smoothing_t::CESTNIK: + smoothing_factor = 1 / numStates; + break; + default: + throw std::invalid_argument("Smoothing method not recognized " + std::to_string(static_cast(smoothing))); + } + + node.second->computeCPT(samples, features, smoothing_factor, weights); + + { + std::lock_guard lock(mtx); + --activeThreads; + } + cv.notify_one(); + }; + for (auto& node : nodes) { - threads.emplace_back([this, &node, &weights, n_samples, smoothing]() { - double numStates = static_cast(node.second->getNumStates()); - double smoothing_factor = 0.0; - switch (smoothing) { - case Smoothing_t::ORIGINAL: - smoothing_factor = 1.0 / n_samples; - break; - case Smoothing_t::LAPLACE: - smoothing_factor = 1.0; - break; - case Smoothing_t::CESTNIK: // Considering m=1 pa = 1/numStates - smoothing_factor = 1 / numStates; - break; - default: - throw std::invalid_argument("Smoothing method not recognized " + std::to_string(static_cast(smoothing))); - } - node.second->computeCPT(samples, features, smoothing_factor, weights); - }); + threads.emplace_back(worker, std::ref(node)); } + for (auto& thread : threads) { thread.join(); } + fitted = true; } torch::Tensor Network::predict_tensor(const torch::Tensor& samples, const bool proba) @@ -340,15 +370,32 @@ namespace bayesnet { std::vector result(classNumStates, 0.0); std::vector threads; std::mutex mtx; - for (int i = 0; i < classNumStates; ++i) { - threads.emplace_back([this, &result, &evidence, i, &mtx]() { - auto completeEvidence = std::map(evidence); - completeEvidence[getClassName()] = i; - double factor = computeFactor(completeEvidence); + std::condition_variable cv; + size_t activeThreads = 0; + + auto worker = [&](int i) { + { + std::unique_lock lock(mtx); + cv.wait(lock, [&] { return activeThreads < maxThreadsRunning; }); + ++activeThreads; + } + + auto completeEvidence = std::map(evidence); + completeEvidence[getClassName()] = i; + double factor = computeFactor(completeEvidence); + + { std::lock_guard lock(mtx); result[i] = factor; - }); + --activeThreads; + } + cv.notify_one(); + }; + + for (int i = 0; i < classNumStates; ++i) { + threads.emplace_back(worker, i); } + for (auto& thread : threads) { thread.join(); } diff --git a/bayesnet/network/Network.h b/bayesnet/network/Network.h index 3485e64..a14540d 100644 --- a/bayesnet/network/Network.h +++ b/bayesnet/network/Network.h @@ -56,7 +56,8 @@ namespace bayesnet { private: std::map> nodes; bool fitted; - float maxThreads = 0.95; + float maxThreads = 0.95; // Coefficient to multiply by the number of threads available + int maxThreadsRunning; // Effective max number of threads running int classNumStates; std::vector features; // Including classname std::string className; From 716748e18cc65548dc0bd9a4199f07671a94e637 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Thu, 20 Jun 2024 10:36:09 +0200 Subject: [PATCH 08/18] Add Counting Semaphore class Fix threading in Network --- bayesnet/network/CountingSemaphore.h | 33 ++++++++++++++++ bayesnet/network/Network.cc | 59 ++++++---------------------- 2 files changed, 46 insertions(+), 46 deletions(-) create mode 100644 bayesnet/network/CountingSemaphore.h diff --git a/bayesnet/network/CountingSemaphore.h b/bayesnet/network/CountingSemaphore.h new file mode 100644 index 0000000..6f65e71 --- /dev/null +++ b/bayesnet/network/CountingSemaphore.h @@ -0,0 +1,33 @@ +#ifndef COUNTING_SEMAPHORE_H +#define COUNTING_SEMAPHORE_H +#include +#include +class CountingSemaphore { +public: + explicit CountingSemaphore(size_t max_count) : max_count_(max_count), count_(max_count) {} + + // Acquires a permit, blocking if necessary until one becomes available + void acquire() + { + std::unique_lock lock(mtx_); + cv_.wait(lock, [this]() { return count_ > 0; }); + --count_; + } + + // Releases a permit, potentially waking up a blocked acquirer + void release() + { + std::lock_guard lock(mtx_); + ++count_; + if (count_ <= max_count_) { + cv_.notify_one(); + } + } + +private: + std::mutex mtx_; + std::condition_variable cv_; + size_t max_count_; + size_t count_; +}; +#endif \ No newline at end of file diff --git a/bayesnet/network/Network.cc b/bayesnet/network/Network.cc index d034a41..aa9eb16 100644 --- a/bayesnet/network/Network.cc +++ b/bayesnet/network/Network.cc @@ -5,29 +5,25 @@ // *************************************************************** #include -#include -#include #include #include +#include +#include "CountingSemaphore.h" #include "Network.h" #include "bayesnet/utils/bayesnetUtils.h" namespace bayesnet { Network::Network() : fitted{ false }, maxThreads{ 0.95 }, classNumStates{ 0 } { - maxThreadsRunning = static_cast(std::thread::hardware_concurrency() * maxThreads); - if (maxThreadsRunning < 1) { - maxThreadsRunning = 1; - } + maxThreadsRunning = std::max(1, static_cast(std::thread::hardware_concurrency() * maxThreads)); + maxThreadsRunning = std::min(maxThreadsRunning, static_cast(std::thread::hardware_concurrency())); } Network::Network(float maxT) : fitted{ false }, maxThreads{ maxT }, classNumStates{ 0 } { - maxThreadsRunning = static_cast(std::thread::hardware_concurrency() * maxThreads); - if (maxThreadsRunning < 1 || maxT > 1) { - maxThreadsRunning = 1; - } + maxThreadsRunning = std::max(1, static_cast(std::thread::hardware_concurrency() * maxThreads)); + maxThreadsRunning = std::min(maxThreadsRunning, static_cast(std::thread::hardware_concurrency())); } Network::Network(const Network& other) : features(other.features), className(other.className), classNumStates(other.getClassNumStates()), - maxThreads(other.getMaxThreads()), fitted(other.fitted), samples(other.samples) + maxThreads(other.getMaxThreads()), fitted(other.fitted), samples(other.samples), maxThreadsRunning(other.maxThreadsRunning) { if (samples.defined()) samples = samples.clone(); @@ -200,21 +196,12 @@ namespace bayesnet { { setStates(states); std::vector threads; - std::mutex mtx; - std::condition_variable cv; - size_t activeThreads = 0; + CountingSemaphore semaphore(maxThreadsRunning); const double n_samples = static_cast(samples.size(1)); - auto worker = [&](std::pair>& node) { - { - std::unique_lock lock(mtx); - cv.wait(lock, [&] { return activeThreads < maxThreadsRunning; }); - ++activeThreads; - } - + semaphore.acquire(); double numStates = static_cast(node.second->getNumStates()); double smoothing_factor = 0.0; - switch (smoothing) { case Smoothing_t::ORIGINAL: smoothing_factor = 1.0 / n_samples; @@ -228,24 +215,15 @@ namespace bayesnet { default: throw std::invalid_argument("Smoothing method not recognized " + std::to_string(static_cast(smoothing))); } - node.second->computeCPT(samples, features, smoothing_factor, weights); - - { - std::lock_guard lock(mtx); - --activeThreads; - } - cv.notify_one(); + semaphore.release(); }; - for (auto& node : nodes) { threads.emplace_back(worker, std::ref(node)); } - for (auto& thread : threads) { thread.join(); } - fitted = true; } torch::Tensor Network::predict_tensor(const torch::Tensor& samples, const bool proba) @@ -370,32 +348,21 @@ namespace bayesnet { std::vector result(classNumStates, 0.0); std::vector threads; std::mutex mtx; - std::condition_variable cv; - size_t activeThreads = 0; - + CountingSemaphore semaphore(maxThreadsRunning); auto worker = [&](int i) { - { - std::unique_lock lock(mtx); - cv.wait(lock, [&] { return activeThreads < maxThreadsRunning; }); - ++activeThreads; - } - + semaphore.acquire(); auto completeEvidence = std::map(evidence); completeEvidence[getClassName()] = i; double factor = computeFactor(completeEvidence); - { std::lock_guard lock(mtx); result[i] = factor; - --activeThreads; } - cv.notify_one(); + semaphore.release(); }; - for (int i = 0; i < classNumStates; ++i) { threads.emplace_back(worker, i); } - for (auto& thread : threads) { thread.join(); } From 02bcab01be26a213e302bc628f29e8c4944612ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Fri, 21 Jun 2024 09:30:24 +0200 Subject: [PATCH 09/18] Refactor CountingSemaphore as singleton --- bayesnet/ensembles/Ensemble.cc | 51 +++++++--------------------- bayesnet/network/CountingSemaphore.h | 33 ------------------ bayesnet/network/Network.cc | 38 +++++++++++---------- bayesnet/network/Network.h | 2 -- bayesnet/utils/CountingSemaphore.h | 46 +++++++++++++++++++++++++ 5 files changed, 79 insertions(+), 91 deletions(-) delete mode 100644 bayesnet/network/CountingSemaphore.h create mode 100644 bayesnet/utils/CountingSemaphore.h diff --git a/bayesnet/ensembles/Ensemble.cc b/bayesnet/ensembles/Ensemble.cc index 68f3ee5..4b71a16 100644 --- a/bayesnet/ensembles/Ensemble.cc +++ b/bayesnet/ensembles/Ensemble.cc @@ -3,14 +3,13 @@ // SPDX-FileType: SOURCE // SPDX-License-Identifier: MIT // *************************************************************** - #include "Ensemble.h" +#include "bayesnet/utils/CountingSemaphore.h" namespace bayesnet { Ensemble::Ensemble(bool predict_voting) : Classifier(Network()), n_models(0), predict_voting(predict_voting) { - }; const std::string ENSEMBLE_NOT_FITTED = "Ensemble has not been fitted"; void Ensemble::trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) @@ -85,17 +84,9 @@ namespace bayesnet { { auto n_states = models[0]->getClassNumStates(); torch::Tensor y_pred = torch::zeros({ X.size(1), n_states }, torch::kFloat32); - auto threads{ std::vector() }; - std::mutex mtx; for (auto i = 0; i < n_models; ++i) { - threads.push_back(std::thread([&, i]() { - auto ypredict = models[i]->predict_proba(X); - std::lock_guard lock(mtx); - y_pred += ypredict * significanceModels[i]; - })); - } - for (auto& thread : threads) { - thread.join(); + auto ypredict = models[i]->predict_proba(X); + y_pred += ypredict * significanceModels[i]; } auto sum = std::reduce(significanceModels.begin(), significanceModels.end()); y_pred /= sum; @@ -105,23 +96,15 @@ namespace bayesnet { { auto n_states = models[0]->getClassNumStates(); std::vector> y_pred(X[0].size(), std::vector(n_states, 0.0)); - auto threads{ std::vector() }; - std::mutex mtx; for (auto i = 0; i < n_models; ++i) { - threads.push_back(std::thread([&, i]() { - auto ypredict = models[i]->predict_proba(X); - assert(ypredict.size() == y_pred.size()); - assert(ypredict[0].size() == y_pred[0].size()); - std::lock_guard lock(mtx); - // Multiply each prediction by the significance of the model and then add it to the final prediction - for (auto j = 0; j < ypredict.size(); ++j) { - std::transform(y_pred[j].begin(), y_pred[j].end(), ypredict[j].begin(), y_pred[j].begin(), - [significanceModels = significanceModels[i]](double x, double y) { return x + y * significanceModels; }); - } - })); - } - for (auto& thread : threads) { - thread.join(); + auto ypredict = models[i]->predict_proba(X); + assert(ypredict.size() == y_pred.size()); + assert(ypredict[0].size() == y_pred[0].size()); + // Multiply each prediction by the significance of the model and then add it to the final prediction + for (auto j = 0; j < ypredict.size(); ++j) { + std::transform(y_pred[j].begin(), y_pred[j].end(), ypredict[j].begin(), y_pred[j].begin(), + [significanceModels = significanceModels[i]](double x, double y) { return x + y * significanceModels; }); + } } auto sum = std::reduce(significanceModels.begin(), significanceModels.end()); //Divide each element of the prediction by the sum of the significances @@ -141,17 +124,9 @@ namespace bayesnet { { // Build a m x n_models tensor with the predictions of each model torch::Tensor y_pred = torch::zeros({ X.size(1), n_models }, torch::kInt32); - auto threads{ std::vector() }; - std::mutex mtx; for (auto i = 0; i < n_models; ++i) { - threads.push_back(std::thread([&, i]() { - auto ypredict = models[i]->predict(X); - std::lock_guard lock(mtx); - y_pred.index_put_({ "...", i }, ypredict); - })); - } - for (auto& thread : threads) { - thread.join(); + auto ypredict = models[i]->predict(X); + y_pred.index_put_({ "...", i }, ypredict); } return voting(y_pred); } diff --git a/bayesnet/network/CountingSemaphore.h b/bayesnet/network/CountingSemaphore.h deleted file mode 100644 index 6f65e71..0000000 --- a/bayesnet/network/CountingSemaphore.h +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef COUNTING_SEMAPHORE_H -#define COUNTING_SEMAPHORE_H -#include -#include -class CountingSemaphore { -public: - explicit CountingSemaphore(size_t max_count) : max_count_(max_count), count_(max_count) {} - - // Acquires a permit, blocking if necessary until one becomes available - void acquire() - { - std::unique_lock lock(mtx_); - cv_.wait(lock, [this]() { return count_ > 0; }); - --count_; - } - - // Releases a permit, potentially waking up a blocked acquirer - void release() - { - std::lock_guard lock(mtx_); - ++count_; - if (count_ <= max_count_) { - cv_.notify_one(); - } - } - -private: - std::mutex mtx_; - std::condition_variable cv_; - size_t max_count_; - size_t count_; -}; -#endif \ No newline at end of file diff --git a/bayesnet/network/Network.cc b/bayesnet/network/Network.cc index aa9eb16..8aee16d 100644 --- a/bayesnet/network/Network.cc +++ b/bayesnet/network/Network.cc @@ -8,22 +8,16 @@ #include #include #include -#include "CountingSemaphore.h" #include "Network.h" #include "bayesnet/utils/bayesnetUtils.h" +#include "bayesnet/utils/CountingSemaphore.h" +#include namespace bayesnet { - Network::Network() : fitted{ false }, maxThreads{ 0.95 }, classNumStates{ 0 } + Network::Network() : fitted{ false }, classNumStates{ 0 } { - maxThreadsRunning = std::max(1, static_cast(std::thread::hardware_concurrency() * maxThreads)); - maxThreadsRunning = std::min(maxThreadsRunning, static_cast(std::thread::hardware_concurrency())); - } - Network::Network(float maxT) : fitted{ false }, maxThreads{ maxT }, classNumStates{ 0 } - { - maxThreadsRunning = std::max(1, static_cast(std::thread::hardware_concurrency() * maxThreads)); - maxThreadsRunning = std::min(maxThreadsRunning, static_cast(std::thread::hardware_concurrency())); } Network::Network(const Network& other) : features(other.features), className(other.className), classNumStates(other.getClassNumStates()), - maxThreads(other.getMaxThreads()), fitted(other.fitted), samples(other.samples), maxThreadsRunning(other.maxThreadsRunning) + fitted(other.fitted), samples(other.samples) { if (samples.defined()) samples = samples.clone(); @@ -40,10 +34,6 @@ namespace bayesnet { nodes.clear(); samples = torch::Tensor(); } - float Network::getMaxThreads() const - { - return maxThreads; - } torch::Tensor& Network::getSamples() { return samples; @@ -196,9 +186,11 @@ namespace bayesnet { { setStates(states); std::vector threads; - CountingSemaphore semaphore(maxThreadsRunning); + auto& semaphore = CountingSemaphore::getInstance(); const double n_samples = static_cast(samples.size(1)); - auto worker = [&](std::pair>& node) { + auto worker = [&](std::pair>& node, int i) { + std::string threadName = "FitWorker-" + std::to_string(i); + pthread_setname_np(pthread_self(), threadName.c_str()); semaphore.acquire(); double numStates = static_cast(node.second->getNumStates()); double smoothing_factor = 0.0; @@ -218,8 +210,9 @@ namespace bayesnet { node.second->computeCPT(samples, features, smoothing_factor, weights); semaphore.release(); }; + int i = 0; for (auto& node : nodes) { - threads.emplace_back(worker, std::ref(node)); + threads.emplace_back(worker, std::ref(node), i++); } for (auto& thread : threads) { thread.join(); @@ -345,12 +338,21 @@ namespace bayesnet { } std::vector Network::exactInference(std::map& evidence) { + + + //Implementar una cache para acelerar la inferencia. + // Cambiar la estrategia de crear hilos en la inferencia (por nodos como en fit?) + + + std::vector result(classNumStates, 0.0); std::vector threads; std::mutex mtx; - CountingSemaphore semaphore(maxThreadsRunning); + auto& semaphore = CountingSemaphore::getInstance(); auto worker = [&](int i) { semaphore.acquire(); + std::string threadName = "InferenceWorker-" + std::to_string(i); + pthread_setname_np(pthread_self(), threadName.c_str()); auto completeEvidence = std::map(evidence); completeEvidence[getClassName()] = i; double factor = computeFactor(completeEvidence); diff --git a/bayesnet/network/Network.h b/bayesnet/network/Network.h index a14540d..1aea190 100644 --- a/bayesnet/network/Network.h +++ b/bayesnet/network/Network.h @@ -56,8 +56,6 @@ namespace bayesnet { private: std::map> nodes; bool fitted; - float maxThreads = 0.95; // Coefficient to multiply by the number of threads available - int maxThreadsRunning; // Effective max number of threads running int classNumStates; std::vector features; // Including classname std::string className; diff --git a/bayesnet/utils/CountingSemaphore.h b/bayesnet/utils/CountingSemaphore.h new file mode 100644 index 0000000..25d1ac7 --- /dev/null +++ b/bayesnet/utils/CountingSemaphore.h @@ -0,0 +1,46 @@ +#ifndef COUNTING_SEMAPHORE_H +#define COUNTING_SEMAPHORE_H +#include +#include +#include +#include +#include +#include +#include + +class CountingSemaphore { +public: + static CountingSemaphore& getInstance() + { + static CountingSemaphore instance; + return instance; + } + // Delete copy constructor and assignment operator + CountingSemaphore(const CountingSemaphore&) = delete; + CountingSemaphore& operator=(const CountingSemaphore&) = delete; + void acquire() + { + std::unique_lock lock(mtx_); + cv_.wait(lock, [this]() { return count_ > 0; }); + --count_; + } + void release() + { + std::lock_guard lock(mtx_); + ++count_; + if (count_ <= max_count_) { + cv_.notify_one(); + } + } +private: + CountingSemaphore() + : max_count_(std::max(1u, static_cast(0.95 * std::thread::hardware_concurrency()))), + count_(max_count_) + { + } + std::mutex mtx_; + std::condition_variable cv_; + const uint max_count_; + uint count_; +}; +#endif \ No newline at end of file From 8e9090d283883bcd444e519cb9055f4f3ebd778b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Fri, 21 Jun 2024 13:58:42 +0200 Subject: [PATCH 10/18] Fix tests --- .vscode/launch.json | 6 +- README.md | 2 +- bayesnet/network/Network.cc | 115 +++++++++++++++++++----------------- bayesnet/network/Network.h | 3 - tests/TestBayesNetwork.cc | 25 ++++---- 5 files changed, 76 insertions(+), 75 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 66b5438..40fbf2f 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -14,11 +14,11 @@ "type": "lldb", "request": "launch", "name": "test", - "program": "${workspaceFolder}/build_debug/tests/TestBayesNet", + "program": "${workspaceFolder}/build_Debug/tests/TestBayesNet", "args": [ - "[Node]" + "[Network]" ], - "cwd": "${workspaceFolder}/build_debug/tests" + "cwd": "${workspaceFolder}/build_Debug/tests" }, { "name": "(gdb) Launch", diff --git a/README.md b/README.md index bdc77a3..7cad08d 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ [![Security Rating](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_BayesNet&metric=security_rating)](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet) [![Reliability Rating](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_BayesNet&metric=reliability_rating)](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet) ![Gitea Last Commit](https://img.shields.io/gitea/last-commit/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es:3000&logo=gitea) -[![Coverage Badge](https://img.shields.io/badge/Coverage-97,0%25-green)](html/index.html) +[![Coverage Badge](https://img.shields.io/badge/Coverage-97,1%25-green)](html/index.html) Bayesian Network Classifiers using libtorch from scratch diff --git a/bayesnet/network/Network.cc b/bayesnet/network/Network.cc index 8aee16d..0458d3a 100644 --- a/bayesnet/network/Network.cc +++ b/bayesnet/network/Network.cc @@ -224,14 +224,34 @@ namespace bayesnet { if (!fitted) { throw std::logic_error("You must call fit() before calling predict()"); } + // Ensure the sample size is equal to the number of features + if (samples.size(0) != features.size() - 1) { + throw std::invalid_argument("(T) Sample size (" + std::to_string(samples.size(0)) + + ") does not match the number of features (" + std::to_string(features.size() - 1) + ")"); + } torch::Tensor result; + std::vector threads; + std::mutex mtx; + auto& semaphore = CountingSemaphore::getInstance(); result = torch::zeros({ samples.size(1), classNumStates }, torch::kFloat64); - for (int i = 0; i < samples.size(1); ++i) { - const torch::Tensor sample = samples.index({ "...", i }); + auto worker = [&](const torch::Tensor& sample, int i) { + std::string threadName = "PredictWorker-" + std::to_string(i); + pthread_setname_np(pthread_self(), threadName.c_str()); + semaphore.acquire(); auto psample = predict_sample(sample); auto temp = torch::tensor(psample, torch::kFloat64); - // result.index_put_({ i, "..." }, torch::tensor(predict_sample(sample), torch::kFloat64)); - result.index_put_({ i, "..." }, temp); + { + std::lock_guard lock(mtx); + result.index_put_({ i, "..." }, temp); + } + semaphore.release(); + }; + for (int i = 0; i < samples.size(1); ++i) { + const torch::Tensor sample = samples.index({ "...", i }); + threads.emplace_back(worker, sample, i); + } + for (auto& thread : threads) { + thread.join(); } if (proba) return result; @@ -256,18 +276,36 @@ namespace bayesnet { if (!fitted) { throw std::logic_error("You must call fit() before calling predict()"); } - std::vector predictions; + // Ensure the sample size is equal to the number of features + if (tsamples.size() != features.size() - 1) { + throw std::invalid_argument("(V) Sample size (" + std::to_string(tsamples.size()) + + ") does not match the number of features (" + std::to_string(features.size() - 1) + ")"); + } + std::vector predictions(tsamples[0].size(), 0); std::vector sample; + std::vector threads; + std::mutex mtx; + auto& semaphore = CountingSemaphore::getInstance(); + auto worker = [&](const std::vector& sample, const int row, std::vector& predictions) { + semaphore.acquire(); + auto classProbabilities = predict_sample(sample); + auto maxElem = max_element(classProbabilities.begin(), classProbabilities.end()); + int predictedClass = distance(classProbabilities.begin(), maxElem); + { + std::lock_guard lock(mtx); + predictions[row] = predictedClass; + } + semaphore.release(); + }; for (int row = 0; row < tsamples[0].size(); ++row) { sample.clear(); for (int col = 0; col < tsamples.size(); ++col) { sample.push_back(tsamples[col][row]); } - std::vector classProbabilities = predict_sample(sample); - // Find the class with the maximum posterior probability - auto maxElem = max_element(classProbabilities.begin(), classProbabilities.end()); - int predictedClass = distance(classProbabilities.begin(), maxElem); - predictions.push_back(predictedClass); + threads.emplace_back(worker, sample, row, std::ref(predictions)); + } + for (auto& thread : threads) { + thread.join(); } return predictions; } @@ -278,6 +316,11 @@ namespace bayesnet { if (!fitted) { throw std::logic_error("You must call fit() before calling predict_proba()"); } + // Ensure the sample size is equal to the number of features + if (tsamples.size() != features.size() - 1) { + throw std::invalid_argument("(V) Sample size (" + std::to_string(tsamples.size()) + + ") does not match the number of features (" + std::to_string(features.size() - 1) + ")"); + } std::vector> predictions; std::vector sample; for (int row = 0; row < tsamples[0].size(); ++row) { @@ -303,11 +346,6 @@ namespace bayesnet { // Return 1xn std::vector of probabilities std::vector Network::predict_sample(const std::vector& sample) { - // Ensure the sample size is equal to the number of features - if (sample.size() != features.size() - 1) { - throw std::invalid_argument("Sample size (" + std::to_string(sample.size()) + - ") does not match the number of features (" + std::to_string(features.size() - 1) + ")"); - } std::map evidence; for (int i = 0; i < sample.size(); ++i) { evidence[features[i]] = sample[i]; @@ -317,56 +355,23 @@ namespace bayesnet { // Return 1xn std::vector of probabilities std::vector Network::predict_sample(const torch::Tensor& sample) { - // Ensure the sample size is equal to the number of features - if (sample.size(0) != features.size() - 1) { - throw std::invalid_argument("Sample size (" + std::to_string(sample.size(0)) + - ") does not match the number of features (" + std::to_string(features.size() - 1) + ")"); - } std::map evidence; for (int i = 0; i < sample.size(0); ++i) { evidence[features[i]] = sample[i].item(); } return exactInference(evidence); } - double Network::computeFactor(std::map& completeEvidence) - { - double result = 1.0; - for (auto& node : getNodes()) { - result *= node.second->getFactorValue(completeEvidence); - } - return result; - } std::vector Network::exactInference(std::map& evidence) { - - - //Implementar una cache para acelerar la inferencia. - // Cambiar la estrategia de crear hilos en la inferencia (por nodos como en fit?) - - - std::vector result(classNumStates, 0.0); - std::vector threads; - std::mutex mtx; - auto& semaphore = CountingSemaphore::getInstance(); - auto worker = [&](int i) { - semaphore.acquire(); - std::string threadName = "InferenceWorker-" + std::to_string(i); - pthread_setname_np(pthread_self(), threadName.c_str()); - auto completeEvidence = std::map(evidence); - completeEvidence[getClassName()] = i; - double factor = computeFactor(completeEvidence); - { - std::lock_guard lock(mtx); - result[i] = factor; - } - semaphore.release(); - }; + auto completeEvidence = std::map(evidence); for (int i = 0; i < classNumStates; ++i) { - threads.emplace_back(worker, i); - } - for (auto& thread : threads) { - thread.join(); + completeEvidence[getClassName()] = i; + double partial = 1.0; + for (auto& node : getNodes()) { + partial *= node.second->getFactorValue(completeEvidence); + } + result[i] = partial; } // Normalize result double sum = std::accumulate(result.begin(), result.end(), 0.0); diff --git a/bayesnet/network/Network.h b/bayesnet/network/Network.h index 1aea190..0210877 100644 --- a/bayesnet/network/Network.h +++ b/bayesnet/network/Network.h @@ -21,11 +21,9 @@ namespace bayesnet { class Network { public: Network(); - explicit Network(float); explicit Network(const Network&); ~Network() = default; torch::Tensor& getSamples(); - float getMaxThreads() const; void addNode(const std::string&); void addEdge(const std::string&, const std::string&); std::map>& getNodes(); @@ -64,7 +62,6 @@ namespace bayesnet { std::vector predict_sample(const std::vector&); std::vector predict_sample(const torch::Tensor&); std::vector exactInference(std::map&); - double computeFactor(std::map&); void completeFit(const std::map>& states, const torch::Tensor& weights, const Smoothing_t smoothing); void checkFitData(int n_samples, int n_features, int n_samples_y, const std::vector& featureNames, const std::string& className, const std::map>& states, const torch::Tensor& weights); void setStates(const std::map>&); diff --git a/tests/TestBayesNetwork.cc b/tests/TestBayesNetwork.cc index 3638a19..0663ca2 100644 --- a/tests/TestBayesNetwork.cc +++ b/tests/TestBayesNetwork.cc @@ -149,6 +149,7 @@ TEST_CASE("Test Bayesian Network", "[Network]") } SECTION("Test show") { + INFO("Test show"); net.addNode("A"); net.addNode("B"); net.addNode("C"); @@ -162,6 +163,7 @@ TEST_CASE("Test Bayesian Network", "[Network]") } SECTION("Test topological_sort") { + INFO("Test topological sort"); net.addNode("A"); net.addNode("B"); net.addNode("C"); @@ -175,6 +177,7 @@ TEST_CASE("Test Bayesian Network", "[Network]") } SECTION("Test graph") { + INFO("Test graph"); net.addNode("A"); net.addNode("B"); net.addNode("C"); @@ -192,6 +195,7 @@ TEST_CASE("Test Bayesian Network", "[Network]") } SECTION("Test predict") { + INFO("Test predict"); buildModel(net, raw.features, raw.className); net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing); std::vector> test = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1}, {2, 2, 2, 2, 1} }; @@ -201,6 +205,7 @@ TEST_CASE("Test Bayesian Network", "[Network]") } SECTION("Test predict_proba") { + INFO("Test predict_proba"); buildModel(net, raw.features, raw.className); net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing); std::vector> test = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1}, {2, 2, 2, 2, 1} }; @@ -222,6 +227,7 @@ TEST_CASE("Test Bayesian Network", "[Network]") } SECTION("Test score") { + INFO("Test score"); buildModel(net, raw.features, raw.className); net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing); auto score = net.score(raw.Xv, raw.yv); @@ -229,6 +235,7 @@ TEST_CASE("Test Bayesian Network", "[Network]") } SECTION("Copy constructor") { + INFO("Test copy constructor"); buildModel(net, raw.features, raw.className); net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing); auto net2 = bayesnet::Network(net); @@ -252,6 +259,7 @@ TEST_CASE("Test Bayesian Network", "[Network]") } SECTION("Test oddities") { + INFO("Test oddities"); buildModel(net, raw.features, raw.className); // predict without fitting std::vector> test = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1}, {2, 2, 2, 2, 1} }; @@ -270,10 +278,10 @@ TEST_CASE("Test Bayesian Network", "[Network]") netx.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing); std::vector> test2 = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1} }; auto test_tensor2 = bayesnet::vectorToTensor(test2, false); - REQUIRE_THROWS_AS(netx.predict(test2), std::logic_error); - REQUIRE_THROWS_WITH(netx.predict(test2), "Sample size (3) does not match the number of features (4)"); - REQUIRE_THROWS_AS(netx.predict(test_tensor2), std::logic_error); - REQUIRE_THROWS_WITH(netx.predict(test_tensor2), "Sample size (3) does not match the number of features (4)"); + REQUIRE_THROWS_AS(netx.predict(test2), std::invalid_argument); + REQUIRE_THROWS_WITH(netx.predict(test2), "(V) Sample size (3) does not match the number of features (4)"); + REQUIRE_THROWS_AS(netx.predict(test_tensor2), std::invalid_argument); + REQUIRE_THROWS_WITH(netx.predict(test_tensor2), "(T) Sample size (3) does not match the number of features (4)"); // fit with wrong data // Weights auto net2 = bayesnet::Network(); @@ -341,15 +349,6 @@ TEST_CASE("Cicle in Network", "[Network]") REQUIRE_THROWS_AS(net.addEdge("C", "A"), std::invalid_argument); REQUIRE_THROWS_WITH(net.addEdge("C", "A"), "Adding this edge forms a cycle in the graph."); } -TEST_CASE("Test max threads constructor", "[Network]") -{ - auto net = bayesnet::Network(); - REQUIRE(net.getMaxThreads() == 0.95f); - auto net2 = bayesnet::Network(4); - REQUIRE(net2.getMaxThreads() == 4); - auto net3 = bayesnet::Network(1.75); - REQUIRE(net3.getMaxThreads() == 1.75); -} TEST_CASE("Edges troubles", "[Network]") { auto net = bayesnet::Network(); From 59c1cf5b3b344389992456b185ee155e97b97ed9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Fri, 21 Jun 2024 19:56:35 +0200 Subject: [PATCH 11/18] Fix number of threads spawned --- bayesnet/network/Network.cc | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/bayesnet/network/Network.cc b/bayesnet/network/Network.cc index 0458d3a..b399782 100644 --- a/bayesnet/network/Network.cc +++ b/bayesnet/network/Network.cc @@ -190,8 +190,11 @@ namespace bayesnet { const double n_samples = static_cast(samples.size(1)); auto worker = [&](std::pair>& node, int i) { std::string threadName = "FitWorker-" + std::to_string(i); +#if defined(__linux__) pthread_setname_np(pthread_self(), threadName.c_str()); - semaphore.acquire(); +#else + pthread_setname_np(threadName.c_str()); +#endif double numStates = static_cast(node.second->getNumStates()); double smoothing_factor = 0.0; switch (smoothing) { @@ -212,6 +215,7 @@ namespace bayesnet { }; int i = 0; for (auto& node : nodes) { + semaphore.acquire(); threads.emplace_back(worker, std::ref(node), i++); } for (auto& thread : threads) { @@ -236,8 +240,11 @@ namespace bayesnet { result = torch::zeros({ samples.size(1), classNumStates }, torch::kFloat64); auto worker = [&](const torch::Tensor& sample, int i) { std::string threadName = "PredictWorker-" + std::to_string(i); +#if defined(__linux__) pthread_setname_np(pthread_self(), threadName.c_str()); - semaphore.acquire(); +#else + pthread_setname_np(threadName.c_str()); +#endif auto psample = predict_sample(sample); auto temp = torch::tensor(psample, torch::kFloat64); { @@ -247,6 +254,7 @@ namespace bayesnet { semaphore.release(); }; for (int i = 0; i < samples.size(1); ++i) { + semaphore.acquire(); const torch::Tensor sample = samples.index({ "...", i }); threads.emplace_back(worker, sample, i); } From 9a14133be5d730676e9cbf7dcd3b2c4df2e5ccba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Sun, 23 Jun 2024 13:02:40 +0200 Subject: [PATCH 12/18] Add thread control to vectors predict --- bayesnet/network/Network.cc | 39 +++++++++++++++++++++++++++---------- sample/CMakeLists.txt | 2 +- sample/sample.cc | 4 ++-- 3 files changed, 32 insertions(+), 13 deletions(-) diff --git a/bayesnet/network/Network.cc b/bayesnet/network/Network.cc index b399782..ceede5b 100644 --- a/bayesnet/network/Network.cc +++ b/bayesnet/network/Network.cc @@ -292,17 +292,18 @@ namespace bayesnet { std::vector predictions(tsamples[0].size(), 0); std::vector sample; std::vector threads; - std::mutex mtx; auto& semaphore = CountingSemaphore::getInstance(); - auto worker = [&](const std::vector& sample, const int row, std::vector& predictions) { - semaphore.acquire(); + auto worker = [&](const std::vector& sample, const int row, int& prediction) { + std::string threadName = "(V)PWorker-" + std::to_string(row); +#if defined(__linux__) + pthread_setname_np(pthread_self(), threadName.c_str()); +#else + pthread_setname_np(threadName.c_str()); +#endif auto classProbabilities = predict_sample(sample); auto maxElem = max_element(classProbabilities.begin(), classProbabilities.end()); int predictedClass = distance(classProbabilities.begin(), maxElem); - { - std::lock_guard lock(mtx); - predictions[row] = predictedClass; - } + prediction = predictedClass; semaphore.release(); }; for (int row = 0; row < tsamples[0].size(); ++row) { @@ -310,7 +311,8 @@ namespace bayesnet { for (int col = 0; col < tsamples.size(); ++col) { sample.push_back(tsamples[col][row]); } - threads.emplace_back(worker, sample, row, std::ref(predictions)); + semaphore.acquire(); + threads.emplace_back(worker, sample, row, std::ref(predictions[row])); } for (auto& thread : threads) { thread.join(); @@ -329,14 +331,31 @@ namespace bayesnet { throw std::invalid_argument("(V) Sample size (" + std::to_string(tsamples.size()) + ") does not match the number of features (" + std::to_string(features.size() - 1) + ")"); } - std::vector> predictions; + std::vector> predictions(tsamples[0].size(), std::vector(classNumStates, 0.0)); std::vector sample; + std::vector threads; + auto& semaphore = CountingSemaphore::getInstance(); + auto worker = [&](const std::vector& sample, int row, std::vector& predictions) { + std::string threadName = "(V)PWorker-" + std::to_string(row); +#if defined(__linux__) + pthread_setname_np(pthread_self(), threadName.c_str()); +#else + pthread_setname_np(threadName.c_str()); +#endif + std::vector classProbabilities = predict_sample(sample); + predictions = classProbabilities; + semaphore.release(); + }; for (int row = 0; row < tsamples[0].size(); ++row) { sample.clear(); for (int col = 0; col < tsamples.size(); ++col) { sample.push_back(tsamples[col][row]); } - predictions.push_back(predict_sample(sample)); + semaphore.acquire(); + threads.emplace_back(worker, sample, row, std::ref(predictions[row])); + } + for (auto& thread : threads) { + thread.join(); } return predictions; } diff --git a/sample/CMakeLists.txt b/sample/CMakeLists.txt index d50030e..b36cfc7 100644 --- a/sample/CMakeLists.txt +++ b/sample/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(Torch REQUIRED) find_library(BayesNet NAMES BayesNet.a libBayesNet.a REQUIRED) include_directories( - lib/Files + ../tests/lib/Files lib/mdlp lib/json/include /usr/local/include diff --git a/sample/sample.cc b/sample/sample.cc index 511230f..478ff85 100644 --- a/sample/sample.cc +++ b/sample/sample.cc @@ -60,9 +60,9 @@ int main(int argc, char* argv[]) auto clf = bayesnet::BoostAODE(false); // false for not using voting in predict std::cout << "Library version: " << clf.getVersion() << std::endl; tie(X, y, features, className, states) = loadDataset(file_name, true); - clf.fit(X, y, features, className, states); + clf.fit(X, y, features, className, states, bayesnet::Smoothing_t::LAPLACE); auto score = clf.score(X, y); - std::cout << "File: " << file_name << " score: " << score << std::endl; + std::cout << "File: " << file_name << " Model: BoostAODE score: " << score << std::endl; return 0; } From 6fcc15d39aa312d59fbe6ef9d2b3a018c00828a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Mon, 24 Jun 2024 12:38:44 +0200 Subject: [PATCH 13/18] Upgrade mdlp library --- lib/mdlp | 2 +- tests/TestModulesVersions.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/mdlp b/lib/mdlp index c4e6c04..7b0673f 160000 --- a/lib/mdlp +++ b/lib/mdlp @@ -1 +1 @@ -Subproject commit c4e6c041fe7f769ec24c0a2bd66a5aff482fd630 +Subproject commit 7b0673fd4b05135ce90f711b2dc410eb1a2fa5ff diff --git a/tests/TestModulesVersions.cc b/tests/TestModulesVersions.cc index 5c9cc83..5a64fe4 100644 --- a/tests/TestModulesVersions.cc +++ b/tests/TestModulesVersions.cc @@ -16,7 +16,7 @@ #include "TestUtils.h" std::map modules = { - { "mdlp", "1.2.1" }, + { "mdlp", "1.2.2" }, { "Folding", "1.1.0" }, { "json", "3.11" }, { "ArffFiles", "1.1.0" } From 26eb58b10436859a6e37935340def236cd4ceb2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Thu, 4 Jul 2024 18:52:41 +0200 Subject: [PATCH 14/18] Forbids to insert the same edge twice --- Makefile | 2 +- bayesnet/CMakeLists.txt | 2 +- bayesnet/network/Network.cc | 6 +++++ bayesnet/network/Node.cc | 1 + lib/mdlp | 2 +- tests/CMakeLists.txt | 2 +- tests/TestBayesNetwork.cc | 46 ++++++++++++++++++++++++++++++++++-- tests/TestModulesVersions.cc | 2 +- 8 files changed, 56 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index 3515580..d597d4a 100644 --- a/Makefile +++ b/Makefile @@ -119,7 +119,7 @@ test: ## Run tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximu coverage: ## Run tests and generate coverage report (build/index.html) @echo ">>> Building tests with coverage..." - @which $(lcov) || (echo ">>> Please install lcov"; exit 1) + @which $(lcov) || (echo ">>ease install lcov"; exit 1) @if [ ! -f $(f_debug)/tests/coverage.info ] ; then $(MAKE) test ; fi @echo ">>> Building report..." @cd $(f_debug)/tests; \ diff --git a/bayesnet/CMakeLists.txt b/bayesnet/CMakeLists.txt index 2aef26a..62929ce 100644 --- a/bayesnet/CMakeLists.txt +++ b/bayesnet/CMakeLists.txt @@ -1,5 +1,5 @@ include_directories( - ${BayesNet_SOURCE_DIR}/lib/mdlp + ${BayesNet_SOURCE_DIR}/lib/mdlp/src ${BayesNet_SOURCE_DIR}/lib/folding ${BayesNet_SOURCE_DIR}/lib/json/include ${BayesNet_SOURCE_DIR} diff --git a/bayesnet/network/Network.cc b/bayesnet/network/Network.cc index ceede5b..f26e19d 100644 --- a/bayesnet/network/Network.cc +++ b/bayesnet/network/Network.cc @@ -95,6 +95,12 @@ namespace bayesnet { if (nodes.find(child) == nodes.end()) { throw std::invalid_argument("Child node " + child + " does not exist"); } + // Check if the edge is already in the graph + for (auto& node : nodes[parent]->getChildren()) { + if (node->getName() == child) { + throw std::invalid_argument("Edge " + parent + " -> " + child + " already exists"); + } + } // Temporarily add edge to check for cycles nodes[parent]->addChild(nodes[child].get()); nodes[child]->addParent(nodes[parent].get()); diff --git a/bayesnet/network/Node.cc b/bayesnet/network/Node.cc index fcb1e53..28eba72 100644 --- a/bayesnet/network/Node.cc +++ b/bayesnet/network/Node.cc @@ -119,6 +119,7 @@ namespace bayesnet { cpTable.index_put_({ coordinates }, cpTable.index({ coordinates }) + weights.index({ n_sample }).item()); } // Normalize the counts + // Divide each row by the sum of the row cpTable = cpTable / cpTable.sum(0); } float Node::getFactorValue(std::map& evidence) diff --git a/lib/mdlp b/lib/mdlp index 7b0673f..e36d9af 160000 --- a/lib/mdlp +++ b/lib/mdlp @@ -1 +1 @@ -Subproject commit 7b0673fd4b05135ce90f711b2dc410eb1a2fa5ff +Subproject commit e36d9af8f939a57266e30ca96e1cf84fc7d107b0 diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 1ff33ee..9abab26 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -2,7 +2,7 @@ if(ENABLE_TESTING) include_directories( ${BayesNet_SOURCE_DIR}/tests/lib/Files ${BayesNet_SOURCE_DIR}/lib/folding - ${BayesNet_SOURCE_DIR}/lib/mdlp + ${BayesNet_SOURCE_DIR}/lib/mdlp/src ${BayesNet_SOURCE_DIR}/lib/json/include ${BayesNet_SOURCE_DIR} ${CMAKE_BINARY_DIR}/configured_files/include diff --git a/tests/TestBayesNetwork.cc b/tests/TestBayesNetwork.cc index 0663ca2..9a22214 100644 --- a/tests/TestBayesNetwork.cc +++ b/tests/TestBayesNetwork.cc @@ -358,6 +358,9 @@ TEST_CASE("Edges troubles", "[Network]") REQUIRE_THROWS_WITH(net.addEdge("A", "C"), "Child node C does not exist"); REQUIRE_THROWS_AS(net.addEdge("C", "A"), std::invalid_argument); REQUIRE_THROWS_WITH(net.addEdge("C", "A"), "Parent node C does not exist"); + net.addEdge("A", "B"); + REQUIRE_THROWS_AS(net.addEdge("A", "B"), std::invalid_argument); + REQUIRE_THROWS_WITH(net.addEdge("A", "B"), "Edge A -> B already exists"); } TEST_CASE("Dump CPT", "[Network]") { @@ -457,10 +460,9 @@ TEST_CASE("Dump CPT", "[Network]") REQUIRE(res == expected); } -TEST_CASE("Test Smoothing", "[Network]") +TEST_CASE("Test Smoothing A", "[Network]") { /* - Tomando m = 1 Pa = 0.5 Si estoy calculando P(A | C), con C en{ 0,1,2 } y tengo : AC = { 11, 12, 11, 10, 10, 12, 10, 01, 00, 02 } @@ -523,3 +525,43 @@ TEST_CASE("Test Smoothing", "[Network]") } } } +TEST_CASE("Test Smoothing B", "[Network]") +{ + auto net = bayesnet::Network(); + net.addNode("X"); + net.addNode("Y"); + net.addNode("Z"); + net.addNode("C"); + net.addEdge("C", "X"); + net.addEdge("C", "Y"); + net.addEdge("C", "Z"); + net.addEdge("Y", "Z"); + std::vector C = { 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1 }; + std::vector> Data = { + { 0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0}, + { 1,2,0,2,2,2,1,0,0,1,1,1,0,1,2,1,0,2}, + { 2,1,3,3,2,0,0,1,3,2,1,2,2,3,0,0,1,2} + }; + std::map> states = { + { "X", {0, 1} }, + { "Y", {0, 1, 2} }, + { "Z", {0, 1, 2, 3} }, + { "C", {0, 1} } + }; + auto weights = std::vector(C.size(), 1); + // Simple + std::cout << "LAPLACE\n"; + net.fit(Data, C, weights, { "X", "Y", "Z" }, "C", states, bayesnet::Smoothing_t::LAPLACE); + std::cout << net.dump_cpt(); + std::cout << "Predict proba of {0, 1, 2} y {1, 2, 3} = " << net.predict_proba({ {0, 1}, {1, 2}, {2, 3} }) << std::endl; + std::cout << "ORIGINAL\n"; + net.fit(Data, C, weights, { "X", "Y", "Z" }, "C", states, bayesnet::Smoothing_t::ORIGINAL); + std::cout << net.dump_cpt(); + std::cout << "Predict proba of {0, 1, 2} y {1, 2, 3} = " << net.predict_proba({ {0, 1}, {1, 2}, {2, 3} }) << std::endl; + std::cout << "CESTNIK\n"; + net.fit(Data, C, weights, { "X", "Y", "Z" }, "C", states, bayesnet::Smoothing_t::CESTNIK); + std::cout << net.dump_cpt(); + std::cout << "Predict proba of {0, 1, 2} y {1, 2, 3} = " << net.predict_proba({ {0, 1}, {1, 2}, {2, 3} }) << std::endl; + + +} diff --git a/tests/TestModulesVersions.cc b/tests/TestModulesVersions.cc index 5a64fe4..fdcaa05 100644 --- a/tests/TestModulesVersions.cc +++ b/tests/TestModulesVersions.cc @@ -16,7 +16,7 @@ #include "TestUtils.h" std::map modules = { - { "mdlp", "1.2.2" }, + { "mdlp", "2.0.0" }, { "Folding", "1.1.0" }, { "json", "3.11" }, { "ArffFiles", "1.1.0" } From 35ca862ecaeaa93922b3947aca6b130b97876632 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Sun, 7 Jul 2024 21:06:59 +0200 Subject: [PATCH 15/18] Don't allow add node nor add edge on fitted networks --- bayesnet/network/Network.cc | 17 +++++++++++++++++ bayesnet/network/Node.cc | 12 +++++++----- lib/json | 2 +- lib/mdlp | 2 +- 4 files changed, 26 insertions(+), 7 deletions(-) diff --git a/bayesnet/network/Network.cc b/bayesnet/network/Network.cc index f26e19d..b6482db 100644 --- a/bayesnet/network/Network.cc +++ b/bayesnet/network/Network.cc @@ -12,6 +12,7 @@ #include "bayesnet/utils/bayesnetUtils.h" #include "bayesnet/utils/CountingSemaphore.h" #include +#include namespace bayesnet { Network::Network() : fitted{ false }, classNumStates{ 0 } { @@ -40,6 +41,9 @@ namespace bayesnet { } void Network::addNode(const std::string& name) { + if (fitted) { + throw std::invalid_argument("Cannot add node to a fitted network. Initialize first."); + } if (name == "") { throw std::invalid_argument("Node name cannot be empty"); } @@ -89,6 +93,9 @@ namespace bayesnet { } void Network::addEdge(const std::string& parent, const std::string& child) { + if (fitted) { + throw std::invalid_argument("Cannot add edge to a fitted network. Initialize first."); + } if (nodes.find(parent) == nodes.end()) { throw std::invalid_argument("Parent node " + parent + " does not exist"); } @@ -227,6 +234,16 @@ namespace bayesnet { for (auto& thread : threads) { thread.join(); } + // std::fstream file; + // file.open("cpt.txt", std::fstream::out | std::fstream::app); + // file << std::string(80, '*') << std::endl; + // for (const auto& item : graph("Test")) { + // file << item << std::endl; + // } + // file << std::string(80, '-') << std::endl; + // file << dump_cpt() << std::endl; + // file << std::string(80, '=') << std::endl; + // file.close(); fitted = true; } torch::Tensor Network::predict_tensor(const torch::Tensor& samples, const bool proba) diff --git a/bayesnet/network/Node.cc b/bayesnet/network/Node.cc index 28eba72..44fc900 100644 --- a/bayesnet/network/Node.cc +++ b/bayesnet/network/Node.cc @@ -104,16 +104,18 @@ namespace bayesnet { throw std::logic_error("Feature " + name + " not found in dataset"); } int name_index = pos - features.begin(); + c10::List> coordinates; for (int n_sample = 0; n_sample < dataset.size(1); ++n_sample) { - c10::List> coordinates; - coordinates.push_back(dataset.index({ name_index, n_sample })); + coordinates.clear(); + auto sample = dataset.index({ "...", n_sample }); + coordinates.push_back(sample[name_index]); for (auto parent : parents) { pos = find(features.begin(), features.end(), parent->getName()); if (pos == features.end()) { throw std::logic_error("Feature parent " + parent->getName() + " not found in dataset"); } int parent_index = pos - features.begin(); - coordinates.push_back(dataset.index({ parent_index, n_sample })); + coordinates.push_back(sample[parent_index]); } // Increment the count of the corresponding coordinate cpTable.index_put_({ coordinates }, cpTable.index({ coordinates }) + weights.index({ n_sample }).item()); @@ -134,8 +136,8 @@ namespace bayesnet { { auto output = std::vector(); auto suffix = name == className ? ", fontcolor=red, fillcolor=lightblue, style=filled " : ""; - output.push_back(name + " [shape=circle" + suffix + "] \n"); - transform(children.begin(), children.end(), back_inserter(output), [this](const auto& child) { return name + " -> " + child->getName(); }); + output.push_back("\"" + name + "\" [shape=circle" + suffix + "] \n"); + transform(children.begin(), children.end(), back_inserter(output), [this](const auto& child) { return "\"" + name + "\" -> \"" + child->getName() + "\""; }); return output; } } \ No newline at end of file diff --git a/lib/json b/lib/json index 8c391e0..960b763 160000 --- a/lib/json +++ b/lib/json @@ -1 +1 @@ -Subproject commit 8c391e04fe4195d8be862c97f38cfe10e2a3472e +Subproject commit 960b763ecd144f156d05ec61f577b04107290137 diff --git a/lib/mdlp b/lib/mdlp index e36d9af..2db60e0 160000 --- a/lib/mdlp +++ b/lib/mdlp @@ -1 +1 @@ -Subproject commit e36d9af8f939a57266e30ca96e1cf84fc7d107b0 +Subproject commit 2db60e007d70da876379373c53b6421f281daeac From 0bbc8328a9688a64a0c3986c707e83d83587d454 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Mon, 8 Jul 2024 13:27:55 +0200 Subject: [PATCH 16/18] Change cpt table type to float --- bayesnet/network/Node.cc | 8 ++++---- bayesnet/network/Node.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/bayesnet/network/Node.cc b/bayesnet/network/Node.cc index 44fc900..b62e275 100644 --- a/bayesnet/network/Node.cc +++ b/bayesnet/network/Node.cc @@ -97,7 +97,7 @@ namespace bayesnet { dimensions.push_back(numStates); transform(parents.begin(), parents.end(), back_inserter(dimensions), [](const auto& parent) { return parent->getNumStates(); }); // Create a tensor of zeros with the dimensions of the CPT - cpTable = torch::zeros(dimensions, torch::kFloat) + smoothing; + cpTable = torch::zeros(dimensions, torch::kDouble) + smoothing; // Fill table with counts auto pos = find(features.begin(), features.end(), name); if (pos == features.end()) { @@ -118,19 +118,19 @@ namespace bayesnet { coordinates.push_back(sample[parent_index]); } // Increment the count of the corresponding coordinate - cpTable.index_put_({ coordinates }, cpTable.index({ coordinates }) + weights.index({ n_sample }).item()); + cpTable.index_put_({ coordinates }, weights.index({ n_sample }), true); } // Normalize the counts // Divide each row by the sum of the row cpTable = cpTable / cpTable.sum(0); } - float Node::getFactorValue(std::map& evidence) + double Node::getFactorValue(std::map& evidence) { c10::List> coordinates; // following predetermined order of indices in the cpTable (see Node.h) coordinates.push_back(at::tensor(evidence[name])); transform(parents.begin(), parents.end(), std::back_inserter(coordinates), [&evidence](const auto& parent) { return at::tensor(evidence[parent->getName()]); }); - return cpTable.index({ coordinates }).item(); + return cpTable.index({ coordinates }).item(); } std::vector Node::graph(const std::string& className) { diff --git a/bayesnet/network/Node.h b/bayesnet/network/Node.h index dc21119..b950d70 100644 --- a/bayesnet/network/Node.h +++ b/bayesnet/network/Node.h @@ -28,7 +28,7 @@ namespace bayesnet { void setNumStates(int); unsigned minFill(); std::vector graph(const std::string& clasName); // Returns a std::vector of std::strings representing the graph in graphviz format - float getFactorValue(std::map&); + double getFactorValue(std::map&); private: std::string name; std::vector parents; From 0159c397fa74e3639f71a0cf618d7afac7429cc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Thu, 11 Jul 2024 12:29:57 +0200 Subject: [PATCH 17/18] Update optimization flag in CMakeLists --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6e35773..784ed4e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,7 +26,7 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fprofile-arcs -ftest-coverage -fno-elide-constructors") -set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3") +set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Ofast") if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-default-inline") endif() From 2d34eb8c898b0f07d4aadf754063661ecee9a420 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Sat, 31 Aug 2024 12:43:39 +0200 Subject: [PATCH 18/18] Update Makefile to get parallel info from env --- Makefile | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index d597d4a..6416950 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,6 @@ plantuml = plantuml lcov = lcov genhtml = genhtml dot = dot -n_procs = -j 16 docsrcdir = docs/manual mansrcdir = docs/man3 mandestdir = /usr/local/share/man @@ -59,10 +58,10 @@ diagrams: ## Create an UML class diagram & depnendency of the project (diagrams/ @$(dot) -Tsvg $(f_debug)/dependency.dot.BayesNet -o $(f_diagrams)/dependency.svg buildd: ## Build the debug targets - cmake --build $(f_debug) -t $(app_targets) $(n_procs) + cmake --build $(f_debug) -t $(app_targets) --parallel buildr: ## Build the release targets - cmake --build $(f_release) -t $(app_targets) $(n_procs) + cmake --build $(f_release) -t $(app_targets) --parallel clean: ## Clean the tests info @echo ">>> Cleaning Debug BayesNet tests..."; @@ -106,7 +105,7 @@ opt = "" test: ## Run tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximum Spanning Tree'") to run only that section @echo ">>> Running BayesNet tests..."; @$(MAKE) clean - @cmake --build $(f_debug) -t $(test_targets) $(n_procs) + @cmake --build $(f_debug) -t $(test_targets) --parallel @for t in $(test_targets); do \ echo ">>> Running $$t...";\ if [ -f $(f_debug)/tests/$$t ]; then \