From e1c4221c115897c666af69e99927101b9a76a143 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Tue, 20 Feb 2024 10:58:21 +0100 Subject: [PATCH 1/7] Add predict_voting and predict_prob to ensemble --- src/BayesNet/Ensemble.cc | 66 +++++++++++++++++++++++++++++++--------- src/BayesNet/Ensemble.h | 22 ++++++++------ 2 files changed, 64 insertions(+), 24 deletions(-) diff --git a/src/BayesNet/Ensemble.cc b/src/BayesNet/Ensemble.cc index 4702733..2eedc8d 100644 --- a/src/BayesNet/Ensemble.cc +++ b/src/BayesNet/Ensemble.cc @@ -2,8 +2,8 @@ namespace bayesnet { - Ensemble::Ensemble() : Classifier(Network()), n_models(0) {} - + Ensemble::Ensemble(bool predict_voting) : Classifier(Network()), n_models(0), predict_voting(predict_voting) {}; + const std::string ENSEMBLE_NOT_FITTED = "Ensemble has not been fitted"; void Ensemble::trainModel(const torch::Tensor& weights) { n_models = models.size(); @@ -12,6 +12,7 @@ namespace bayesnet { models[i]->fit(dataset, features, className, states); } } + std::vector Ensemble::voting(torch::Tensor& y_pred) { auto y_pred_ = y_pred.accessor(); @@ -31,11 +32,55 @@ namespace bayesnet { } return y_pred_final; } + std::vector Ensemble::predict(std::vector>& X) + { + if (!fitted) { + throw std::logic_error(ENSEMBLE_NOT_FITTED); + } + return predict_voting ? do_predict_voting(X) : do_predict_prob(X); + + } torch::Tensor Ensemble::predict(torch::Tensor& X) { if (!fitted) { - throw std::logic_error("Ensemble has not been fitted"); + throw std::logic_error(ENSEMBLE_NOT_FITTED); } + return predict_voting ? do_predict_voting(X) : do_predict_prob(X); + } + torch::Tensor Ensemble::do_predict_prob(torch::Tensor& X) + { + torch::Tensor y_pred = torch::zeros({ X.size(1), n_models }, torch::kFloat32); + // auto threads{ std::vector() }; + // std::mutex mtx; + // for (auto i = 0; i < n_models; ++i) { + // threads.push_back(std::thread([&, i]() { + // auto ypredict = models[i]->predict(X); + // std::lock_guard lock(mtx); + // y_pred.index_put_({ "...", i }, ypredict); + // })); + // } + // for (auto& thread : threads) { + // thread.join(); + // } + return y_pred; + } + std::vector Ensemble::do_predict_prob(std::vector>& X) + { + // long m_ = X[0].size(); + // long n_ = X.size(); + // vector> Xd(n_, vector(m_, 0)); + // for (auto i = 0; i < n_; i++) { + // Xd[i] = vector(X[i].begin(), X[i].end()); + // } + // torch::Tensor y_pred = torch::zeros({ m_, n_models }, torch::kInt32); + // for (auto i = 0; i < n_models; ++i) { + // y_pred.index_put_({ "...", i }, torch::tensor(models[i]->predict(Xd), torch::kInt32)); + // } + // return voting(y_pred); + return std::vector(); + } + torch::Tensor Ensemble::do_predict_voting(torch::Tensor& X) + { torch::Tensor y_pred = torch::zeros({ X.size(1), n_models }, torch::kInt32); auto threads{ std::vector() }; std::mutex mtx; @@ -51,11 +96,8 @@ namespace bayesnet { } return torch::tensor(voting(y_pred)); } - std::vector Ensemble::predict(std::vector>& X) + std::vector Ensemble::do_predict_voting(std::vector>& X) { - if (!fitted) { - throw std::logic_error("Ensemble has not been fitted"); - } long m_ = X[0].size(); long n_ = X.size(); std::vector> Xd(n_, std::vector(m_, 0)); @@ -70,10 +112,7 @@ namespace bayesnet { } float Ensemble::score(torch::Tensor& X, torch::Tensor& y) { - if (!fitted) { - throw std::logic_error("Ensemble has not been fitted"); - } - auto y_pred = predict(X); + auto y_pred = predict_voting ? do_predict_voting(X) : do_predict_prob(X); int correct = 0; for (int i = 0; i < y_pred.size(0); ++i) { if (y_pred[i].item() == y[i].item()) { @@ -84,10 +123,7 @@ namespace bayesnet { } float Ensemble::score(std::vector>& X, std::vector& y) { - if (!fitted) { - throw std::logic_error("Ensemble has not been fitted"); - } - auto y_pred = predict(X); + auto y_pred = predict_voting ? do_predict_voting(X) : do_predict_prob(X); int correct = 0; for (int i = 0; i < y_pred.size(); ++i) { if (y_pred[i] == y[i]) { diff --git a/src/BayesNet/Ensemble.h b/src/BayesNet/Ensemble.h index 07fda9b..b748235 100644 --- a/src/BayesNet/Ensemble.h +++ b/src/BayesNet/Ensemble.h @@ -7,19 +7,15 @@ namespace bayesnet { class Ensemble : public Classifier { - private: - Ensemble& build(std::vector& features, std::string className, std::map>& states); - protected: - unsigned n_models; - std::vector> models; - std::vector significanceModels; - void trainModel(const torch::Tensor& weights) override; - std::vector voting(torch::Tensor& y_pred); public: - Ensemble(); + Ensemble(bool predict_voting = true); virtual ~Ensemble() = default; torch::Tensor predict(torch::Tensor& X) override; std::vector predict(std::vector>& X) override; + torch::Tensor do_predict_voting(torch::Tensor& X); + std::vector do_predict_voting(std::vector>& X); + torch::Tensor do_predict_prob(torch::Tensor& X); + std::vector do_predict_prob(std::vector>& X); float score(torch::Tensor& X, torch::Tensor& y) override; float score(std::vector>& X, std::vector& y) override; int getNumberOfNodes() const override; @@ -34,6 +30,14 @@ namespace bayesnet { void dump_cpt() const override { } + protected: + unsigned n_models; + std::vector> models; + std::vector significanceModels; + void trainModel(const torch::Tensor& weights) override; + std::vector voting(torch::Tensor& y_pred); + private: + bool predict_voting; }; } #endif -- 2.45.2 From 443e5cc88204452f4c40e195e0c0572904f676c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Thu, 22 Feb 2024 11:45:40 +0100 Subject: [PATCH 2/7] Implement classifier.predict_proba & test --- CHANGELOG.md | 8 ++++- src/BayesNet/BaseClassifier.h | 3 ++ src/BayesNet/BoostAODE.cc | 2 +- src/BayesNet/Classifier.cc | 34 ++++++++++++++++--- src/BayesNet/Classifier.h | 51 +++++++++++++++-------------- src/BayesNet/Ensemble.cc | 61 ++++++++++++++++++++++------------- src/BayesNet/Ensemble.h | 4 +-- src/BayesNet/Network.h | 34 +++++++++---------- tests/TestBayesModels.cc | 40 ++++++++++++++++++++++- 9 files changed, 165 insertions(+), 72 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3fa6c5a..24c41a3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,13 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] +## =[Unreleased] + +### Added + +- predict_proba method in Classifier +- predict_proba method in BoostAODE +- predict_voting parameter in BoostAODE constructor to use voting or probability to predict (default is voting) ## [1.0.2] - 2024-02-20 diff --git a/src/BayesNet/BaseClassifier.h b/src/BayesNet/BaseClassifier.h index e84f494..8e5631c 100644 --- a/src/BayesNet/BaseClassifier.h +++ b/src/BayesNet/BaseClassifier.h @@ -16,12 +16,15 @@ namespace bayesnet { virtual ~BaseClassifier() = default; torch::Tensor virtual predict(torch::Tensor& X) = 0; std::vector virtual predict(std::vector>& X) = 0; + torch::Tensor virtual predict_proba(torch::Tensor& X) = 0; + std::vector> virtual predict_proba(std::vector>& X) = 0; status_t virtual getStatus() const = 0; float virtual score(std::vector>& X, std::vector& y) = 0; float virtual score(torch::Tensor& X, torch::Tensor& y) = 0; int virtual getNumberOfNodes()const = 0; int virtual getNumberOfEdges()const = 0; int virtual getNumberOfStates() const = 0; + int virtual getClassNumStates() const = 0; std::vector virtual show() const = 0; std::vector virtual graph(const std::string& title = "") const = 0; virtual std::string getVersion() = 0; diff --git a/src/BayesNet/BoostAODE.cc b/src/BayesNet/BoostAODE.cc index 959cada..fe6ab72 100644 --- a/src/BayesNet/BoostAODE.cc +++ b/src/BayesNet/BoostAODE.cc @@ -8,7 +8,7 @@ #include "folding.hpp" namespace bayesnet { - BoostAODE::BoostAODE() : Ensemble() + BoostAODE::BoostAODE() : Ensemble(false) { validHyperparameters = { "repeatSparent", "maxModels", "ascending", "convergence", "threshold", "select_features", "tolerance" }; diff --git a/src/BayesNet/Classifier.cc b/src/BayesNet/Classifier.cc index c8ee3ef..176f369 100644 --- a/src/BayesNet/Classifier.cc +++ b/src/BayesNet/Classifier.cc @@ -3,6 +3,7 @@ namespace bayesnet { Classifier::Classifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {} + const std::string CLASSIFIER_NOT_FITTED = "Classifier has not been fitted"; Classifier& Classifier::build(const std::vector& features, const std::string& className, std::map>& states, const torch::Tensor& weights) { this->features = features; @@ -87,14 +88,14 @@ namespace bayesnet { torch::Tensor Classifier::predict(torch::Tensor& X) { if (!fitted) { - throw std::logic_error("Classifier has not been fitted"); + throw std::logic_error(CLASSIFIER_NOT_FITTED); } return model.predict(X); } std::vector Classifier::predict(std::vector>& X) { if (!fitted) { - throw std::logic_error("Classifier has not been fitted"); + throw std::logic_error(CLASSIFIER_NOT_FITTED); } auto m_ = X[0].size(); auto n_ = X.size(); @@ -105,10 +106,31 @@ namespace bayesnet { auto yp = model.predict(Xd); return yp; } + torch::Tensor Classifier::predict_proba(torch::Tensor& X) + { + if (!fitted) { + throw std::logic_error(CLASSIFIER_NOT_FITTED); + } + return model.predict_proba(X); + } + std::vector> Classifier::predict_proba(std::vector>& X) + { + if (!fitted) { + throw std::logic_error(CLASSIFIER_NOT_FITTED); + } + auto m_ = X[0].size(); + auto n_ = X.size(); + std::vector> Xd(n_, std::vector(m_, 0)); + for (auto i = 0; i < n_; i++) { + Xd[i] = std::vector(X[i].begin(), X[i].end()); + } + auto yp = model.predict_proba(Xd); + return yp; + } float Classifier::score(torch::Tensor& X, torch::Tensor& y) { if (!fitted) { - throw std::logic_error("Classifier has not been fitted"); + throw std::logic_error(CLASSIFIER_NOT_FITTED); } torch::Tensor y_pred = predict(X); return (y_pred == y).sum().item() / y.size(0); @@ -116,7 +138,7 @@ namespace bayesnet { float Classifier::score(std::vector>& X, std::vector& y) { if (!fitted) { - throw std::logic_error("Classifier has not been fitted"); + throw std::logic_error(CLASSIFIER_NOT_FITTED); } return model.score(X, y); } @@ -145,6 +167,10 @@ namespace bayesnet { { return fitted ? model.getStates() : 0; } + int Classifier::getClassNumStates() const + { + return fitted ? model.getClassNumStates() : 0; + } std::vector Classifier::topological_order() { return model.topological_sort(); diff --git a/src/BayesNet/Classifier.h b/src/BayesNet/Classifier.h index 24657ca..639ab8f 100644 --- a/src/BayesNet/Classifier.h +++ b/src/BayesNet/Classifier.h @@ -7,8 +7,31 @@ namespace bayesnet { class Classifier : public BaseClassifier { - private: - Classifier& build(const std::vector& features, const std::string& className, std::map>& states, const torch::Tensor& weights); + public: + Classifier(Network model); + virtual ~Classifier() = default; + Classifier& fit(std::vector>& X, std::vector& y, const std::vector& features, const std::string& className, std::map>& states) override; + Classifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, std::map>& states) override; + Classifier& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states) override; + Classifier& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states, const torch::Tensor& weights) override; + void addNodes(); + int getNumberOfNodes() const override; + int getNumberOfEdges() const override; + int getNumberOfStates() const override; + int getClassNumStates() const override; + torch::Tensor predict(torch::Tensor& X) override; + std::vector predict(std::vector>& X) override; + torch::Tensor predict_proba(torch::Tensor& X) override; + std::vector> predict_proba(std::vector>& X) override; + status_t getStatus() const override { return status; } + std::string getVersion() override { return { project_version.begin(), project_version.end() }; }; + float score(torch::Tensor& X, torch::Tensor& y) override; + float score(std::vector>& X, std::vector& y) override; + std::vector show() const override; + std::vector topological_order() override; + std::vector getNotes() const override { return notes; } + void dump_cpt() const override; + void setHyperparameters(const nlohmann::json& hyperparameters) override; //For classifiers that don't have hyperparameters protected: bool fitted; int m, n; // m: number of samples, n: number of features @@ -24,28 +47,8 @@ namespace bayesnet { virtual void buildModel(const torch::Tensor& weights) = 0; void trainModel(const torch::Tensor& weights) override; void buildDataset(torch::Tensor& y); - public: - Classifier(Network model); - virtual ~Classifier() = default; - Classifier& fit(std::vector>& X, std::vector& y, const std::vector& features, const std::string& className, std::map>& states) override; - Classifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, std::map>& states) override; - Classifier& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states) override; - Classifier& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states, const torch::Tensor& weights) override; - void addNodes(); - int getNumberOfNodes() const override; - int getNumberOfEdges() const override; - int getNumberOfStates() const override; - torch::Tensor predict(torch::Tensor& X) override; - status_t getStatus() const override { return status; } - std::string getVersion() override { return { project_version.begin(), project_version.end() }; }; - std::vector predict(std::vector>& X) override; - float score(torch::Tensor& X, torch::Tensor& y) override; - float score(std::vector>& X, std::vector& y) override; - std::vector show() const override; - std::vector topological_order() override; - std::vector getNotes() const override { return notes; } - void dump_cpt() const override; - void setHyperparameters(const nlohmann::json& hyperparameters) override; //For classifiers that don't have hyperparameters + private: + Classifier& build(const std::vector& features, const std::string& className, std::map>& states, const torch::Tensor& weights); }; } #endif diff --git a/src/BayesNet/Ensemble.cc b/src/BayesNet/Ensemble.cc index 2eedc8d..966275d 100644 --- a/src/BayesNet/Ensemble.cc +++ b/src/BayesNet/Ensemble.cc @@ -2,7 +2,10 @@ namespace bayesnet { - Ensemble::Ensemble(bool predict_voting) : Classifier(Network()), n_models(0), predict_voting(predict_voting) {}; + Ensemble::Ensemble(bool predict_voting) : Classifier(Network()), n_models(0), predict_voting(predict_voting) + { + + }; const std::string ENSEMBLE_NOT_FITTED = "Ensemble has not been fitted"; void Ensemble::trainModel(const torch::Tensor& weights) { @@ -37,7 +40,7 @@ namespace bayesnet { if (!fitted) { throw std::logic_error(ENSEMBLE_NOT_FITTED); } - return predict_voting ? do_predict_voting(X) : do_predict_prob(X); + return do_predict_voting(X); } torch::Tensor Ensemble::predict(torch::Tensor& X) @@ -45,27 +48,32 @@ namespace bayesnet { if (!fitted) { throw std::logic_error(ENSEMBLE_NOT_FITTED); } - return predict_voting ? do_predict_voting(X) : do_predict_prob(X); + return do_predict_voting(X); } - torch::Tensor Ensemble::do_predict_prob(torch::Tensor& X) + torch::Tensor Ensemble::predict_proba(torch::Tensor& X) { - torch::Tensor y_pred = torch::zeros({ X.size(1), n_models }, torch::kFloat32); - // auto threads{ std::vector() }; - // std::mutex mtx; - // for (auto i = 0; i < n_models; ++i) { - // threads.push_back(std::thread([&, i]() { - // auto ypredict = models[i]->predict(X); - // std::lock_guard lock(mtx); - // y_pred.index_put_({ "...", i }, ypredict); - // })); - // } - // for (auto& thread : threads) { - // thread.join(); - // } + auto n_states = getClassNumStates(); + torch::Tensor y_pred = torch::zeros({ X.size(1), n_states }, torch::kFloat32); + auto threads{ std::vector() }; + std::mutex mtx; + for (auto i = 0; i < n_models; ++i) { + threads.push_back(std::thread([&, i]() { + auto ypredict = models[i]->predict_proba(X); + ypredict *= significanceModels[i]; + std::lock_guard lock(mtx); + y_pred.index_put_({ "...", i }, ypredict); + })); + } + for (auto& thread : threads) { + thread.join(); + } + auto sum = std::reduce(significanceModels.begin(), significanceModels.end()); + y_pred /= sum; return y_pred; } - std::vector Ensemble::do_predict_prob(std::vector>& X) + std::vector> Ensemble::predict_proba(std::vector>& X) { + // long m_ = X[0].size(); // long n_ = X.size(); // vector> Xd(n_, vector(m_, 0)); @@ -77,7 +85,7 @@ namespace bayesnet { // y_pred.index_put_({ "...", i }, torch::tensor(models[i]->predict(Xd), torch::kInt32)); // } // return voting(y_pred); - return std::vector(); + return std::vector>(); } torch::Tensor Ensemble::do_predict_voting(torch::Tensor& X) { @@ -105,14 +113,23 @@ namespace bayesnet { Xd[i] = std::vector(X[i].begin(), X[i].end()); } torch::Tensor y_pred = torch::zeros({ m_, n_models }, torch::kInt32); + auto threads{ std::vector() }; + std::mutex mtx; for (auto i = 0; i < n_models; ++i) { - y_pred.index_put_({ "...", i }, torch::tensor(models[i]->predict(Xd), torch::kInt32)); + threads.push_back(std::thread([&, i]() { + auto ypredict = models[i]->predict(Xd); + std::lock_guard lock(mtx); + y_pred.index_put_({ "...", i }, torch::tensor(ypredict, torch::kInt32)); + })); + } + for (auto& thread : threads) { + thread.join(); } return voting(y_pred); } float Ensemble::score(torch::Tensor& X, torch::Tensor& y) { - auto y_pred = predict_voting ? do_predict_voting(X) : do_predict_prob(X); + auto y_pred = do_predict_voting(X); int correct = 0; for (int i = 0; i < y_pred.size(0); ++i) { if (y_pred[i].item() == y[i].item()) { @@ -123,7 +140,7 @@ namespace bayesnet { } float Ensemble::score(std::vector>& X, std::vector& y) { - auto y_pred = predict_voting ? do_predict_voting(X) : do_predict_prob(X); + auto y_pred = do_predict_voting(X); int correct = 0; for (int i = 0; i < y_pred.size(); ++i) { if (y_pred[i] == y[i]) { diff --git a/src/BayesNet/Ensemble.h b/src/BayesNet/Ensemble.h index b748235..da18fd6 100644 --- a/src/BayesNet/Ensemble.h +++ b/src/BayesNet/Ensemble.h @@ -12,10 +12,10 @@ namespace bayesnet { virtual ~Ensemble() = default; torch::Tensor predict(torch::Tensor& X) override; std::vector predict(std::vector>& X) override; + torch::Tensor predict_proba(torch::Tensor& X) override; + std::vector> predict_proba(std::vector>& X) override; torch::Tensor do_predict_voting(torch::Tensor& X); std::vector do_predict_voting(std::vector>& X); - torch::Tensor do_predict_prob(torch::Tensor& X); - std::vector do_predict_prob(std::vector>& X); float score(torch::Tensor& X, torch::Tensor& y) override; float score(std::vector>& X, std::vector& y) override; int getNumberOfNodes() const override; diff --git a/src/BayesNet/Network.h b/src/BayesNet/Network.h index 2a3795e..ad45055 100644 --- a/src/BayesNet/Network.h +++ b/src/BayesNet/Network.h @@ -7,23 +7,6 @@ namespace bayesnet { class Network { - private: - std::map> nodes; - bool fitted; - float maxThreads = 0.95; - int classNumStates; - std::vector features; // Including classname - std::string className; - double laplaceSmoothing; - torch::Tensor samples; // nxm tensor used to fit the model - bool isCyclic(const std::string&, std::unordered_set&, std::unordered_set&); - std::vector predict_sample(const std::vector&); - std::vector predict_sample(const torch::Tensor&); - std::vector exactInference(std::map&); - double computeFactor(std::map&); - void completeFit(const std::map>& states, const torch::Tensor& weights); - void checkFitData(int n_features, int n_samples, int n_samples_y, const std::vector& featureNames, const std::string& className, const std::map>& states, const torch::Tensor& weights); - void setStates(const std::map>&); public: Network(); explicit Network(float); @@ -58,6 +41,23 @@ namespace bayesnet { void initialize(); void dump_cpt() const; inline std::string version() { return { project_version.begin(), project_version.end() }; } + private: + std::map> nodes; + bool fitted; + float maxThreads = 0.95; + int classNumStates; + std::vector features; // Including classname + std::string className; + double laplaceSmoothing; + torch::Tensor samples; // nxm tensor used to fit the model + bool isCyclic(const std::string&, std::unordered_set&, std::unordered_set&); + std::vector predict_sample(const std::vector&); + std::vector predict_sample(const torch::Tensor&); + std::vector exactInference(std::map&); + double computeFactor(std::map&); + void completeFit(const std::map>& states, const torch::Tensor& weights); + void checkFitData(int n_features, int n_samples, int n_samples_y, const std::vector& featureNames, const std::string& className, const std::map>& states, const torch::Tensor& weights); + void setStates(const std::map>&); }; } #endif \ No newline at end of file diff --git a/tests/TestBayesModels.cc b/tests/TestBayesModels.cc index bed1783..26912c0 100644 --- a/tests/TestBayesModels.cc +++ b/tests/TestBayesModels.cc @@ -165,7 +165,6 @@ TEST_CASE("BoostAODE test used features in train note", "[BayesNet]") {"convergence", true}, {"repeatSparent",true}, {"select_features","CFS"}, - {"tolerance", 3} }); clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); REQUIRE(clf.getNumberOfNodes() == 72); @@ -175,3 +174,42 @@ TEST_CASE("BoostAODE test used features in train note", "[BayesNet]") REQUIRE(clf.getNotes()[1] == "Used features in train: 7 of 8"); REQUIRE(clf.getNotes()[2] == "Number of models: 8"); } +TEST_CASE("TAN predict_proba", "[BayesNet]") +{ + auto raw = RawDatasets("iris", true); + auto clf = bayesnet::TAN(); + clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); + auto y_pred_proba = clf.predict_proba(raw.Xv); + auto y_pred = clf.predict(raw.Xv); + auto yt_pred_proba = clf.predict_proba(raw.Xt); + REQUIRE(y_pred.size() == y_pred_proba.size()); + REQUIRE(y_pred.size() == yt_pred_proba.size(0)); + REQUIRE(y_pred.size() == raw.yv.size()); + REQUIRE(y_pred_proba[0].size() == 3); + REQUIRE(yt_pred_proba.size(1) == y_pred_proba[0].size()); + for (int i = 0; i < y_pred_proba.size(); ++i) { + auto maxElem = max_element(y_pred_proba[i].begin(), y_pred_proba[i].end()); + int predictedClass = distance(y_pred_proba[i].begin(), maxElem); + REQUIRE(predictedClass == y_pred[i]); + REQUIRE(yt_pred_proba[i].argmax().item() == y_pred[i]); + } +} + +// TEST_CASE("BoostAODE predict_proba", "[BayesNet]") +// { +// auto raw = RawDatasets("iris", true); +// auto clf = bayesnet::BoostAODE(); +// clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); +// auto y_pred = clf.predict_proba(raw.Xv); +// REQUIRE(y_pred.size(0) == raw.yv.size(0)); +// REQUIRE(y_pred.size(1) == 3); +// auto y_pred2 = clf.predict_proba(raw.Xv); +// REQUIRE(y_pred2.size(0) == raw.yv.size(0)); +// REQUIRE(y_pred2.size(1) == 3); +// REQUIRE(y_pred.equal(y_pred2)); +// for (int i = 0; i < y_pred.size(0); ++i) { +// for (int j = 0; j < y_pred.size(1); ++j) { +// REQUIRE(y_pred[i][j].item() == y_pred2[i][j].item()); +// } +// } +// } -- 2.45.2 From 3116eaa76326af2bafa9a21e3f3f49bbf4753c11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Thu, 22 Feb 2024 18:44:40 +0100 Subject: [PATCH 3/7] Begin testing ensemble predict_proba --- src/BayesNet/BoostAODE.cc | 2 +- src/BayesNet/BoostAODE.h | 2 +- src/BayesNet/Ensemble.h | 1 - tests/TestBayesModels.cc | 85 ++++++++++++++++++++++++++++++--------- 4 files changed, 67 insertions(+), 23 deletions(-) diff --git a/src/BayesNet/BoostAODE.cc b/src/BayesNet/BoostAODE.cc index fe6ab72..cc617eb 100644 --- a/src/BayesNet/BoostAODE.cc +++ b/src/BayesNet/BoostAODE.cc @@ -8,7 +8,7 @@ #include "folding.hpp" namespace bayesnet { - BoostAODE::BoostAODE() : Ensemble(false) + BoostAODE::BoostAODE(bool predict_voting) : Ensemble(predict_voting) { validHyperparameters = { "repeatSparent", "maxModels", "ascending", "convergence", "threshold", "select_features", "tolerance" }; diff --git a/src/BayesNet/BoostAODE.h b/src/BayesNet/BoostAODE.h index 4b0b063..f9c8429 100644 --- a/src/BayesNet/BoostAODE.h +++ b/src/BayesNet/BoostAODE.h @@ -7,7 +7,7 @@ namespace bayesnet { class BoostAODE : public Ensemble { public: - BoostAODE(); + BoostAODE(bool predict_voting = false); virtual ~BoostAODE() = default; std::vector graph(const std::string& title = "BoostAODE") const override; void setHyperparameters(const nlohmann::json& hyperparameters) override; diff --git a/src/BayesNet/Ensemble.h b/src/BayesNet/Ensemble.h index da18fd6..3d31882 100644 --- a/src/BayesNet/Ensemble.h +++ b/src/BayesNet/Ensemble.h @@ -36,7 +36,6 @@ namespace bayesnet { std::vector significanceModels; void trainModel(const torch::Tensor& weights) override; std::vector voting(torch::Tensor& y_pred); - private: bool predict_voting; }; } diff --git a/tests/TestBayesModels.cc b/tests/TestBayesModels.cc index 26912c0..1832cd9 100644 --- a/tests/TestBayesModels.cc +++ b/tests/TestBayesModels.cc @@ -156,7 +156,7 @@ TEST_CASE("BoostAODE feature_select CFS", "[BayesNet]") REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 9 with CFS"); REQUIRE(clf.getNotes()[1] == "Number of models: 9"); } -TEST_CASE("BoostAODE test used features in train note", "[BayesNet]") +TEST_CASE("BoostAODE test used features in train note and score", "[BayesNet]") { auto raw = RawDatasets("diabetes", true); auto clf = bayesnet::BoostAODE(); @@ -173,9 +173,25 @@ TEST_CASE("BoostAODE test used features in train note", "[BayesNet]") REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS"); REQUIRE(clf.getNotes()[1] == "Used features in train: 7 of 8"); REQUIRE(clf.getNotes()[2] == "Number of models: 8"); + auto score = clf.score(raw.Xv, raw.yv); + auto scoret = clf.score(raw.Xt, raw.yt); + REQUIRE(score == Catch::Approx(0.8138).epsilon(raw.epsilon)); + REQUIRE(scoret == Catch::Approx(0.8138).epsilon(raw.epsilon)); } TEST_CASE("TAN predict_proba", "[BayesNet]") { + auto res_prob = std::vector>({ + { 0.00375671, 0.994457, 0.00178621 }, + { 0.00137462, 0.992734, 0.00589123 }, + { 0.00137462, 0.992734, 0.00589123 }, + { 0.00137462, 0.992734, 0.00589123 }, + { 0.00218225, 0.992877, 0.00494094 }, + { 0.00494209, 0.0978534, 0.897205 }, + { 0.0054192, 0.974275, 0.0203054 }, + { 0.00433012, 0.985054, 0.0106159 }, + { 0.000860806, 0.996922, 0.00221698 } + }); + int init_index = 78; auto raw = RawDatasets("iris", true); auto clf = bayesnet::TAN(); clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); @@ -191,25 +207,54 @@ TEST_CASE("TAN predict_proba", "[BayesNet]") auto maxElem = max_element(y_pred_proba[i].begin(), y_pred_proba[i].end()); int predictedClass = distance(y_pred_proba[i].begin(), maxElem); REQUIRE(predictedClass == y_pred[i]); + // Check predict is coherent with predict_proba REQUIRE(yt_pred_proba[i].argmax().item() == y_pred[i]); } + // Check predict_proba values for vectors and tensors + for (int i = 0; i < res_prob.size(); i++) { + for (int j = 0; j < 3; j++) { + REQUIRE(res_prob[i][j] == Catch::Approx(y_pred_proba[i + init_index][j]).epsilon(raw.epsilon)); + REQUIRE(res_prob[i][j] == Catch::Approx(yt_pred_proba[i + init_index][j].item()).epsilon(raw.epsilon)); + } + } +} +TEST_CASE("BoostAODE predict_proba voting", "[BayesNet]") +{ + // auto res_prob = std::vector>({ + // { 0.00375671, 0.994457, 0.00178621 }, + // { 0.00137462, 0.992734, 0.00589123 }, + // { 0.00137462, 0.992734, 0.00589123 }, + // { 0.00137462, 0.992734, 0.00589123 }, + // { 0.00218225, 0.992877, 0.00494094 }, + // { 0.00494209, 0.0978534, 0.897205 }, + // { 0.0054192, 0.974275, 0.0203054 }, + // { 0.00433012, 0.985054, 0.0106159 }, + // { 0.000860806, 0.996922, 0.00221698 } + // }); + // int init_index = 78; + auto raw = RawDatasets("iris", true); + auto clf = bayesnet::BoostAODE(true); + clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); + auto y_pred_proba = clf.predict_proba(raw.Xv); + auto y_pred = clf.predict(raw.Xv); + auto yt_pred_proba = clf.predict_proba(raw.Xt); + // REQUIRE(y_pred.size() == y_pred_proba.size()); + // REQUIRE(y_pred.size() == yt_pred_proba.size(0)); + // REQUIRE(y_pred.size() == raw.yv.size()); + // REQUIRE(y_pred_proba[0].size() == 3); + // REQUIRE(yt_pred_proba.size(1) == y_pred_proba[0].size()); + // for (int i = 0; i < y_pred_proba.size(); ++i) { + // auto maxElem = max_element(y_pred_proba[i].begin(), y_pred_proba[i].end()); + // int predictedClass = distance(y_pred_proba[i].begin(), maxElem); + // REQUIRE(predictedClass == y_pred[i]); + // // Check predict is coherent with predict_proba + // REQUIRE(yt_pred_proba[i].argmax().item() == y_pred[i]); + // } + // // Check predict_proba values for vectors and tensors + // for (int i = 0; i < res_prob.size(); i++) { + // for (int j = 0; j < 3; j++) { + // REQUIRE(res_prob[i][j] == Catch::Approx(y_pred_proba[i + init_index][j]).epsilon(raw.epsilon)); + // REQUIRE(res_prob[i][j] == Catch::Approx(yt_pred_proba[i + init_index][j].item()).epsilon(raw.epsilon)); + // } + // } } - -// TEST_CASE("BoostAODE predict_proba", "[BayesNet]") -// { -// auto raw = RawDatasets("iris", true); -// auto clf = bayesnet::BoostAODE(); -// clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); -// auto y_pred = clf.predict_proba(raw.Xv); -// REQUIRE(y_pred.size(0) == raw.yv.size(0)); -// REQUIRE(y_pred.size(1) == 3); -// auto y_pred2 = clf.predict_proba(raw.Xv); -// REQUIRE(y_pred2.size(0) == raw.yv.size(0)); -// REQUIRE(y_pred2.size(1) == 3); -// REQUIRE(y_pred.equal(y_pred2)); -// for (int i = 0; i < y_pred.size(0); ++i) { -// for (int j = 0; j < y_pred.size(1); ++j) { -// REQUIRE(y_pred[i][j].item() == y_pred2[i][j].item()); -// } -// } -// } -- 2.45.2 From 52abd2d6708f902cf36be925ce5e16dd603c85f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Fri, 23 Feb 2024 20:36:11 +0100 Subject: [PATCH 4/7] Implement the proba branch and begin with the voting one --- CMakeLists.txt | 6 +- src/{BayesNet => }/AODE.cc | 0 src/{BayesNet => }/AODE.h | 0 src/{BayesNet => }/AODELd.cc | 0 src/{BayesNet => }/AODELd.h | 0 src/{BayesNet => }/BaseClassifier.h | 0 src/{BayesNet => }/BayesMetrics.cc | 0 src/{BayesNet => }/BayesMetrics.h | 0 src/BayesNet/Ensemble.cc | 194 ------------- src/BayesNet/bayesnetUtils.cc | 25 -- src/{BayesNet => }/BoostAODE.cc | 0 src/{BayesNet => }/BoostAODE.h | 0 src/{BayesNet => }/CFS.cc | 0 src/{BayesNet => }/CFS.h | 0 src/{BayesNet => }/CMakeLists.txt | 2 +- src/{BayesNet => }/Classifier.cc | 4 +- src/{BayesNet => }/Classifier.h | 2 +- src/Ensemble.cc | 251 ++++++++++++++++ src/{BayesNet => }/Ensemble.h | 11 +- src/{BayesNet => }/FCBF.cc | 0 src/{BayesNet => }/FCBF.h | 0 src/{BayesNet => }/FeatureSelect.cc | 0 src/{BayesNet => }/FeatureSelect.h | 0 src/{BayesNet => }/IWSS.cc | 0 src/{BayesNet => }/IWSS.h | 0 src/{BayesNet => }/KDB.cc | 0 src/{BayesNet => }/KDB.h | 0 src/{BayesNet => }/KDBLd.cc | 0 src/{BayesNet => }/KDBLd.h | 0 src/{BayesNet => }/Mst.cc | 0 src/{BayesNet => }/Mst.h | 0 src/{BayesNet => }/Network.cc | 1 + src/{BayesNet => }/Network.h | 0 src/{BayesNet => }/Node.cc | 0 src/{BayesNet => }/Node.h | 0 src/{BayesNet => }/Proposal.cc | 0 src/{BayesNet => }/Proposal.h | 0 src/{BayesNet => }/SPODE.cc | 0 src/{BayesNet => }/SPODE.h | 0 src/{BayesNet => }/SPODELd.cc | 0 src/{BayesNet => }/SPODELd.h | 0 src/{BayesNet => }/TAN.cc | 0 src/{BayesNet => }/TAN.h | 0 src/{BayesNet => }/TANLd.cc | 0 src/{BayesNet => }/TANLd.h | 0 src/bayesnetUtils.cc | 39 +++ src/{BayesNet => }/bayesnetUtils.h | 4 +- tests/CMakeLists.txt | 4 +- tests/TestBayesModels.cc | 427 +++++++++++++++++----------- 49 files changed, 574 insertions(+), 396 deletions(-) rename src/{BayesNet => }/AODE.cc (100%) rename src/{BayesNet => }/AODE.h (100%) rename src/{BayesNet => }/AODELd.cc (100%) rename src/{BayesNet => }/AODELd.h (100%) rename src/{BayesNet => }/BaseClassifier.h (100%) rename src/{BayesNet => }/BayesMetrics.cc (100%) rename src/{BayesNet => }/BayesMetrics.h (100%) delete mode 100644 src/BayesNet/Ensemble.cc delete mode 100644 src/BayesNet/bayesnetUtils.cc rename src/{BayesNet => }/BoostAODE.cc (100%) rename src/{BayesNet => }/BoostAODE.h (100%) rename src/{BayesNet => }/CFS.cc (100%) rename src/{BayesNet => }/CFS.h (100%) rename src/{BayesNet => }/CMakeLists.txt (93%) rename src/{BayesNet => }/Classifier.cc (98%) rename src/{BayesNet => }/Classifier.h (97%) create mode 100644 src/Ensemble.cc rename src/{BayesNet => }/Ensemble.h (71%) rename src/{BayesNet => }/FCBF.cc (100%) rename src/{BayesNet => }/FCBF.h (100%) rename src/{BayesNet => }/FeatureSelect.cc (100%) rename src/{BayesNet => }/FeatureSelect.h (100%) rename src/{BayesNet => }/IWSS.cc (100%) rename src/{BayesNet => }/IWSS.h (100%) rename src/{BayesNet => }/KDB.cc (100%) rename src/{BayesNet => }/KDB.h (100%) rename src/{BayesNet => }/KDBLd.cc (100%) rename src/{BayesNet => }/KDBLd.h (100%) rename src/{BayesNet => }/Mst.cc (100%) rename src/{BayesNet => }/Mst.h (100%) rename src/{BayesNet => }/Network.cc (99%) rename src/{BayesNet => }/Network.h (100%) rename src/{BayesNet => }/Node.cc (100%) rename src/{BayesNet => }/Node.h (100%) rename src/{BayesNet => }/Proposal.cc (100%) rename src/{BayesNet => }/Proposal.h (100%) rename src/{BayesNet => }/SPODE.cc (100%) rename src/{BayesNet => }/SPODE.h (100%) rename src/{BayesNet => }/SPODELd.cc (100%) rename src/{BayesNet => }/SPODELd.h (100%) rename src/{BayesNet => }/TAN.cc (100%) rename src/{BayesNet => }/TAN.h (100%) rename src/{BayesNet => }/TANLd.cc (100%) rename src/{BayesNet => }/TANLd.h (100%) create mode 100644 src/bayesnetUtils.cc rename src/{BayesNet => }/bayesnetUtils.h (53%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 072c729..95cd197 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -58,14 +58,12 @@ add_git_submodule("lib/json") # -------------- add_subdirectory(config) add_subdirectory(lib/Files) -add_subdirectory(src/BayesNet) +add_subdirectory(src) -file(GLOB BayesNet_HEADERS CONFIGURE_DEPENDS ${BayesNet_SOURCE_DIR}/src/BayesNet/*.h ${BayesNet_SOURCE_DIR}/BayesNet/*.h) -file(GLOB BayesNet_SOURCES CONFIGURE_DEPENDS ${BayesNet_SOURCE_DIR}/src/BayesNet/*.cc ${BayesNet_SOURCE_DIR}/src/BayesNet/*.cpp) +file(GLOB BayesNet_SOURCES CONFIGURE_DEPENDS ${BayesNet_SOURCE_DIR}/src/*.cc) # Testing # ------- - if (ENABLE_TESTING) MESSAGE("Testing enabled") add_git_submodule("lib/catch2") diff --git a/src/BayesNet/AODE.cc b/src/AODE.cc similarity index 100% rename from src/BayesNet/AODE.cc rename to src/AODE.cc diff --git a/src/BayesNet/AODE.h b/src/AODE.h similarity index 100% rename from src/BayesNet/AODE.h rename to src/AODE.h diff --git a/src/BayesNet/AODELd.cc b/src/AODELd.cc similarity index 100% rename from src/BayesNet/AODELd.cc rename to src/AODELd.cc diff --git a/src/BayesNet/AODELd.h b/src/AODELd.h similarity index 100% rename from src/BayesNet/AODELd.h rename to src/AODELd.h diff --git a/src/BayesNet/BaseClassifier.h b/src/BaseClassifier.h similarity index 100% rename from src/BayesNet/BaseClassifier.h rename to src/BaseClassifier.h diff --git a/src/BayesNet/BayesMetrics.cc b/src/BayesMetrics.cc similarity index 100% rename from src/BayesNet/BayesMetrics.cc rename to src/BayesMetrics.cc diff --git a/src/BayesNet/BayesMetrics.h b/src/BayesMetrics.h similarity index 100% rename from src/BayesNet/BayesMetrics.h rename to src/BayesMetrics.h diff --git a/src/BayesNet/Ensemble.cc b/src/BayesNet/Ensemble.cc deleted file mode 100644 index 966275d..0000000 --- a/src/BayesNet/Ensemble.cc +++ /dev/null @@ -1,194 +0,0 @@ -#include "Ensemble.h" - -namespace bayesnet { - - Ensemble::Ensemble(bool predict_voting) : Classifier(Network()), n_models(0), predict_voting(predict_voting) - { - - }; - const std::string ENSEMBLE_NOT_FITTED = "Ensemble has not been fitted"; - void Ensemble::trainModel(const torch::Tensor& weights) - { - n_models = models.size(); - for (auto i = 0; i < n_models; ++i) { - // fit with std::vectors - models[i]->fit(dataset, features, className, states); - } - } - - std::vector Ensemble::voting(torch::Tensor& y_pred) - { - auto y_pred_ = y_pred.accessor(); - std::vector y_pred_final; - int numClasses = states.at(className).size(); - // y_pred is m x n_models with the prediction of every model for each sample - for (int i = 0; i < y_pred.size(0); ++i) { - // votes store in each index (value of class) the significance added by each model - // i.e. votes[0] contains how much value has the value 0 of class. That value is generated by the models predictions - std::vector votes(numClasses, 0.0); - for (int j = 0; j < n_models; ++j) { - votes[y_pred_[i][j]] += significanceModels.at(j); - } - // argsort in descending order - auto indices = argsort(votes); - y_pred_final.push_back(indices[0]); - } - return y_pred_final; - } - std::vector Ensemble::predict(std::vector>& X) - { - if (!fitted) { - throw std::logic_error(ENSEMBLE_NOT_FITTED); - } - return do_predict_voting(X); - - } - torch::Tensor Ensemble::predict(torch::Tensor& X) - { - if (!fitted) { - throw std::logic_error(ENSEMBLE_NOT_FITTED); - } - return do_predict_voting(X); - } - torch::Tensor Ensemble::predict_proba(torch::Tensor& X) - { - auto n_states = getClassNumStates(); - torch::Tensor y_pred = torch::zeros({ X.size(1), n_states }, torch::kFloat32); - auto threads{ std::vector() }; - std::mutex mtx; - for (auto i = 0; i < n_models; ++i) { - threads.push_back(std::thread([&, i]() { - auto ypredict = models[i]->predict_proba(X); - ypredict *= significanceModels[i]; - std::lock_guard lock(mtx); - y_pred.index_put_({ "...", i }, ypredict); - })); - } - for (auto& thread : threads) { - thread.join(); - } - auto sum = std::reduce(significanceModels.begin(), significanceModels.end()); - y_pred /= sum; - return y_pred; - } - std::vector> Ensemble::predict_proba(std::vector>& X) - { - - // long m_ = X[0].size(); - // long n_ = X.size(); - // vector> Xd(n_, vector(m_, 0)); - // for (auto i = 0; i < n_; i++) { - // Xd[i] = vector(X[i].begin(), X[i].end()); - // } - // torch::Tensor y_pred = torch::zeros({ m_, n_models }, torch::kInt32); - // for (auto i = 0; i < n_models; ++i) { - // y_pred.index_put_({ "...", i }, torch::tensor(models[i]->predict(Xd), torch::kInt32)); - // } - // return voting(y_pred); - return std::vector>(); - } - torch::Tensor Ensemble::do_predict_voting(torch::Tensor& X) - { - torch::Tensor y_pred = torch::zeros({ X.size(1), n_models }, torch::kInt32); - auto threads{ std::vector() }; - std::mutex mtx; - for (auto i = 0; i < n_models; ++i) { - threads.push_back(std::thread([&, i]() { - auto ypredict = models[i]->predict(X); - std::lock_guard lock(mtx); - y_pred.index_put_({ "...", i }, ypredict); - })); - } - for (auto& thread : threads) { - thread.join(); - } - return torch::tensor(voting(y_pred)); - } - std::vector Ensemble::do_predict_voting(std::vector>& X) - { - long m_ = X[0].size(); - long n_ = X.size(); - std::vector> Xd(n_, std::vector(m_, 0)); - for (auto i = 0; i < n_; i++) { - Xd[i] = std::vector(X[i].begin(), X[i].end()); - } - torch::Tensor y_pred = torch::zeros({ m_, n_models }, torch::kInt32); - auto threads{ std::vector() }; - std::mutex mtx; - for (auto i = 0; i < n_models; ++i) { - threads.push_back(std::thread([&, i]() { - auto ypredict = models[i]->predict(Xd); - std::lock_guard lock(mtx); - y_pred.index_put_({ "...", i }, torch::tensor(ypredict, torch::kInt32)); - })); - } - for (auto& thread : threads) { - thread.join(); - } - return voting(y_pred); - } - float Ensemble::score(torch::Tensor& X, torch::Tensor& y) - { - auto y_pred = do_predict_voting(X); - int correct = 0; - for (int i = 0; i < y_pred.size(0); ++i) { - if (y_pred[i].item() == y[i].item()) { - correct++; - } - } - return (double)correct / y_pred.size(0); - } - float Ensemble::score(std::vector>& X, std::vector& y) - { - auto y_pred = do_predict_voting(X); - int correct = 0; - for (int i = 0; i < y_pred.size(); ++i) { - if (y_pred[i] == y[i]) { - correct++; - } - } - return (double)correct / y_pred.size(); - } - std::vector Ensemble::show() const - { - auto result = std::vector(); - for (auto i = 0; i < n_models; ++i) { - auto res = models[i]->show(); - result.insert(result.end(), res.begin(), res.end()); - } - return result; - } - std::vector Ensemble::graph(const std::string& title) const - { - auto result = std::vector(); - for (auto i = 0; i < n_models; ++i) { - auto res = models[i]->graph(title + "_" + std::to_string(i)); - result.insert(result.end(), res.begin(), res.end()); - } - return result; - } - int Ensemble::getNumberOfNodes() const - { - int nodes = 0; - for (auto i = 0; i < n_models; ++i) { - nodes += models[i]->getNumberOfNodes(); - } - return nodes; - } - int Ensemble::getNumberOfEdges() const - { - int edges = 0; - for (auto i = 0; i < n_models; ++i) { - edges += models[i]->getNumberOfEdges(); - } - return edges; - } - int Ensemble::getNumberOfStates() const - { - int nstates = 0; - for (auto i = 0; i < n_models; ++i) { - nstates += models[i]->getNumberOfStates(); - } - return nstates; - } -} \ No newline at end of file diff --git a/src/BayesNet/bayesnetUtils.cc b/src/BayesNet/bayesnetUtils.cc deleted file mode 100644 index accef68..0000000 --- a/src/BayesNet/bayesnetUtils.cc +++ /dev/null @@ -1,25 +0,0 @@ - -#include "bayesnetUtils.h" -namespace bayesnet { - // Return the indices in descending order - std::vector argsort(std::vector& nums) - { - int n = nums.size(); - std::vector indices(n); - iota(indices.begin(), indices.end(), 0); - sort(indices.begin(), indices.end(), [&nums](int i, int j) {return nums[i] > nums[j];}); - return indices; - } - std::vector> tensorToVector(torch::Tensor& tensor) - { - // convert mxn tensor to nxm std::vector - std::vector> result; - // Iterate over cols - for (int i = 0; i < tensor.size(1); ++i) { - auto col_tensor = tensor.index({ "...", i }); - auto col = std::vector(col_tensor.data_ptr(), col_tensor.data_ptr() + tensor.size(0)); - result.push_back(col); - } - return result; - } -} \ No newline at end of file diff --git a/src/BayesNet/BoostAODE.cc b/src/BoostAODE.cc similarity index 100% rename from src/BayesNet/BoostAODE.cc rename to src/BoostAODE.cc diff --git a/src/BayesNet/BoostAODE.h b/src/BoostAODE.h similarity index 100% rename from src/BayesNet/BoostAODE.h rename to src/BoostAODE.h diff --git a/src/BayesNet/CFS.cc b/src/CFS.cc similarity index 100% rename from src/BayesNet/CFS.cc rename to src/CFS.cc diff --git a/src/BayesNet/CFS.h b/src/CFS.h similarity index 100% rename from src/BayesNet/CFS.h rename to src/CFS.h diff --git a/src/BayesNet/CMakeLists.txt b/src/CMakeLists.txt similarity index 93% rename from src/BayesNet/CMakeLists.txt rename to src/CMakeLists.txt index d02c671..461c6a9 100644 --- a/src/BayesNet/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -3,7 +3,7 @@ include_directories( ${BayesNet_SOURCE_DIR}/lib/Files ${BayesNet_SOURCE_DIR}/lib/folding ${BayesNet_SOURCE_DIR}/lib/json/include - ${BayesNet_SOURCE_DIR}/src/BayesNet + ${BayesNet_SOURCE_DIR}/src ${CMAKE_BINARY_DIR}/configured_files/include ) diff --git a/src/BayesNet/Classifier.cc b/src/Classifier.cc similarity index 98% rename from src/BayesNet/Classifier.cc rename to src/Classifier.cc index 176f369..ba63753 100644 --- a/src/BayesNet/Classifier.cc +++ b/src/Classifier.cc @@ -121,6 +121,7 @@ namespace bayesnet { auto m_ = X[0].size(); auto n_ = X.size(); std::vector> Xd(n_, std::vector(m_, 0)); + // Convert to nxm vector for (auto i = 0; i < n_; i++) { Xd[i] = std::vector(X[i].begin(), X[i].end()); } @@ -129,9 +130,6 @@ namespace bayesnet { } float Classifier::score(torch::Tensor& X, torch::Tensor& y) { - if (!fitted) { - throw std::logic_error(CLASSIFIER_NOT_FITTED); - } torch::Tensor y_pred = predict(X); return (y_pred == y).sum().item() / y.size(0); } diff --git a/src/BayesNet/Classifier.h b/src/Classifier.h similarity index 97% rename from src/BayesNet/Classifier.h rename to src/Classifier.h index 639ab8f..2c43533 100644 --- a/src/BayesNet/Classifier.h +++ b/src/Classifier.h @@ -34,7 +34,7 @@ namespace bayesnet { void setHyperparameters(const nlohmann::json& hyperparameters) override; //For classifiers that don't have hyperparameters protected: bool fitted; - int m, n; // m: number of samples, n: number of features + unsigned int m, n; // m: number of samples, n: number of features Network model; Metrics metrics; std::vector features; diff --git a/src/Ensemble.cc b/src/Ensemble.cc new file mode 100644 index 0000000..cebcb9a --- /dev/null +++ b/src/Ensemble.cc @@ -0,0 +1,251 @@ +#include "Ensemble.h" + +namespace bayesnet { + + Ensemble::Ensemble(bool predict_voting) : Classifier(Network()), n_models(0), predict_voting(predict_voting) + { + + }; + const std::string ENSEMBLE_NOT_FITTED = "Ensemble has not been fitted"; + void Ensemble::trainModel(const torch::Tensor& weights) + { + n_models = models.size(); + for (auto i = 0; i < n_models; ++i) { + // fit with std::vectors + models[i]->fit(dataset, features, className, states); + } + } + std::vector Ensemble::compute_arg_max(std::vector>& X) + { + std::vector y_pred; + for (auto i = 0; i < X.size(); ++i) { + auto max = std::max_element(X[i].begin(), X[i].end()); + y_pred.push_back(std::distance(X[i].begin(), max)); + } + return y_pred; + } + torch::Tensor Ensemble::compute_arg_max(torch::Tensor& X) + { + auto y_pred = torch::argmax(X, 1); + return y_pred; + } + torch::Tensor Ensemble::voting(torch::Tensor& votes) + { + // Convert m x n_models tensor to a m x n_class_states with voting probabilities + auto y_pred_ = votes.accessor(); + std::vector y_pred_final; + int numClasses = states.at(className).size(); + // votes is m x n_models with the prediction of every model for each sample + auto result = torch::zeros({ votes.size(0), numClasses }, torch::kFloat32); + auto sum = std::reduce(significanceModels.begin(), significanceModels.end()); + for (int i = 0; i < votes.size(0); ++i) { + // n_votes store in each index (value of class) the significance added by each model + // i.e. n_votes[0] contains how much value has the value 0 of class. That value is generated by the models predictions + std::vector n_votes(numClasses, 0.0); + for (int j = 0; j < n_models; ++j) { + n_votes[y_pred_[i][j]] += significanceModels.at(j); + } + result[i] = torch::tensor(n_votes); + } + // To only do one division and gain precision + result /= sum; + return result; + } + std::vector> Ensemble::voting(std::vector>& votes) + { + // Convert n_models x m matrix to a m x n_class_states matrix + std::vector> y_pred_final; + int numClasses = states.at(className).size(); + auto sum = std::reduce(significanceModels.begin(), significanceModels.end()); + // y_pred is m x n_models with the prediction of every model for each sample + std::cout << std::string(80, '*') << std::endl; + for (int i = 0; i < votes.size(); ++i) { + // n_votes store in each index (value of class) the significance added by each model + // i.e. n_votes[0] contains how much value has the value 0 of class. That value is generated by the models predictions + std::vector n_votes(numClasses, 0.0); + for (int j = 0; j < n_models; ++j) { + n_votes[votes[i][j]] += significanceModels.at(j); + } + for (auto& x : n_votes) { + std::cout << x << " "; + } + std::cout << std::endl; + // To only do one division per result and gain precision + std::transform(n_votes.begin(), n_votes.end(), n_votes.begin(), [sum](double x) { return x / sum; }); + y_pred_final.push_back(n_votes); + } + std::cout << std::string(80, '*') << std::endl; + return y_pred_final; + } + std::vector> Ensemble::predict_proba(std::vector>& X) + { + if (!fitted) { + throw std::logic_error(ENSEMBLE_NOT_FITTED); + } + return predict_voting ? predict_average_voting(X) : predict_average_proba(X); + } + torch::Tensor Ensemble::predict_proba(torch::Tensor& X) + { + if (!fitted) { + throw std::logic_error(ENSEMBLE_NOT_FITTED); + } + return predict_voting ? predict_average_voting(X) : predict_average_proba(X); + } + std::vector Ensemble::predict(std::vector>& X) + { + auto res = predict_proba(X); + std::cout << "res: " << res.size() << ", " << res[0].size() << std::endl; + return compute_arg_max(res); + } + torch::Tensor Ensemble::predict(torch::Tensor& X) + { + auto res = predict_proba(X); + return compute_arg_max(res); + } + torch::Tensor Ensemble::predict_average_proba(torch::Tensor& X) + { + auto n_states = models[0]->getClassNumStates(); + torch::Tensor y_pred = torch::zeros({ X.size(1), n_states }, torch::kFloat32); + auto threads{ std::vector() }; + std::mutex mtx; + for (auto i = 0; i < n_models; ++i) { + threads.push_back(std::thread([&, i]() { + auto ypredict = models[i]->predict_proba(X); + std::lock_guard lock(mtx); + y_pred += ypredict * significanceModels[i]; + })); + } + for (auto& thread : threads) { + thread.join(); + } + auto sum = std::reduce(significanceModels.begin(), significanceModels.end()); + y_pred /= sum; + return y_pred; + } + std::vector> Ensemble::predict_average_proba(std::vector>& X) + { + auto n_states = models[0]->getClassNumStates(); + std::vector> y_pred(X[0].size(), std::vector(n_states, 0.0)); + auto threads{ std::vector() }; + std::mutex mtx; + for (auto i = 0; i < n_models; ++i) { + threads.push_back(std::thread([&, i]() { + auto ypredict = models[i]->predict_proba(X); + assert(ypredict.size() == y_pred.size()); + assert(ypredict[0].size() == y_pred[0].size()); + std::lock_guard lock(mtx); + // Multiply each prediction by the significance of the model and then add it to the final prediction + for (auto j = 0; j < ypredict.size(); ++j) { + std::transform(y_pred[j].begin(), y_pred[j].end(), ypredict[j].begin(), y_pred[j].begin(), + [significanceModels = significanceModels[i]](double x, double y) { return x + y * significanceModels; }); + } + })); + } + for (auto& thread : threads) { + thread.join(); + } + auto sum = std::reduce(significanceModels.begin(), significanceModels.end()); + //Divide each element of the prediction by the sum of the significances + for (auto j = 0; j < y_pred.size(); ++j) { + std::transform(y_pred[j].begin(), y_pred[j].end(), y_pred[j].begin(), [sum](double x) { return x / sum; }); + } + return y_pred; + } + torch::Tensor Ensemble::predict_average_voting(torch::Tensor& X) + { + // Build a m x n_models tensor with the predictions of each model + torch::Tensor y_pred = torch::zeros({ X.size(1), n_models }, torch::kInt32); + auto threads{ std::vector() }; + std::mutex mtx; + for (auto i = 0; i < n_models; ++i) { + threads.push_back(std::thread([&, i]() { + auto ypredict = models[i]->predict(X); + std::lock_guard lock(mtx); + y_pred.index_put_({ "...", i }, ypredict); + })); + } + for (auto& thread : threads) { + thread.join(); + } + return voting(y_pred); + } + std::vector> Ensemble::predict_average_voting(std::vector>& X) + { + auto Xt = vectorToTensor(X); + auto y_pred = predict_average_voting(Xt); + auto res = voting(y_pred); + std::vector> result; + // Iterate over cols + for (int i = 0; i < res.size(1); ++i) { + auto col_tensor = res.index({ "...", i }); + auto col = std::vector(col_tensor.data_ptr(), col_tensor.data_ptr() + res.size(0)); + result.push_back(col); + } + return result; + //return tensorToVector(res); + } + float Ensemble::score(torch::Tensor& X, torch::Tensor& y) + { + auto y_pred = predict(X); + int correct = 0; + for (int i = 0; i < y_pred.size(0); ++i) { + if (y_pred[i].item() == y[i].item()) { + correct++; + } + } + return (double)correct / y_pred.size(0); + } + float Ensemble::score(std::vector>& X, std::vector& y) + { + auto y_pred = predict(X); + int correct = 0; + for (int i = 0; i < y_pred.size(); ++i) { + if (y_pred[i] == y[i]) { + correct++; + } + } + return (double)correct / y_pred.size(); + } + std::vector Ensemble::show() const + { + auto result = std::vector(); + for (auto i = 0; i < n_models; ++i) { + auto res = models[i]->show(); + result.insert(result.end(), res.begin(), res.end()); + } + return result; + } + std::vector Ensemble::graph(const std::string& title) const + { + auto result = std::vector(); + for (auto i = 0; i < n_models; ++i) { + auto res = models[i]->graph(title + "_" + std::to_string(i)); + result.insert(result.end(), res.begin(), res.end()); + } + return result; + } + int Ensemble::getNumberOfNodes() const + { + int nodes = 0; + for (auto i = 0; i < n_models; ++i) { + nodes += models[i]->getNumberOfNodes(); + } + return nodes; + } + int Ensemble::getNumberOfEdges() const + { + int edges = 0; + for (auto i = 0; i < n_models; ++i) { + edges += models[i]->getNumberOfEdges(); + } + return edges; + } + int Ensemble::getNumberOfStates() const + { + int nstates = 0; + for (auto i = 0; i < n_models; ++i) { + nstates += models[i]->getNumberOfStates(); + } + return nstates; + } +} \ No newline at end of file diff --git a/src/BayesNet/Ensemble.h b/src/Ensemble.h similarity index 71% rename from src/BayesNet/Ensemble.h rename to src/Ensemble.h index 3d31882..dd14046 100644 --- a/src/BayesNet/Ensemble.h +++ b/src/Ensemble.h @@ -14,8 +14,6 @@ namespace bayesnet { std::vector predict(std::vector>& X) override; torch::Tensor predict_proba(torch::Tensor& X) override; std::vector> predict_proba(std::vector>& X) override; - torch::Tensor do_predict_voting(torch::Tensor& X); - std::vector do_predict_voting(std::vector>& X); float score(torch::Tensor& X, torch::Tensor& y) override; float score(std::vector>& X, std::vector& y) override; int getNumberOfNodes() const override; @@ -31,11 +29,18 @@ namespace bayesnet { { } protected: + torch::Tensor predict_average_voting(torch::Tensor& X); + std::vector> predict_average_voting(std::vector>& X); + torch::Tensor predict_average_proba(torch::Tensor& X); + std::vector> predict_average_proba(std::vector>& X); + torch::Tensor compute_arg_max(torch::Tensor& X); + std::vector compute_arg_max(std::vector>& X); + torch::Tensor voting(torch::Tensor& votes); + std::vector> voting(std::vector>& votes); unsigned n_models; std::vector> models; std::vector significanceModels; void trainModel(const torch::Tensor& weights) override; - std::vector voting(torch::Tensor& y_pred); bool predict_voting; }; } diff --git a/src/BayesNet/FCBF.cc b/src/FCBF.cc similarity index 100% rename from src/BayesNet/FCBF.cc rename to src/FCBF.cc diff --git a/src/BayesNet/FCBF.h b/src/FCBF.h similarity index 100% rename from src/BayesNet/FCBF.h rename to src/FCBF.h diff --git a/src/BayesNet/FeatureSelect.cc b/src/FeatureSelect.cc similarity index 100% rename from src/BayesNet/FeatureSelect.cc rename to src/FeatureSelect.cc diff --git a/src/BayesNet/FeatureSelect.h b/src/FeatureSelect.h similarity index 100% rename from src/BayesNet/FeatureSelect.h rename to src/FeatureSelect.h diff --git a/src/BayesNet/IWSS.cc b/src/IWSS.cc similarity index 100% rename from src/BayesNet/IWSS.cc rename to src/IWSS.cc diff --git a/src/BayesNet/IWSS.h b/src/IWSS.h similarity index 100% rename from src/BayesNet/IWSS.h rename to src/IWSS.h diff --git a/src/BayesNet/KDB.cc b/src/KDB.cc similarity index 100% rename from src/BayesNet/KDB.cc rename to src/KDB.cc diff --git a/src/BayesNet/KDB.h b/src/KDB.h similarity index 100% rename from src/BayesNet/KDB.h rename to src/KDB.h diff --git a/src/BayesNet/KDBLd.cc b/src/KDBLd.cc similarity index 100% rename from src/BayesNet/KDBLd.cc rename to src/KDBLd.cc diff --git a/src/BayesNet/KDBLd.h b/src/KDBLd.h similarity index 100% rename from src/BayesNet/KDBLd.h rename to src/KDBLd.h diff --git a/src/BayesNet/Mst.cc b/src/Mst.cc similarity index 100% rename from src/BayesNet/Mst.cc rename to src/Mst.cc diff --git a/src/BayesNet/Mst.h b/src/Mst.h similarity index 100% rename from src/BayesNet/Mst.h rename to src/Mst.h diff --git a/src/BayesNet/Network.cc b/src/Network.cc similarity index 99% rename from src/BayesNet/Network.cc rename to src/Network.cc index e8a7da8..32e6ecf 100644 --- a/src/BayesNet/Network.cc +++ b/src/Network.cc @@ -238,6 +238,7 @@ namespace bayesnet { return predictions; } // Return mxn std::vector of probabilities + // tsamples is nxm std::vector of samples std::vector> Network::predict_proba(const std::vector>& tsamples) { if (!fitted) { diff --git a/src/BayesNet/Network.h b/src/Network.h similarity index 100% rename from src/BayesNet/Network.h rename to src/Network.h diff --git a/src/BayesNet/Node.cc b/src/Node.cc similarity index 100% rename from src/BayesNet/Node.cc rename to src/Node.cc diff --git a/src/BayesNet/Node.h b/src/Node.h similarity index 100% rename from src/BayesNet/Node.h rename to src/Node.h diff --git a/src/BayesNet/Proposal.cc b/src/Proposal.cc similarity index 100% rename from src/BayesNet/Proposal.cc rename to src/Proposal.cc diff --git a/src/BayesNet/Proposal.h b/src/Proposal.h similarity index 100% rename from src/BayesNet/Proposal.h rename to src/Proposal.h diff --git a/src/BayesNet/SPODE.cc b/src/SPODE.cc similarity index 100% rename from src/BayesNet/SPODE.cc rename to src/SPODE.cc diff --git a/src/BayesNet/SPODE.h b/src/SPODE.h similarity index 100% rename from src/BayesNet/SPODE.h rename to src/SPODE.h diff --git a/src/BayesNet/SPODELd.cc b/src/SPODELd.cc similarity index 100% rename from src/BayesNet/SPODELd.cc rename to src/SPODELd.cc diff --git a/src/BayesNet/SPODELd.h b/src/SPODELd.h similarity index 100% rename from src/BayesNet/SPODELd.h rename to src/SPODELd.h diff --git a/src/BayesNet/TAN.cc b/src/TAN.cc similarity index 100% rename from src/BayesNet/TAN.cc rename to src/TAN.cc diff --git a/src/BayesNet/TAN.h b/src/TAN.h similarity index 100% rename from src/BayesNet/TAN.h rename to src/TAN.h diff --git a/src/BayesNet/TANLd.cc b/src/TANLd.cc similarity index 100% rename from src/BayesNet/TANLd.cc rename to src/TANLd.cc diff --git a/src/BayesNet/TANLd.h b/src/TANLd.h similarity index 100% rename from src/BayesNet/TANLd.h rename to src/TANLd.h diff --git a/src/bayesnetUtils.cc b/src/bayesnetUtils.cc new file mode 100644 index 0000000..4b4e3c2 --- /dev/null +++ b/src/bayesnetUtils.cc @@ -0,0 +1,39 @@ + +#include "bayesnetUtils.h" +namespace bayesnet { + // Return the indices in descending order + std::vector argsort(std::vector& nums) + { + int n = nums.size(); + std::vector indices(n); + iota(indices.begin(), indices.end(), 0); + sort(indices.begin(), indices.end(), [&nums](int i, int j) {return nums[i] > nums[j];}); + return indices; + } + template + std::vector> tensorToVector(torch::Tensor& dtensor) + { + // convert mxn tensor to nxm std::vector + std::vector> result; + // Iterate over cols + for (int i = 0; i < dtensor.size(1); ++i) { + auto col_tensor = dtensor.index({ "...", i }); + auto col = std::vector(col_tensor.data_ptr(), col_tensor.data_ptr() + dtensor.size(0)); + result.push_back(col); + } + return result; + } + torch::Tensor vectorToTensor(std::vector>& vector) + { + // convert nxm std::vector to mxn tensor + long int m = vector[0].size(); + long int n = vector.size(); + auto tensor = torch::zeros({ m, n }, torch::kInt32); + for (int i = 0; i < m; ++i) { + for (int j = 0; j < n; ++j) { + tensor[i][j] = vector[j][i]; + } + } + return tensor; + } +} \ No newline at end of file diff --git a/src/BayesNet/bayesnetUtils.h b/src/bayesnetUtils.h similarity index 53% rename from src/BayesNet/bayesnetUtils.h rename to src/bayesnetUtils.h index 4f477a0..2790d16 100644 --- a/src/BayesNet/bayesnetUtils.h +++ b/src/bayesnetUtils.h @@ -4,6 +4,8 @@ #include namespace bayesnet { std::vector argsort(std::vector& nums); - std::vector> tensorToVector(torch::Tensor& tensor); + template + std::vector> tensorToVector(torch::Tensor& dtensor); + torch::Tensor vectorToTensor(std::vector>& vector); } #endif //BAYESNET_UTILS_H \ No newline at end of file diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index fca574b..efccc48 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,7 +1,7 @@ if(ENABLE_TESTING) set(TEST_BAYESNET "unit_tests_bayesnet") include_directories( - ${BayesNet_SOURCE_DIR}/src/BayesNet + ${BayesNet_SOURCE_DIR}/src ${BayesNet_SOURCE_DIR}/src/Platform ${BayesNet_SOURCE_DIR}/lib/Files ${BayesNet_SOURCE_DIR}/lib/mdlp @@ -11,6 +11,6 @@ if(ENABLE_TESTING) ) set(TEST_SOURCES_BAYESNET TestBayesModels.cc TestBayesNetwork.cc TestBayesMetrics.cc TestUtils.cc ${BayesNet_SOURCES}) add_executable(${TEST_BAYESNET} ${TEST_SOURCES_BAYESNET}) - target_link_libraries(${TEST_BAYESNET} PUBLIC "${TORCH_LIBRARIES}" ArffFiles mdlp Catch2::Catch2WithMain) + target_link_libraries(${TEST_BAYESNET} PUBLIC "${TORCH_LIBRARIES}" ArffFiles mdlp Catch2::Catch2WithMain ) add_test(NAME ${TEST_BAYESNET} COMMAND ${TEST_BAYESNET}) endif(ENABLE_TESTING) diff --git a/tests/TestBayesModels.cc b/tests/TestBayesModels.cc index 1832cd9..3ecf4f3 100644 --- a/tests/TestBayesModels.cc +++ b/tests/TestBayesModels.cc @@ -21,104 +21,104 @@ TEST_CASE("Library check version", "[BayesNet]") auto clf = bayesnet::KDB(2); REQUIRE(clf.getVersion() == "1.0.2"); } -TEST_CASE("Test Bayesian Classifiers score", "[BayesNet]") -{ - map , float> scores = { - // Diabetes - {{"diabetes", "AODE"}, 0.811198}, {{"diabetes", "KDB"}, 0.852865}, {{"diabetes", "SPODE"}, 0.802083}, {{"diabetes", "TAN"}, 0.821615}, - {{"diabetes", "AODELd"}, 0.8138f}, {{"diabetes", "KDBLd"}, 0.80208f}, {{"diabetes", "SPODELd"}, 0.78646f}, {{"diabetes", "TANLd"}, 0.8099f}, {{"diabetes", "BoostAODE"}, 0.83984f}, - // Ecoli - {{"ecoli", "AODE"}, 0.889881}, {{"ecoli", "KDB"}, 0.889881}, {{"ecoli", "SPODE"}, 0.880952}, {{"ecoli", "TAN"}, 0.892857}, - {{"ecoli", "AODELd"}, 0.8869f}, {{"ecoli", "KDBLd"}, 0.875f}, {{"ecoli", "SPODELd"}, 0.84226f}, {{"ecoli", "TANLd"}, 0.86905f}, {{"ecoli", "BoostAODE"}, 0.89583f}, - // Glass - {{"glass", "AODE"}, 0.78972}, {{"glass", "KDB"}, 0.827103}, {{"glass", "SPODE"}, 0.775701}, {{"glass", "TAN"}, 0.827103}, - {{"glass", "AODELd"}, 0.79439f}, {{"glass", "KDBLd"}, 0.85047f}, {{"glass", "SPODELd"}, 0.79439f}, {{"glass", "TANLd"}, 0.86449f}, {{"glass", "BoostAODE"}, 0.84579f}, - // Iris - {{"iris", "AODE"}, 0.973333}, {{"iris", "KDB"}, 0.973333}, {{"iris", "SPODE"}, 0.973333}, {{"iris", "TAN"}, 0.973333}, - {{"iris", "AODELd"}, 0.973333}, {{"iris", "KDBLd"}, 0.973333}, {{"iris", "SPODELd"}, 0.96f}, {{"iris", "TANLd"}, 0.97333f}, {{"iris", "BoostAODE"}, 0.98f} - }; +// TEST_CASE("Test Bayesian Classifiers score", "[BayesNet]") +// { +// map , float> scores = { +// // Diabetes +// {{"diabetes", "AODE"}, 0.811198}, {{"diabetes", "KDB"}, 0.852865}, {{"diabetes", "SPODE"}, 0.802083}, {{"diabetes", "TAN"}, 0.821615}, +// {{"diabetes", "AODELd"}, 0.8138f}, {{"diabetes", "KDBLd"}, 0.80208f}, {{"diabetes", "SPODELd"}, 0.78646f}, {{"diabetes", "TANLd"}, 0.8099f}, {{"diabetes", "BoostAODE"}, 0.83984f}, +// // Ecoli +// {{"ecoli", "AODE"}, 0.889881}, {{"ecoli", "KDB"}, 0.889881}, {{"ecoli", "SPODE"}, 0.880952}, {{"ecoli", "TAN"}, 0.892857}, +// {{"ecoli", "AODELd"}, 0.8869f}, {{"ecoli", "KDBLd"}, 0.875f}, {{"ecoli", "SPODELd"}, 0.84226f}, {{"ecoli", "TANLd"}, 0.86905f}, {{"ecoli", "BoostAODE"}, 0.89583f}, +// // Glass +// {{"glass", "AODE"}, 0.78972}, {{"glass", "KDB"}, 0.827103}, {{"glass", "SPODE"}, 0.775701}, {{"glass", "TAN"}, 0.827103}, +// {{"glass", "AODELd"}, 0.79439f}, {{"glass", "KDBLd"}, 0.85047f}, {{"glass", "SPODELd"}, 0.79439f}, {{"glass", "TANLd"}, 0.86449f}, {{"glass", "BoostAODE"}, 0.84579f}, +// // Iris +// {{"iris", "AODE"}, 0.973333}, {{"iris", "KDB"}, 0.973333}, {{"iris", "SPODE"}, 0.973333}, {{"iris", "TAN"}, 0.973333}, +// {{"iris", "AODELd"}, 0.973333}, {{"iris", "KDBLd"}, 0.973333}, {{"iris", "SPODELd"}, 0.96f}, {{"iris", "TANLd"}, 0.97333f}, {{"iris", "BoostAODE"}, 0.98f} +// }; - std::string file_name = GENERATE("glass", "iris", "ecoli", "diabetes"); - auto raw = RawDatasets(file_name, false); +// std::string file_name = GENERATE("glass", "iris", "ecoli", "diabetes"); +// auto raw = RawDatasets(file_name, false); - SECTION("Test TAN classifier (" + file_name + ")") - { - auto clf = bayesnet::TAN(); - clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); - auto score = clf.score(raw.Xv, raw.yv); - //scores[{file_name, "TAN"}] = score; - REQUIRE(score == Catch::Approx(scores[{file_name, "TAN"}]).epsilon(raw.epsilon)); - } - SECTION("Test TANLd classifier (" + file_name + ")") - { - auto clf = bayesnet::TANLd(); - clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); - auto score = clf.score(raw.Xt, raw.yt); - //scores[{file_name, "TANLd"}] = score; - REQUIRE(score == Catch::Approx(scores[{file_name, "TANLd"}]).epsilon(raw.epsilon)); - } - SECTION("Test KDB classifier (" + file_name + ")") - { - auto clf = bayesnet::KDB(2); - clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); - auto score = clf.score(raw.Xv, raw.yv); - //scores[{file_name, "KDB"}] = score; - REQUIRE(score == Catch::Approx(scores[{file_name, "KDB" - }]).epsilon(raw.epsilon)); - } - SECTION("Test KDBLd classifier (" + file_name + ")") - { - auto clf = bayesnet::KDBLd(2); - clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); - auto score = clf.score(raw.Xt, raw.yt); - //scores[{file_name, "KDBLd"}] = score; - REQUIRE(score == Catch::Approx(scores[{file_name, "KDBLd" - }]).epsilon(raw.epsilon)); - } - SECTION("Test SPODE classifier (" + file_name + ")") - { - auto clf = bayesnet::SPODE(1); - clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); - auto score = clf.score(raw.Xv, raw.yv); - // scores[{file_name, "SPODE"}] = score; - REQUIRE(score == Catch::Approx(scores[{file_name, "SPODE"}]).epsilon(raw.epsilon)); - } - SECTION("Test SPODELd classifier (" + file_name + ")") - { - auto clf = bayesnet::SPODELd(1); - clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); - auto score = clf.score(raw.Xt, raw.yt); - // scores[{file_name, "SPODELd"}] = score; - REQUIRE(score == Catch::Approx(scores[{file_name, "SPODELd"}]).epsilon(raw.epsilon)); - } - SECTION("Test AODE classifier (" + file_name + ")") - { - auto clf = bayesnet::AODE(); - clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); - auto score = clf.score(raw.Xv, raw.yv); - // scores[{file_name, "AODE"}] = score; - REQUIRE(score == Catch::Approx(scores[{file_name, "AODE"}]).epsilon(raw.epsilon)); - } - SECTION("Test AODELd classifier (" + file_name + ")") - { - auto clf = bayesnet::AODELd(); - clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); - auto score = clf.score(raw.Xt, raw.yt); - // scores[{file_name, "AODELd"}] = score; - REQUIRE(score == Catch::Approx(scores[{file_name, "AODELd"}]).epsilon(raw.epsilon)); - } - SECTION("Test BoostAODE classifier (" + file_name + ")") - { - auto clf = bayesnet::BoostAODE(); - clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); - auto score = clf.score(raw.Xv, raw.yv); - // scores[{file_name, "BoostAODE"}] = score; - REQUIRE(score == Catch::Approx(scores[{file_name, "BoostAODE"}]).epsilon(raw.epsilon)); - } - // for (auto scores : scores) { - // std::cout << "{{\"" << scores.first.first << "\", \"" << scores.first.second << "\"}, " << scores.second << "}, "; - // } -} +// SECTION("Test TAN classifier (" + file_name + ")") +// { +// auto clf = bayesnet::TAN(); +// clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); +// auto score = clf.score(raw.Xv, raw.yv); +// //scores[{file_name, "TAN"}] = score; +// REQUIRE(score == Catch::Approx(scores[{file_name, "TAN"}]).epsilon(raw.epsilon)); +// } +// SECTION("Test TANLd classifier (" + file_name + ")") +// { +// auto clf = bayesnet::TANLd(); +// clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); +// auto score = clf.score(raw.Xt, raw.yt); +// //scores[{file_name, "TANLd"}] = score; +// REQUIRE(score == Catch::Approx(scores[{file_name, "TANLd"}]).epsilon(raw.epsilon)); +// } +// SECTION("Test KDB classifier (" + file_name + ")") +// { +// auto clf = bayesnet::KDB(2); +// clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); +// auto score = clf.score(raw.Xv, raw.yv); +// //scores[{file_name, "KDB"}] = score; +// REQUIRE(score == Catch::Approx(scores[{file_name, "KDB" +// }]).epsilon(raw.epsilon)); +// } +// SECTION("Test KDBLd classifier (" + file_name + ")") +// { +// auto clf = bayesnet::KDBLd(2); +// clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); +// auto score = clf.score(raw.Xt, raw.yt); +// //scores[{file_name, "KDBLd"}] = score; +// REQUIRE(score == Catch::Approx(scores[{file_name, "KDBLd" +// }]).epsilon(raw.epsilon)); +// } +// SECTION("Test SPODE classifier (" + file_name + ")") +// { +// auto clf = bayesnet::SPODE(1); +// clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); +// auto score = clf.score(raw.Xv, raw.yv); +// // scores[{file_name, "SPODE"}] = score; +// REQUIRE(score == Catch::Approx(scores[{file_name, "SPODE"}]).epsilon(raw.epsilon)); +// } +// SECTION("Test SPODELd classifier (" + file_name + ")") +// { +// auto clf = bayesnet::SPODELd(1); +// clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); +// auto score = clf.score(raw.Xt, raw.yt); +// // scores[{file_name, "SPODELd"}] = score; +// REQUIRE(score == Catch::Approx(scores[{file_name, "SPODELd"}]).epsilon(raw.epsilon)); +// } +// SECTION("Test AODE classifier (" + file_name + ")") +// { +// auto clf = bayesnet::AODE(); +// clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); +// auto score = clf.score(raw.Xv, raw.yv); +// // scores[{file_name, "AODE"}] = score; +// REQUIRE(score == Catch::Approx(scores[{file_name, "AODE"}]).epsilon(raw.epsilon)); +// } +// SECTION("Test AODELd classifier (" + file_name + ")") +// { +// auto clf = bayesnet::AODELd(); +// clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); +// auto score = clf.score(raw.Xt, raw.yt); +// // scores[{file_name, "AODELd"}] = score; +// REQUIRE(score == Catch::Approx(scores[{file_name, "AODELd"}]).epsilon(raw.epsilon)); +// } +// SECTION("Test BoostAODE classifier (" + file_name + ")") +// { +// auto clf = bayesnet::BoostAODE(); +// clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); +// auto score = clf.score(raw.Xv, raw.yv); +// // scores[{file_name, "BoostAODE"}] = score; +// REQUIRE(score == Catch::Approx(scores[{file_name, "BoostAODE"}]).epsilon(raw.epsilon)); +// } +// // for (auto scores : scores) { +// // std::cout << "{{\"" << scores.first.first << "\", \"" << scores.first.second << "\"}, " << scores.second << "}, "; +// // } +// } TEST_CASE("Models features", "[BayesNet]") { auto graph = std::vector({ "digraph BayesNet {\nlabel=\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n", @@ -133,6 +133,8 @@ TEST_CASE("Models features", "[BayesNet]") clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); REQUIRE(clf.getNumberOfNodes() == 5); REQUIRE(clf.getNumberOfEdges() == 7); + REQUIRE(clf.getNumberOfStates() == 19); + REQUIRE(clf.getClassNumStates() == 3); REQUIRE(clf.show() == std::vector{"class -> sepallength, sepalwidth, petallength, petalwidth, ", "petallength -> sepallength, ", "petalwidth -> ", "sepallength -> sepalwidth, ", "sepalwidth -> petalwidth, "}); REQUIRE(clf.graph("Test") == graph); } @@ -156,48 +158,178 @@ TEST_CASE("BoostAODE feature_select CFS", "[BayesNet]") REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 9 with CFS"); REQUIRE(clf.getNotes()[1] == "Number of models: 9"); } -TEST_CASE("BoostAODE test used features in train note and score", "[BayesNet]") +// TEST_CASE("BoostAODE test used features in train note and score", "[BayesNet]") +// { +// auto raw = RawDatasets("diabetes", true); +// auto clf = bayesnet::BoostAODE(); +// clf.setHyperparameters({ +// {"ascending",true}, +// {"convergence", true}, +// {"repeatSparent",true}, +// {"select_features","CFS"}, +// }); +// clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); +// REQUIRE(clf.getNumberOfNodes() == 72); +// REQUIRE(clf.getNumberOfEdges() == 120); +// REQUIRE(clf.getNotes().size() == 3); +// REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS"); +// REQUIRE(clf.getNotes()[1] == "Used features in train: 7 of 8"); +// REQUIRE(clf.getNotes()[2] == "Number of models: 8"); +// auto score = clf.score(raw.Xv, raw.yv); +// auto scoret = clf.score(raw.Xt, raw.yt); +// REQUIRE(score == Catch::Approx(0.8138).epsilon(raw.epsilon)); +// REQUIRE(scoret == Catch::Approx(0.8138).epsilon(raw.epsilon)); +// } +TEST_CASE("Model predict_proba", "[BayesNet]") { - auto raw = RawDatasets("diabetes", true); - auto clf = bayesnet::BoostAODE(); - clf.setHyperparameters({ - {"ascending",true}, - {"convergence", true}, - {"repeatSparent",true}, - {"select_features","CFS"}, + // std::string model = GENERATE("TAN", "SPODE", "BoostAODEprobabilities", "BoostAODEvoting"); + std::string model = GENERATE("TAN", "SPODE"); + std::cout << string(100, '*') << std::endl; + std::cout << "************************************* CHANGE MODEL GENERATE ****************************************" << std::endl; + std::cout << string(100, '*') << std::endl; + auto res_prob_tan = std::vector>({ + { 0.00375671, 0.994457, 0.00178621 }, + { 0.00137462, 0.992734, 0.00589123 }, + { 0.00137462, 0.992734, 0.00589123 }, + { 0.00137462, 0.992734, 0.00589123 }, + { 0.00218225, 0.992877, 0.00494094 }, + { 0.00494209, 0.0978534, 0.897205 }, + { 0.0054192, 0.974275, 0.0203054 }, + { 0.00433012, 0.985054, 0.0106159 }, + { 0.000860806, 0.996922, 0.00221698 } }); - clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); - REQUIRE(clf.getNumberOfNodes() == 72); - REQUIRE(clf.getNumberOfEdges() == 120); - REQUIRE(clf.getNotes().size() == 3); - REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS"); - REQUIRE(clf.getNotes()[1] == "Used features in train: 7 of 8"); - REQUIRE(clf.getNotes()[2] == "Number of models: 8"); - auto score = clf.score(raw.Xv, raw.yv); - auto scoret = clf.score(raw.Xt, raw.yt); - REQUIRE(score == Catch::Approx(0.8138).epsilon(raw.epsilon)); - REQUIRE(scoret == Catch::Approx(0.8138).epsilon(raw.epsilon)); + auto res_prob_spode = std::vector>({ + {0.00419032, 0.994247, 0.00156265}, + {0.00172808, 0.993433, 0.00483862}, + {0.00172808, 0.993433, 0.00483862}, + {0.00172808, 0.993433, 0.00483862}, + {0.00279211, 0.993737, 0.00347077}, + {0.0120674, 0.357909, 0.630024}, + {0.00386239, 0.913919, 0.0822185}, + {0.0244389, 0.966447, 0.00911374}, + {0.003135, 0.991799, 0.0050661} + }); + auto res_prob_baode = std::vector>({ + {0.00803291, 0.9676, 0.0243672}, + {0.00398714, 0.945126, 0.050887}, + {0.00398714, 0.945126, 0.050887}, + {0.00398714, 0.945126, 0.050887}, + {0.00189227, 0.859575, 0.138533}, + {0.0118341, 0.442149, 0.546017}, + {0.0216135, 0.785781, 0.192605}, + {0.0204803, 0.844276, 0.135244}, + {0.00576313, 0.961665, 0.0325716}, + }); + std::map>> res_prob = { {"TAN", res_prob_tan}, {"SPODE", res_prob_spode} , {"BoostAODEproba", res_prob_baode }, {"BoostAODEvoting", res_prob_baode } }; + std::map models = { {"TAN", new bayesnet::TAN()}, {"SPODE", new bayesnet::SPODE(0)}, {"BoostAODEproba", new bayesnet::BoostAODE(false)}, {"BoostAODEvoting", new bayesnet::BoostAODE(true)} }; + int init_index = 78; + auto raw = RawDatasets("iris", true); + + SECTION("Test " + model + " predict_proba") + { + auto clf = models[model]; + clf->fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); + auto y_pred_proba = clf->predict_proba(raw.Xv); + auto y_pred = clf->predict(raw.Xv); + auto yt_pred = clf->predict(raw.Xt); + auto yt_pred_proba = clf->predict_proba(raw.Xt); + REQUIRE(y_pred.size() == yt_pred.size(0)); + REQUIRE(y_pred.size() == y_pred_proba.size()); + REQUIRE(y_pred.size() == yt_pred_proba.size(0)); + REQUIRE(y_pred.size() == raw.yv.size()); + REQUIRE(y_pred_proba[0].size() == 3); + REQUIRE(yt_pred_proba.size(1) == y_pred_proba[0].size()); + for (int i = 0; i < y_pred_proba.size(); ++i) { + auto maxElem = max_element(y_pred_proba[i].begin(), y_pred_proba[i].end()); + int predictedClass = distance(y_pred_proba[i].begin(), maxElem); + REQUIRE(predictedClass == y_pred[i]); + // Check predict is coherent with predict_proba + REQUIRE(yt_pred_proba[i].argmax().item() == y_pred[i]); + } + // Check predict_proba values for vectors and tensors + for (int i = 0; i < res_prob.size(); i++) { + REQUIRE(y_pred[i] == yt_pred[i].item()); + for (int j = 0; j < 3; j++) { + REQUIRE(res_prob[model][i][j] == Catch::Approx(y_pred_proba[i + init_index][j]).epsilon(raw.epsilon)); + REQUIRE(res_prob[model][i][j] == Catch::Approx(yt_pred_proba[i + init_index][j].item()).epsilon(raw.epsilon)); + } + } + delete clf; + } } -TEST_CASE("TAN predict_proba", "[BayesNet]") +TEST_CASE("BoostAODE predict_proba proba", "[BayesNet]") { auto res_prob = std::vector>({ - { 0.00375671, 0.994457, 0.00178621 }, - { 0.00137462, 0.992734, 0.00589123 }, - { 0.00137462, 0.992734, 0.00589123 }, - { 0.00137462, 0.992734, 0.00589123 }, - { 0.00218225, 0.992877, 0.00494094 }, - { 0.00494209, 0.0978534, 0.897205 }, - { 0.0054192, 0.974275, 0.0203054 }, - { 0.00433012, 0.985054, 0.0106159 }, - { 0.000860806, 0.996922, 0.00221698 } + {0.00803291, 0.9676, 0.0243672}, + {0.00398714, 0.945126, 0.050887}, + {0.00398714, 0.945126, 0.050887}, + {0.00398714, 0.945126, 0.050887}, + {0.00189227, 0.859575, 0.138533}, + {0.0118341, 0.442149, 0.546017}, + {0.0216135, 0.785781, 0.192605}, + {0.0204803, 0.844276, 0.135244}, + {0.00576313, 0.961665, 0.0325716}, }); int init_index = 78; auto raw = RawDatasets("iris", true); - auto clf = bayesnet::TAN(); + auto clf = bayesnet::BoostAODE(false); clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); auto y_pred_proba = clf.predict_proba(raw.Xv); auto y_pred = clf.predict(raw.Xv); + auto yt_pred = clf.predict(raw.Xt); auto yt_pred_proba = clf.predict_proba(raw.Xt); + std::cout << "yt_pred_proba proba sizes " << yt_pred_proba.sizes() << std::endl; + REQUIRE(y_pred.size() == yt_pred.size(0)); + REQUIRE(y_pred.size() == y_pred_proba.size()); + REQUIRE(y_pred.size() == yt_pred_proba.size(0)); + REQUIRE(y_pred.size() == raw.yv.size()); + REQUIRE(y_pred_proba[0].size() == 3); + REQUIRE(yt_pred_proba.size(1) == y_pred_proba[0].size()); + for (int i = 0; i < y_pred_proba.size(); ++i) { + // Check predict is coherent with predict_proba + auto maxElem = max_element(y_pred_proba[i].begin(), y_pred_proba[i].end()); + int predictedClass = distance(y_pred_proba[i].begin(), maxElem); + REQUIRE(predictedClass == y_pred[i]); + REQUIRE(yt_pred_proba[i].argmax().item() == y_pred[i]); + } + // Check predict_proba values for vectors and tensors + for (int i = 0; i < res_prob.size(); i++) { + REQUIRE(y_pred[i] == yt_pred[i].item()); + for (int j = 0; j < 3; j++) { + REQUIRE(res_prob[i][j] == Catch::Approx(y_pred_proba[i + init_index][j]).epsilon(raw.epsilon)); + REQUIRE(res_prob[i][j] == Catch::Approx(yt_pred_proba[i + init_index][j].item()).epsilon(raw.epsilon)); + } + } + // for (int i = 0; i < res_prob.size(); i++) { + // for (int j = 0; j < 3; j++) { + // std::cout << y_pred_proba[i + init_index][j] << " "; + // } + // std::cout << std::endl; + // } +} +TEST_CASE("BoostAODE predict_proba voting", "[BayesNet]") +{ + auto res_prob = std::vector>({ + {0.00803291, 0.9676, 0.0243672}, + {0.00398714, 0.945126, 0.050887}, + {0.00398714, 0.945126, 0.050887}, + {0.00398714, 0.945126, 0.050887}, + {0.00189227, 0.859575, 0.138533}, + {0.0118341, 0.442149, 0.546017}, + {0.0216135, 0.785781, 0.192605}, + {0.0204803, 0.844276, 0.135244}, + {0.00576313, 0.961665, 0.0325716}, + }); + int init_index = 78; + auto raw = RawDatasets("iris", true); + auto clf = bayesnet::BoostAODE(true); + clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); + auto y_pred_proba = clf.predict_proba(raw.Xv); + auto y_pred = clf.predict(raw.Xv); + auto yt_pred = clf.predict(raw.Xt); + auto yt_pred_proba = clf.predict_proba(raw.Xt); + std::cout << "yt_pred_proba proba sizes " << yt_pred_proba.sizes() << std::endl; + REQUIRE(y_pred.size() == yt_pred.size(0)); REQUIRE(y_pred.size() == y_pred_proba.size()); REQUIRE(y_pred.size() == yt_pred_proba.size(0)); REQUIRE(y_pred.size() == raw.yv.size()); @@ -208,53 +340,24 @@ TEST_CASE("TAN predict_proba", "[BayesNet]") int predictedClass = distance(y_pred_proba[i].begin(), maxElem); REQUIRE(predictedClass == y_pred[i]); // Check predict is coherent with predict_proba + for (int k = 0; k < yt_pred_proba[i].size(0); k++) { + std::cout << yt_pred_proba[i][k].item() << " "; + } + std::cout << "-> " << y_pred[i] << std::endl; REQUIRE(yt_pred_proba[i].argmax().item() == y_pred[i]); } // Check predict_proba values for vectors and tensors for (int i = 0; i < res_prob.size(); i++) { + REQUIRE(y_pred[i] == yt_pred[i].item()); for (int j = 0; j < 3; j++) { REQUIRE(res_prob[i][j] == Catch::Approx(y_pred_proba[i + init_index][j]).epsilon(raw.epsilon)); REQUIRE(res_prob[i][j] == Catch::Approx(yt_pred_proba[i + init_index][j].item()).epsilon(raw.epsilon)); } } -} -TEST_CASE("BoostAODE predict_proba voting", "[BayesNet]") -{ - // auto res_prob = std::vector>({ - // { 0.00375671, 0.994457, 0.00178621 }, - // { 0.00137462, 0.992734, 0.00589123 }, - // { 0.00137462, 0.992734, 0.00589123 }, - // { 0.00137462, 0.992734, 0.00589123 }, - // { 0.00218225, 0.992877, 0.00494094 }, - // { 0.00494209, 0.0978534, 0.897205 }, - // { 0.0054192, 0.974275, 0.0203054 }, - // { 0.00433012, 0.985054, 0.0106159 }, - // { 0.000860806, 0.996922, 0.00221698 } - // }); - // int init_index = 78; - auto raw = RawDatasets("iris", true); - auto clf = bayesnet::BoostAODE(true); - clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); - auto y_pred_proba = clf.predict_proba(raw.Xv); - auto y_pred = clf.predict(raw.Xv); - auto yt_pred_proba = clf.predict_proba(raw.Xt); - // REQUIRE(y_pred.size() == y_pred_proba.size()); - // REQUIRE(y_pred.size() == yt_pred_proba.size(0)); - // REQUIRE(y_pred.size() == raw.yv.size()); - // REQUIRE(y_pred_proba[0].size() == 3); - // REQUIRE(yt_pred_proba.size(1) == y_pred_proba[0].size()); - // for (int i = 0; i < y_pred_proba.size(); ++i) { - // auto maxElem = max_element(y_pred_proba[i].begin(), y_pred_proba[i].end()); - // int predictedClass = distance(y_pred_proba[i].begin(), maxElem); - // REQUIRE(predictedClass == y_pred[i]); - // // Check predict is coherent with predict_proba - // REQUIRE(yt_pred_proba[i].argmax().item() == y_pred[i]); - // } - // // Check predict_proba values for vectors and tensors // for (int i = 0; i < res_prob.size(); i++) { // for (int j = 0; j < 3; j++) { - // REQUIRE(res_prob[i][j] == Catch::Approx(y_pred_proba[i + init_index][j]).epsilon(raw.epsilon)); - // REQUIRE(res_prob[i][j] == Catch::Approx(yt_pred_proba[i + init_index][j].item()).epsilon(raw.epsilon)); + // std::cout << y_pred_proba[i + init_index][j] << " "; // } + // std::cout << std::endl; // } } -- 2.45.2 From 8477698d8dc04410b3983c99bc86943c4bbcd255 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Fri, 23 Feb 2024 23:11:14 +0100 Subject: [PATCH 5/7] Complete predict & predict_proba with voting & probabilities --- src/Ensemble.cc | 49 +----- src/Ensemble.h | 1 - src/bayesnetUtils.cc | 29 +++- src/bayesnetUtils.h | 6 +- tests/TestBayesModels.cc | 359 ++++++++++++++------------------------- 5 files changed, 161 insertions(+), 283 deletions(-) diff --git a/src/Ensemble.cc b/src/Ensemble.cc index cebcb9a..b0b8d9c 100644 --- a/src/Ensemble.cc +++ b/src/Ensemble.cc @@ -51,32 +51,6 @@ namespace bayesnet { result /= sum; return result; } - std::vector> Ensemble::voting(std::vector>& votes) - { - // Convert n_models x m matrix to a m x n_class_states matrix - std::vector> y_pred_final; - int numClasses = states.at(className).size(); - auto sum = std::reduce(significanceModels.begin(), significanceModels.end()); - // y_pred is m x n_models with the prediction of every model for each sample - std::cout << std::string(80, '*') << std::endl; - for (int i = 0; i < votes.size(); ++i) { - // n_votes store in each index (value of class) the significance added by each model - // i.e. n_votes[0] contains how much value has the value 0 of class. That value is generated by the models predictions - std::vector n_votes(numClasses, 0.0); - for (int j = 0; j < n_models; ++j) { - n_votes[votes[i][j]] += significanceModels.at(j); - } - for (auto& x : n_votes) { - std::cout << x << " "; - } - std::cout << std::endl; - // To only do one division per result and gain precision - std::transform(n_votes.begin(), n_votes.end(), n_votes.begin(), [sum](double x) { return x / sum; }); - y_pred_final.push_back(n_votes); - } - std::cout << std::string(80, '*') << std::endl; - return y_pred_final; - } std::vector> Ensemble::predict_proba(std::vector>& X) { if (!fitted) { @@ -94,7 +68,6 @@ namespace bayesnet { std::vector Ensemble::predict(std::vector>& X) { auto res = predict_proba(X); - std::cout << "res: " << res.size() << ", " << res[0].size() << std::endl; return compute_arg_max(res); } torch::Tensor Ensemble::predict(torch::Tensor& X) @@ -151,6 +124,13 @@ namespace bayesnet { } return y_pred; } + std::vector> Ensemble::predict_average_voting(std::vector>& X) + { + torch::Tensor Xt = bayesnet::vectorToTensor(X, false); + auto y_pred = predict_average_voting(Xt); + std::vector> result = tensorToVectorDouble(y_pred); + return result; + } torch::Tensor Ensemble::predict_average_voting(torch::Tensor& X) { // Build a m x n_models tensor with the predictions of each model @@ -169,21 +149,6 @@ namespace bayesnet { } return voting(y_pred); } - std::vector> Ensemble::predict_average_voting(std::vector>& X) - { - auto Xt = vectorToTensor(X); - auto y_pred = predict_average_voting(Xt); - auto res = voting(y_pred); - std::vector> result; - // Iterate over cols - for (int i = 0; i < res.size(1); ++i) { - auto col_tensor = res.index({ "...", i }); - auto col = std::vector(col_tensor.data_ptr(), col_tensor.data_ptr() + res.size(0)); - result.push_back(col); - } - return result; - //return tensorToVector(res); - } float Ensemble::score(torch::Tensor& X, torch::Tensor& y) { auto y_pred = predict(X); diff --git a/src/Ensemble.h b/src/Ensemble.h index dd14046..cd42cee 100644 --- a/src/Ensemble.h +++ b/src/Ensemble.h @@ -36,7 +36,6 @@ namespace bayesnet { torch::Tensor compute_arg_max(torch::Tensor& X); std::vector compute_arg_max(std::vector>& X); torch::Tensor voting(torch::Tensor& votes); - std::vector> voting(std::vector>& votes); unsigned n_models; std::vector> models; std::vector significanceModels; diff --git a/src/bayesnetUtils.cc b/src/bayesnetUtils.cc index 4b4e3c2..f620983 100644 --- a/src/bayesnetUtils.cc +++ b/src/bayesnetUtils.cc @@ -10,28 +10,39 @@ namespace bayesnet { sort(indices.begin(), indices.end(), [&nums](int i, int j) {return nums[i] > nums[j];}); return indices; } - template - std::vector> tensorToVector(torch::Tensor& dtensor) + std::vector> tensorToVector(torch::Tensor& dtensor) { // convert mxn tensor to nxm std::vector - std::vector> result; + std::vector> result; // Iterate over cols for (int i = 0; i < dtensor.size(1); ++i) { auto col_tensor = dtensor.index({ "...", i }); - auto col = std::vector(col_tensor.data_ptr(), col_tensor.data_ptr() + dtensor.size(0)); + auto col = std::vector(col_tensor.data_ptr(), col_tensor.data_ptr() + dtensor.size(0)); result.push_back(col); } return result; } - torch::Tensor vectorToTensor(std::vector>& vector) + std::vector> tensorToVectorDouble(torch::Tensor& dtensor) { - // convert nxm std::vector to mxn tensor - long int m = vector[0].size(); - long int n = vector.size(); + // convert mxn tensor to mxn std::vector + std::vector> result; + // Iterate over cols + for (int i = 0; i < dtensor.size(0); ++i) { + auto col_tensor = dtensor.index({ i, "..." }); + auto col = std::vector(col_tensor.data_ptr(), col_tensor.data_ptr() + dtensor.size(1)); + result.push_back(col); + } + return result; + } + torch::Tensor vectorToTensor(std::vector>& vector, bool transpose) + { + // convert nxm std::vector to mxn tensor if transpose + long int m = transpose ? vector[0].size() : vector.size(); + long int n = transpose ? vector.size() : vector[0].size(); auto tensor = torch::zeros({ m, n }, torch::kInt32); for (int i = 0; i < m; ++i) { for (int j = 0; j < n; ++j) { - tensor[i][j] = vector[j][i]; + tensor[i][j] = transpose ? vector[j][i] : vector[i][j]; } } return tensor; diff --git a/src/bayesnetUtils.h b/src/bayesnetUtils.h index 2790d16..0e741be 100644 --- a/src/bayesnetUtils.h +++ b/src/bayesnetUtils.h @@ -4,8 +4,8 @@ #include namespace bayesnet { std::vector argsort(std::vector& nums); - template - std::vector> tensorToVector(torch::Tensor& dtensor); - torch::Tensor vectorToTensor(std::vector>& vector); + std::vector> tensorToVector(torch::Tensor& dtensor); + std::vector> tensorToVectorDouble(torch::Tensor& dtensor); + torch::Tensor vectorToTensor(std::vector>& vector, bool transpose = true); } #endif //BAYESNET_UTILS_H \ No newline at end of file diff --git a/tests/TestBayesModels.cc b/tests/TestBayesModels.cc index 3ecf4f3..f8ae718 100644 --- a/tests/TestBayesModels.cc +++ b/tests/TestBayesModels.cc @@ -21,104 +21,104 @@ TEST_CASE("Library check version", "[BayesNet]") auto clf = bayesnet::KDB(2); REQUIRE(clf.getVersion() == "1.0.2"); } -// TEST_CASE("Test Bayesian Classifiers score", "[BayesNet]") -// { -// map , float> scores = { -// // Diabetes -// {{"diabetes", "AODE"}, 0.811198}, {{"diabetes", "KDB"}, 0.852865}, {{"diabetes", "SPODE"}, 0.802083}, {{"diabetes", "TAN"}, 0.821615}, -// {{"diabetes", "AODELd"}, 0.8138f}, {{"diabetes", "KDBLd"}, 0.80208f}, {{"diabetes", "SPODELd"}, 0.78646f}, {{"diabetes", "TANLd"}, 0.8099f}, {{"diabetes", "BoostAODE"}, 0.83984f}, -// // Ecoli -// {{"ecoli", "AODE"}, 0.889881}, {{"ecoli", "KDB"}, 0.889881}, {{"ecoli", "SPODE"}, 0.880952}, {{"ecoli", "TAN"}, 0.892857}, -// {{"ecoli", "AODELd"}, 0.8869f}, {{"ecoli", "KDBLd"}, 0.875f}, {{"ecoli", "SPODELd"}, 0.84226f}, {{"ecoli", "TANLd"}, 0.86905f}, {{"ecoli", "BoostAODE"}, 0.89583f}, -// // Glass -// {{"glass", "AODE"}, 0.78972}, {{"glass", "KDB"}, 0.827103}, {{"glass", "SPODE"}, 0.775701}, {{"glass", "TAN"}, 0.827103}, -// {{"glass", "AODELd"}, 0.79439f}, {{"glass", "KDBLd"}, 0.85047f}, {{"glass", "SPODELd"}, 0.79439f}, {{"glass", "TANLd"}, 0.86449f}, {{"glass", "BoostAODE"}, 0.84579f}, -// // Iris -// {{"iris", "AODE"}, 0.973333}, {{"iris", "KDB"}, 0.973333}, {{"iris", "SPODE"}, 0.973333}, {{"iris", "TAN"}, 0.973333}, -// {{"iris", "AODELd"}, 0.973333}, {{"iris", "KDBLd"}, 0.973333}, {{"iris", "SPODELd"}, 0.96f}, {{"iris", "TANLd"}, 0.97333f}, {{"iris", "BoostAODE"}, 0.98f} -// }; +TEST_CASE("Test Bayesian Classifiers score", "[BayesNet]") +{ + map , float> scores = { + // Diabetes + {{"diabetes", "AODE"}, 0.811198}, {{"diabetes", "KDB"}, 0.852865}, {{"diabetes", "SPODE"}, 0.802083}, {{"diabetes", "TAN"}, 0.821615}, + {{"diabetes", "AODELd"}, 0.8138f}, {{"diabetes", "KDBLd"}, 0.80208f}, {{"diabetes", "SPODELd"}, 0.78646f}, {{"diabetes", "TANLd"}, 0.8099f}, {{"diabetes", "BoostAODE"}, 0.83984f}, + // Ecoli + {{"ecoli", "AODE"}, 0.889881}, {{"ecoli", "KDB"}, 0.889881}, {{"ecoli", "SPODE"}, 0.880952}, {{"ecoli", "TAN"}, 0.892857}, + {{"ecoli", "AODELd"}, 0.8869f}, {{"ecoli", "KDBLd"}, 0.875f}, {{"ecoli", "SPODELd"}, 0.84226f}, {{"ecoli", "TANLd"}, 0.86905f}, {{"ecoli", "BoostAODE"}, 0.89583f}, + // Glass + {{"glass", "AODE"}, 0.78972}, {{"glass", "KDB"}, 0.827103}, {{"glass", "SPODE"}, 0.775701}, {{"glass", "TAN"}, 0.827103}, + {{"glass", "AODELd"}, 0.79439f}, {{"glass", "KDBLd"}, 0.85047f}, {{"glass", "SPODELd"}, 0.79439f}, {{"glass", "TANLd"}, 0.86449f}, {{"glass", "BoostAODE"}, 0.84579f}, + // Iris + {{"iris", "AODE"}, 0.973333}, {{"iris", "KDB"}, 0.973333}, {{"iris", "SPODE"}, 0.973333}, {{"iris", "TAN"}, 0.973333}, + {{"iris", "AODELd"}, 0.973333}, {{"iris", "KDBLd"}, 0.973333}, {{"iris", "SPODELd"}, 0.96f}, {{"iris", "TANLd"}, 0.97333f}, {{"iris", "BoostAODE"}, 0.98f} + }; -// std::string file_name = GENERATE("glass", "iris", "ecoli", "diabetes"); -// auto raw = RawDatasets(file_name, false); + std::string file_name = GENERATE("glass", "iris", "ecoli", "diabetes"); + auto raw = RawDatasets(file_name, false); -// SECTION("Test TAN classifier (" + file_name + ")") -// { -// auto clf = bayesnet::TAN(); -// clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); -// auto score = clf.score(raw.Xv, raw.yv); -// //scores[{file_name, "TAN"}] = score; -// REQUIRE(score == Catch::Approx(scores[{file_name, "TAN"}]).epsilon(raw.epsilon)); -// } -// SECTION("Test TANLd classifier (" + file_name + ")") -// { -// auto clf = bayesnet::TANLd(); -// clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); -// auto score = clf.score(raw.Xt, raw.yt); -// //scores[{file_name, "TANLd"}] = score; -// REQUIRE(score == Catch::Approx(scores[{file_name, "TANLd"}]).epsilon(raw.epsilon)); -// } -// SECTION("Test KDB classifier (" + file_name + ")") -// { -// auto clf = bayesnet::KDB(2); -// clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); -// auto score = clf.score(raw.Xv, raw.yv); -// //scores[{file_name, "KDB"}] = score; -// REQUIRE(score == Catch::Approx(scores[{file_name, "KDB" -// }]).epsilon(raw.epsilon)); -// } -// SECTION("Test KDBLd classifier (" + file_name + ")") -// { -// auto clf = bayesnet::KDBLd(2); -// clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); -// auto score = clf.score(raw.Xt, raw.yt); -// //scores[{file_name, "KDBLd"}] = score; -// REQUIRE(score == Catch::Approx(scores[{file_name, "KDBLd" -// }]).epsilon(raw.epsilon)); -// } -// SECTION("Test SPODE classifier (" + file_name + ")") -// { -// auto clf = bayesnet::SPODE(1); -// clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); -// auto score = clf.score(raw.Xv, raw.yv); -// // scores[{file_name, "SPODE"}] = score; -// REQUIRE(score == Catch::Approx(scores[{file_name, "SPODE"}]).epsilon(raw.epsilon)); -// } -// SECTION("Test SPODELd classifier (" + file_name + ")") -// { -// auto clf = bayesnet::SPODELd(1); -// clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); -// auto score = clf.score(raw.Xt, raw.yt); -// // scores[{file_name, "SPODELd"}] = score; -// REQUIRE(score == Catch::Approx(scores[{file_name, "SPODELd"}]).epsilon(raw.epsilon)); -// } -// SECTION("Test AODE classifier (" + file_name + ")") -// { -// auto clf = bayesnet::AODE(); -// clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); -// auto score = clf.score(raw.Xv, raw.yv); -// // scores[{file_name, "AODE"}] = score; -// REQUIRE(score == Catch::Approx(scores[{file_name, "AODE"}]).epsilon(raw.epsilon)); -// } -// SECTION("Test AODELd classifier (" + file_name + ")") -// { -// auto clf = bayesnet::AODELd(); -// clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); -// auto score = clf.score(raw.Xt, raw.yt); -// // scores[{file_name, "AODELd"}] = score; -// REQUIRE(score == Catch::Approx(scores[{file_name, "AODELd"}]).epsilon(raw.epsilon)); -// } -// SECTION("Test BoostAODE classifier (" + file_name + ")") -// { -// auto clf = bayesnet::BoostAODE(); -// clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); -// auto score = clf.score(raw.Xv, raw.yv); -// // scores[{file_name, "BoostAODE"}] = score; -// REQUIRE(score == Catch::Approx(scores[{file_name, "BoostAODE"}]).epsilon(raw.epsilon)); -// } -// // for (auto scores : scores) { -// // std::cout << "{{\"" << scores.first.first << "\", \"" << scores.first.second << "\"}, " << scores.second << "}, "; -// // } -// } + SECTION("Test TAN classifier (" + file_name + ")") + { + auto clf = bayesnet::TAN(); + clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); + auto score = clf.score(raw.Xv, raw.yv); + //scores[{file_name, "TAN"}] = score; + REQUIRE(score == Catch::Approx(scores[{file_name, "TAN"}]).epsilon(raw.epsilon)); + } + SECTION("Test TANLd classifier (" + file_name + ")") + { + auto clf = bayesnet::TANLd(); + clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); + auto score = clf.score(raw.Xt, raw.yt); + //scores[{file_name, "TANLd"}] = score; + REQUIRE(score == Catch::Approx(scores[{file_name, "TANLd"}]).epsilon(raw.epsilon)); + } + SECTION("Test KDB classifier (" + file_name + ")") + { + auto clf = bayesnet::KDB(2); + clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); + auto score = clf.score(raw.Xv, raw.yv); + //scores[{file_name, "KDB"}] = score; + REQUIRE(score == Catch::Approx(scores[{file_name, "KDB" + }]).epsilon(raw.epsilon)); + } + SECTION("Test KDBLd classifier (" + file_name + ")") + { + auto clf = bayesnet::KDBLd(2); + clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); + auto score = clf.score(raw.Xt, raw.yt); + //scores[{file_name, "KDBLd"}] = score; + REQUIRE(score == Catch::Approx(scores[{file_name, "KDBLd" + }]).epsilon(raw.epsilon)); + } + SECTION("Test SPODE classifier (" + file_name + ")") + { + auto clf = bayesnet::SPODE(1); + clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); + auto score = clf.score(raw.Xv, raw.yv); + // scores[{file_name, "SPODE"}] = score; + REQUIRE(score == Catch::Approx(scores[{file_name, "SPODE"}]).epsilon(raw.epsilon)); + } + SECTION("Test SPODELd classifier (" + file_name + ")") + { + auto clf = bayesnet::SPODELd(1); + clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); + auto score = clf.score(raw.Xt, raw.yt); + // scores[{file_name, "SPODELd"}] = score; + REQUIRE(score == Catch::Approx(scores[{file_name, "SPODELd"}]).epsilon(raw.epsilon)); + } + SECTION("Test AODE classifier (" + file_name + ")") + { + auto clf = bayesnet::AODE(); + clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); + auto score = clf.score(raw.Xv, raw.yv); + // scores[{file_name, "AODE"}] = score; + REQUIRE(score == Catch::Approx(scores[{file_name, "AODE"}]).epsilon(raw.epsilon)); + } + SECTION("Test AODELd classifier (" + file_name + ")") + { + auto clf = bayesnet::AODELd(); + clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); + auto score = clf.score(raw.Xt, raw.yt); + // scores[{file_name, "AODELd"}] = score; + REQUIRE(score == Catch::Approx(scores[{file_name, "AODELd"}]).epsilon(raw.epsilon)); + } + SECTION("Test BoostAODE classifier (" + file_name + ")") + { + auto clf = bayesnet::BoostAODE(true); + clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); + auto score = clf.score(raw.Xv, raw.yv); + // scores[{file_name, "BoostAODE"}] = score; + REQUIRE(score == Catch::Approx(scores[{file_name, "BoostAODE"}]).epsilon(raw.epsilon)); + } + // for (auto scores : scores) { + // std::cout << "{{\"" << scores.first.first << "\", \"" << scores.first.second << "\"}, " << scores.second << "}, "; + // } +} TEST_CASE("Models features", "[BayesNet]") { auto graph = std::vector({ "digraph BayesNet {\nlabel=\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n", @@ -158,35 +158,31 @@ TEST_CASE("BoostAODE feature_select CFS", "[BayesNet]") REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 9 with CFS"); REQUIRE(clf.getNotes()[1] == "Number of models: 9"); } -// TEST_CASE("BoostAODE test used features in train note and score", "[BayesNet]") -// { -// auto raw = RawDatasets("diabetes", true); -// auto clf = bayesnet::BoostAODE(); -// clf.setHyperparameters({ -// {"ascending",true}, -// {"convergence", true}, -// {"repeatSparent",true}, -// {"select_features","CFS"}, -// }); -// clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); -// REQUIRE(clf.getNumberOfNodes() == 72); -// REQUIRE(clf.getNumberOfEdges() == 120); -// REQUIRE(clf.getNotes().size() == 3); -// REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS"); -// REQUIRE(clf.getNotes()[1] == "Used features in train: 7 of 8"); -// REQUIRE(clf.getNotes()[2] == "Number of models: 8"); -// auto score = clf.score(raw.Xv, raw.yv); -// auto scoret = clf.score(raw.Xt, raw.yt); -// REQUIRE(score == Catch::Approx(0.8138).epsilon(raw.epsilon)); -// REQUIRE(scoret == Catch::Approx(0.8138).epsilon(raw.epsilon)); -// } +TEST_CASE("BoostAODE test used features in train note and score", "[BayesNet]") +{ + auto raw = RawDatasets("diabetes", true); + auto clf = bayesnet::BoostAODE(true); + clf.setHyperparameters({ + {"ascending",true}, + {"convergence", true}, + {"repeatSparent",true}, + {"select_features","CFS"}, + }); + clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); + REQUIRE(clf.getNumberOfNodes() == 72); + REQUIRE(clf.getNumberOfEdges() == 120); + REQUIRE(clf.getNotes().size() == 3); + REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS"); + REQUIRE(clf.getNotes()[1] == "Used features in train: 7 of 8"); + REQUIRE(clf.getNotes()[2] == "Number of models: 8"); + auto score = clf.score(raw.Xv, raw.yv); + auto scoret = clf.score(raw.Xt, raw.yt); + REQUIRE(score == Catch::Approx(0.8138).epsilon(raw.epsilon)); + REQUIRE(scoret == Catch::Approx(0.8138).epsilon(raw.epsilon)); +} TEST_CASE("Model predict_proba", "[BayesNet]") { - // std::string model = GENERATE("TAN", "SPODE", "BoostAODEprobabilities", "BoostAODEvoting"); - std::string model = GENERATE("TAN", "SPODE"); - std::cout << string(100, '*') << std::endl; - std::cout << "************************************* CHANGE MODEL GENERATE ****************************************" << std::endl; - std::cout << string(100, '*') << std::endl; + std::string model = GENERATE("TAN", "SPODE", "BoostAODEproba", "BoostAODEvoting"); auto res_prob_tan = std::vector>({ { 0.00375671, 0.994457, 0.00178621 }, { 0.00137462, 0.992734, 0.00589123 }, @@ -220,7 +216,18 @@ TEST_CASE("Model predict_proba", "[BayesNet]") {0.0204803, 0.844276, 0.135244}, {0.00576313, 0.961665, 0.0325716}, }); - std::map>> res_prob = { {"TAN", res_prob_tan}, {"SPODE", res_prob_spode} , {"BoostAODEproba", res_prob_baode }, {"BoostAODEvoting", res_prob_baode } }; + auto res_prob_voting = std::vector>({ + {0, 1, 0}, + {0, 1, 0}, + {0, 1, 0}, + {0, 1, 0}, + {0, 1, 0}, + {0, 0.447909, 0.552091}, + {0, 0.811482, 0.188517}, + {0, 1, 0}, + {0, 1, 0} + }); + std::map>> res_prob = { {"TAN", res_prob_tan}, {"SPODE", res_prob_spode} , {"BoostAODEproba", res_prob_baode }, {"BoostAODEvoting", res_prob_voting } }; std::map models = { {"TAN", new bayesnet::TAN()}, {"SPODE", new bayesnet::SPODE(0)}, {"BoostAODEproba", new bayesnet::BoostAODE(false)}, {"BoostAODEvoting", new bayesnet::BoostAODE(true)} }; int init_index = 78; auto raw = RawDatasets("iris", true); @@ -257,107 +264,3 @@ TEST_CASE("Model predict_proba", "[BayesNet]") delete clf; } } -TEST_CASE("BoostAODE predict_proba proba", "[BayesNet]") -{ - auto res_prob = std::vector>({ - {0.00803291, 0.9676, 0.0243672}, - {0.00398714, 0.945126, 0.050887}, - {0.00398714, 0.945126, 0.050887}, - {0.00398714, 0.945126, 0.050887}, - {0.00189227, 0.859575, 0.138533}, - {0.0118341, 0.442149, 0.546017}, - {0.0216135, 0.785781, 0.192605}, - {0.0204803, 0.844276, 0.135244}, - {0.00576313, 0.961665, 0.0325716}, - }); - int init_index = 78; - auto raw = RawDatasets("iris", true); - auto clf = bayesnet::BoostAODE(false); - clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); - auto y_pred_proba = clf.predict_proba(raw.Xv); - auto y_pred = clf.predict(raw.Xv); - auto yt_pred = clf.predict(raw.Xt); - auto yt_pred_proba = clf.predict_proba(raw.Xt); - std::cout << "yt_pred_proba proba sizes " << yt_pred_proba.sizes() << std::endl; - REQUIRE(y_pred.size() == yt_pred.size(0)); - REQUIRE(y_pred.size() == y_pred_proba.size()); - REQUIRE(y_pred.size() == yt_pred_proba.size(0)); - REQUIRE(y_pred.size() == raw.yv.size()); - REQUIRE(y_pred_proba[0].size() == 3); - REQUIRE(yt_pred_proba.size(1) == y_pred_proba[0].size()); - for (int i = 0; i < y_pred_proba.size(); ++i) { - // Check predict is coherent with predict_proba - auto maxElem = max_element(y_pred_proba[i].begin(), y_pred_proba[i].end()); - int predictedClass = distance(y_pred_proba[i].begin(), maxElem); - REQUIRE(predictedClass == y_pred[i]); - REQUIRE(yt_pred_proba[i].argmax().item() == y_pred[i]); - } - // Check predict_proba values for vectors and tensors - for (int i = 0; i < res_prob.size(); i++) { - REQUIRE(y_pred[i] == yt_pred[i].item()); - for (int j = 0; j < 3; j++) { - REQUIRE(res_prob[i][j] == Catch::Approx(y_pred_proba[i + init_index][j]).epsilon(raw.epsilon)); - REQUIRE(res_prob[i][j] == Catch::Approx(yt_pred_proba[i + init_index][j].item()).epsilon(raw.epsilon)); - } - } - // for (int i = 0; i < res_prob.size(); i++) { - // for (int j = 0; j < 3; j++) { - // std::cout << y_pred_proba[i + init_index][j] << " "; - // } - // std::cout << std::endl; - // } -} -TEST_CASE("BoostAODE predict_proba voting", "[BayesNet]") -{ - auto res_prob = std::vector>({ - {0.00803291, 0.9676, 0.0243672}, - {0.00398714, 0.945126, 0.050887}, - {0.00398714, 0.945126, 0.050887}, - {0.00398714, 0.945126, 0.050887}, - {0.00189227, 0.859575, 0.138533}, - {0.0118341, 0.442149, 0.546017}, - {0.0216135, 0.785781, 0.192605}, - {0.0204803, 0.844276, 0.135244}, - {0.00576313, 0.961665, 0.0325716}, - }); - int init_index = 78; - auto raw = RawDatasets("iris", true); - auto clf = bayesnet::BoostAODE(true); - clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); - auto y_pred_proba = clf.predict_proba(raw.Xv); - auto y_pred = clf.predict(raw.Xv); - auto yt_pred = clf.predict(raw.Xt); - auto yt_pred_proba = clf.predict_proba(raw.Xt); - std::cout << "yt_pred_proba proba sizes " << yt_pred_proba.sizes() << std::endl; - REQUIRE(y_pred.size() == yt_pred.size(0)); - REQUIRE(y_pred.size() == y_pred_proba.size()); - REQUIRE(y_pred.size() == yt_pred_proba.size(0)); - REQUIRE(y_pred.size() == raw.yv.size()); - REQUIRE(y_pred_proba[0].size() == 3); - REQUIRE(yt_pred_proba.size(1) == y_pred_proba[0].size()); - for (int i = 0; i < y_pred_proba.size(); ++i) { - auto maxElem = max_element(y_pred_proba[i].begin(), y_pred_proba[i].end()); - int predictedClass = distance(y_pred_proba[i].begin(), maxElem); - REQUIRE(predictedClass == y_pred[i]); - // Check predict is coherent with predict_proba - for (int k = 0; k < yt_pred_proba[i].size(0); k++) { - std::cout << yt_pred_proba[i][k].item() << " "; - } - std::cout << "-> " << y_pred[i] << std::endl; - REQUIRE(yt_pred_proba[i].argmax().item() == y_pred[i]); - } - // Check predict_proba values for vectors and tensors - for (int i = 0; i < res_prob.size(); i++) { - REQUIRE(y_pred[i] == yt_pred[i].item()); - for (int j = 0; j < 3; j++) { - REQUIRE(res_prob[i][j] == Catch::Approx(y_pred_proba[i + init_index][j]).epsilon(raw.epsilon)); - REQUIRE(res_prob[i][j] == Catch::Approx(yt_pred_proba[i + init_index][j].item()).epsilon(raw.epsilon)); - } - } - // for (int i = 0; i < res_prob.size(); i++) { - // for (int j = 0; j < 3; j++) { - // std::cout << y_pred_proba[i + init_index][j] << " "; - // } - // std::cout << std::endl; - // } -} -- 2.45.2 From 02e456befb7bd877a90d0ec6f8ec86cbdd108b78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Sat, 24 Feb 2024 18:36:09 +0100 Subject: [PATCH 6/7] Complete predict & predict_proba in ensemble --- CHANGELOG.md | 2 + CMakeLists.txt | 2 +- src/AODE.cc | 18 +++++- src/AODE.h | 9 +-- src/AODELd.cc | 17 +++++- src/AODELd.h | 12 ++-- src/BoostAODE.cc | 15 ++++- src/BoostAODE.h | 2 +- tests/TestBayesModels.cc | 128 +++++++++++++-------------------------- 9 files changed, 104 insertions(+), 101 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 24c41a3..56f6cb8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - predict_proba method in Classifier - predict_proba method in BoostAODE - predict_voting parameter in BoostAODE constructor to use voting or probability to predict (default is voting) +- hyperparameter predict_voting to AODE, AODELd and BoostAODE (Ensemble child classes) +- tests to check predict & predict_proba coherence ## [1.0.2] - 2024-02-20 diff --git a/CMakeLists.txt b/CMakeLists.txt index 95cd197..9d42041 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.20) project(BayesNet - VERSION 1.0.2 + VERSION 1.0.3 DESCRIPTION "Bayesian Network and basic classifiers Library." HOMEPAGE_URL "https://github.com/rmontanana/bayesnet" LANGUAGES CXX diff --git a/src/AODE.cc b/src/AODE.cc index 850980c..f984f9d 100644 --- a/src/AODE.cc +++ b/src/AODE.cc @@ -1,10 +1,26 @@ #include "AODE.h" namespace bayesnet { - AODE::AODE() : Ensemble() {} + AODE::AODE(bool predict_voting) : Ensemble(predict_voting) + { + validHyperparameters = { "predict_voting" }; + + } + void AODE::setHyperparameters(const nlohmann::json& hyperparameters_) + { + auto hyperparameters = hyperparameters_; + if (hyperparameters.contains("predict_voting")) { + predict_voting = hyperparameters["predict_voting"]; + hyperparameters.erase("predict_voting"); + } + if (!hyperparameters.empty()) { + throw std::invalid_argument("Invalid hyperparameters" + hyperparameters.dump()); + } + } void AODE::buildModel(const torch::Tensor& weights) { models.clear(); + significanceModels.clear(); for (int i = 0; i < features.size(); ++i) { models.push_back(std::make_unique(i)); } diff --git a/src/AODE.h b/src/AODE.h index 98f87fe..b8bec94 100644 --- a/src/AODE.h +++ b/src/AODE.h @@ -4,12 +4,13 @@ #include "SPODE.h" namespace bayesnet { class AODE : public Ensemble { + public: + AODE(bool predict_voting = true); + virtual ~AODE() {}; + void setHyperparameters(const nlohmann::json& hyperparameters) override; + std::vector graph(const std::string& title = "AODE") const override; protected: void buildModel(const torch::Tensor& weights) override; - public: - AODE(); - virtual ~AODE() {}; - std::vector graph(const std::string& title = "AODE") const override; }; } #endif \ No newline at end of file diff --git a/src/AODELd.cc b/src/AODELd.cc index 776e37c..022138a 100644 --- a/src/AODELd.cc +++ b/src/AODELd.cc @@ -1,7 +1,22 @@ #include "AODELd.h" namespace bayesnet { - AODELd::AODELd() : Ensemble(), Proposal(dataset, features, className) {} + AODELd::AODELd(bool predict_voting) : Ensemble(predict_voting), Proposal(dataset, features, className) + { + validHyperparameters = { "predict_voting" }; + + } + void AODELd::setHyperparameters(const nlohmann::json& hyperparameters_) + { + auto hyperparameters = hyperparameters_; + if (hyperparameters.contains("predict_voting")) { + predict_voting = hyperparameters["predict_voting"]; + hyperparameters.erase("predict_voting"); + } + if (!hyperparameters.empty()) { + throw std::invalid_argument("Invalid hyperparameters" + hyperparameters.dump()); + } + } AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_) { checkInput(X_, y_); diff --git a/src/AODELd.h b/src/AODELd.h index c8c3347..dd8f29f 100644 --- a/src/AODELd.h +++ b/src/AODELd.h @@ -6,15 +6,15 @@ namespace bayesnet { class AODELd : public Ensemble, public Proposal { + public: + AODELd(bool predict_voting = true); + virtual ~AODELd() = default; + AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_) override; + void setHyperparameters(const nlohmann::json& hyperparameters) override; + std::vector graph(const std::string& name = "AODELd") const override; protected: void trainModel(const torch::Tensor& weights) override; void buildModel(const torch::Tensor& weights) override; - public: - AODELd(); - AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_) override; - virtual ~AODELd() = default; - std::vector graph(const std::string& name = "AODELd") const override; - static inline std::string version() { return "0.0.1"; }; }; } #endif // !AODELD_H \ No newline at end of file diff --git a/src/BoostAODE.cc b/src/BoostAODE.cc index cc617eb..dc7edb5 100644 --- a/src/BoostAODE.cc +++ b/src/BoostAODE.cc @@ -10,13 +10,14 @@ namespace bayesnet { BoostAODE::BoostAODE(bool predict_voting) : Ensemble(predict_voting) { - validHyperparameters = { "repeatSparent", "maxModels", "ascending", "convergence", "threshold", "select_features", "tolerance" }; + validHyperparameters = { "repeatSparent", "maxModels", "ascending", "convergence", "threshold", "select_features", "tolerance", "predict_voting" }; } void BoostAODE::buildModel(const torch::Tensor& weights) { // Models shall be built in trainModel models.clear(); + significanceModels.clear(); n_models = 0; // Prepare the validation dataset auto y_ = dataset.index({ -1, "..." }); @@ -72,6 +73,10 @@ namespace bayesnet { tolerance = hyperparameters["tolerance"]; hyperparameters.erase("tolerance"); } + if (hyperparameters.contains("predict_voting")) { + predict_voting = hyperparameters["predict_voting"]; + hyperparameters.erase("predict_voting"); + } if (hyperparameters.contains("select_features")) { auto selectedAlgorithm = hyperparameters["select_features"]; std::vector algos = { "IWSS", "FCBF", "CFS" }; @@ -128,8 +133,11 @@ namespace bayesnet { if (selectFeatures) { featuresUsed = initializeModels(); } - if (maxModels == 0) + bool resetMaxModels = false; + if (maxModels == 0) { maxModels = .1 * n > 10 ? .1 * n : n; + resetMaxModels = true; // Flag to unset maxModels + } torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64); bool exitCondition = false; // Variables to control the accuracy finish condition @@ -211,6 +219,9 @@ namespace bayesnet { status = WARNING; } notes.push_back("Number of models: " + std::to_string(n_models)); + if (resetMaxModels) { + maxModels = 0; + } } std::vector BoostAODE::graph(const std::string& title) const { diff --git a/src/BoostAODE.h b/src/BoostAODE.h index f9c8429..551f4be 100644 --- a/src/BoostAODE.h +++ b/src/BoostAODE.h @@ -7,7 +7,7 @@ namespace bayesnet { class BoostAODE : public Ensemble { public: - BoostAODE(bool predict_voting = false); + BoostAODE(bool predict_voting = true); virtual ~BoostAODE() = default; std::vector graph(const std::string& title = "BoostAODE") const override; void setHyperparameters(const nlohmann::json& hyperparameters) override; diff --git a/tests/TestBayesModels.cc b/tests/TestBayesModels.cc index f8ae718..eb641fa 100644 --- a/tests/TestBayesModels.cc +++ b/tests/TestBayesModels.cc @@ -2,9 +2,6 @@ #include #include #include -#include -#include -#include #include "KDB.h" #include "TAN.h" #include "SPODE.h" @@ -16,12 +13,9 @@ #include "AODELd.h" #include "TestUtils.h" -TEST_CASE("Library check version", "[BayesNet]") -{ - auto clf = bayesnet::KDB(2); - REQUIRE(clf.getVersion() == "1.0.2"); -} -TEST_CASE("Test Bayesian Classifiers score", "[BayesNet]") +const std::string ACTUAL_VERSION = "1.0.3"; + +TEST_CASE("Test Bayesian Classifiers score & version", "[BayesNet]") { map , float> scores = { // Diabetes @@ -37,87 +31,34 @@ TEST_CASE("Test Bayesian Classifiers score", "[BayesNet]") {{"iris", "AODE"}, 0.973333}, {{"iris", "KDB"}, 0.973333}, {{"iris", "SPODE"}, 0.973333}, {{"iris", "TAN"}, 0.973333}, {{"iris", "AODELd"}, 0.973333}, {{"iris", "KDBLd"}, 0.973333}, {{"iris", "SPODELd"}, 0.96f}, {{"iris", "TANLd"}, 0.97333f}, {{"iris", "BoostAODE"}, 0.98f} }; + std::map models = { + {"AODE", new bayesnet::AODE()}, {"AODELd", new bayesnet::AODELd()}, + {"BoostAODE", new bayesnet::BoostAODE()}, + {"KDB", new bayesnet::KDB(2)}, {"KDBLd", new bayesnet::KDBLd(2)}, + {"SPODE", new bayesnet::SPODE(1)}, {"SPODELd", new bayesnet::SPODELd(1)}, + {"TAN", new bayesnet::TAN()}, {"TANLd", new bayesnet::TANLd()} + }; + std::string name = GENERATE("AODE", "AODELd", "KDB", "KDBLd", "SPODE", "SPODELd", "TAN", "TANLd"); + auto clf = models[name]; - std::string file_name = GENERATE("glass", "iris", "ecoli", "diabetes"); - auto raw = RawDatasets(file_name, false); - - SECTION("Test TAN classifier (" + file_name + ")") + SECTION("Test " + name + " classifier") { - auto clf = bayesnet::TAN(); - clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); - auto score = clf.score(raw.Xv, raw.yv); - //scores[{file_name, "TAN"}] = score; - REQUIRE(score == Catch::Approx(scores[{file_name, "TAN"}]).epsilon(raw.epsilon)); + for (const std::string& file_name : { "glass", "iris", "ecoli", "diabetes" }) { + auto clf = models[name]; + auto discretize = name.substr(name.length() - 2) != "Ld"; + auto raw = RawDatasets(file_name, discretize); + clf->fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); + auto score = clf->score(raw.Xt, raw.yt); + INFO("File: " + file_name); + REQUIRE(score == Catch::Approx(scores[{file_name, name}]).epsilon(raw.epsilon)); + } } - SECTION("Test TANLd classifier (" + file_name + ")") + SECTION("Library check version") { - auto clf = bayesnet::TANLd(); - clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); - auto score = clf.score(raw.Xt, raw.yt); - //scores[{file_name, "TANLd"}] = score; - REQUIRE(score == Catch::Approx(scores[{file_name, "TANLd"}]).epsilon(raw.epsilon)); + INFO("Checking version of " + name + " classifier"); + REQUIRE(clf->getVersion() == ACTUAL_VERSION); } - SECTION("Test KDB classifier (" + file_name + ")") - { - auto clf = bayesnet::KDB(2); - clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); - auto score = clf.score(raw.Xv, raw.yv); - //scores[{file_name, "KDB"}] = score; - REQUIRE(score == Catch::Approx(scores[{file_name, "KDB" - }]).epsilon(raw.epsilon)); - } - SECTION("Test KDBLd classifier (" + file_name + ")") - { - auto clf = bayesnet::KDBLd(2); - clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); - auto score = clf.score(raw.Xt, raw.yt); - //scores[{file_name, "KDBLd"}] = score; - REQUIRE(score == Catch::Approx(scores[{file_name, "KDBLd" - }]).epsilon(raw.epsilon)); - } - SECTION("Test SPODE classifier (" + file_name + ")") - { - auto clf = bayesnet::SPODE(1); - clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); - auto score = clf.score(raw.Xv, raw.yv); - // scores[{file_name, "SPODE"}] = score; - REQUIRE(score == Catch::Approx(scores[{file_name, "SPODE"}]).epsilon(raw.epsilon)); - } - SECTION("Test SPODELd classifier (" + file_name + ")") - { - auto clf = bayesnet::SPODELd(1); - clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); - auto score = clf.score(raw.Xt, raw.yt); - // scores[{file_name, "SPODELd"}] = score; - REQUIRE(score == Catch::Approx(scores[{file_name, "SPODELd"}]).epsilon(raw.epsilon)); - } - SECTION("Test AODE classifier (" + file_name + ")") - { - auto clf = bayesnet::AODE(); - clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); - auto score = clf.score(raw.Xv, raw.yv); - // scores[{file_name, "AODE"}] = score; - REQUIRE(score == Catch::Approx(scores[{file_name, "AODE"}]).epsilon(raw.epsilon)); - } - SECTION("Test AODELd classifier (" + file_name + ")") - { - auto clf = bayesnet::AODELd(); - clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); - auto score = clf.score(raw.Xt, raw.yt); - // scores[{file_name, "AODELd"}] = score; - REQUIRE(score == Catch::Approx(scores[{file_name, "AODELd"}]).epsilon(raw.epsilon)); - } - SECTION("Test BoostAODE classifier (" + file_name + ")") - { - auto clf = bayesnet::BoostAODE(true); - clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); - auto score = clf.score(raw.Xv, raw.yv); - // scores[{file_name, "BoostAODE"}] = score; - REQUIRE(score == Catch::Approx(scores[{file_name, "BoostAODE"}]).epsilon(raw.epsilon)); - } - // for (auto scores : scores) { - // std::cout << "{{\"" << scores.first.first << "\", \"" << scores.first.second << "\"}, " << scores.second << "}, "; - // } + delete clf; } TEST_CASE("Models features", "[BayesNet]") { @@ -264,3 +205,20 @@ TEST_CASE("Model predict_proba", "[BayesNet]") delete clf; } } +TEST_CASE("BoostAODE voting-proba", "[BayesNet]") +{ + auto raw = RawDatasets("iris", false); + auto clf = bayesnet::BoostAODE(false); + clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); + auto score_proba = clf.score(raw.Xv, raw.yv); + auto pred_proba = clf.predict_proba(raw.Xv); + clf.setHyperparameters({ + {"predict_voting",true}, + }); + auto score_voting = clf.score(raw.Xv, raw.yv); + auto pred_voting = clf.predict_proba(raw.Xv); + REQUIRE(score_proba == Catch::Approx(0.97333).epsilon(raw.epsilon)); + REQUIRE(score_voting == Catch::Approx(0.98).epsilon(raw.epsilon)); + REQUIRE(pred_voting[83][2] == Catch::Approx(0.552091).epsilon(raw.epsilon)); + REQUIRE(pred_proba[83][2] == Catch::Approx(0.546017).epsilon(raw.epsilon)); +} -- 2.45.2 From 3007e22a7dc3fb6e6fd0b6b9dccbf638df00662f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Sat, 24 Feb 2024 21:33:28 +0100 Subject: [PATCH 7/7] Add info to CHANGELOG Update submodules --- .gitmodules | 6 ++++-- CHANGELOG.md | 3 ++- lib/argparse | 1 - lib/catch2 | 2 +- lib/json | 2 +- lib/libxlsxwriter | 1 - 6 files changed, 8 insertions(+), 7 deletions(-) delete mode 160000 lib/argparse delete mode 160000 lib/libxlsxwriter diff --git a/.gitmodules b/.gitmodules index 0912fa4..549c379 100644 --- a/.gitmodules +++ b/.gitmodules @@ -5,14 +5,16 @@ update = merge [submodule "lib/catch2"] path = lib/catch2 - main = v2.x + main = v2.x update = merge url = https://github.com/catchorg/Catch2.git [submodule "lib/json"] path = lib/json url = https://github.com/nlohmann/json.git - master = master + master = master update = merge [submodule "lib/folding"] path = lib/folding url = https://github.com/rmontanana/folding + main = main + update = merge diff --git a/CHANGELOG.md b/CHANGELOG.md index 56f6cb8..18b67a9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,10 +5,11 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## =[Unreleased] +## [Unreleased] ### Added +- Voting / probability aggregation in Ensemble classes - predict_proba method in Classifier - predict_proba method in BoostAODE - predict_voting parameter in BoostAODE constructor to use voting or probability to predict (default is voting) diff --git a/lib/argparse b/lib/argparse deleted file mode 160000 index 69dabd8..0000000 --- a/lib/argparse +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 69dabd88a8e6680b1a1a18397eb3e165e4019ce6 diff --git a/lib/catch2 b/lib/catch2 index 863c662..ed6ac8a 160000 --- a/lib/catch2 +++ b/lib/catch2 @@ -1 +1 @@ -Subproject commit 863c662c0eff026300f4d729a7054e90d6d12cdd +Subproject commit ed6ac8a629f9a4206575be784c1e340da2a94855 diff --git a/lib/json b/lib/json index a259ecc..0457de2 160000 --- a/lib/json +++ b/lib/json @@ -1 +1 @@ -Subproject commit a259ecc51e1951e12f757ce17db958e9881e9c6c +Subproject commit 0457de21cffb298c22b629e538036bfeb96130b7 diff --git a/lib/libxlsxwriter b/lib/libxlsxwriter deleted file mode 160000 index 29355a0..0000000 --- a/lib/libxlsxwriter +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 29355a0887475488c7cc470ad43cc867fcfa92e2 -- 2.45.2