From b34869cc61c7413fd204e42759efd28f523d0a6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Tue, 11 Jun 2024 11:40:45 +0200 Subject: [PATCH] Set smoothing as fit parameter --- README.md | 2 +- bayesnet/BaseClassifier.h | 12 +++---- bayesnet/classifiers/Classifier.cc | 25 +++++++-------- bayesnet/classifiers/Classifier.h | 12 +++---- bayesnet/classifiers/KDBLd.cc | 4 +-- bayesnet/classifiers/KDBLd.h | 2 +- bayesnet/classifiers/Proposal.cc | 2 +- bayesnet/classifiers/SPODELd.cc | 12 +++---- bayesnet/classifiers/SPODELd.h | 8 ++--- bayesnet/classifiers/TANLd.cc | 4 +-- bayesnet/classifiers/TANLd.h | 5 ++- bayesnet/ensembles/AODELd.cc | 9 +++--- bayesnet/ensembles/AODELd.h | 4 +-- bayesnet/ensembles/BoostA2DE.cc | 12 +++---- bayesnet/ensembles/BoostA2DE.h | 4 +-- bayesnet/ensembles/BoostAODE.cc | 12 +++---- bayesnet/ensembles/BoostAODE.h | 4 +-- bayesnet/ensembles/Ensemble.cc | 5 ++- bayesnet/ensembles/Ensemble.h | 2 +- bayesnet/network/Network.cc | 22 ++++++------- bayesnet/network/Network.h | 10 +++--- tests/TestA2DE.cc | 8 ++--- tests/TestBayesClassifier.cc | 32 +++++++++---------- tests/TestBayesEnsemble.cc | 14 ++++----- tests/TestBayesModels.cc | 22 ++++++------- tests/TestBayesNetwork.cc | 50 +++++++++++++++--------------- tests/TestBoostA2DE.cc | 22 ++++++------- tests/TestBoostAODE.cc | 22 ++++++------- tests/TestModulesVersions.cc | 2 +- tests/TestUtils.h | 2 ++ 30 files changed, 168 insertions(+), 178 deletions(-) diff --git a/README.md b/README.md index ba22453..f387482 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ [![Security Rating](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_BayesNet&metric=security_rating)](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet) [![Reliability Rating](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_BayesNet&metric=reliability_rating)](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet) ![Gitea Last Commit](https://img.shields.io/gitea/last-commit/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es:3000&logo=gitea) -[![Coverage Badge](https://img.shields.io/badge/Coverage-97,2%25-green)](html/index.html) +[![Coverage Badge](https://img.shields.io/badge/Coverage-96,9%25-green)](html/index.html) Bayesian Network Classifiers using libtorch from scratch diff --git a/bayesnet/BaseClassifier.h b/bayesnet/BaseClassifier.h index 1f80271..81fbe26 100644 --- a/bayesnet/BaseClassifier.h +++ b/bayesnet/BaseClassifier.h @@ -14,13 +14,12 @@ namespace bayesnet { enum status_t { NORMAL, WARNING, ERROR }; class BaseClassifier { public: - void setSmoothing(Smoothing_t smoothing) { this->smoothing = smoothing; } // To call before fit // X is nxm std::vector, y is nx1 std::vector - virtual BaseClassifier& fit(std::vector>& X, std::vector& y, const std::vector& features, const std::string& className, std::map>& states) = 0; + virtual BaseClassifier& fit(std::vector>& X, std::vector& y, const std::vector& features, const std::string& className, std::map>& states, const Smoothing_t smoothing) = 0; // X is nxm tensor, y is nx1 tensor - virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, std::map>& states) = 0; - virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states) = 0; - virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states, const torch::Tensor& weights) = 0; + virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, std::map>& states, const Smoothing_t smoothing) = 0; + virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states, const Smoothing_t smoothing) = 0; + virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states, const torch::Tensor& weights, const Smoothing_t smoothing) = 0; virtual ~BaseClassifier() = default; torch::Tensor virtual predict(torch::Tensor& X) = 0; std::vector virtual predict(std::vector>& X) = 0; @@ -42,8 +41,7 @@ namespace bayesnet { virtual void setHyperparameters(const nlohmann::json& hyperparameters) = 0; std::vector& getValidHyperparameters() { return validHyperparameters; } protected: - virtual void trainModel(const torch::Tensor& weights) = 0; + virtual void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) = 0; std::vector validHyperparameters; - Smoothing_t smoothing = Smoothing_t::NONE; }; } \ No newline at end of file diff --git a/bayesnet/classifiers/Classifier.cc b/bayesnet/classifiers/Classifier.cc index 287e1bf..5262401 100644 --- a/bayesnet/classifiers/Classifier.cc +++ b/bayesnet/classifiers/Classifier.cc @@ -11,7 +11,7 @@ namespace bayesnet { Classifier::Classifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {} const std::string CLASSIFIER_NOT_FITTED = "Classifier has not been fitted"; - Classifier& Classifier::build(const std::vector& features, const std::string& className, std::map>& states, const torch::Tensor& weights) + Classifier& Classifier::build(const std::vector& features, const std::string& className, std::map>& states, const torch::Tensor& weights, const Smoothing_t smoothing) { this->features = features; this->className = className; @@ -22,9 +22,8 @@ namespace bayesnet { auto n_classes = states.at(className).size(); metrics = Metrics(dataset, features, className, n_classes); model.initialize(); - model.setSmoothing(smoothing); buildModel(weights); - trainModel(weights); + trainModel(weights, smoothing); fitted = true; return *this; } @@ -42,20 +41,20 @@ namespace bayesnet { throw std::runtime_error(oss.str()); } } - void Classifier::trainModel(const torch::Tensor& weights) + void Classifier::trainModel(const torch::Tensor& weights, Smoothing_t smoothing) { - model.fit(dataset, weights, features, className, states); + model.fit(dataset, weights, features, className, states, smoothing); } // X is nxm where n is the number of features and m the number of samples - Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, std::map>& states) + Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, std::map>& states, const Smoothing_t smoothing) { dataset = X; buildDataset(y); const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble); - return build(features, className, states, weights); + return build(features, className, states, weights, smoothing); } // X is nxm where n is the number of features and m the number of samples - Classifier& Classifier::fit(std::vector>& X, std::vector& y, const std::vector& features, const std::string& className, std::map>& states) + Classifier& Classifier::fit(std::vector>& X, std::vector& y, const std::vector& features, const std::string& className, std::map>& states, const Smoothing_t smoothing) { dataset = torch::zeros({ static_cast(X.size()), static_cast(X[0].size()) }, torch::kInt32); for (int i = 0; i < X.size(); ++i) { @@ -64,18 +63,18 @@ namespace bayesnet { auto ytmp = torch::tensor(y, torch::kInt32); buildDataset(ytmp); const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble); - return build(features, className, states, weights); + return build(features, className, states, weights, smoothing); } - Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states) + Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states, const Smoothing_t smoothing) { this->dataset = dataset; const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble); - return build(features, className, states, weights); + return build(features, className, states, weights, smoothing); } - Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states, const torch::Tensor& weights) + Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states, const torch::Tensor& weights, const Smoothing_t smoothing) { this->dataset = dataset; - return build(features, className, states, weights); + return build(features, className, states, weights, smoothing); } void Classifier::checkFitParameters() { diff --git a/bayesnet/classifiers/Classifier.h b/bayesnet/classifiers/Classifier.h index 9be1e3f..4d3ea83 100644 --- a/bayesnet/classifiers/Classifier.h +++ b/bayesnet/classifiers/Classifier.h @@ -15,10 +15,10 @@ namespace bayesnet { public: Classifier(Network model); virtual ~Classifier() = default; - Classifier& fit(std::vector>& X, std::vector& y, const std::vector& features, const std::string& className, std::map>& states) override; - Classifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, std::map>& states) override; - Classifier& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states) override; - Classifier& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states, const torch::Tensor& weights) override; + Classifier& fit(std::vector>& X, std::vector& y, const std::vector& features, const std::string& className, std::map>& states, const Smoothing_t smoothing) override; + Classifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, std::map>& states, const Smoothing_t smoothing) override; + Classifier& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states, const Smoothing_t smoothing) override; + Classifier& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states, const torch::Tensor& weights, const Smoothing_t smoothing) override; void addNodes(); int getNumberOfNodes() const override; int getNumberOfEdges() const override; @@ -50,10 +50,10 @@ namespace bayesnet { std::vector notes; // Used to store messages occurred during the fit process void checkFitParameters(); virtual void buildModel(const torch::Tensor& weights) = 0; - void trainModel(const torch::Tensor& weights) override; + void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override; void buildDataset(torch::Tensor& y); private: - Classifier& build(const std::vector& features, const std::string& className, std::map>& states, const torch::Tensor& weights); + Classifier& build(const std::vector& features, const std::string& className, std::map>& states, const torch::Tensor& weights, const Smoothing_t smoothing); }; } #endif diff --git a/bayesnet/classifiers/KDBLd.cc b/bayesnet/classifiers/KDBLd.cc index 9f1647c..a285da1 100644 --- a/bayesnet/classifiers/KDBLd.cc +++ b/bayesnet/classifiers/KDBLd.cc @@ -8,7 +8,7 @@ namespace bayesnet { KDBLd::KDBLd(int k) : KDB(k), Proposal(dataset, features, className) {} - KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_) + KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_, const Smoothing_t smoothing) { checkInput(X_, y_); features = features_; @@ -19,7 +19,7 @@ namespace bayesnet { states = fit_local_discretization(y); // We have discretized the input data // 1st we need to fit the model to build the normal KDB structure, KDB::fit initializes the base Bayesian network - KDB::fit(dataset, features, className, states); + KDB::fit(dataset, features, className, states, smoothing); states = localDiscretizationProposal(states, model); return *this; } diff --git a/bayesnet/classifiers/KDBLd.h b/bayesnet/classifiers/KDBLd.h index 9150bba..77b9eec 100644 --- a/bayesnet/classifiers/KDBLd.h +++ b/bayesnet/classifiers/KDBLd.h @@ -15,7 +15,7 @@ namespace bayesnet { public: explicit KDBLd(int k); virtual ~KDBLd() = default; - KDBLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, map>& states) override; + KDBLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, map>& states, const Smoothing_t smoothing) override; std::vector graph(const std::string& name = "KDB") const override; torch::Tensor predict(torch::Tensor& X) override; static inline std::string version() { return "0.0.1"; }; diff --git a/bayesnet/classifiers/Proposal.cc b/bayesnet/classifiers/Proposal.cc index 2dfadb7..a5d5f12 100644 --- a/bayesnet/classifiers/Proposal.cc +++ b/bayesnet/classifiers/Proposal.cc @@ -70,7 +70,7 @@ namespace bayesnet { states[pFeatures[index]] = xStates; } const torch::Tensor weights = torch::full({ pDataset.size(1) }, 1.0 / pDataset.size(1), torch::kDouble); - model.fit(pDataset, weights, pFeatures, pClassName, states); + model.fit(pDataset, weights, pFeatures, pClassName, states, Smoothing_t::OLD_LAPLACE); } return states; } diff --git a/bayesnet/classifiers/SPODELd.cc b/bayesnet/classifiers/SPODELd.cc index 98c41ff..d733253 100644 --- a/bayesnet/classifiers/SPODELd.cc +++ b/bayesnet/classifiers/SPODELd.cc @@ -8,25 +8,25 @@ namespace bayesnet { SPODELd::SPODELd(int root) : SPODE(root), Proposal(dataset, features, className) {} - SPODELd& SPODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_) + SPODELd& SPODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_, const Smoothing_t smoothing) { checkInput(X_, y_); Xf = X_; y = y_; - return commonFit(features_, className_, states_); + return commonFit(features_, className_, states_, smoothing); } - SPODELd& SPODELd::fit(torch::Tensor& dataset, const std::vector& features_, const std::string& className_, map>& states_) + SPODELd& SPODELd::fit(torch::Tensor& dataset, const std::vector& features_, const std::string& className_, map>& states_, const Smoothing_t smoothing) { if (!torch::is_floating_point(dataset)) { throw std::runtime_error("Dataset must be a floating point tensor"); } Xf = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." }).clone(); y = dataset.index({ -1, "..." }).clone().to(torch::kInt32); - return commonFit(features_, className_, states_); + return commonFit(features_, className_, states_, smoothing); } - SPODELd& SPODELd::commonFit(const std::vector& features_, const std::string& className_, map>& states_) + SPODELd& SPODELd::commonFit(const std::vector& features_, const std::string& className_, map>& states_, const Smoothing_t smoothing) { features = features_; className = className_; @@ -34,7 +34,7 @@ namespace bayesnet { states = fit_local_discretization(y); // We have discretized the input data // 1st we need to fit the model to build the normal SPODE structure, SPODE::fit initializes the base Bayesian network - SPODE::fit(dataset, features, className, states); + SPODE::fit(dataset, features, className, states, smoothing); states = localDiscretizationProposal(states, model); return *this; } diff --git a/bayesnet/classifiers/SPODELd.h b/bayesnet/classifiers/SPODELd.h index f24a030..b92d24c 100644 --- a/bayesnet/classifiers/SPODELd.h +++ b/bayesnet/classifiers/SPODELd.h @@ -14,10 +14,10 @@ namespace bayesnet { public: explicit SPODELd(int root); virtual ~SPODELd() = default; - SPODELd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, map>& states) override; - SPODELd& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, map>& states) override; - SPODELd& commonFit(const std::vector& features, const std::string& className, map>& states); - std::vector graph(const std::string& name = "SPODE") const override; + SPODELd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, map>& states, const Smoothing_t smoothing) override; + SPODELd& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, map>& states, const Smoothing_t smoothing) override; + SPODELd& commonFit(const std::vector& features, const std::string& className, map>& states, const Smoothing_t smoothing); + std::vector graph(const std::string& name = "SPODELd") const override; torch::Tensor predict(torch::Tensor& X) override; static inline std::string version() { return "0.0.1"; }; }; diff --git a/bayesnet/classifiers/TANLd.cc b/bayesnet/classifiers/TANLd.cc index ab86dc4..6e7d443 100644 --- a/bayesnet/classifiers/TANLd.cc +++ b/bayesnet/classifiers/TANLd.cc @@ -8,7 +8,7 @@ namespace bayesnet { TANLd::TANLd() : TAN(), Proposal(dataset, features, className) {} - TANLd& TANLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_) + TANLd& TANLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_, const Smoothing_t smoothing) { checkInput(X_, y_); features = features_; @@ -19,7 +19,7 @@ namespace bayesnet { states = fit_local_discretization(y); // We have discretized the input data // 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network - TAN::fit(dataset, features, className, states); + TAN::fit(dataset, features, className, states, smoothing); states = localDiscretizationProposal(states, model); return *this; diff --git a/bayesnet/classifiers/TANLd.h b/bayesnet/classifiers/TANLd.h index e6c3c75..d05a9c3 100644 --- a/bayesnet/classifiers/TANLd.h +++ b/bayesnet/classifiers/TANLd.h @@ -15,10 +15,9 @@ namespace bayesnet { public: TANLd(); virtual ~TANLd() = default; - TANLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, map>& states) override; - std::vector graph(const std::string& name = "TAN") const override; + TANLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, map>& states, const Smoothing_t smoothing) override; + std::vector graph(const std::string& name = "TANLd") const override; torch::Tensor predict(torch::Tensor& X) override; - static inline std::string version() { return "0.0.1"; }; }; } #endif // !TANLD_H \ No newline at end of file diff --git a/bayesnet/ensembles/AODELd.cc b/bayesnet/ensembles/AODELd.cc index 29d66f8..1de8218 100644 --- a/bayesnet/ensembles/AODELd.cc +++ b/bayesnet/ensembles/AODELd.cc @@ -10,7 +10,7 @@ namespace bayesnet { AODELd::AODELd(bool predict_voting) : Ensemble(predict_voting), Proposal(dataset, features, className) { } - AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_) + AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_, const Smoothing_t smoothing) { checkInput(X_, y_); features = features_; @@ -21,7 +21,7 @@ namespace bayesnet { states = fit_local_discretization(y); // We have discretized the input data // 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network - Ensemble::fit(dataset, features, className, states); + Ensemble::fit(dataset, features, className, states, smoothing); return *this; } @@ -34,11 +34,10 @@ namespace bayesnet { n_models = models.size(); significanceModels = std::vector(n_models, 1.0); } - void AODELd::trainModel(const torch::Tensor& weights) + void AODELd::trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) { for (const auto& model : models) { - model->setSmoothing(smoothing); - model->fit(Xf, y, features, className, states); + model->fit(Xf, y, features, className, states, smoothing); } } std::vector AODELd::graph(const std::string& name) const diff --git a/bayesnet/ensembles/AODELd.h b/bayesnet/ensembles/AODELd.h index 9c87090..4bf0b63 100644 --- a/bayesnet/ensembles/AODELd.h +++ b/bayesnet/ensembles/AODELd.h @@ -15,10 +15,10 @@ namespace bayesnet { public: AODELd(bool predict_voting = true); virtual ~AODELd() = default; - AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_) override; + AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_, const Smoothing_t smoothing) override; std::vector graph(const std::string& name = "AODELd") const override; protected: - void trainModel(const torch::Tensor& weights) override; + void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override; void buildModel(const torch::Tensor& weights) override; }; } diff --git a/bayesnet/ensembles/BoostA2DE.cc b/bayesnet/ensembles/BoostA2DE.cc index 481bfab..236c165 100644 --- a/bayesnet/ensembles/BoostA2DE.cc +++ b/bayesnet/ensembles/BoostA2DE.cc @@ -19,7 +19,7 @@ namespace bayesnet { BoostA2DE::BoostA2DE(bool predict_voting) : Boost(predict_voting) { } - std::vector BoostA2DE::initializeModels() + std::vector BoostA2DE::initializeModels(const Smoothing_t smoothing) { torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64); std::vector featuresSelected = featureSelection(weights_); @@ -32,8 +32,7 @@ namespace bayesnet { for (int j = i + 1; j < featuresSelected.size(); j++) { auto parents = { featuresSelected[i], featuresSelected[j] }; std::unique_ptr model = std::make_unique(parents); - model->setSmoothing(smoothing); - model->fit(dataset, features, className, states, weights_); + model->fit(dataset, features, className, states, weights_, smoothing); models.push_back(std::move(model)); significanceModels.push_back(1.0); // They will be updated later in trainModel n_models++; @@ -42,7 +41,7 @@ namespace bayesnet { notes.push_back("Used features in initialization: " + std::to_string(featuresSelected.size()) + " of " + std::to_string(features.size()) + " with " + select_features_algorithm); return featuresSelected; } - void BoostA2DE::trainModel(const torch::Tensor& weights) + void BoostA2DE::trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) { // // Logging setup @@ -59,7 +58,7 @@ namespace bayesnet { bool finished = false; std::vector featuresUsed; if (selectFeatures) { - featuresUsed = initializeModels(); + featuresUsed = initializeModels(smoothing); auto ypred = predict(X_train); std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_); // Update significance of the models @@ -97,8 +96,7 @@ namespace bayesnet { pairSelection.erase(pairSelection.begin()); std::unique_ptr model; model = std::make_unique(std::vector({ feature_pair.first, feature_pair.second })); - model->setSmoothing(smoothing); - model->fit(dataset, features, className, states, weights_); + model->fit(dataset, features, className, states, weights_, smoothing); alpha_t = 0.0; if (!block_update) { auto ypred = model->predict(X_train); diff --git a/bayesnet/ensembles/BoostA2DE.h b/bayesnet/ensembles/BoostA2DE.h index ff56b79..71cfa99 100644 --- a/bayesnet/ensembles/BoostA2DE.h +++ b/bayesnet/ensembles/BoostA2DE.h @@ -17,9 +17,9 @@ namespace bayesnet { virtual ~BoostA2DE() = default; std::vector graph(const std::string& title = "BoostA2DE") const override; protected: - void trainModel(const torch::Tensor& weights) override; + void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override; private: - std::vector initializeModels(); + std::vector initializeModels(const Smoothing_t smoothing); }; } #endif \ No newline at end of file diff --git a/bayesnet/ensembles/BoostAODE.cc b/bayesnet/ensembles/BoostAODE.cc index f37cf2d..0638d78 100644 --- a/bayesnet/ensembles/BoostAODE.cc +++ b/bayesnet/ensembles/BoostAODE.cc @@ -16,14 +16,13 @@ namespace bayesnet { BoostAODE::BoostAODE(bool predict_voting) : Boost(predict_voting) { } - std::vector BoostAODE::initializeModels() + std::vector BoostAODE::initializeModels(const Smoothing_t smoothing) { torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64); std::vector featuresSelected = featureSelection(weights_); for (const int& feature : featuresSelected) { std::unique_ptr model = std::make_unique(feature); - model->setSmoothing(smoothing); - model->fit(dataset, features, className, states, weights_); + model->fit(dataset, features, className, states, weights_, smoothing); models.push_back(std::move(model)); significanceModels.push_back(1.0); // They will be updated later in trainModel n_models++; @@ -31,7 +30,7 @@ namespace bayesnet { notes.push_back("Used features in initialization: " + std::to_string(featuresSelected.size()) + " of " + std::to_string(features.size()) + " with " + select_features_algorithm); return featuresSelected; } - void BoostAODE::trainModel(const torch::Tensor& weights) + void BoostAODE::trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) { // // Logging setup @@ -48,7 +47,7 @@ namespace bayesnet { bool finished = false; std::vector featuresUsed; if (selectFeatures) { - featuresUsed = initializeModels(); + featuresUsed = initializeModels(smoothing); auto ypred = predict(X_train); std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_); // Update significance of the models @@ -90,8 +89,7 @@ namespace bayesnet { featureSelection.erase(featureSelection.begin()); std::unique_ptr model; model = std::make_unique(feature); - model->setSmoothing(smoothing); - model->fit(dataset, features, className, states, weights_); + model->fit(dataset, features, className, states, weights_, smoothing); alpha_t = 0.0; if (!block_update) { auto ypred = model->predict(X_train); diff --git a/bayesnet/ensembles/BoostAODE.h b/bayesnet/ensembles/BoostAODE.h index e4eb250..bc66ec1 100644 --- a/bayesnet/ensembles/BoostAODE.h +++ b/bayesnet/ensembles/BoostAODE.h @@ -18,9 +18,9 @@ namespace bayesnet { virtual ~BoostAODE() = default; std::vector graph(const std::string& title = "BoostAODE") const override; protected: - void trainModel(const torch::Tensor& weights) override; + void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override; private: - std::vector initializeModels(); + std::vector initializeModels(const Smoothing_t smoothing); }; } #endif \ No newline at end of file diff --git a/bayesnet/ensembles/Ensemble.cc b/bayesnet/ensembles/Ensemble.cc index 6ef096b..68f3ee5 100644 --- a/bayesnet/ensembles/Ensemble.cc +++ b/bayesnet/ensembles/Ensemble.cc @@ -13,13 +13,12 @@ namespace bayesnet { }; const std::string ENSEMBLE_NOT_FITTED = "Ensemble has not been fitted"; - void Ensemble::trainModel(const torch::Tensor& weights) + void Ensemble::trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) { n_models = models.size(); for (auto i = 0; i < n_models; ++i) { // fit with std::vectors - models[i]->setSmoothing(smoothing); - models[i]->fit(dataset, features, className, states); + models[i]->fit(dataset, features, className, states, smoothing); } } std::vector Ensemble::compute_arg_max(std::vector>& X) diff --git a/bayesnet/ensembles/Ensemble.h b/bayesnet/ensembles/Ensemble.h index 2c072a8..5172a40 100644 --- a/bayesnet/ensembles/Ensemble.h +++ b/bayesnet/ensembles/Ensemble.h @@ -46,7 +46,7 @@ namespace bayesnet { unsigned n_models; std::vector> models; std::vector significanceModels; - void trainModel(const torch::Tensor& weights) override; + void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override; bool predict_voting; }; } diff --git a/bayesnet/network/Network.cc b/bayesnet/network/Network.cc index 2cdb3f1..419585b 100644 --- a/bayesnet/network/Network.cc +++ b/bayesnet/network/Network.cc @@ -11,14 +11,14 @@ #include "Network.h" #include "bayesnet/utils/bayesnetUtils.h" namespace bayesnet { - Network::Network() : fitted{ false }, maxThreads{ 0.95 }, classNumStates{ 0 }, smoothing{ Smoothing_t::LAPLACE } + Network::Network() : fitted{ false }, maxThreads{ 0.95 }, classNumStates{ 0 } { } - Network::Network(float maxT) : fitted{ false }, maxThreads{ maxT }, classNumStates{ 0 }, smoothing{ Smoothing_t::LAPLACE } + Network::Network(float maxT) : fitted{ false }, maxThreads{ maxT }, classNumStates{ 0 } { } - Network::Network(const Network& other) : smoothing(other.smoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()), + Network::Network(const Network& other) : features(other.features), className(other.className), classNumStates(other.getClassNumStates()), maxThreads(other.getMaxThreads()), fitted(other.fitted), samples(other.samples) { if (samples.defined()) @@ -156,7 +156,7 @@ namespace bayesnet { classNumStates = nodes.at(className)->getNumStates(); } // X comes in nxm, where n is the number of features and m the number of samples - void Network::fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector& featureNames, const std::string& className, const std::map>& states) + void Network::fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector& featureNames, const std::string& className, const std::map>& states, const Smoothing_t smoothing) { checkFitData(X.size(1), X.size(0), y.size(0), featureNames, className, states, weights); this->className = className; @@ -165,17 +165,17 @@ namespace bayesnet { for (int i = 0; i < featureNames.size(); ++i) { auto row_feature = X.index({ i, "..." }); } - completeFit(states, weights); + completeFit(states, weights, smoothing); } - void Network::fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector& featureNames, const std::string& className, const std::map>& states) + void Network::fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector& featureNames, const std::string& className, const std::map>& states, const Smoothing_t smoothing) { checkFitData(samples.size(1), samples.size(0) - 1, samples.size(1), featureNames, className, states, weights); this->className = className; this->samples = samples; - completeFit(states, weights); + completeFit(states, weights, smoothing); } // input_data comes in nxm, where n is the number of features and m the number of samples - void Network::fit(const std::vector>& input_data, const std::vector& labels, const std::vector& weights_, const std::vector& featureNames, const std::string& className, const std::map>& states) + void Network::fit(const std::vector>& input_data, const std::vector& labels, const std::vector& weights_, const std::vector& featureNames, const std::string& className, const std::map>& states, const Smoothing_t smoothing) { const torch::Tensor weights = torch::tensor(weights_, torch::kFloat64); checkFitData(input_data[0].size(), input_data.size(), labels.size(), featureNames, className, states, weights); @@ -186,15 +186,15 @@ namespace bayesnet { samples.index_put_({ i, "..." }, torch::tensor(input_data[i], torch::kInt32)); } samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32)); - completeFit(states, weights); + completeFit(states, weights, smoothing); } - void Network::completeFit(const std::map>& states, const torch::Tensor& weights) + void Network::completeFit(const std::map>& states, const torch::Tensor& weights, const Smoothing_t smoothing) { setStates(states); std::vector threads; const double n_samples = static_cast(samples.size(1)); for (auto& node : nodes) { - threads.emplace_back([this, &node, &weights, n_samples]() { + threads.emplace_back([this, &node, &weights, n_samples, smoothing]() { double numStates = static_cast(node.second->getNumStates()); double smoothing_factor = 0.0; switch (smoothing) { diff --git a/bayesnet/network/Network.h b/bayesnet/network/Network.h index 37177cc..b3417cd 100644 --- a/bayesnet/network/Network.h +++ b/bayesnet/network/Network.h @@ -38,10 +38,9 @@ namespace bayesnet { /* Notice: Nodes have to be inserted in the same order as they are in the dataset, i.e., first node is first column and so on. */ - void setSmoothing(Smoothing_t smoothing) { this->smoothing = smoothing; }; - void fit(const std::vector>& input_data, const std::vector& labels, const std::vector& weights, const std::vector& featureNames, const std::string& className, const std::map>& states); - void fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector& featureNames, const std::string& className, const std::map>& states); - void fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector& featureNames, const std::string& className, const std::map>& states); + void fit(const std::vector>& input_data, const std::vector& labels, const std::vector& weights, const std::vector& featureNames, const std::string& className, const std::map>& states, const Smoothing_t smoothing); + void fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector& featureNames, const std::string& className, const std::map>& states, const Smoothing_t smoothing); + void fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector& featureNames, const std::string& className, const std::map>& states, const Smoothing_t smoothing); std::vector predict(const std::vector>&); // Return mx1 std::vector of predictions torch::Tensor predict(const torch::Tensor&); // Return mx1 tensor of predictions torch::Tensor predict_tensor(const torch::Tensor& samples, const bool proba); @@ -61,14 +60,13 @@ namespace bayesnet { int classNumStates; std::vector features; // Including classname std::string className; - Smoothing_t smoothing; torch::Tensor samples; // n+1xm tensor used to fit the model bool isCyclic(const std::string&, std::unordered_set&, std::unordered_set&); std::vector predict_sample(const std::vector&); std::vector predict_sample(const torch::Tensor&); std::vector exactInference(std::map&); double computeFactor(std::map&); - void completeFit(const std::map>& states, const torch::Tensor& weights); + void completeFit(const std::map>& states, const torch::Tensor& weights, const Smoothing_t smoothing); void checkFitData(int n_samples, int n_features, int n_samples_y, const std::vector& featureNames, const std::string& className, const std::map>& states, const torch::Tensor& weights); void setStates(const std::map>&); }; diff --git a/tests/TestA2DE.cc b/tests/TestA2DE.cc index 31809a4..60b876f 100644 --- a/tests/TestA2DE.cc +++ b/tests/TestA2DE.cc @@ -16,7 +16,7 @@ TEST_CASE("Fit and Score", "[A2DE]") { auto raw = RawDatasets("glass", true); auto clf = bayesnet::A2DE(); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.score(raw.Xv, raw.yv) == Catch::Approx(0.831776).epsilon(raw.epsilon)); REQUIRE(clf.getNumberOfNodes() == 360); REQUIRE(clf.getNumberOfEdges() == 756); @@ -30,18 +30,18 @@ TEST_CASE("Test score with predict_voting", "[A2DE]") {"predict_voting", true}, }; clf.setHyperparameters(hyperparameters); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.score(raw.Xv, raw.yv) == Catch::Approx(0.82243).epsilon(raw.epsilon)); hyperparameters["predict_voting"] = false; clf.setHyperparameters(hyperparameters); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.score(raw.Xv, raw.yv) == Catch::Approx(0.83178).epsilon(raw.epsilon)); } TEST_CASE("Test graph", "[A2DE]") { auto raw = RawDatasets("iris", true); auto clf = bayesnet::A2DE(); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); auto graph = clf.graph(); REQUIRE(graph.size() == 78); REQUIRE(graph[0] == "digraph BayesNet {\nlabel=\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n"); diff --git a/tests/TestBayesClassifier.cc b/tests/TestBayesClassifier.cc index 3eb0a3f..ce5887b 100644 --- a/tests/TestBayesClassifier.cc +++ b/tests/TestBayesClassifier.cc @@ -18,38 +18,38 @@ TEST_CASE("Test Cannot build dataset with wrong data vector", "[Classifier]") auto model = bayesnet::TAN(); auto raw = RawDatasets("iris", true); raw.yv.pop_back(); - REQUIRE_THROWS_AS(model.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states), std::runtime_error); - REQUIRE_THROWS_WITH(model.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states), "* Error in X and y dimensions *\nX dimensions: [4, 150]\ny dimensions: [149]"); + REQUIRE_THROWS_AS(model.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing), std::runtime_error); + REQUIRE_THROWS_WITH(model.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing), "* Error in X and y dimensions *\nX dimensions: [4, 150]\ny dimensions: [149]"); } TEST_CASE("Test Cannot build dataset with wrong data tensor", "[Classifier]") { auto model = bayesnet::TAN(); auto raw = RawDatasets("iris", true); auto yshort = torch::zeros({ 149 }, torch::kInt32); - REQUIRE_THROWS_AS(model.fit(raw.Xt, yshort, raw.features, raw.className, raw.states), std::runtime_error); - REQUIRE_THROWS_WITH(model.fit(raw.Xt, yshort, raw.features, raw.className, raw.states), "* Error in X and y dimensions *\nX dimensions: [4, 150]\ny dimensions: [149]"); + REQUIRE_THROWS_AS(model.fit(raw.Xt, yshort, raw.features, raw.className, raw.states, raw.smoothing), std::runtime_error); + REQUIRE_THROWS_WITH(model.fit(raw.Xt, yshort, raw.features, raw.className, raw.states, raw.smoothing), "* Error in X and y dimensions *\nX dimensions: [4, 150]\ny dimensions: [149]"); } TEST_CASE("Invalid data type", "[Classifier]") { auto model = bayesnet::TAN(); auto raw = RawDatasets("iris", false); - REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states), std::invalid_argument); - REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states), "dataset (X, y) must be of type Integer"); + REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing), std::invalid_argument); + REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing), "dataset (X, y) must be of type Integer"); } TEST_CASE("Invalid number of features", "[Classifier]") { auto model = bayesnet::TAN(); auto raw = RawDatasets("iris", true); auto Xt = torch::cat({ raw.Xt, torch::zeros({ 1, 150 }, torch::kInt32) }, 0); - REQUIRE_THROWS_AS(model.fit(Xt, raw.yt, raw.features, raw.className, raw.states), std::invalid_argument); - REQUIRE_THROWS_WITH(model.fit(Xt, raw.yt, raw.features, raw.className, raw.states), "Classifier: X 5 and features 4 must have the same number of features"); + REQUIRE_THROWS_AS(model.fit(Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing), std::invalid_argument); + REQUIRE_THROWS_WITH(model.fit(Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing), "Classifier: X 5 and features 4 must have the same number of features"); } TEST_CASE("Invalid class name", "[Classifier]") { auto model = bayesnet::TAN(); auto raw = RawDatasets("iris", true); - REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.features, "duck", raw.states), std::invalid_argument); - REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.features, "duck", raw.states), "class name not found in states"); + REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.features, "duck", raw.states, raw.smoothing), std::invalid_argument); + REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.features, "duck", raw.states, raw.smoothing), "class name not found in states"); } TEST_CASE("Invalid feature name", "[Classifier]") { @@ -57,8 +57,8 @@ TEST_CASE("Invalid feature name", "[Classifier]") auto raw = RawDatasets("iris", true); auto statest = raw.states; statest.erase("petallength"); - REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.features, raw.className, statest), std::invalid_argument); - REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.features, raw.className, statest), "feature [petallength] not found in states"); + REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.features, raw.className, statest, raw.smoothing), std::invalid_argument); + REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.features, raw.className, statest, raw.smoothing), "feature [petallength] not found in states"); } TEST_CASE("Invalid hyperparameter", "[Classifier]") { @@ -71,7 +71,7 @@ TEST_CASE("Topological order", "[Classifier]") { auto model = bayesnet::TAN(); auto raw = RawDatasets("iris", true); - model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states); + model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing); auto order = model.topological_order(); REQUIRE(order.size() == 4); REQUIRE(order[0] == "petallength"); @@ -83,7 +83,7 @@ TEST_CASE("Dump_cpt", "[Classifier]") { auto model = bayesnet::TAN(); auto raw = RawDatasets("iris", true); - model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states); + model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing); auto cpt = model.dump_cpt(); REQUIRE(cpt.size() == 1713); } @@ -111,7 +111,7 @@ TEST_CASE("KDB Graph", "[Classifier]") { auto model = bayesnet::KDB(2); auto raw = RawDatasets("iris", true); - model.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + model.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); auto graph = model.graph(); REQUIRE(graph.size() == 15); } @@ -119,7 +119,7 @@ TEST_CASE("KDBLd Graph", "[Classifier]") { auto model = bayesnet::KDBLd(2); auto raw = RawDatasets("iris", false); - model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states); + model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing); auto graph = model.graph(); REQUIRE(graph.size() == 15); } \ No newline at end of file diff --git a/tests/TestBayesEnsemble.cc b/tests/TestBayesEnsemble.cc index bbfe086..da9f839 100644 --- a/tests/TestBayesEnsemble.cc +++ b/tests/TestBayesEnsemble.cc @@ -18,7 +18,7 @@ TEST_CASE("Topological Order", "[Ensemble]") { auto raw = RawDatasets("glass", true); auto clf = bayesnet::BoostAODE(); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); auto order = clf.topological_order(); REQUIRE(order.size() == 0); } @@ -26,7 +26,7 @@ TEST_CASE("Dump CPT", "[Ensemble]") { auto raw = RawDatasets("glass", true); auto clf = bayesnet::BoostAODE(); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); auto dump = clf.dump_cpt(); REQUIRE(dump == ""); } @@ -34,7 +34,7 @@ TEST_CASE("Number of States", "[Ensemble]") { auto clf = bayesnet::BoostAODE(); auto raw = RawDatasets("iris", true); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfStates() == 76); } TEST_CASE("Show", "[Ensemble]") @@ -46,7 +46,7 @@ TEST_CASE("Show", "[Ensemble]") {"maxTolerance", 1}, {"convergence", false}, }); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); std::vector expected = { "class -> sepallength, sepalwidth, petallength, petalwidth, ", "petallength -> sepallength, sepalwidth, petalwidth, ", @@ -78,16 +78,16 @@ TEST_CASE("Graph", "[Ensemble]") { auto clf = bayesnet::BoostAODE(); auto raw = RawDatasets("iris", true); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); auto graph = clf.graph(); REQUIRE(graph.size() == 56); auto clf2 = bayesnet::AODE(); - clf2.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf2.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); graph = clf2.graph(); REQUIRE(graph.size() == 56); raw = RawDatasets("glass", false); auto clf3 = bayesnet::AODELd(); - clf3.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states); + clf3.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing); graph = clf3.graph(); REQUIRE(graph.size() == 261); } diff --git a/tests/TestBayesModels.cc b/tests/TestBayesModels.cc index 2d60d5e..cbc56a9 100644 --- a/tests/TestBayesModels.cc +++ b/tests/TestBayesModels.cc @@ -54,7 +54,7 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[Models]") auto clf = models[name]; auto discretize = name.substr(name.length() - 2) != "Ld"; auto raw = RawDatasets(file_name, discretize); - clf->fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states); + clf->fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing); auto score = clf->score(raw.Xt, raw.yt); INFO("Classifier: " << name << " File: " << file_name); REQUIRE(score == Catch::Approx(scores[{file_name, name}]).epsilon(raw.epsilon)); @@ -81,7 +81,7 @@ TEST_CASE("Models features & Graph", "[Models]") { auto raw = RawDatasets("iris", true); auto clf = bayesnet::TAN(); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 5); REQUIRE(clf.getNumberOfEdges() == 7); REQUIRE(clf.getNumberOfStates() == 19); @@ -93,7 +93,7 @@ TEST_CASE("Models features & Graph", "[Models]") { auto clf = bayesnet::TANLd(); auto raw = RawDatasets("iris", false); - clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states); + clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 5); REQUIRE(clf.getNumberOfEdges() == 7); REQUIRE(clf.getNumberOfStates() == 19); @@ -106,7 +106,7 @@ TEST_CASE("Get num features & num edges", "[Models]") { auto raw = RawDatasets("iris", true); auto clf = bayesnet::KDB(2); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 5); REQUIRE(clf.getNumberOfEdges() == 8); } @@ -166,7 +166,7 @@ TEST_CASE("Model predict_proba", "[Models]") SECTION("Test " + model + " predict_proba") { auto clf = models[model]; - clf->fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf->fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); auto y_pred_proba = clf->predict_proba(raw.Xv); auto yt_pred_proba = clf->predict_proba(raw.Xt); auto y_pred = clf->predict(raw.Xv); @@ -203,7 +203,7 @@ TEST_CASE("AODE voting-proba", "[Models]") { auto raw = RawDatasets("glass", true); auto clf = bayesnet::AODE(false); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); auto score_proba = clf.score(raw.Xv, raw.yv); auto pred_proba = clf.predict_proba(raw.Xv); clf.setHyperparameters({ @@ -222,9 +222,9 @@ TEST_CASE("SPODELd dataset", "[Models]") auto raw = RawDatasets("iris", false); auto clf = bayesnet::SPODELd(0); // raw.dataset.to(torch::kFloat32); - clf.fit(raw.dataset, raw.features, raw.className, raw.states); + clf.fit(raw.dataset, raw.features, raw.className, raw.states, raw.smoothing); auto score = clf.score(raw.Xt, raw.yt); - clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states); + clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing); auto scoret = clf.score(raw.Xt, raw.yt); REQUIRE(score == Catch::Approx(0.97333f).epsilon(raw.epsilon)); REQUIRE(scoret == Catch::Approx(0.97333f).epsilon(raw.epsilon)); @@ -233,13 +233,13 @@ TEST_CASE("KDB with hyperparameters", "[Models]") { auto raw = RawDatasets("glass", true); auto clf = bayesnet::KDB(2); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); auto score = clf.score(raw.Xv, raw.yv); clf.setHyperparameters({ {"k", 3}, {"theta", 0.7}, }); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); auto scoret = clf.score(raw.Xv, raw.yv); REQUIRE(score == Catch::Approx(0.827103).epsilon(raw.epsilon)); REQUIRE(scoret == Catch::Approx(0.761682).epsilon(raw.epsilon)); @@ -248,7 +248,7 @@ TEST_CASE("Incorrect type of data for SPODELd", "[Models]") { auto raw = RawDatasets("iris", true); auto clf = bayesnet::SPODELd(0); - REQUIRE_THROWS_AS(clf.fit(raw.dataset, raw.features, raw.className, raw.states), std::runtime_error); + REQUIRE_THROWS_AS(clf.fit(raw.dataset, raw.features, raw.className, raw.states, raw.smoothing), std::runtime_error); } TEST_CASE("Predict, predict_proba & score without fitting", "[Models]") { diff --git a/tests/TestBayesNetwork.cc b/tests/TestBayesNetwork.cc index 680a349..b1d6911 100644 --- a/tests/TestBayesNetwork.cc +++ b/tests/TestBayesNetwork.cc @@ -115,9 +115,9 @@ TEST_CASE("Test Bayesian Network", "[Network]") REQUIRE(children == children3); } // Fit networks - net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states); - net2.fit(raw.dataset, raw.weights, raw.features, raw.className, raw.states); - net3.fit(raw.Xt, raw.yt, raw.weights, raw.features, raw.className, raw.states); + net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing); + net2.fit(raw.dataset, raw.weights, raw.features, raw.className, raw.states, raw.smoothing); + net3.fit(raw.Xt, raw.yt, raw.weights, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(net.getStates() == net2.getStates()); REQUIRE(net.getStates() == net3.getStates()); REQUIRE(net.getFeatures() == net2.getFeatures()); @@ -194,7 +194,7 @@ TEST_CASE("Test Bayesian Network", "[Network]") SECTION("Test predict") { buildModel(net, raw.features, raw.className); - net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states); + net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing); std::vector> test = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1}, {2, 2, 2, 2, 1} }; std::vector y_test = { 2, 2, 0, 2, 1 }; auto y_pred = net.predict(test); @@ -203,7 +203,7 @@ TEST_CASE("Test Bayesian Network", "[Network]") SECTION("Test predict_proba") { buildModel(net, raw.features, raw.className); - net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states); + net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing); std::vector> test = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1}, {2, 2, 2, 2, 1} }; std::vector> y_test = { {0.450237, 0.0866621, 0.463101}, @@ -224,14 +224,14 @@ TEST_CASE("Test Bayesian Network", "[Network]") SECTION("Test score") { buildModel(net, raw.features, raw.className); - net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states); + net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing); auto score = net.score(raw.Xv, raw.yv); REQUIRE(score == Catch::Approx(0.97333333).margin(threshold)); } SECTION("Copy constructor") { buildModel(net, raw.features, raw.className); - net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states); + net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing); auto net2 = bayesnet::Network(net); REQUIRE(net.getFeatures() == net2.getFeatures()); REQUIRE(net.getEdges() == net2.getEdges()); @@ -268,7 +268,7 @@ TEST_CASE("Test Bayesian Network", "[Network]") // predict with wrong data auto netx = bayesnet::Network(); buildModel(netx, raw.features, raw.className); - netx.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states); + netx.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing); std::vector> test2 = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1} }; auto test_tensor2 = bayesnet::vectorToTensor(test2, false); REQUIRE_THROWS_AS(netx.predict(test2), std::logic_error); @@ -278,17 +278,17 @@ TEST_CASE("Test Bayesian Network", "[Network]") // fit with wrong data // Weights auto net2 = bayesnet::Network(); - REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, std::vector(), raw.features, raw.className, raw.states), std::invalid_argument); + REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, std::vector(), raw.features, raw.className, raw.states, raw.smoothing), std::invalid_argument); std::string invalid_weights = "Weights (0) must have the same number of elements as samples (150) in Network::fit"; - REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, std::vector(), raw.features, raw.className, raw.states), invalid_weights); + REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, std::vector(), raw.features, raw.className, raw.states, raw.smoothing), invalid_weights); // X & y std::string invalid_labels = "X and y must have the same number of samples in Network::fit (150 != 0)"; - REQUIRE_THROWS_AS(net2.fit(raw.Xv, std::vector(), raw.weightsv, raw.features, raw.className, raw.states), std::invalid_argument); - REQUIRE_THROWS_WITH(net2.fit(raw.Xv, std::vector(), raw.weightsv, raw.features, raw.className, raw.states), invalid_labels); + REQUIRE_THROWS_AS(net2.fit(raw.Xv, std::vector(), raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing), std::invalid_argument); + REQUIRE_THROWS_WITH(net2.fit(raw.Xv, std::vector(), raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing), invalid_labels); // Features std::string invalid_features = "X and features must have the same number of features in Network::fit (4 != 0)"; - REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, raw.weightsv, std::vector(), raw.className, raw.states), std::invalid_argument); - REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, raw.weightsv, std::vector(), raw.className, raw.states), invalid_features); + REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, raw.weightsv, std::vector(), raw.className, raw.states, raw.smoothing), std::invalid_argument); + REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, raw.weightsv, std::vector(), raw.className, raw.states, raw.smoothing), invalid_features); // Different number of features auto net3 = bayesnet::Network(); auto test2y = { 1, 2, 3, 4, 5 }; @@ -296,23 +296,23 @@ TEST_CASE("Test Bayesian Network", "[Network]") auto features3 = raw.features; features3.pop_back(); std::string invalid_features2 = "X and local features must have the same number of features in Network::fit (3 != 4)"; - REQUIRE_THROWS_AS(net3.fit(test2, test2y, std::vector(5, 0), features3, raw.className, raw.states), std::invalid_argument); - REQUIRE_THROWS_WITH(net3.fit(test2, test2y, std::vector(5, 0), features3, raw.className, raw.states), invalid_features2); + REQUIRE_THROWS_AS(net3.fit(test2, test2y, std::vector(5, 0), features3, raw.className, raw.states, raw.smoothing), std::invalid_argument); + REQUIRE_THROWS_WITH(net3.fit(test2, test2y, std::vector(5, 0), features3, raw.className, raw.states, raw.smoothing), invalid_features2); // Uninitialized network std::string network_invalid = "The network has not been initialized. You must call addNode() before calling fit()"; - REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states), std::invalid_argument); - REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states), network_invalid); + REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states, raw.smoothing), std::invalid_argument); + REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states, raw.smoothing), network_invalid); // Classname std::string invalid_classname = "Class Name not found in Network::features"; - REQUIRE_THROWS_AS(net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states), std::invalid_argument); - REQUIRE_THROWS_WITH(net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states), invalid_classname); + REQUIRE_THROWS_AS(net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states, raw.smoothing), std::invalid_argument); + REQUIRE_THROWS_WITH(net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states, raw.smoothing), invalid_classname); // Invalid feature auto features2 = raw.features; features2.pop_back(); features2.push_back("duck"); std::string invalid_feature = "Feature duck not found in Network::features"; - REQUIRE_THROWS_AS(net.fit(raw.Xv, raw.yv, raw.weightsv, features2, raw.className, raw.states), std::invalid_argument); - REQUIRE_THROWS_WITH(net.fit(raw.Xv, raw.yv, raw.weightsv, features2, raw.className, raw.states), invalid_feature); + REQUIRE_THROWS_AS(net.fit(raw.Xv, raw.yv, raw.weightsv, features2, raw.className, raw.states, raw.smoothing), std::invalid_argument); + REQUIRE_THROWS_WITH(net.fit(raw.Xv, raw.yv, raw.weightsv, features2, raw.className, raw.states, raw.smoothing), invalid_feature); // Add twice the same node name to the network => Nothing should happen net.addNode("A"); net.addNode("A"); @@ -320,8 +320,8 @@ TEST_CASE("Test Bayesian Network", "[Network]") auto net4 = bayesnet::Network(); buildModel(net4, raw.features, raw.className); std::string invalid_state = "Feature sepallength not found in states"; - REQUIRE_THROWS_AS(net4.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, std::map>()), std::invalid_argument); - REQUIRE_THROWS_WITH(net4.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, std::map>()), invalid_state); + REQUIRE_THROWS_AS(net4.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, std::map>(), raw.smoothing), std::invalid_argument); + REQUIRE_THROWS_WITH(net4.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, std::map>(), raw.smoothing), invalid_state); } } @@ -366,7 +366,7 @@ TEST_CASE("Dump CPT", "[Network]") auto net = bayesnet::Network(); auto raw = RawDatasets("iris", true); buildModel(net, raw.features, raw.className); - net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states); + net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing); auto res = net.dump_cpt(); std::string expected = R"(* class: (3) : [3] 0.3333 diff --git a/tests/TestBoostA2DE.cc b/tests/TestBoostA2DE.cc index b841bc3..41e7dd7 100644 --- a/tests/TestBoostA2DE.cc +++ b/tests/TestBoostA2DE.cc @@ -17,7 +17,7 @@ TEST_CASE("Build basic model", "[BoostA2DE]") { auto raw = RawDatasets("diabetes", true); auto clf = bayesnet::BoostA2DE(); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 342); REQUIRE(clf.getNumberOfEdges() == 684); REQUIRE(clf.getNotes().size() == 3); @@ -32,7 +32,7 @@ TEST_CASE("Build basic model", "[BoostA2DE]") // auto raw = RawDatasets("glass", true); // auto clf = bayesnet::BoostAODE(); // clf.setHyperparameters({ {"select_features", "IWSS"}, {"threshold", 0.5 } }); -// clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); +// clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); // REQUIRE(clf.getNumberOfNodes() == 90); // REQUIRE(clf.getNumberOfEdges() == 153); // REQUIRE(clf.getNotes().size() == 2); @@ -44,7 +44,7 @@ TEST_CASE("Build basic model", "[BoostA2DE]") // auto raw = RawDatasets("glass", true); // auto clf = bayesnet::BoostAODE(); // clf.setHyperparameters({ {"select_features", "FCBF"}, {"threshold", 1e-7 } }); -// clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); +// clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); // REQUIRE(clf.getNumberOfNodes() == 90); // REQUIRE(clf.getNumberOfEdges() == 153); // REQUIRE(clf.getNotes().size() == 2); @@ -60,7 +60,7 @@ TEST_CASE("Build basic model", "[BoostA2DE]") // {"convergence", true}, // {"select_features","CFS"}, // }); -// clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); +// clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); // REQUIRE(clf.getNumberOfNodes() == 72); // REQUIRE(clf.getNumberOfEdges() == 120); // REQUIRE(clf.getNotes().size() == 2); @@ -75,7 +75,7 @@ TEST_CASE("Build basic model", "[BoostA2DE]") // { // auto raw = RawDatasets("iris", true); // auto clf = bayesnet::BoostAODE(false); -// clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); +// clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); // auto score_proba = clf.score(raw.Xv, raw.yv); // auto pred_proba = clf.predict_proba(raw.Xv); // clf.setHyperparameters({ @@ -104,7 +104,7 @@ TEST_CASE("Build basic model", "[BoostA2DE]") // {"maxTolerance", 1}, // {"convergence", false}, // }); -// clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); +// clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); // auto score = clf.score(raw.Xv, raw.yv); // auto scoret = clf.score(raw.Xt, raw.yt); // INFO("BoostAODE order: " + order); @@ -136,7 +136,7 @@ TEST_CASE("Build basic model", "[BoostA2DE]") // for (const auto& hyper : bad_hyper_fit.items()) { // INFO("BoostAODE hyper: " + hyper.value().dump()); // clf.setHyperparameters(hyper.value()); -// REQUIRE_THROWS_AS(clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states), std::invalid_argument); +// REQUIRE_THROWS_AS(clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing, std::invalid_argument); // } // } @@ -151,7 +151,7 @@ TEST_CASE("Build basic model", "[BoostA2DE]") // {"block_update", false}, // {"convergence_best", false}, // }); -// clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states); +// clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing); // REQUIRE(clf.getNumberOfNodes() == 210); // REQUIRE(clf.getNumberOfEdges() == 378); // REQUIRE(clf.getNotes().size() == 1); @@ -172,13 +172,13 @@ TEST_CASE("Build basic model", "[BoostA2DE]") // {"convergence_best", true}, // }; // clf.setHyperparameters(hyperparameters); -// clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states); +// clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing); // auto score_best = clf.score(raw.X_test, raw.y_test); // REQUIRE(score_best == Catch::Approx(0.980000019f).epsilon(raw.epsilon)); // // Now we will set the hyperparameter to use the last accuracy // hyperparameters["convergence_best"] = false; // clf.setHyperparameters(hyperparameters); -// clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states); +// clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing); // auto score_last = clf.score(raw.X_test, raw.y_test); // REQUIRE(score_last == Catch::Approx(0.976666689f).epsilon(raw.epsilon)); // } @@ -193,7 +193,7 @@ TEST_CASE("Build basic model", "[BoostA2DE]") // {"maxTolerance", 3}, // {"convergence", true}, // }); -// clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states); +// clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing); // REQUIRE(clf.getNumberOfNodes() == 868); // REQUIRE(clf.getNumberOfEdges() == 1724); // REQUIRE(clf.getNotes().size() == 3); diff --git a/tests/TestBoostAODE.cc b/tests/TestBoostAODE.cc index 66fa7fb..1a8a0f0 100644 --- a/tests/TestBoostAODE.cc +++ b/tests/TestBoostAODE.cc @@ -18,7 +18,7 @@ TEST_CASE("Feature_select CFS", "[BoostAODE]") auto raw = RawDatasets("glass", true); auto clf = bayesnet::BoostAODE(); clf.setHyperparameters({ {"select_features", "CFS"} }); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 90); REQUIRE(clf.getNumberOfEdges() == 153); REQUIRE(clf.getNotes().size() == 2); @@ -30,7 +30,7 @@ TEST_CASE("Feature_select IWSS", "[BoostAODE]") auto raw = RawDatasets("glass", true); auto clf = bayesnet::BoostAODE(); clf.setHyperparameters({ {"select_features", "IWSS"}, {"threshold", 0.5 } }); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 90); REQUIRE(clf.getNumberOfEdges() == 153); REQUIRE(clf.getNotes().size() == 2); @@ -42,7 +42,7 @@ TEST_CASE("Feature_select FCBF", "[BoostAODE]") auto raw = RawDatasets("glass", true); auto clf = bayesnet::BoostAODE(); clf.setHyperparameters({ {"select_features", "FCBF"}, {"threshold", 1e-7 } }); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 90); REQUIRE(clf.getNumberOfEdges() == 153); REQUIRE(clf.getNotes().size() == 2); @@ -58,7 +58,7 @@ TEST_CASE("Test used features in train note and score", "[BoostAODE]") {"convergence", true}, {"select_features","CFS"}, }); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 72); REQUIRE(clf.getNumberOfEdges() == 120); REQUIRE(clf.getNotes().size() == 2); @@ -73,7 +73,7 @@ TEST_CASE("Voting vs proba", "[BoostAODE]") { auto raw = RawDatasets("iris", true); auto clf = bayesnet::BoostAODE(false); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); auto score_proba = clf.score(raw.Xv, raw.yv); auto pred_proba = clf.predict_proba(raw.Xv); clf.setHyperparameters({ @@ -102,7 +102,7 @@ TEST_CASE("Order asc, desc & random", "[BoostAODE]") {"maxTolerance", 1}, {"convergence", false}, }); - clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states); + clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); auto score = clf.score(raw.Xv, raw.yv); auto scoret = clf.score(raw.Xt, raw.yt); INFO("BoostAODE order: " << order); @@ -134,7 +134,7 @@ TEST_CASE("Oddities", "[BoostAODE]") for (const auto& hyper : bad_hyper_fit.items()) { INFO("BoostAODE hyper: " << hyper.value().dump()); clf.setHyperparameters(hyper.value()); - REQUIRE_THROWS_AS(clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states), std::invalid_argument); + REQUIRE_THROWS_AS(clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing), std::invalid_argument); } } @@ -149,7 +149,7 @@ TEST_CASE("Bisection Best", "[BoostAODE]") {"block_update", false}, {"convergence_best", false}, }); - clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states); + clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 210); REQUIRE(clf.getNumberOfEdges() == 378); REQUIRE(clf.getNotes().size() == 1); @@ -170,13 +170,13 @@ TEST_CASE("Bisection Best vs Last", "[BoostAODE]") {"convergence_best", true}, }; clf.setHyperparameters(hyperparameters); - clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states); + clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing); auto score_best = clf.score(raw.X_test, raw.y_test); REQUIRE(score_best == Catch::Approx(0.980000019f).epsilon(raw.epsilon)); // Now we will set the hyperparameter to use the last accuracy hyperparameters["convergence_best"] = false; clf.setHyperparameters(hyperparameters); - clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states); + clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing); auto score_last = clf.score(raw.X_test, raw.y_test); REQUIRE(score_last == Catch::Approx(0.976666689f).epsilon(raw.epsilon)); } @@ -191,7 +191,7 @@ TEST_CASE("Block Update", "[BoostAODE]") {"maxTolerance", 3}, {"convergence", true}, }); - clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states); + clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 868); REQUIRE(clf.getNumberOfEdges() == 1724); REQUIRE(clf.getNotes().size() == 3); diff --git a/tests/TestModulesVersions.cc b/tests/TestModulesVersions.cc index 5b29178..5ee5d40 100644 --- a/tests/TestModulesVersions.cc +++ b/tests/TestModulesVersions.cc @@ -16,7 +16,7 @@ #include "TestUtils.h" std::map modules = { - { "mdlp", "1.2.0" }, + { "mdlp", "1.2.1" }, { "Folding", "1.1.0" }, { "json", "3.11" }, { "ArffFiles", "1.0.0" } diff --git a/tests/TestUtils.h b/tests/TestUtils.h index 96b6775..b986278 100644 --- a/tests/TestUtils.h +++ b/tests/TestUtils.h @@ -14,6 +14,7 @@ #include #include #include +#include class RawDatasets { @@ -32,6 +33,7 @@ public: bool discretize; int num_samples = 0; bool shuffle = false; + bayesnet::Smoothing_t smoothing = bayesnet::Smoothing_t::OLD_LAPLACE; private: std::string to_string() {