From ed380b14945e01a5b8842d9c9a5b3d630599adbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Tue, 8 Jul 2025 11:42:20 +0200 Subject: [PATCH] Complete implementation with tests --- README.md | 2 +- bayesnet/classifiers/KDBLd.cc | 36 ++++---- bayesnet/classifiers/KDBLd.h | 9 +- bayesnet/classifiers/Proposal.cc | 53 ++--------- bayesnet/classifiers/Proposal.h | 3 +- bayesnet/classifiers/SPODELd.cc | 6 +- bayesnet/classifiers/SPODELd.h | 6 ++ bayesnet/classifiers/TANLd.cc | 23 +++-- bayesnet/classifiers/TANLd.h | 8 ++ bayesnet/ensembles/AODELd.h | 4 + tests/TestBayesModels.cc | 152 +++++++++++++++++++++---------- tests/TestBayesNetwork.cc | 80 ++++++++-------- tests/TestBayesNode.cc | 43 +++++++++ 13 files changed, 255 insertions(+), 170 deletions(-) diff --git a/README.md b/README.md index 2ca0195..1226f6d 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ [![Reliability Rating](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_BayesNet&metric=reliability_rating)](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet) [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/Doctorado-ML/BayesNet) ![Gitea Last Commit](https://img.shields.io/gitea/last-commit/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es&logo=gitea) -[![Coverage Badge](https://img.shields.io/badge/Coverage-98,0%25-green)](https://gitea.rmontanana.es/rmontanana/BayesNet) +[![Coverage Badge](https://img.shields.io/badge/Coverage-98,5%25-green)](https://gitea.rmontanana.es/rmontanana/BayesNet) [![DOI](https://zenodo.org/badge/667782806.svg)](https://doi.org/10.5281/zenodo.14210344) Bayesian Network Classifiers library diff --git a/bayesnet/classifiers/KDBLd.cc b/bayesnet/classifiers/KDBLd.cc index 32aa690..1b96bce 100644 --- a/bayesnet/classifiers/KDBLd.cc +++ b/bayesnet/classifiers/KDBLd.cc @@ -14,33 +14,29 @@ namespace bayesnet { validHyperparameters.push_back("k"); validHyperparameters.push_back("theta"); } - void KDBLd::setHyperparameters(const nlohmann::json& hyperparameters_) - { - auto hyperparameters = hyperparameters_; - if (hyperparameters.contains("k")) { - k = hyperparameters["k"]; - hyperparameters.erase("k"); - } - if (hyperparameters.contains("theta")) { - theta = hyperparameters["theta"]; - hyperparameters.erase("theta"); - } - Proposal::setHyperparameters(hyperparameters); - } KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_, const Smoothing_t smoothing) { checkInput(X_, y_); - features = features_; - className = className_; Xf = X_; y = y_; - - // Use iterative local discretization instead of the two-phase approach + return commonFit(features_, className_, states_, smoothing); + } + KDBLd& KDBLd::fit(torch::Tensor& dataset, const std::vector& features_, const std::string& className_, map>& states_, const Smoothing_t smoothing) + { + if (!torch::is_floating_point(dataset)) { + throw std::runtime_error("Dataset must be a floating point tensor"); + } + Xf = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." }).clone(); + y = dataset.index({ -1, "..." }).clone().to(torch::kInt32); + return commonFit(features_, className_, states_, smoothing); + } + + KDBLd& KDBLd::commonFit(const std::vector& features_, const std::string& className_, map>& states_, const Smoothing_t smoothing) + { + features = features_; + className = className_; states = iterativeLocalDiscretization(y, static_cast(this), dataset, features, className, states_, smoothing); - - // Final fit with converged discretization KDB::fit(dataset, features, className, states, smoothing); - return *this; } torch::Tensor KDBLd::predict(torch::Tensor& X) diff --git a/bayesnet/classifiers/KDBLd.h b/bayesnet/classifiers/KDBLd.h index 4fa5f82..e19da24 100644 --- a/bayesnet/classifiers/KDBLd.h +++ b/bayesnet/classifiers/KDBLd.h @@ -15,8 +15,15 @@ namespace bayesnet { explicit KDBLd(int k); virtual ~KDBLd() = default; KDBLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, map>& states, const Smoothing_t smoothing) override; + KDBLd& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, map>& states, const Smoothing_t smoothing) override; + KDBLd& commonFit(const std::vector& features, const std::string& className, map>& states, const Smoothing_t smoothing); std::vector graph(const std::string& name = "KDB") const override; - void setHyperparameters(const nlohmann::json& hyperparameters_) override; + void setHyperparameters(const nlohmann::json& hyperparameters_) override + { + auto hyperparameters = hyperparameters_; + Proposal::setHyperparameters(hyperparameters); + KDB::setHyperparameters(hyperparameters); + } torch::Tensor predict(torch::Tensor& X) override; torch::Tensor predict_proba(torch::Tensor& X) override; static inline std::string version() { return "0.0.1"; }; diff --git a/bayesnet/classifiers/Proposal.cc b/bayesnet/classifiers/Proposal.cc index 4dde35a..b634b6e 100644 --- a/bayesnet/classifiers/Proposal.cc +++ b/bayesnet/classifiers/Proposal.cc @@ -11,6 +11,7 @@ #include "Classifier.h" #include "KDB.h" #include "TAN.h" +#include "SPODE.h" #include "KDBLd.h" #include "TANLd.h" @@ -18,9 +19,8 @@ namespace bayesnet { Proposal::Proposal(torch::Tensor& dataset_, std::vector& features_, std::string& className_) : pDataset(dataset_), pFeatures(features_), pClassName(className_) { } - void Proposal::setHyperparameters(const nlohmann::json& hyperparameters_) + void Proposal::setHyperparameters(nlohmann::json& hyperparameters) { - auto hyperparameters = hyperparameters_; if (hyperparameters.contains("ld_proposed_cuts")) { ld_params.proposed_cuts = hyperparameters["ld_proposed_cuts"]; hyperparameters.erase("ld_proposed_cuts"); @@ -55,9 +55,6 @@ namespace bayesnet { convergence_params.verbose = hyperparameters["verbose_convergence"]; hyperparameters.erase("verbose_convergence"); } - if (!hyperparameters.empty()) { - throw std::invalid_argument("Invalid hyperparameters for Proposal: " + hyperparameters.dump()); - } } void Proposal::checkInput(const torch::Tensor& X, const torch::Tensor& y) @@ -209,7 +206,7 @@ namespace bayesnet { // Phase 2: Build model with current discretization classifier->fit(dataset, features, className, currentStates, weights, smoothing); - + // Phase 3: Network-aware discretization refinement currentStates = localDiscretizationProposal(currentStates, classifier->getModel()); @@ -228,51 +225,15 @@ namespace bayesnet { return currentStates; } - double Proposal::computeLogLikelihood(Network& model, const torch::Tensor& dataset) - { - double logLikelihood = 0.0; - int n_samples = dataset.size(0); - int n_features = dataset.size(1); - - for (int i = 0; i < n_samples; ++i) { - double sampleLogLikelihood = 0.0; - - // Get class value for this sample - int classValue = dataset[i][n_features - 1].item(); - - // Compute log-likelihood for each feature given its parents and class - for (const auto& node : model.getNodes()) { - if (node.first == model.getClassName()) { - // For class node, add log P(class) - auto classCounts = node.second->getCPT(); - double classProb = classCounts[classValue].item() / dataset.size(0); - sampleLogLikelihood += std::log(std::max(classProb, 1e-10)); - } else { - // For feature nodes, add log P(feature | parents, class) - int featureIdx = std::distance(model.getFeatures().begin(), - std::find(model.getFeatures().begin(), - model.getFeatures().end(), - node.first)); - int featureValue = dataset[i][featureIdx].item(); - - // Simplified probability computation - in practice would need full CPT lookup - double featureProb = 0.1; // Placeholder - would compute from CPT - sampleLogLikelihood += std::log(std::max(featureProb, 1e-10)); - } - } - - logLikelihood += sampleLogLikelihood; - } - - return logLikelihood; - } - // Explicit template instantiation for common classifier types template map> Proposal::iterativeLocalDiscretization( const torch::Tensor&, KDB*, torch::Tensor&, const std::vector&, const std::string&, const map>&, Smoothing_t); - + template map> Proposal::iterativeLocalDiscretization( const torch::Tensor&, TAN*, torch::Tensor&, const std::vector&, const std::string&, const map>&, Smoothing_t); + template map> Proposal::iterativeLocalDiscretization( + const torch::Tensor&, SPODE*, torch::Tensor&, const std::vector&, + const std::string&, const map>&, Smoothing_t); } diff --git a/bayesnet/classifiers/Proposal.h b/bayesnet/classifiers/Proposal.h index b5685d9..9f23283 100644 --- a/bayesnet/classifiers/Proposal.h +++ b/bayesnet/classifiers/Proposal.h @@ -19,7 +19,7 @@ namespace bayesnet { class Proposal { public: Proposal(torch::Tensor& pDataset, std::vector& features_, std::string& className_); - void setHyperparameters(const nlohmann::json& hyperparameters_); + void setHyperparameters(nlohmann::json& hyperparameters_); protected: void checkInput(const torch::Tensor& X, const torch::Tensor& y); torch::Tensor prepareX(torch::Tensor& X); @@ -61,7 +61,6 @@ namespace bayesnet { }; private: std::vector factorize(const std::vector& labels_t); - double computeLogLikelihood(Network& model, const torch::Tensor& dataset); torch::Tensor& pDataset; // (n+1)xm tensor std::vector& pFeatures; std::string& pClassName; diff --git a/bayesnet/classifiers/SPODELd.cc b/bayesnet/classifiers/SPODELd.cc index 1bb55fb..8cdbdec 100644 --- a/bayesnet/classifiers/SPODELd.cc +++ b/bayesnet/classifiers/SPODELd.cc @@ -34,12 +34,8 @@ namespace bayesnet { { features = features_; className = className_; - // Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y - states = fit_local_discretization(y); - // We have discretized the input data - // 1st we need to fit the model to build the normal SPODE structure, SPODE::fit initializes the base Bayesian network + states = iterativeLocalDiscretization(y, static_cast(this), dataset, features, className, states_, smoothing); SPODE::fit(dataset, features, className, states, smoothing); - states = localDiscretizationProposal(states, model); return *this; } torch::Tensor SPODELd::predict(torch::Tensor& X) diff --git a/bayesnet/classifiers/SPODELd.h b/bayesnet/classifiers/SPODELd.h index faa3a48..ff02149 100644 --- a/bayesnet/classifiers/SPODELd.h +++ b/bayesnet/classifiers/SPODELd.h @@ -18,6 +18,12 @@ namespace bayesnet { SPODELd& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, map>& states, const Smoothing_t smoothing) override; SPODELd& commonFit(const std::vector& features, const std::string& className, map>& states, const Smoothing_t smoothing); std::vector graph(const std::string& name = "SPODELd") const override; + void setHyperparameters(const nlohmann::json& hyperparameters_) override + { + auto hyperparameters = hyperparameters_; + Proposal::setHyperparameters(hyperparameters); + SPODE::setHyperparameters(hyperparameters); + } torch::Tensor predict(torch::Tensor& X) override; torch::Tensor predict_proba(torch::Tensor& X) override; static inline std::string version() { return "0.0.1"; }; diff --git a/bayesnet/classifiers/TANLd.cc b/bayesnet/classifiers/TANLd.cc index 783681c..32bd7b8 100644 --- a/bayesnet/classifiers/TANLd.cc +++ b/bayesnet/classifiers/TANLd.cc @@ -12,17 +12,26 @@ namespace bayesnet { TANLd& TANLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_, const Smoothing_t smoothing) { checkInput(X_, y_); - features = features_; - className = className_; Xf = X_; y = y_; - - // Use iterative local discretization instead of the two-phase approach + return commonFit(features_, className_, states_, smoothing); + } + TANLd& TANLd::fit(torch::Tensor& dataset, const std::vector& features_, const std::string& className_, map>& states_, const Smoothing_t smoothing) + { + if (!torch::is_floating_point(dataset)) { + throw std::runtime_error("Dataset must be a floating point tensor"); + } + Xf = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." }).clone(); + y = dataset.index({ -1, "..." }).clone().to(torch::kInt32); + return commonFit(features_, className_, states_, smoothing); + } + + TANLd& TANLd::commonFit(const std::vector& features_, const std::string& className_, map>& states_, const Smoothing_t smoothing) + { + features = features_; + className = className_; states = iterativeLocalDiscretization(y, static_cast(this), dataset, features, className, states_, smoothing); - - // Final fit with converged discretization TAN::fit(dataset, features, className, states, smoothing); - return *this; } torch::Tensor TANLd::predict(torch::Tensor& X) diff --git a/bayesnet/classifiers/TANLd.h b/bayesnet/classifiers/TANLd.h index a904235..bc119fc 100644 --- a/bayesnet/classifiers/TANLd.h +++ b/bayesnet/classifiers/TANLd.h @@ -16,7 +16,15 @@ namespace bayesnet { TANLd(); virtual ~TANLd() = default; TANLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, map>& states, const Smoothing_t smoothing) override; + TANLd& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, map>& states, const Smoothing_t smoothing) override; + TANLd& commonFit(const std::vector& features, const std::string& className, map>& states, const Smoothing_t smoothing); std::vector graph(const std::string& name = "TANLd") const override; + void setHyperparameters(const nlohmann::json& hyperparameters_) override + { + auto hyperparameters = hyperparameters_; + Proposal::setHyperparameters(hyperparameters); + TAN::setHyperparameters(hyperparameters); + } torch::Tensor predict(torch::Tensor& X) override; torch::Tensor predict_proba(torch::Tensor& X) override; }; diff --git a/bayesnet/ensembles/AODELd.h b/bayesnet/ensembles/AODELd.h index d697554..63739b3 100644 --- a/bayesnet/ensembles/AODELd.h +++ b/bayesnet/ensembles/AODELd.h @@ -17,6 +17,10 @@ namespace bayesnet { virtual ~AODELd() = default; AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_, const Smoothing_t smoothing) override; std::vector graph(const std::string& name = "AODELd") const override; + void setHyperparameters(const nlohmann::json& hyperparameters_) override + { + hyperparameters = hyperparameters_; + } protected: void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override; void buildModel(const torch::Tensor& weights) override; diff --git a/tests/TestBayesModels.cc b/tests/TestBayesModels.cc index f22eabc..9bf98b4 100644 --- a/tests/TestBayesModels.cc +++ b/tests/TestBayesModels.cc @@ -31,9 +31,9 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[Models]") {{"diabetes", "SPODE"}, 0.802083}, {{"diabetes", "TAN"}, 0.821615}, {{"diabetes", "AODELd"}, 0.8125f}, - {{"diabetes", "KDBLd"}, 0.80208f}, + {{"diabetes", "KDBLd"}, 0.804688f}, {{"diabetes", "SPODELd"}, 0.7890625f}, - {{"diabetes", "TANLd"}, 0.803385437f}, + {{"diabetes", "TANLd"}, 0.8125f}, {{"diabetes", "BoostAODE"}, 0.83984f}, // Ecoli {{"ecoli", "AODE"}, 0.889881}, @@ -42,9 +42,9 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[Models]") {{"ecoli", "SPODE"}, 0.880952}, {{"ecoli", "TAN"}, 0.892857}, {{"ecoli", "AODELd"}, 0.875f}, - {{"ecoli", "KDBLd"}, 0.880952358f}, + {{"ecoli", "KDBLd"}, 0.872024f}, {{"ecoli", "SPODELd"}, 0.839285731f}, - {{"ecoli", "TANLd"}, 0.848214269f}, + {{"ecoli", "TANLd"}, 0.869047642f}, {{"ecoli", "BoostAODE"}, 0.89583f}, // Glass {{"glass", "AODE"}, 0.79439}, @@ -53,9 +53,9 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[Models]") {{"glass", "SPODE"}, 0.775701}, {{"glass", "TAN"}, 0.827103}, {{"glass", "AODELd"}, 0.799065411f}, - {{"glass", "KDBLd"}, 0.82710278f}, + {{"glass", "KDBLd"}, 0.864485979f}, {{"glass", "SPODELd"}, 0.780373812f}, - {{"glass", "TANLd"}, 0.869158864f}, + {{"glass", "TANLd"}, 0.831775725f}, {{"glass", "BoostAODE"}, 0.84579f}, // Iris {{"iris", "AODE"}, 0.973333}, @@ -68,29 +68,29 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[Models]") {{"iris", "SPODELd"}, 0.96f}, {{"iris", "TANLd"}, 0.97333f}, {{"iris", "BoostAODE"}, 0.98f} }; - std::map models{ {"AODE", new bayesnet::AODE()}, - {"AODELd", new bayesnet::AODELd()}, - {"BoostAODE", new bayesnet::BoostAODE()}, - {"KDB", new bayesnet::KDB(2)}, - {"KDBLd", new bayesnet::KDBLd(2)}, - {"XSPODE", new bayesnet::XSpode(1)}, - {"SPODE", new bayesnet::SPODE(1)}, - {"SPODELd", new bayesnet::SPODELd(1)}, - {"TAN", new bayesnet::TAN()}, - {"TANLd", new bayesnet::TANLd()} }; + std::map> models; + models["AODE"] = std::make_unique(); + models["AODELd"] = std::make_unique(); + models["BoostAODE"] = std::make_unique(); + models["KDB"] = std::make_unique(2); + models["KDBLd"] = std::make_unique(2); + models["XSPODE"] = std::make_unique(1); + models["SPODE"] = std::make_unique(1); + models["SPODELd"] = std::make_unique(1); + models["TAN"] = std::make_unique(); + models["TANLd"] = std::make_unique(); std::string name = GENERATE("AODE", "AODELd", "KDB", "KDBLd", "SPODE", "XSPODE", "SPODELd", "TAN", "TANLd"); - auto clf = models[name]; + auto clf = std::move(models[name]); SECTION("Test " + name + " classifier") { for (const std::string& file_name : { "glass", "iris", "ecoli", "diabetes" }) { - auto clf = models[name]; auto discretize = name.substr(name.length() - 2) != "Ld"; auto raw = RawDatasets(file_name, discretize); clf->fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing); auto score = clf->score(raw.Xt, raw.yt); // std::cout << "Classifier: " << name << " File: " << file_name << " Score: " << score << " expected = " << - // scores[{file_name, name}] << std::endl; + // scores[{file_name, name}] << std::endl; INFO("Classifier: " << name << " File: " << file_name); REQUIRE(score == Catch::Approx(scores[{file_name, name}]).epsilon(raw.epsilon)); REQUIRE(clf->getStatus() == bayesnet::NORMAL); @@ -101,7 +101,6 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[Models]") INFO("Checking version of " << name << " classifier"); REQUIRE(clf->getVersion() == ACTUAL_VERSION); } - delete clf; } TEST_CASE("Models features & Graph", "[Models]") { @@ -133,7 +132,7 @@ TEST_CASE("Models features & Graph", "[Models]") clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 5); REQUIRE(clf.getNumberOfEdges() == 7); - REQUIRE(clf.getNumberOfStates() == 27); + REQUIRE(clf.getNumberOfStates() == 26); REQUIRE(clf.getClassNumStates() == 3); REQUIRE(clf.show() == std::vector{"class -> sepallength, sepalwidth, petallength, petalwidth, ", "petallength -> sepallength, ", "petalwidth -> ", @@ -149,7 +148,6 @@ TEST_CASE("Get num features & num edges", "[Models]") REQUIRE(clf.getNumberOfNodes() == 5); REQUIRE(clf.getNumberOfEdges() == 8); } - TEST_CASE("Model predict_proba", "[Models]") { std::string model = GENERATE("TAN", "SPODE", "BoostAODEproba", "BoostAODEvoting", "TANLd", "SPODELd", "KDBLd"); @@ -180,15 +178,15 @@ TEST_CASE("Model predict_proba", "[Models]") {0.0284828, 0.770524, 0.200993}, {0.0213182, 0.857189, 0.121493}, {0.00868436, 0.949494, 0.0418215} }); - auto res_prob_tanld = std::vector>({ {0.000544493, 0.995796, 0.00365992 }, - {0.000908092, 0.997268, 0.00182429 }, - {0.000908092, 0.997268, 0.00182429 }, - {0.000908092, 0.997268, 0.00182429 }, - {0.00228423, 0.994645, 0.00307078 }, - {0.00120539, 0.0666788, 0.932116 }, - {0.00361847, 0.979203, 0.017179 }, - {0.00483293, 0.985326, 0.00984064 }, - {0.000595606, 0.9977, 0.00170441 } }); + auto res_prob_tanld = std::vector>({ {0.000597557, 0.9957, 0.00370254}, + {0.000731377, 0.997914, 0.0013544}, + {0.000731377, 0.997914, 0.0013544}, + {0.000731377, 0.997914, 0.0013544}, + {0.000838614, 0.998122, 0.00103923}, + {0.00130852, 0.0659492, 0.932742}, + {0.00365946, 0.979412, 0.0169281}, + {0.00435035, 0.986248, 0.00940212}, + {0.000583815, 0.997746, 0.00167066} }); auto res_prob_spodeld = std::vector>({ {0.000908024, 0.993742, 0.00535024 }, {0.00187726, 0.99167, 0.00645308 }, {0.00187726, 0.99167, 0.00645308 }, @@ -216,29 +214,33 @@ TEST_CASE("Model predict_proba", "[Models]") {"TANLd", res_prob_tanld}, {"SPODELd", res_prob_spodeld}, {"KDBLd", res_prob_kdbld} }; - std::map models{ {"TAN", new bayesnet::TAN()}, - {"SPODE", new bayesnet::SPODE(0)}, - {"BoostAODEproba", new bayesnet::BoostAODE(false)}, - {"BoostAODEvoting", new bayesnet::BoostAODE(true)}, - {"TANLd", new bayesnet::TANLd()}, - {"SPODELd", new bayesnet::SPODELd(0)}, - {"KDBLd", new bayesnet::KDBLd(2)} }; + + std::map> models; + models["TAN"] = std::make_unique(); + models["SPODE"] = std::make_unique(0); + models["BoostAODEproba"] = std::make_unique(false); + models["BoostAODEvoting"] = std::make_unique(true); + models["TANLd"] = std::make_unique(); + models["SPODELd"] = std::make_unique(0); + models["KDBLd"] = std::make_unique(2); + int init_index = 78; SECTION("Test " + model + " predict_proba") { + INFO("Testing " << model << " predict_proba"); auto ld_model = model.substr(model.length() - 2) == "Ld"; auto discretize = !ld_model; auto raw = RawDatasets("iris", discretize); - auto clf = models[model]; - clf->fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing); - auto yt_pred_proba = clf->predict_proba(raw.Xt); - auto yt_pred = clf->predict(raw.Xt); + auto& clf = *models[model]; + clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing); + auto yt_pred_proba = clf.predict_proba(raw.Xt); + auto yt_pred = clf.predict(raw.Xt); std::vector y_pred; std::vector> y_pred_proba; if (!ld_model) { - y_pred = clf->predict(raw.Xv); - y_pred_proba = clf->predict_proba(raw.Xv); + y_pred = clf.predict(raw.Xv); + y_pred_proba = clf.predict_proba(raw.Xv); REQUIRE(y_pred.size() == y_pred_proba.size()); REQUIRE(y_pred.size() == yt_pred.size(0)); REQUIRE(y_pred.size() == yt_pred_proba.size(0)); @@ -267,18 +269,20 @@ TEST_CASE("Model predict_proba", "[Models]") } else { // Check predict_proba values for vectors and tensors auto predictedClasses = yt_pred_proba.argmax(1); + // std::cout << model << std::endl; for (int i = 0; i < 9; i++) { REQUIRE(predictedClasses[i].item() == yt_pred[i].item()); + // std::cout << "{"; for (int j = 0; j < 3; j++) { + // std::cout << yt_pred_proba[i + init_index][j].item() << ", "; REQUIRE(res_prob[model][i][j] == Catch::Approx(yt_pred_proba[i + init_index][j].item()).epsilon(raw.epsilon)); } + // std::cout << "\b\b}," << std::endl; } } - delete clf; } } - TEST_CASE("AODE voting-proba", "[Models]") { auto raw = RawDatasets("glass", true); @@ -324,11 +328,15 @@ TEST_CASE("KDB with hyperparameters", "[Models]") REQUIRE(score == Catch::Approx(0.827103).epsilon(raw.epsilon)); REQUIRE(scoret == Catch::Approx(0.761682).epsilon(raw.epsilon)); } -TEST_CASE("Incorrect type of data for SPODELd", "[Models]") +TEST_CASE("Incorrect type of data for Ld models", "[Models]") { auto raw = RawDatasets("iris", true); - auto clf = bayesnet::SPODELd(0); - REQUIRE_THROWS_AS(clf.fit(raw.dataset, raw.features, raw.className, raw.states, raw.smoothing), std::runtime_error); + auto clfs = bayesnet::SPODELd(0); + REQUIRE_THROWS_AS(clfs.fit(raw.dataset, raw.features, raw.className, raw.states, raw.smoothing), std::runtime_error); + auto clft = bayesnet::TANLd(); + REQUIRE_THROWS_AS(clft.fit(raw.dataset, raw.features, raw.className, raw.states, raw.smoothing), std::runtime_error); + auto clfk = bayesnet::KDBLd(0); + REQUIRE_THROWS_AS(clfk.fit(raw.dataset, raw.features, raw.className, raw.states, raw.smoothing), std::runtime_error); } TEST_CASE("Predict, predict_proba & score without fitting", "[Models]") { @@ -428,3 +436,49 @@ TEST_CASE("Check KDB loop detection", "[Models]") REQUIRE_NOTHROW(clf.test_add_m_edges(features, 0, S, weights)); REQUIRE_NOTHROW(clf.test_add_m_edges(features, 1, S, weights)); } +TEST_CASE("Local discretization hyperparameters", "[Models]") +{ + auto raw = RawDatasets("iris", false); + auto clfs = bayesnet::SPODELd(0); + clfs.setHyperparameters({ + {"max_iterations", 7}, + {"verbose_convergence", true}, + }); + REQUIRE_NOTHROW(clfs.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing)); + REQUIRE(clfs.getStatus() == bayesnet::NORMAL); + auto clfk = bayesnet::KDBLd(0); + clfk.setHyperparameters({ + {"k", 3}, + {"theta", 1e-4}, + }); + REQUIRE_NOTHROW(clfk.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing)); + REQUIRE(clfk.getStatus() == bayesnet::NORMAL); + auto clfa = bayesnet::AODELd(); + clfa.setHyperparameters({ + {"ld_proposed_cuts", 9}, + {"ld_algorithm", "BINQ"}, + }); + REQUIRE_NOTHROW(clfa.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing)); + REQUIRE(clfa.getStatus() == bayesnet::NORMAL); + auto clft = bayesnet::TANLd(); + clft.setHyperparameters({ + {"ld_proposed_cuts", 7}, + {"mdlp_max_depth", 5}, + {"mdlp_min_length", 3}, + {"ld_algorithm", "MDLP"}, + }); + REQUIRE_NOTHROW(clft.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing)); + REQUIRE(clft.getStatus() == bayesnet::NORMAL); + clft.setHyperparameters({ + {"ld_proposed_cuts", 9}, + {"ld_algorithm", "BINQ"}, + }); + REQUIRE_NOTHROW(clft.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing)); + REQUIRE(clft.getStatus() == bayesnet::NORMAL); + clft.setHyperparameters({ + {"ld_proposed_cuts", 5}, + {"ld_algorithm", "BINU"}, + }); + REQUIRE_NOTHROW(clft.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing)); + REQUIRE(clft.getStatus() == bayesnet::NORMAL); +} diff --git a/tests/TestBayesNetwork.cc b/tests/TestBayesNetwork.cc index c024f32..8e0f47a 100644 --- a/tests/TestBayesNetwork.cc +++ b/tests/TestBayesNetwork.cc @@ -345,12 +345,12 @@ TEST_CASE("Test Bayesian Network", "[Network]") auto net1 = bayesnet::Network(); buildModel(net1, raw.features, raw.className); net1.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing); - + // Create empty network and assign auto net2 = bayesnet::Network(); net2.addNode("TempNode"); // Add something to make sure it gets cleared net2 = net1; - + // Verify they are equal REQUIRE(net1.getFeatures() == net2.getFeatures()); REQUIRE(net1.getEdges() == net2.getEdges()); @@ -361,10 +361,10 @@ TEST_CASE("Test Bayesian Network", "[Network]") REQUIRE(net1.getSamples().size(0) == net2.getSamples().size(0)); REQUIRE(net1.getSamples().size(1) == net2.getSamples().size(1)); REQUIRE(net1.getNodes().size() == net2.getNodes().size()); - + // Verify topology equality REQUIRE(net1 == net2); - + // Verify they are separate objects by modifying one net2.initialize(); net2.addNode("OnlyInNet2"); @@ -376,46 +376,47 @@ TEST_CASE("Test Bayesian Network", "[Network]") INFO("Test self assignment"); buildModel(net, raw.features, raw.className); net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing); - + int original_edges = net.getNumEdges(); int original_nodes = net.getNodes().size(); - + // Self assignment should not corrupt the network net = net; - + auto all_features = raw.features; + all_features.push_back(raw.className); REQUIRE(net.getNumEdges() == original_edges); REQUIRE(net.getNodes().size() == original_nodes); - REQUIRE(net.getFeatures() == raw.features); + REQUIRE(net.getFeatures() == all_features); REQUIRE(net.getClassName() == raw.className); } SECTION("Test operator== topology comparison") { INFO("Test operator== topology comparison"); - + // Test 1: Two identical networks auto net1 = bayesnet::Network(); auto net2 = bayesnet::Network(); - + net1.addNode("A"); net1.addNode("B"); net1.addNode("C"); net1.addEdge("A", "B"); net1.addEdge("B", "C"); - + net2.addNode("A"); net2.addNode("B"); net2.addNode("C"); net2.addEdge("A", "B"); net2.addEdge("B", "C"); - + REQUIRE(net1 == net2); - + // Test 2: Different nodes auto net3 = bayesnet::Network(); net3.addNode("A"); net3.addNode("D"); // Different node REQUIRE_FALSE(net1 == net3); - + // Test 3: Same nodes, different edges auto net4 = bayesnet::Network(); net4.addNode("A"); @@ -424,12 +425,12 @@ TEST_CASE("Test Bayesian Network", "[Network]") net4.addEdge("A", "C"); // Different topology net4.addEdge("B", "C"); REQUIRE_FALSE(net1 == net4); - + // Test 4: Empty networks auto net5 = bayesnet::Network(); auto net6 = bayesnet::Network(); REQUIRE(net5 == net6); - + // Test 5: Same topology, different edge order auto net7 = bayesnet::Network(); net7.addNode("A"); @@ -442,35 +443,36 @@ TEST_CASE("Test Bayesian Network", "[Network]") SECTION("Test RAII compliance with smart pointers") { INFO("Test RAII compliance with smart pointers"); - + std::unique_ptr net1 = std::make_unique(); buildModel(*net1, raw.features, raw.className); net1->fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing); - + // Test that copy constructor works with smart pointers std::unique_ptr net2 = std::make_unique(*net1); - + REQUIRE(*net1 == *net2); REQUIRE(net1->getNumEdges() == net2->getNumEdges()); REQUIRE(net1->getNodes().size() == net2->getNodes().size()); - + // Destroy original net1.reset(); - + + // Test predictions still work + std::vector> test = { {1}, {2}, {0}, {1} }; + REQUIRE_NOTHROW(net2->predict(test)); + // net2 should still be valid and functional + net2->initialize(); REQUIRE_NOTHROW(net2->addNode("NewNode")); REQUIRE(net2->getNodes().count("NewNode") == 1); - - // Test predictions still work - std::vector> test = { {1, 2, 0, 1, 1} }; - REQUIRE_NOTHROW(net2->predict(test)); } SECTION("Test complex topology copy") { INFO("Test complex topology copy"); - + auto original = bayesnet::Network(); - + // Create a more complex network original.addNode("Root"); original.addNode("Child1"); @@ -478,45 +480,45 @@ TEST_CASE("Test Bayesian Network", "[Network]") original.addNode("Grandchild1"); original.addNode("Grandchild2"); original.addNode("Grandchild3"); - + original.addEdge("Root", "Child1"); original.addEdge("Root", "Child2"); original.addEdge("Child1", "Grandchild1"); original.addEdge("Child1", "Grandchild2"); original.addEdge("Child2", "Grandchild3"); - + // Copy it auto copy = original; - + // Verify topology is identical REQUIRE(original == copy); REQUIRE(original.getNodes().size() == copy.getNodes().size()); REQUIRE(original.getNumEdges() == copy.getNumEdges()); - + // Verify edges are properly reconstructed auto originalEdges = original.getEdges(); auto copyEdges = copy.getEdges(); REQUIRE(originalEdges.size() == copyEdges.size()); - + // Verify node relationships are properly copied for (const auto& nodePair : original.getNodes()) { const std::string& nodeName = nodePair.first; auto* originalNode = nodePair.second.get(); auto* copyNode = copy.getNodes().at(nodeName).get(); - + REQUIRE(originalNode->getParents().size() == copyNode->getParents().size()); REQUIRE(originalNode->getChildren().size() == copyNode->getChildren().size()); - + // Verify parent names match for (size_t i = 0; i < originalNode->getParents().size(); ++i) { - REQUIRE(originalNode->getParents()[i]->getName() == - copyNode->getParents()[i]->getName()); + REQUIRE(originalNode->getParents()[i]->getName() == + copyNode->getParents()[i]->getName()); } - + // Verify child names match for (size_t i = 0; i < originalNode->getChildren().size(); ++i) { - REQUIRE(originalNode->getChildren()[i]->getName() == - copyNode->getChildren()[i]->getName()); + REQUIRE(originalNode->getChildren()[i]->getName() == + copyNode->getChildren()[i]->getName()); } } } diff --git a/tests/TestBayesNode.cc b/tests/TestBayesNode.cc index 8cbd757..a1fd04f 100644 --- a/tests/TestBayesNode.cc +++ b/tests/TestBayesNode.cc @@ -158,4 +158,47 @@ TEST_CASE("TEST MinFill method", "[Node]") REQUIRE(node_2.minFill() == 6); REQUIRE(node_3.minFill() == 3); REQUIRE(node_4.minFill() == 1); +} +TEST_CASE("Test operator =", "[Node]") +{ + // Generate a test to test the operator = of the Node class + // Create a node with 3 parents and 2 children + auto node = bayesnet::Node("N1"); + auto parent_1 = bayesnet::Node("P1"); + parent_1.setNumStates(3); + auto child_1 = bayesnet::Node("H1"); + child_1.setNumStates(2); + node.addParent(&parent_1); + node.addChild(&child_1); + // Create a cpt in the node using computeCPT + auto dataset = torch::tensor({ {1, 0, 0, 1}, {0, 1, 2, 1}, {0, 1, 1, 0} }); + auto states = std::vector({ 2, 3, 3 }); + auto features = std::vector{ "N1", "P1", "H1" }; + auto className = std::string("Class"); + auto weights = torch::tensor({ 1.0, 1.0, 1.0, 1.0 }, torch::kDouble); + node.setNumStates(2); + node.computeCPT(dataset, features, 0.0, weights); + // Get the cpt of the node + auto cpt = node.getCPT(); + // Check that the cpt is not empty + REQUIRE(cpt.numel() > 0); + // Check that the cpt has the correct dimensions + auto dimensions = cpt.sizes(); + REQUIRE(dimensions.size() == 2); + REQUIRE(dimensions[0] == 2); // Number of states of the node + REQUIRE(dimensions[1] == 3); // Number of states of the first parent + // Create a copy of the node + auto node_copy = node; + // Check that the copy has not any parents or children + auto parents = node_copy.getParents(); + auto children = node_copy.getChildren(); + REQUIRE(parents.size() == 0); + REQUIRE(children.size() == 0); + // Check that the copy has the same name + REQUIRE(node_copy.getName() == "N1"); + // Check that the copy has the same cpt + auto cpt_copy = node_copy.getCPT(); + REQUIRE(cpt_copy.equal(cpt)); + // Check that the copy has the same number of states + REQUIRE(node_copy.getNumStates() == node.getNumStates()); } \ No newline at end of file