diff --git a/README.md b/README.md index e227b0b..e9042f4 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)]() ![Gitea Release](https://img.shields.io/gitea/v/release/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es:3000) ![Gitea Last Commit](https://img.shields.io/gitea/last-commit/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es:3000&logo=gitea) +[![Codacy Badge](https://app.codacy.com/project/badge/Grade/cf3e0ac71d764650b1bf4d8d00d303b1)](https://app.codacy.com/gh/Doctorado-ML/BayesNet/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade) Bayesian Network Classifiers using libtorch from scratch diff --git a/bayesnet/classifiers/SPODELd.cc b/bayesnet/classifiers/SPODELd.cc index 329ecce..d41471d 100644 --- a/bayesnet/classifiers/SPODELd.cc +++ b/bayesnet/classifiers/SPODELd.cc @@ -5,25 +5,23 @@ namespace bayesnet { SPODELd& SPODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_) { checkInput(X_, y_); - features = features_; - className = className_; Xf = X_; y = y_; - // Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y - states = fit_local_discretization(y); - // We have discretized the input data - // 1st we need to fit the model to build the normal SPODE structure, SPODE::fit initializes the base Bayesian network - SPODE::fit(dataset, features, className, states); - states = localDiscretizationProposal(states, model); - return *this; + return commonFit(features_, className_, states_); } + SPODELd& SPODELd::fit(torch::Tensor& dataset, const std::vector& features_, const std::string& className_, map>& states_) { if (!torch::is_floating_point(dataset)) { throw std::runtime_error("Dataset must be a floating point tensor"); } Xf = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." }).clone(); - y = dataset.index({ -1, "..." }).clone(); + y = dataset.index({ -1, "..." }).clone().to(torch::kInt32); + return commonFit(features_, className_, states_); + } + + SPODELd& SPODELd::commonFit(const std::vector& features_, const std::string& className_, map>& states_) + { features = features_; className = className_; // Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y @@ -34,7 +32,6 @@ namespace bayesnet { states = localDiscretizationProposal(states, model); return *this; } - torch::Tensor SPODELd::predict(torch::Tensor& X) { auto Xt = prepareX(X); diff --git a/bayesnet/classifiers/SPODELd.h b/bayesnet/classifiers/SPODELd.h index 9cc3310..001dc73 100644 --- a/bayesnet/classifiers/SPODELd.h +++ b/bayesnet/classifiers/SPODELd.h @@ -10,6 +10,7 @@ namespace bayesnet { virtual ~SPODELd() = default; SPODELd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, map>& states) override; SPODELd& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, map>& states) override; + SPODELd& commonFit(const std::vector& features, const std::string& className, map>& states); std::vector graph(const std::string& name = "SPODE") const override; torch::Tensor predict(torch::Tensor& X) override; static inline std::string version() { return "0.0.1"; }; diff --git a/bayesnet/utils/bayesnetUtils.cc b/bayesnet/utils/bayesnetUtils.cc index f620983..5082a5f 100644 --- a/bayesnet/utils/bayesnetUtils.cc +++ b/bayesnet/utils/bayesnetUtils.cc @@ -10,18 +10,6 @@ namespace bayesnet { sort(indices.begin(), indices.end(), [&nums](int i, int j) {return nums[i] > nums[j];}); return indices; } - std::vector> tensorToVector(torch::Tensor& dtensor) - { - // convert mxn tensor to nxm std::vector - std::vector> result; - // Iterate over cols - for (int i = 0; i < dtensor.size(1); ++i) { - auto col_tensor = dtensor.index({ "...", i }); - auto col = std::vector(col_tensor.data_ptr(), col_tensor.data_ptr() + dtensor.size(0)); - result.push_back(col); - } - return result; - } std::vector> tensorToVectorDouble(torch::Tensor& dtensor) { // convert mxn tensor to mxn std::vector diff --git a/bayesnet/utils/bayesnetUtils.h b/bayesnet/utils/bayesnetUtils.h index 8b18974..b75e3f4 100644 --- a/bayesnet/utils/bayesnetUtils.h +++ b/bayesnet/utils/bayesnetUtils.h @@ -4,7 +4,6 @@ #include namespace bayesnet { std::vector argsort(std::vector& nums); - std::vector> tensorToVector(torch::Tensor& dtensor); std::vector> tensorToVectorDouble(torch::Tensor& dtensor); torch::Tensor vectorToTensor(std::vector>& vector, bool transpose = true); } diff --git a/gcovr.cfg b/gcovr.cfg index 1739d47..816d464 100644 --- a/gcovr.cfg +++ b/gcovr.cfg @@ -1,4 +1,5 @@ filter = bayesnet/ exclude-directories = build_debug/lib/ +exclude = bayesnet/utils/loguru.* print-summary = yes sort = uncovered-percent diff --git a/tests/TestBayesModels.cc b/tests/TestBayesModels.cc index 71527d8..17605b0 100644 --- a/tests/TestBayesModels.cc +++ b/tests/TestBayesModels.cc @@ -207,7 +207,7 @@ TEST_CASE("Model predict_proba", "[Models]") } TEST_CASE("BoostAODE voting-proba", "[Models]") { - auto raw = RawDatasets("iris", false); + auto raw = RawDatasets("iris", true); auto clf = bayesnet::BoostAODE(false); clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); auto score_proba = clf.score(raw.Xv, raw.yv); @@ -224,9 +224,53 @@ TEST_CASE("BoostAODE voting-proba", "[Models]") clf.dump_cpt(); REQUIRE(clf.topological_order() == std::vector()); } +TEST_CASE("AODE voting-proba", "[Models]") +{ + auto raw = RawDatasets("glass", true); + auto clf = bayesnet::AODE(false); + clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); + auto score_proba = clf.score(raw.Xv, raw.yv); + auto pred_proba = clf.predict_proba(raw.Xv); + clf.setHyperparameters({ + {"predict_voting",true}, + }); + auto score_voting = clf.score(raw.Xv, raw.yv); + auto pred_voting = clf.predict_proba(raw.Xv); + REQUIRE(score_proba == Catch::Approx(0.79439f).epsilon(raw.epsilon)); + REQUIRE(score_voting == Catch::Approx(0.78972f).epsilon(raw.epsilon)); + REQUIRE(pred_voting[67][0] == Catch::Approx(0.888889).epsilon(raw.epsilon)); + REQUIRE(pred_proba[67][0] == Catch::Approx(0.702184).epsilon(raw.epsilon)); + REQUIRE(clf.topological_order() == std::vector()); +} +TEST_CASE("SPODELd dataset", "[Models]") +{ + auto raw = RawDatasets("iris", false); + auto clf = bayesnet::SPODELd(0); + raw.dataset.to(torch::kFloat32); + clf.fit(raw.dataset, raw.featuresv, raw.classNamev, raw.statesv); + auto score = clf.score(raw.Xt, raw.yt); + clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); + auto scoret = clf.score(raw.Xt, raw.yt); + REQUIRE(score == Catch::Approx(0.97333f).epsilon(raw.epsilon)); + REQUIRE(scoret == Catch::Approx(0.97333f).epsilon(raw.epsilon)); +} +TEST_CASE("KDB with hyperparameters", "[Models]") +{ + auto raw = RawDatasets("glass", true); + auto clf = bayesnet::KDB(2); + clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); + auto score = clf.score(raw.Xv, raw.yv); + clf.setHyperparameters({ + {"k", 3}, + {"theta", 0.7}, + }); + clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); + auto scoret = clf.score(raw.Xv, raw.yv); + REQUIRE(score == Catch::Approx(0.827103).epsilon(raw.epsilon)); + REQUIRE(scoret == Catch::Approx(0.761682).epsilon(raw.epsilon)); +} TEST_CASE("BoostAODE order asc, desc & random", "[Models]") { - auto raw = RawDatasets("glass", true); std::map scores{ {"asc", 0.83645f }, { "desc", 0.84579f }, { "rand", 0.84112 }