diff --git a/README.md b/README.md index a33a6e6..e1c8400 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ ![Gitea Release](https://img.shields.io/gitea/v/release/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es:3000) [![Codacy Badge](https://app.codacy.com/project/badge/Grade/cf3e0ac71d764650b1bf4d8d00d303b1)](https://app.codacy.com/gh/Doctorado-ML/BayesNet/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade) ![Gitea Last Commit](https://img.shields.io/gitea/last-commit/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es:3000&logo=gitea) -![Static Badge](https://img.shields.io/badge/Coverage-95,8%25-green) +![Static Badge](https://img.shields.io/badge/Coverage-97,1%25-green) Bayesian Network Classifiers using libtorch from scratch diff --git a/bayesnet/utils/Mst.cc b/bayesnet/utils/Mst.cc index b0897b2..b463205 100644 --- a/bayesnet/utils/Mst.cc +++ b/bayesnet/utils/Mst.cc @@ -1,3 +1,4 @@ +#include #include #include #include "Mst.h" @@ -45,15 +46,6 @@ namespace bayesnet { } } } - void Graph::display_mst() - { - std::cout << "Edge :" << " Weight" << std::endl; - for (int i = 0; i < T.size(); i++) { - std::cout << T[i].second.first << " - " << T[i].second.second << " : " - << T[i].first; - std::cout << std::endl; - } - } void insertElement(std::list& variables, int variable) { diff --git a/bayesnet/utils/Mst.h b/bayesnet/utils/Mst.h index 9fa951b..6c9b421 100644 --- a/bayesnet/utils/Mst.h +++ b/bayesnet/utils/Mst.h @@ -5,29 +5,28 @@ #include namespace bayesnet { class MST { - private: - torch::Tensor weights; - std::vector features; - int root = 0; public: MST() = default; MST(const std::vector& features, const torch::Tensor& weights, const int root); std::vector> maximumSpanningTree(); + private: + torch::Tensor weights; + std::vector features; + int root = 0; }; class Graph { - private: - int V; // number of nodes in graph - std::vector >> G; // std::vector for graph - std::vector >> T; // std::vector for mst - std::vector parent; public: explicit Graph(int V); void addEdge(int u, int v, float wt); int find_set(int i); void union_set(int u, int v); void kruskal_algorithm(); - void display_mst(); std::vector >> get_mst() { return T; } + private: + int V; // number of nodes in graph + std::vector >> G; // std::vector for graph + std::vector >> T; // std::vector for mst + std::vector parent; }; } #endif \ No newline at end of file diff --git a/tests/TestBayesClassifier.cc b/tests/TestBayesClassifier.cc index 31fa660..c6b0031 100644 --- a/tests/TestBayesClassifier.cc +++ b/tests/TestBayesClassifier.cc @@ -54,6 +54,13 @@ TEST_CASE("Invalid feature name", "[Classifier]") REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, statest), std::invalid_argument); REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, statest), "feature [petallength] not found in states"); } +TEST_CASE("Invalid hyperparameter", "[Classifier]") +{ + auto model = bayesnet::KDB(2); + auto raw = RawDatasets("iris", true); + REQUIRE_THROWS_AS(model.setHyperparameters({ { "alpha", "0.0" } }), std::invalid_argument); + REQUIRE_THROWS_WITH(model.setHyperparameters({ { "alpha", "0.0" } }), "Invalid hyperparameters{\"alpha\":\"0.0\"}"); +} TEST_CASE("Topological order", "[Classifier]") { auto model = bayesnet::TAN(); diff --git a/tests/TestBayesEnsemble.cc b/tests/TestBayesEnsemble.cc index 79c3228..215f293 100644 --- a/tests/TestBayesEnsemble.cc +++ b/tests/TestBayesEnsemble.cc @@ -3,6 +3,8 @@ #include #include #include "bayesnet/ensembles/BoostAODE.h" +#include "bayesnet/ensembles/AODE.h" +#include "bayesnet/ensembles/AODELd.h" #include "TestUtils.h" @@ -73,6 +75,15 @@ TEST_CASE("Graph", "[Ensemble]") clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); auto graph = clf.graph(); REQUIRE(graph.size() == 56); + auto clf2 = bayesnet::AODE(); + clf2.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); + graph = clf2.graph(); + REQUIRE(graph.size() == 56); + raw = RawDatasets("glass", false); + auto clf3 = bayesnet::AODELd(); + clf3.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); + graph = clf3.graph(); + REQUIRE(graph.size() == 261); } TEST_CASE("Compute ArgMax", "[Ensemble]") { diff --git a/tests/TestBayesModels.cc b/tests/TestBayesModels.cc index 8d071d8..a52df09 100644 --- a/tests/TestBayesModels.cc +++ b/tests/TestBayesModels.cc @@ -14,7 +14,7 @@ #include "bayesnet/ensembles/BoostAODE.h" #include "TestUtils.h" -const std::string ACTUAL_VERSION = "1.0.4"; +const std::string ACTUAL_VERSION = "1.0.4.1"; TEST_CASE("Test Bayesian Classifiers score & version", "[Models]") { @@ -52,6 +52,7 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[Models]") auto score = clf->score(raw.Xt, raw.yt); INFO("Classifier: " + name + " File: " + file_name); REQUIRE(score == Catch::Approx(scores[{file_name, name}]).epsilon(raw.epsilon)); + REQUIRE(clf->getStatus() == bayesnet::NORMAL); } } SECTION("Library check version") @@ -61,7 +62,7 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[Models]") } delete clf; } -TEST_CASE("Models features", "[Models]") +TEST_CASE("Models features & Graph", "[Models]") { auto graph = std::vector({ "digraph BayesNet {\nlabel=\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n", "class [shape=circle, fontcolor=red, fillcolor=lightblue, style=filled ] \n", @@ -70,15 +71,30 @@ TEST_CASE("Models features", "[Models]") "sepallength -> sepalwidth", "sepalwidth [shape=circle] \n", "sepalwidth -> petalwidth", "}\n" } ); - auto raw = RawDatasets("iris", true); - auto clf = bayesnet::TAN(); - clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); - REQUIRE(clf.getNumberOfNodes() == 5); - REQUIRE(clf.getNumberOfEdges() == 7); - REQUIRE(clf.getNumberOfStates() == 19); - REQUIRE(clf.getClassNumStates() == 3); - REQUIRE(clf.show() == std::vector{"class -> sepallength, sepalwidth, petallength, petalwidth, ", "petallength -> sepallength, ", "petalwidth -> ", "sepallength -> sepalwidth, ", "sepalwidth -> petalwidth, "}); - REQUIRE(clf.graph("Test") == graph); + SECTION("Test TAN") + { + auto raw = RawDatasets("iris", true); + auto clf = bayesnet::TAN(); + clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); + REQUIRE(clf.getNumberOfNodes() == 5); + REQUIRE(clf.getNumberOfEdges() == 7); + REQUIRE(clf.getNumberOfStates() == 19); + REQUIRE(clf.getClassNumStates() == 3); + REQUIRE(clf.show() == std::vector{"class -> sepallength, sepalwidth, petallength, petalwidth, ", "petallength -> sepallength, ", "petalwidth -> ", "sepallength -> sepalwidth, ", "sepalwidth -> petalwidth, "}); + REQUIRE(clf.graph("Test") == graph); + } + SECTION("Test TANLd") + { + auto clf = bayesnet::TANLd(); + auto raw = RawDatasets("iris", false); + clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); + REQUIRE(clf.getNumberOfNodes() == 5); + REQUIRE(clf.getNumberOfEdges() == 7); + REQUIRE(clf.getNumberOfStates() == 19); + REQUIRE(clf.getClassNumStates() == 3); + REQUIRE(clf.show() == std::vector{"class -> sepallength, sepalwidth, petallength, petalwidth, ", "petallength -> sepallength, ", "petalwidth -> ", "sepallength -> sepalwidth, ", "sepalwidth -> petalwidth, "}); + REQUIRE(clf.graph("Test") == graph); + } } TEST_CASE("Get num features & num edges", "[Models]") { @@ -222,6 +238,12 @@ TEST_CASE("KDB with hyperparameters", "[Models]") REQUIRE(score == Catch::Approx(0.827103).epsilon(raw.epsilon)); REQUIRE(scoret == Catch::Approx(0.761682).epsilon(raw.epsilon)); } +TEST_CASE("Incorrect type of data for SPODELd", "[Models]") +{ + auto raw = RawDatasets("iris", true); + auto clf = bayesnet::SPODELd(0); + REQUIRE_THROWS_AS(clf.fit(raw.dataset, raw.featurest, raw.classNamet, raw.statest), std::runtime_error); +} TEST_CASE("Predict, predict_proba & score without fitting", "[Models]") { auto clf = bayesnet::AODE(); diff --git a/tests/TestBoostAODE.cc b/tests/TestBoostAODE.cc index 3ba8303..73669b5 100644 --- a/tests/TestBoostAODE.cc +++ b/tests/TestBoostAODE.cc @@ -157,18 +157,13 @@ TEST_CASE("Bisection", "[BoostAODE]") TEST_CASE("Block Update", "[BoostAODE]") { auto clf = bayesnet::BoostAODE(); - // auto raw = RawDatasets("mfeat-factors", true); - auto raw = RawDatasets("glass", true); + auto raw = RawDatasets("mfeat-factors", true); clf.setHyperparameters({ {"bisection", true}, {"block_update", true}, {"maxTolerance", 3}, {"convergence", true}, }); - // clf.setHyperparameters({ - // {"block_update", true}, - // }); - clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); REQUIRE(clf.getNumberOfNodes() == 217); REQUIRE(clf.getNumberOfEdges() == 431); diff --git a/tests/TestFeatureSelection.cc b/tests/TestFeatureSelection.cc index 2feb723..159d3b8 100644 --- a/tests/TestFeatureSelection.cc +++ b/tests/TestFeatureSelection.cc @@ -1,6 +1,7 @@ #include #include #include +#include #include "bayesnet/utils/BayesMetrics.h" #include "bayesnet/feature_selection/CFS.h" #include "bayesnet/feature_selection/FCBF.h" @@ -68,4 +69,15 @@ TEST_CASE("Features Selected", "[FeatureSelection]") delete featureSelector; } } +} +TEST_CASE("Oddities", "[FeatureSelection]") +{ + auto raw = RawDatasets("iris", true); + // FCBF Limits + REQUIRE_THROWS_AS(bayesnet::FCBF(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, 1e-8), std::invalid_argument); + REQUIRE_THROWS_WITH(bayesnet::FCBF(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, 1e-8), "Threshold cannot be less than 1e-7"); + REQUIRE_THROWS_AS(bayesnet::IWSS(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, -1e4), std::invalid_argument); + REQUIRE_THROWS_WITH(bayesnet::IWSS(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, -1e4), "Threshold has to be in [0, 0.5]"); + REQUIRE_THROWS_AS(bayesnet::IWSS(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, 0.501), std::invalid_argument); + REQUIRE_THROWS_WITH(bayesnet::IWSS(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, 0.501), "Threshold has to be in [0, 0.5]"); } \ No newline at end of file