From a1178554ff7487e8f380a254c07634316a08d81a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Mon, 8 Apr 2024 19:09:51 +0200 Subject: [PATCH] Add Ensemble tests --- Makefile | 3 +- README.md | 2 +- tests/CMakeLists.txt | 6 +- tests/TestBayesEnsemble.cc | 104 ++++++++++++++++++++++++++++++++++ tests/TestBayesModels.cc | 113 +++++++------------------------------ tests/TestBoostAODE.cc | 106 ++++++++++++++++++++++++++++++++++ update_coverage.py | 5 ++ 7 files changed, 242 insertions(+), 97 deletions(-) create mode 100644 tests/TestBayesEnsemble.cc create mode 100644 tests/TestBoostAODE.cc diff --git a/Makefile b/Makefile index a730cfb..c43c01e 100644 --- a/Makefile +++ b/Makefile @@ -103,7 +103,6 @@ coverage: ## Run tests and generate coverage report (build/index.html) viewcoverage: ## Run tests, generate coverage report and upload it to codecov (build/index.html) @echo ">>> Building tests with coverage..." - @folder=`pwd` ; @$(MAKE) coverage @echo ">>> Building report..." @cd $(f_debug)/tests; \ @@ -113,7 +112,7 @@ viewcoverage: ## Run tests, generate coverage report and upload it to codecov (b lcov --remove coverage.info 'libtorch/*' --output-file coverage.info >/dev/null 2>&1; \ lcov --remove coverage.info 'tests/*' --output-file coverage.info >/dev/null 2>&1; \ lcov --remove coverage.info 'bayesnet/utils/loguru.*' --output-file coverage.info >/dev/null 2>&1; \ - genhtml coverage.info --output-directory $(f_debug)/tests/coverage >/dev/null 2>&1; + genhtml coverage.info --output-directory coverage >/dev/null 2>&1; @$(MAKE) updatebadge @xdg-open $(f_debug)/tests/coverage/index.html || open $(f_debug)/tests/coverage/index.html 2>/dev/null @echo ">>> Done"; diff --git a/README.md b/README.md index 60f8b4b..1a9c677 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ ![Gitea Release](https://img.shields.io/gitea/v/release/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es:3000) [![Codacy Badge](https://app.codacy.com/project/badge/Grade/cf3e0ac71d764650b1bf4d8d00d303b1)](https://app.codacy.com/gh/Doctorado-ML/BayesNet/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade) ![Gitea Last Commit](https://img.shields.io/gitea/last-commit/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es:3000&logo=gitea) -![Static Badge](https://img.shields.io/badge/Coverage-92,4%25-green) +![Static Badge](https://img.shields.io/badge/Coverage-94,0%25-green) Bayesian Network Classifiers using libtorch from scratch diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 02fd775..978e47a 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -8,7 +8,9 @@ if(ENABLE_TESTING) ${CMAKE_BINARY_DIR}/configured_files/include ) file(GLOB_RECURSE BayesNet_SOURCES "${BayesNet_SOURCE_DIR}/bayesnet/*.cc") - add_executable(TestBayesNet TestBayesNetwork.cc TestBayesNode.cc TestBayesClassifier.cc TestBayesModels.cc TestBayesMetrics.cc TestFeatureSelection.cc TestUtils.cc ${BayesNet_SOURCES}) + add_executable(TestBayesNet TestBayesNetwork.cc TestBayesNode.cc TestBayesClassifier.cc + TestBayesModels.cc TestBayesMetrics.cc TestFeatureSelection.cc TestBoostAODE.cc + TestUtils.cc TestBayesEnsemble.cc ${BayesNet_SOURCES}) target_link_libraries(TestBayesNet PUBLIC "${TORCH_LIBRARIES}" ArffFiles mdlp Catch2::Catch2WithMain ) add_test(NAME BayesNetworkTest COMMAND TestBayesNet) add_test(NAME Network COMMAND TestBayesNet "[Network]") @@ -16,5 +18,7 @@ if(ENABLE_TESTING) add_test(NAME Metrics COMMAND TestBayesNet "[Metrics]") add_test(NAME FeatureSelection COMMAND TestBayesNet "[FeatureSelection]") add_test(NAME Classifier COMMAND TestBayesNet "[Classifier]") + add_test(NAME Ensemble COMMAND TestBayesNet "[Ensemble]") add_test(NAME Models COMMAND TestBayesNet "[Models]") + add_test(NAME BoostAODE COMMAND TestBayesNet "[BoostAODE]") endif(ENABLE_TESTING) diff --git a/tests/TestBayesEnsemble.cc b/tests/TestBayesEnsemble.cc new file mode 100644 index 0000000..d009d11 --- /dev/null +++ b/tests/TestBayesEnsemble.cc @@ -0,0 +1,104 @@ +#include +#include +#include +#include +#include "bayesnet/ensembles/BoostAODE.h" +#include "TestUtils.h" + + +TEST_CASE("Topological Order", "[Ensemble]") +{ + auto raw = RawDatasets("glass", true); + auto clf = bayesnet::BoostAODE(); + clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); + auto order = clf.topological_order(); + REQUIRE(order.size() == 0); +} +TEST_CASE("Dump CPT", "[Ensemble]") +{ + auto raw = RawDatasets("glass", true); + auto clf = bayesnet::BoostAODE(); + clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); + auto dump = clf.dump_cpt(); + REQUIRE(dump == ""); +} +TEST_CASE("Number of States", "[Ensemble]") +{ + auto clf = bayesnet::BoostAODE(); + auto raw = RawDatasets("iris", true); + clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); + REQUIRE(clf.getNumberOfStates() == 76); +} +TEST_CASE("Show", "[Ensemble]") +{ + auto clf = bayesnet::BoostAODE(); + auto raw = RawDatasets("iris", true); + clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); + std::vector expected = { + "class -> sepallength, sepalwidth, petallength, petalwidth, ", + "petallength -> sepallength, sepalwidth, petalwidth, ", + "petalwidth -> ", + "sepallength -> ", + "sepalwidth -> ", + "class -> sepallength, sepalwidth, petallength, petalwidth, ", + "petallength -> ", + "petalwidth -> sepallength, sepalwidth, petallength, ", + "sepallength -> ", + "sepalwidth -> ", + "class -> sepallength, sepalwidth, petallength, petalwidth, ", + "petallength -> ", + "petalwidth -> ", + "sepallength -> sepalwidth, petallength, petalwidth, ", + "sepalwidth -> ", + "class -> sepallength, sepalwidth, petallength, petalwidth, ", + "petallength -> ", + "petalwidth -> ", + "sepallength -> ", + "sepalwidth -> sepallength, petallength, petalwidth, ", + }; + auto show = clf.show(); + REQUIRE(show.size() == expected.size()); + for (size_t i = 0; i < show.size(); i++) + REQUIRE(show[i] == expected[i]); +} +TEST_CASE("Graph", "[Ensemble]") +{ + auto clf = bayesnet::BoostAODE(); + auto raw = RawDatasets("iris", true); + clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); + auto graph = clf.graph(); + REQUIRE(graph.size() == 56); +} +TEST_CASE("Compute ArgMax", "[Ensemble]") +{ + class TestEnsemble : public bayesnet::BoostAODE { + public: + TestEnsemble() : bayesnet::BoostAODE() {} + torch::Tensor compute_arg_max(torch::Tensor& X) { return Ensemble::compute_arg_max(X); } + std::vector compute_arg_max(std::vector>& X) { return Ensemble::compute_arg_max(X); } + }; + TestEnsemble clf; + std::vector> X = { + {0.1f, 0.2f, 0.3f}, + {0.4f, 0.9f, 0.6f}, + {0.7f, 0.8f, 0.9f}, + {0.5f, 0.2f, 0.1f}, + {0.3f, 0.7f, 0.2f}, + {0.5f, 0.5f, 0.2f} + }; + std::vector expected = { 2, 1, 2, 0, 1, 0 }; + auto argmax = clf.compute_arg_max(X); + REQUIRE(argmax.size() == expected.size()); + REQUIRE(argmax == expected); + auto Xt = torch::zeros({ 6, 3 }, torch::kFloat32); + Xt[0][0] = 0.1f; Xt[0][1] = 0.2f; Xt[0][2] = 0.3f; + Xt[1][0] = 0.4f; Xt[1][1] = 0.9f; Xt[1][2] = 0.6f; + Xt[2][0] = 0.7f; Xt[2][1] = 0.8f; Xt[2][2] = 0.9f; + Xt[3][0] = 0.5f; Xt[3][1] = 0.2f; Xt[3][2] = 0.1f; + Xt[4][0] = 0.3f; Xt[4][1] = 0.7f; Xt[4][2] = 0.2f; + Xt[5][0] = 0.5f; Xt[5][1] = 0.5f; Xt[5][2] = 0.2f; + auto argmaxt = clf.compute_arg_max(Xt); + REQUIRE(argmaxt.size(0) == expected.size()); + for (int i = 0; i < argmaxt.size(0); i++) + REQUIRE(argmaxt[i].item() == expected[i]); +} \ No newline at end of file diff --git a/tests/TestBayesModels.cc b/tests/TestBayesModels.cc index 51d5091..55a932f 100644 --- a/tests/TestBayesModels.cc +++ b/tests/TestBayesModels.cc @@ -2,6 +2,7 @@ #include #include #include +#include #include "bayesnet/classifiers/KDB.h" #include "bayesnet/classifiers/TAN.h" #include "bayesnet/classifiers/SPODE.h" @@ -87,62 +88,7 @@ TEST_CASE("Get num features & num edges", "[Models]") REQUIRE(clf.getNumberOfNodes() == 5); REQUIRE(clf.getNumberOfEdges() == 8); } -TEST_CASE("BoostAODE feature_select CFS", "[Models]") -{ - auto raw = RawDatasets("glass", true); - auto clf = bayesnet::BoostAODE(); - clf.setHyperparameters({ {"select_features", "CFS"} }); - clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); - REQUIRE(clf.getNumberOfNodes() == 90); - REQUIRE(clf.getNumberOfEdges() == 153); - REQUIRE(clf.getNotes().size() == 2); - REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 9 with CFS"); - REQUIRE(clf.getNotes()[1] == "Number of models: 9"); -} -TEST_CASE("BoostAODE feature_select IWSS", "[Models]") -{ - auto raw = RawDatasets("glass", true); - auto clf = bayesnet::BoostAODE(); - clf.setHyperparameters({ {"select_features", "IWSS"}, {"threshold", 0.5 } }); - clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); - REQUIRE(clf.getNumberOfNodes() == 90); - REQUIRE(clf.getNumberOfEdges() == 153); - REQUIRE(clf.getNotes().size() == 2); - REQUIRE(clf.getNotes()[0] == "Used features in initialization: 5 of 9 with IWSS"); - REQUIRE(clf.getNotes()[1] == "Number of models: 9"); -} -TEST_CASE("BoostAODE feature_select FCBF", "[Models]") -{ - auto raw = RawDatasets("glass", true); - auto clf = bayesnet::BoostAODE(); - clf.setHyperparameters({ {"select_features", "FCBF"}, {"threshold", 1e-7 } }); - clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); - REQUIRE(clf.getNumberOfNodes() == 90); - REQUIRE(clf.getNumberOfEdges() == 153); - REQUIRE(clf.getNotes().size() == 2); - REQUIRE(clf.getNotes()[0] == "Used features in initialization: 5 of 9 with FCBF"); - REQUIRE(clf.getNotes()[1] == "Number of models: 9"); -} -TEST_CASE("BoostAODE test used features in train note and score", "[Models]") -{ - auto raw = RawDatasets("diabetes", true); - auto clf = bayesnet::BoostAODE(true); - clf.setHyperparameters({ - {"order", "asc"}, - {"convergence", true}, - {"select_features","CFS"}, - }); - clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); - REQUIRE(clf.getNumberOfNodes() == 72); - REQUIRE(clf.getNumberOfEdges() == 120); - REQUIRE(clf.getNotes().size() == 2); - REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS"); - REQUIRE(clf.getNotes()[1] == "Number of models: 8"); - auto score = clf.score(raw.Xv, raw.yv); - auto scoret = clf.score(raw.Xt, raw.yt); - REQUIRE(score == Catch::Approx(0.80078).epsilon(raw.epsilon)); - REQUIRE(scoret == Catch::Approx(0.80078).epsilon(raw.epsilon)); -} + TEST_CASE("Model predict_proba", "[Models]") { std::string model = GENERATE("TAN", "SPODE", "BoostAODEproba", "BoostAODEvoting"); @@ -230,25 +176,7 @@ TEST_CASE("Model predict_proba", "[Models]") delete clf; } } -TEST_CASE("BoostAODE voting-proba", "[Models]") -{ - auto raw = RawDatasets("iris", true); - auto clf = bayesnet::BoostAODE(false); - clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); - auto score_proba = clf.score(raw.Xv, raw.yv); - auto pred_proba = clf.predict_proba(raw.Xv); - clf.setHyperparameters({ - {"predict_voting",true}, - }); - auto score_voting = clf.score(raw.Xv, raw.yv); - auto pred_voting = clf.predict_proba(raw.Xv); - REQUIRE(score_proba == Catch::Approx(0.97333).epsilon(raw.epsilon)); - REQUIRE(score_voting == Catch::Approx(0.98).epsilon(raw.epsilon)); - REQUIRE(pred_voting[83][2] == Catch::Approx(0.552091).epsilon(raw.epsilon)); - REQUIRE(pred_proba[83][2] == Catch::Approx(0.546017).epsilon(raw.epsilon)); - REQUIRE(clf.dump_cpt() == ""); - REQUIRE(clf.topological_order() == std::vector()); -} + TEST_CASE("AODE voting-proba", "[Models]") { auto raw = RawDatasets("glass", true); @@ -294,22 +222,21 @@ TEST_CASE("KDB with hyperparameters", "[Models]") REQUIRE(score == Catch::Approx(0.827103).epsilon(raw.epsilon)); REQUIRE(scoret == Catch::Approx(0.761682).epsilon(raw.epsilon)); } -TEST_CASE("BoostAODE order asc, desc & random", "[Models]") +TEST_CASE("Predict, predict_proba & score without fitting", "[Models]") { - auto raw = RawDatasets("glass", true); - std::map scores{ - {"asc", 0.83645f }, { "desc", 0.84579f }, { "rand", 0.84112 } - }; - for (const std::string& order : { "asc", "desc", "rand" }) { - auto clf = bayesnet::BoostAODE(); - clf.setHyperparameters({ - {"order", order}, - }); - clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); - auto score = clf.score(raw.Xv, raw.yv); - auto scoret = clf.score(raw.Xt, raw.yt); - INFO("BoostAODE order: " + order); - REQUIRE(score == Catch::Approx(scores[order]).epsilon(raw.epsilon)); - REQUIRE(scoret == Catch::Approx(scores[order]).epsilon(raw.epsilon)); - } -} + auto clf = bayesnet::AODE(); + auto raw = RawDatasets("iris", true); + std::string message = "Ensemble has not been fitted"; + REQUIRE_THROWS_AS(clf.predict(raw.Xv), std::logic_error); + REQUIRE_THROWS_AS(clf.predict_proba(raw.Xv), std::logic_error); + REQUIRE_THROWS_AS(clf.predict(raw.Xt), std::logic_error); + REQUIRE_THROWS_AS(clf.predict_proba(raw.Xt), std::logic_error); + REQUIRE_THROWS_AS(clf.score(raw.Xv, raw.yv), std::logic_error); + REQUIRE_THROWS_AS(clf.score(raw.Xt, raw.yt), std::logic_error); + REQUIRE_THROWS_WITH(clf.predict(raw.Xv), message); + REQUIRE_THROWS_WITH(clf.predict_proba(raw.Xv), message); + REQUIRE_THROWS_WITH(clf.predict(raw.Xt), message); + REQUIRE_THROWS_WITH(clf.predict_proba(raw.Xt), message); + REQUIRE_THROWS_WITH(clf.score(raw.Xv, raw.yv), message); + REQUIRE_THROWS_WITH(clf.score(raw.Xt, raw.yt), message); +} \ No newline at end of file diff --git a/tests/TestBoostAODE.cc b/tests/TestBoostAODE.cc new file mode 100644 index 0000000..9b140fa --- /dev/null +++ b/tests/TestBoostAODE.cc @@ -0,0 +1,106 @@ +#include +#include +#include +#include +#include "bayesnet/ensembles/BoostAODE.h" +#include "TestUtils.h" + + +TEST_CASE("Feature_select CFS", "[BoostAODE]") +{ + auto raw = RawDatasets("glass", true); + auto clf = bayesnet::BoostAODE(); + clf.setHyperparameters({ {"select_features", "CFS"} }); + clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); + REQUIRE(clf.getNumberOfNodes() == 90); + REQUIRE(clf.getNumberOfEdges() == 153); + REQUIRE(clf.getNotes().size() == 2); + REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 9 with CFS"); + REQUIRE(clf.getNotes()[1] == "Number of models: 9"); +} +TEST_CASE("Feature_select IWSS", "[BoostAODE]") +{ + auto raw = RawDatasets("glass", true); + auto clf = bayesnet::BoostAODE(); + clf.setHyperparameters({ {"select_features", "IWSS"}, {"threshold", 0.5 } }); + clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); + REQUIRE(clf.getNumberOfNodes() == 90); + REQUIRE(clf.getNumberOfEdges() == 153); + REQUIRE(clf.getNotes().size() == 2); + REQUIRE(clf.getNotes()[0] == "Used features in initialization: 5 of 9 with IWSS"); + REQUIRE(clf.getNotes()[1] == "Number of models: 9"); +} +TEST_CASE("Feature_select FCBF", "[BoostAODE]") +{ + auto raw = RawDatasets("glass", true); + auto clf = bayesnet::BoostAODE(); + clf.setHyperparameters({ {"select_features", "FCBF"}, {"threshold", 1e-7 } }); + clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); + REQUIRE(clf.getNumberOfNodes() == 90); + REQUIRE(clf.getNumberOfEdges() == 153); + REQUIRE(clf.getNotes().size() == 2); + REQUIRE(clf.getNotes()[0] == "Used features in initialization: 5 of 9 with FCBF"); + REQUIRE(clf.getNotes()[1] == "Number of models: 9"); +} +TEST_CASE("Test used features in train note and score", "[BoostAODE]") +{ + auto raw = RawDatasets("diabetes", true); + auto clf = bayesnet::BoostAODE(true); + clf.setHyperparameters({ + {"order", "asc"}, + {"convergence", true}, + {"select_features","CFS"}, + }); + clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); + REQUIRE(clf.getNumberOfNodes() == 72); + REQUIRE(clf.getNumberOfEdges() == 120); + REQUIRE(clf.getNotes().size() == 2); + REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS"); + REQUIRE(clf.getNotes()[1] == "Number of models: 8"); + auto score = clf.score(raw.Xv, raw.yv); + auto scoret = clf.score(raw.Xt, raw.yt); + REQUIRE(score == Catch::Approx(0.80078).epsilon(raw.epsilon)); + REQUIRE(scoret == Catch::Approx(0.80078).epsilon(raw.epsilon)); +} +TEST_CASE("Voting vs proba", "[BoostAODE]") +{ + auto raw = RawDatasets("iris", true); + auto clf = bayesnet::BoostAODE(false); + clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); + auto score_proba = clf.score(raw.Xv, raw.yv); + auto pred_proba = clf.predict_proba(raw.Xv); + clf.setHyperparameters({ + {"predict_voting",true}, + }); + auto score_voting = clf.score(raw.Xv, raw.yv); + auto pred_voting = clf.predict_proba(raw.Xv); + REQUIRE(score_proba == Catch::Approx(0.97333).epsilon(raw.epsilon)); + REQUIRE(score_voting == Catch::Approx(0.98).epsilon(raw.epsilon)); + REQUIRE(pred_voting[83][2] == Catch::Approx(0.552091).epsilon(raw.epsilon)); + REQUIRE(pred_proba[83][2] == Catch::Approx(0.546017).epsilon(raw.epsilon)); + REQUIRE(clf.dump_cpt() == ""); + REQUIRE(clf.topological_order() == std::vector()); +} +TEST_CASE("Order asc, desc & random", "[BoostAODE]") +{ + auto raw = RawDatasets("glass", true); + std::map scores{ + {"asc", 0.83645f }, { "desc", 0.84579f }, { "rand", 0.84112 } + }; + for (const std::string& order : { "asc", "desc", "rand" }) { + auto clf = bayesnet::BoostAODE(); + clf.setHyperparameters({ + {"order", order}, + }); + clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); + auto score = clf.score(raw.Xv, raw.yv); + auto scoret = clf.score(raw.Xt, raw.yt); + INFO("BoostAODE order: " + order); + REQUIRE(score == Catch::Approx(scores[order]).epsilon(raw.epsilon)); + REQUIRE(scoret == Catch::Approx(scores[order]).epsilon(raw.epsilon)); + } +} +TEST_CASE("Oddities", "[BoostAODE]") +{ + +} \ No newline at end of file diff --git a/update_coverage.py b/update_coverage.py index 126e4d8..5379cb3 100644 --- a/update_coverage.py +++ b/update_coverage.py @@ -10,6 +10,10 @@ output = subprocess.check_output( "tail -1", shell=True, ) +value = float(output.decode("utf-8").strip().replace("%", "")) +if value < 90: + print("Coverage is less than 90%. I won't update the badge.") + sys.exit(1) percentage = output.decode("utf-8").strip().replace(".", ",") coverage_line = ( f"![Static Badge](https://img.shields.io/badge/Coverage-{percentage}25-green)" @@ -23,3 +27,4 @@ with open(readme_file, "w") as f: f.write(coverage_line + "\n") else: f.write(line) +print(f"Coverage updated with value: {percentage}")