Add Ensemble tests

2024-04-08 19:09:51 +02:00 · 2024-04-08 19:09:51 +02:00 · a1178554ff
commit a1178554ff
parent d12a779bd9
7 changed files with 242 additions and 97 deletions
--- a/3
+++ b/3
@ -103,7 +103,6 @@ coverage: ## Run tests and generate coverage report (build/index.html)

 viewcoverage: ## Run tests, generate coverage report and upload it to codecov (build/index.html)
 	@echo ">>> Building tests with coverage..."
-	@folder=`pwd` ;
 	@$(MAKE) coverage
 	@echo ">>> Building report..."
 	@cd $(f_debug)/tests; \
@ -113,7 +112,7 @@ viewcoverage: ## Run tests, generate coverage report and upload it to codecov (b
 	lcov --remove coverage.info 'libtorch/*' --output-file coverage.info >/dev/null 2>&1; \
 	lcov --remove coverage.info 'tests/*' --output-file coverage.info >/dev/null 2>&1; \
 	lcov --remove coverage.info 'bayesnet/utils/loguru.*' --output-file coverage.info >/dev/null 2>&1; \
-	genhtml coverage.info --output-directory $(f_debug)/tests/coverage >/dev/null 2>&1;
+	genhtml coverage.info --output-directory coverage >/dev/null 2>&1;
 	@$(MAKE) updatebadge
 	@xdg-open $(f_debug)/tests/coverage/index.html || open $(f_debug)/tests/coverage/index.html 2>/dev/null
 	@echo ">>> Done";
--- a/README.md
+++ b/README.md
@ -5,7 +5,7 @@
 ![Gitea Release](https://img.shields.io/gitea/v/release/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es:3000)
 [![Codacy Badge](https://app.codacy.com/project/badge/Grade/cf3e0ac71d764650b1bf4d8d00d303b1)](https://app.codacy.com/gh/Doctorado-ML/BayesNet/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
 ![Gitea Last Commit](https://img.shields.io/gitea/last-commit/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es:3000&logo=gitea)
-![Static Badge](https://img.shields.io/badge/Coverage-92,4%25-green)
+![Static Badge](https://img.shields.io/badge/Coverage-94,0%25-green)

 Bayesian Network Classifiers using libtorch from scratch

--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@ -8,7 +8,9 @@ if(ENABLE_TESTING)
        ${CMAKE_BINARY_DIR}/configured_files/include
    )
    file(GLOB_RECURSE BayesNet_SOURCES "${BayesNet_SOURCE_DIR}/bayesnet/*.cc")
-    add_executable(TestBayesNet TestBayesNetwork.cc TestBayesNode.cc TestBayesClassifier.cc TestBayesModels.cc TestBayesMetrics.cc TestFeatureSelection.cc TestUtils.cc ${BayesNet_SOURCES})
+    add_executable(TestBayesNet TestBayesNetwork.cc TestBayesNode.cc TestBayesClassifier.cc 
+        TestBayesModels.cc TestBayesMetrics.cc TestFeatureSelection.cc TestBoostAODE.cc
+        TestUtils.cc TestBayesEnsemble.cc ${BayesNet_SOURCES})
    target_link_libraries(TestBayesNet PUBLIC "${TORCH_LIBRARIES}" ArffFiles mdlp Catch2::Catch2WithMain )
    add_test(NAME BayesNetworkTest COMMAND TestBayesNet)
    add_test(NAME Network COMMAND TestBayesNet "[Network]")
@ -16,5 +18,7 @@ if(ENABLE_TESTING)
    add_test(NAME Metrics COMMAND TestBayesNet "[Metrics]")
    add_test(NAME FeatureSelection COMMAND TestBayesNet "[FeatureSelection]")
    add_test(NAME Classifier COMMAND TestBayesNet "[Classifier]")
+    add_test(NAME Ensemble COMMAND TestBayesNet "[Ensemble]")
    add_test(NAME Models COMMAND TestBayesNet "[Models]")
+    add_test(NAME BoostAODE COMMAND TestBayesNet "[BoostAODE]")
 endif(ENABLE_TESTING)
--- a/tests/TestBayesEnsemble.cc
+++ b/tests/TestBayesEnsemble.cc
@ -0,0 +1,104 @@
+#include <type_traits>
+#include <catch2/catch_test_macros.hpp>
+#include <catch2/catch_approx.hpp>
+#include <catch2/generators/catch_generators.hpp>
+#include "bayesnet/ensembles/BoostAODE.h"
+#include "TestUtils.h"
+
+
+TEST_CASE("Topological Order", "[Ensemble]")
+{
+    auto raw = RawDatasets("glass", true);
+    auto clf = bayesnet::BoostAODE();
+    clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
+    auto order = clf.topological_order();
+    REQUIRE(order.size() == 0);
+}
+TEST_CASE("Dump CPT", "[Ensemble]")
+{
+    auto raw = RawDatasets("glass", true);
+    auto clf = bayesnet::BoostAODE();
+    clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
+    auto dump = clf.dump_cpt();
+    REQUIRE(dump == "");
+}
+TEST_CASE("Number of States", "[Ensemble]")
+{
+    auto clf = bayesnet::BoostAODE();
+    auto raw = RawDatasets("iris", true);
+    clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
+    REQUIRE(clf.getNumberOfStates() == 76);
+}
+TEST_CASE("Show", "[Ensemble]")
+{
+    auto clf = bayesnet::BoostAODE();
+    auto raw = RawDatasets("iris", true);
+    clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
+    std::vector<std::string> expected = {
+        "class -> sepallength, sepalwidth, petallength, petalwidth, ",
+        "petallength -> sepallength, sepalwidth, petalwidth, ",
+        "petalwidth -> ",
+        "sepallength -> ",
+        "sepalwidth -> ",
+        "class -> sepallength, sepalwidth, petallength, petalwidth, ",
+        "petallength -> ",
+        "petalwidth -> sepallength, sepalwidth, petallength, ",
+        "sepallength -> ",
+        "sepalwidth -> ",
+        "class -> sepallength, sepalwidth, petallength, petalwidth, ",
+        "petallength -> ",
+        "petalwidth -> ",
+        "sepallength -> sepalwidth, petallength, petalwidth, ",
+        "sepalwidth -> ",
+        "class -> sepallength, sepalwidth, petallength, petalwidth, ",
+        "petallength -> ",
+        "petalwidth -> ",
+        "sepallength -> ",
+        "sepalwidth -> sepallength, petallength, petalwidth, ",
+    };
+    auto show = clf.show();
+    REQUIRE(show.size() == expected.size());
+    for (size_t i = 0; i < show.size(); i++)
+        REQUIRE(show[i] == expected[i]);
+}
+TEST_CASE("Graph", "[Ensemble]")
+{
+    auto clf = bayesnet::BoostAODE();
+    auto raw = RawDatasets("iris", true);
+    clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
+    auto graph = clf.graph();
+    REQUIRE(graph.size() == 56);
+}
+TEST_CASE("Compute ArgMax", "[Ensemble]")
+{
+    class TestEnsemble : public bayesnet::BoostAODE {
+    public:
+        TestEnsemble() : bayesnet::BoostAODE() {}
+        torch::Tensor compute_arg_max(torch::Tensor& X) { return Ensemble::compute_arg_max(X); }
+        std::vector<int> compute_arg_max(std::vector<std::vector<double>>& X) { return Ensemble::compute_arg_max(X); }
+    };
+    TestEnsemble clf;
+    std::vector<std::vector<double>> X = {
+        {0.1f, 0.2f, 0.3f},
+        {0.4f, 0.9f, 0.6f},
+        {0.7f, 0.8f, 0.9f},
+        {0.5f, 0.2f, 0.1f},
+        {0.3f, 0.7f, 0.2f},
+        {0.5f, 0.5f, 0.2f}
+    };
+    std::vector<int> expected = { 2, 1, 2, 0, 1, 0 };
+    auto argmax = clf.compute_arg_max(X);
+    REQUIRE(argmax.size() == expected.size());
+    REQUIRE(argmax == expected);
+    auto Xt = torch::zeros({ 6, 3 }, torch::kFloat32);
+    Xt[0][0] = 0.1f; Xt[0][1] = 0.2f; Xt[0][2] = 0.3f;
+    Xt[1][0] = 0.4f; Xt[1][1] = 0.9f; Xt[1][2] = 0.6f;
+    Xt[2][0] = 0.7f; Xt[2][1] = 0.8f; Xt[2][2] = 0.9f;
+    Xt[3][0] = 0.5f; Xt[3][1] = 0.2f; Xt[3][2] = 0.1f;
+    Xt[4][0] = 0.3f; Xt[4][1] = 0.7f; Xt[4][2] = 0.2f;
+    Xt[5][0] = 0.5f; Xt[5][1] = 0.5f; Xt[5][2] = 0.2f;
+    auto argmaxt = clf.compute_arg_max(Xt);
+    REQUIRE(argmaxt.size(0) == expected.size());
+    for (int i = 0; i < argmaxt.size(0); i++)
+        REQUIRE(argmaxt[i].item<int>() == expected[i]);
+}
--- a/tests/TestBayesModels.cc
+++ b/tests/TestBayesModels.cc
@ -2,6 +2,7 @@
 #include <catch2/catch_test_macros.hpp>
 #include <catch2/catch_approx.hpp>
 #include <catch2/generators/catch_generators.hpp>
+#include <catch2/matchers/catch_matchers.hpp>
 #include "bayesnet/classifiers/KDB.h"
 #include "bayesnet/classifiers/TAN.h"
 #include "bayesnet/classifiers/SPODE.h"
@ -87,62 +88,7 @@ TEST_CASE("Get num features & num edges", "[Models]")
    REQUIRE(clf.getNumberOfNodes() == 5);
    REQUIRE(clf.getNumberOfEdges() == 8);
 }
-TEST_CASE("BoostAODE feature_select CFS", "[Models]")
-{
-    auto raw = RawDatasets("glass", true);
-    auto clf = bayesnet::BoostAODE();
-    clf.setHyperparameters({ {"select_features", "CFS"} });
-    clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
-    REQUIRE(clf.getNumberOfNodes() == 90);
-    REQUIRE(clf.getNumberOfEdges() == 153);
-    REQUIRE(clf.getNotes().size() == 2);
-    REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 9 with CFS");
-    REQUIRE(clf.getNotes()[1] == "Number of models: 9");
-}
-TEST_CASE("BoostAODE feature_select IWSS", "[Models]")
-{
-    auto raw = RawDatasets("glass", true);
-    auto clf = bayesnet::BoostAODE();
-    clf.setHyperparameters({ {"select_features", "IWSS"}, {"threshold", 0.5 } });
-    clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
-    REQUIRE(clf.getNumberOfNodes() == 90);
-    REQUIRE(clf.getNumberOfEdges() == 153);
-    REQUIRE(clf.getNotes().size() == 2);
-    REQUIRE(clf.getNotes()[0] == "Used features in initialization: 5 of 9 with IWSS");
-    REQUIRE(clf.getNotes()[1] == "Number of models: 9");
-}
-TEST_CASE("BoostAODE feature_select FCBF", "[Models]")
-{
-    auto raw = RawDatasets("glass", true);
-    auto clf = bayesnet::BoostAODE();
-    clf.setHyperparameters({ {"select_features", "FCBF"}, {"threshold", 1e-7 } });
-    clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
-    REQUIRE(clf.getNumberOfNodes() == 90);
-    REQUIRE(clf.getNumberOfEdges() == 153);
-    REQUIRE(clf.getNotes().size() == 2);
-    REQUIRE(clf.getNotes()[0] == "Used features in initialization: 5 of 9 with FCBF");
-    REQUIRE(clf.getNotes()[1] == "Number of models: 9");
-}
-TEST_CASE("BoostAODE test used features in train note and score", "[Models]")
-{
-    auto raw = RawDatasets("diabetes", true);
-    auto clf = bayesnet::BoostAODE(true);
-    clf.setHyperparameters({
-        {"order", "asc"},
-        {"convergence", true},
-        {"select_features","CFS"},
-        });
-    clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
-    REQUIRE(clf.getNumberOfNodes() == 72);
-    REQUIRE(clf.getNumberOfEdges() == 120);
-    REQUIRE(clf.getNotes().size() == 2);
-    REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS");
-    REQUIRE(clf.getNotes()[1] == "Number of models: 8");
-    auto score = clf.score(raw.Xv, raw.yv);
-    auto scoret = clf.score(raw.Xt, raw.yt);
-    REQUIRE(score == Catch::Approx(0.80078).epsilon(raw.epsilon));
-    REQUIRE(scoret == Catch::Approx(0.80078).epsilon(raw.epsilon));
-}
+
 TEST_CASE("Model predict_proba", "[Models]")
 {
    std::string model = GENERATE("TAN", "SPODE", "BoostAODEproba", "BoostAODEvoting");
@ -230,25 +176,7 @@ TEST_CASE("Model predict_proba", "[Models]")
        delete clf;
    }
 }
-TEST_CASE("BoostAODE voting-proba", "[Models]")
-{
-    auto raw = RawDatasets("iris", true);
-    auto clf = bayesnet::BoostAODE(false);
-    clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
-    auto score_proba = clf.score(raw.Xv, raw.yv);
-    auto pred_proba = clf.predict_proba(raw.Xv);
-    clf.setHyperparameters({
-        {"predict_voting",true},
-        });
-    auto score_voting = clf.score(raw.Xv, raw.yv);
-    auto pred_voting = clf.predict_proba(raw.Xv);
-    REQUIRE(score_proba == Catch::Approx(0.97333).epsilon(raw.epsilon));
-    REQUIRE(score_voting == Catch::Approx(0.98).epsilon(raw.epsilon));
-    REQUIRE(pred_voting[83][2] == Catch::Approx(0.552091).epsilon(raw.epsilon));
-    REQUIRE(pred_proba[83][2] == Catch::Approx(0.546017).epsilon(raw.epsilon));
-    REQUIRE(clf.dump_cpt() == "");
-    REQUIRE(clf.topological_order() == std::vector<std::string>());
-}
+
 TEST_CASE("AODE voting-proba", "[Models]")
 {
    auto raw = RawDatasets("glass", true);
@ -294,22 +222,21 @@ TEST_CASE("KDB with hyperparameters", "[Models]")
    REQUIRE(score == Catch::Approx(0.827103).epsilon(raw.epsilon));
    REQUIRE(scoret == Catch::Approx(0.761682).epsilon(raw.epsilon));
 }
-TEST_CASE("BoostAODE order asc, desc & random", "[Models]")
+TEST_CASE("Predict, predict_proba & score without fitting", "[Models]")
 {
-    auto raw = RawDatasets("glass", true);
-    std::map<std::string, double> scores{
-        {"asc", 0.83645f }, { "desc", 0.84579f }, { "rand", 0.84112 }
-    };
-    for (const std::string& order : { "asc", "desc", "rand" }) {
-        auto clf = bayesnet::BoostAODE();
-        clf.setHyperparameters({
-            {"order", order},
-            });
-        clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
-        auto score = clf.score(raw.Xv, raw.yv);
-        auto scoret = clf.score(raw.Xt, raw.yt);
-        INFO("BoostAODE order: " + order);
-        REQUIRE(score == Catch::Approx(scores[order]).epsilon(raw.epsilon));
-        REQUIRE(scoret == Catch::Approx(scores[order]).epsilon(raw.epsilon));
-    }
-}
+    auto clf = bayesnet::AODE();
+    auto raw = RawDatasets("iris", true);
+    std::string message = "Ensemble has not been fitted";
+    REQUIRE_THROWS_AS(clf.predict(raw.Xv), std::logic_error);
+    REQUIRE_THROWS_AS(clf.predict_proba(raw.Xv), std::logic_error);
+    REQUIRE_THROWS_AS(clf.predict(raw.Xt), std::logic_error);
+    REQUIRE_THROWS_AS(clf.predict_proba(raw.Xt), std::logic_error);
+    REQUIRE_THROWS_AS(clf.score(raw.Xv, raw.yv), std::logic_error);
+    REQUIRE_THROWS_AS(clf.score(raw.Xt, raw.yt), std::logic_error);
+    REQUIRE_THROWS_WITH(clf.predict(raw.Xv), message);
+    REQUIRE_THROWS_WITH(clf.predict_proba(raw.Xv), message);
+    REQUIRE_THROWS_WITH(clf.predict(raw.Xt), message);
+    REQUIRE_THROWS_WITH(clf.predict_proba(raw.Xt), message);
+    REQUIRE_THROWS_WITH(clf.score(raw.Xv, raw.yv), message);
+    REQUIRE_THROWS_WITH(clf.score(raw.Xt, raw.yt), message);
+}
--- a/tests/TestBoostAODE.cc
+++ b/tests/TestBoostAODE.cc
@ -0,0 +1,106 @@
+#include <type_traits>
+#include <catch2/catch_test_macros.hpp>
+#include <catch2/catch_approx.hpp>
+#include <catch2/generators/catch_generators.hpp>
+#include "bayesnet/ensembles/BoostAODE.h"
+#include "TestUtils.h"
+
+
+TEST_CASE("Feature_select CFS", "[BoostAODE]")
+{
+    auto raw = RawDatasets("glass", true);
+    auto clf = bayesnet::BoostAODE();
+    clf.setHyperparameters({ {"select_features", "CFS"} });
+    clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
+    REQUIRE(clf.getNumberOfNodes() == 90);
+    REQUIRE(clf.getNumberOfEdges() == 153);
+    REQUIRE(clf.getNotes().size() == 2);
+    REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 9 with CFS");
+    REQUIRE(clf.getNotes()[1] == "Number of models: 9");
+}
+TEST_CASE("Feature_select IWSS", "[BoostAODE]")
+{
+    auto raw = RawDatasets("glass", true);
+    auto clf = bayesnet::BoostAODE();
+    clf.setHyperparameters({ {"select_features", "IWSS"}, {"threshold", 0.5 } });
+    clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
+    REQUIRE(clf.getNumberOfNodes() == 90);
+    REQUIRE(clf.getNumberOfEdges() == 153);
+    REQUIRE(clf.getNotes().size() == 2);
+    REQUIRE(clf.getNotes()[0] == "Used features in initialization: 5 of 9 with IWSS");
+    REQUIRE(clf.getNotes()[1] == "Number of models: 9");
+}
+TEST_CASE("Feature_select FCBF", "[BoostAODE]")
+{
+    auto raw = RawDatasets("glass", true);
+    auto clf = bayesnet::BoostAODE();
+    clf.setHyperparameters({ {"select_features", "FCBF"}, {"threshold", 1e-7 } });
+    clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
+    REQUIRE(clf.getNumberOfNodes() == 90);
+    REQUIRE(clf.getNumberOfEdges() == 153);
+    REQUIRE(clf.getNotes().size() == 2);
+    REQUIRE(clf.getNotes()[0] == "Used features in initialization: 5 of 9 with FCBF");
+    REQUIRE(clf.getNotes()[1] == "Number of models: 9");
+}
+TEST_CASE("Test used features in train note and score", "[BoostAODE]")
+{
+    auto raw = RawDatasets("diabetes", true);
+    auto clf = bayesnet::BoostAODE(true);
+    clf.setHyperparameters({
+        {"order", "asc"},
+        {"convergence", true},
+        {"select_features","CFS"},
+        });
+    clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
+    REQUIRE(clf.getNumberOfNodes() == 72);
+    REQUIRE(clf.getNumberOfEdges() == 120);
+    REQUIRE(clf.getNotes().size() == 2);
+    REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS");
+    REQUIRE(clf.getNotes()[1] == "Number of models: 8");
+    auto score = clf.score(raw.Xv, raw.yv);
+    auto scoret = clf.score(raw.Xt, raw.yt);
+    REQUIRE(score == Catch::Approx(0.80078).epsilon(raw.epsilon));
+    REQUIRE(scoret == Catch::Approx(0.80078).epsilon(raw.epsilon));
+}
+TEST_CASE("Voting vs proba", "[BoostAODE]")
+{
+    auto raw = RawDatasets("iris", true);
+    auto clf = bayesnet::BoostAODE(false);
+    clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
+    auto score_proba = clf.score(raw.Xv, raw.yv);
+    auto pred_proba = clf.predict_proba(raw.Xv);
+    clf.setHyperparameters({
+        {"predict_voting",true},
+        });
+    auto score_voting = clf.score(raw.Xv, raw.yv);
+    auto pred_voting = clf.predict_proba(raw.Xv);
+    REQUIRE(score_proba == Catch::Approx(0.97333).epsilon(raw.epsilon));
+    REQUIRE(score_voting == Catch::Approx(0.98).epsilon(raw.epsilon));
+    REQUIRE(pred_voting[83][2] == Catch::Approx(0.552091).epsilon(raw.epsilon));
+    REQUIRE(pred_proba[83][2] == Catch::Approx(0.546017).epsilon(raw.epsilon));
+    REQUIRE(clf.dump_cpt() == "");
+    REQUIRE(clf.topological_order() == std::vector<std::string>());
+}
+TEST_CASE("Order asc, desc & random", "[BoostAODE]")
+{
+    auto raw = RawDatasets("glass", true);
+    std::map<std::string, double> scores{
+        {"asc", 0.83645f }, { "desc", 0.84579f }, { "rand", 0.84112 }
+    };
+    for (const std::string& order : { "asc", "desc", "rand" }) {
+        auto clf = bayesnet::BoostAODE();
+        clf.setHyperparameters({
+            {"order", order},
+            });
+        clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
+        auto score = clf.score(raw.Xv, raw.yv);
+        auto scoret = clf.score(raw.Xt, raw.yt);
+        INFO("BoostAODE order: " + order);
+        REQUIRE(score == Catch::Approx(scores[order]).epsilon(raw.epsilon));
+        REQUIRE(scoret == Catch::Approx(scores[order]).epsilon(raw.epsilon));
+    }
+}
+TEST_CASE("Oddities", "[BoostAODE]")
+{
+
+}
--- a/update_coverage.py
+++ b/update_coverage.py
@ -10,6 +10,10 @@ output = subprocess.check_output(
    "tail -1",
    shell=True,
 )
+value = float(output.decode("utf-8").strip().replace("%", ""))
+if value < 90:
+    print("Coverage is less than 90%. I won't update the badge.")
+    sys.exit(1)
 percentage = output.decode("utf-8").strip().replace(".", ",")
 coverage_line = (
    f"![Static Badge](https://img.shields.io/badge/Coverage-{percentage}25-green)"
@ -23,3 +27,4 @@ with open(readme_file, "w") as f:
            f.write(coverage_line + "\n")
        else:
            f.write(line)
+print(f"Coverage updated with value: {percentage}")