Add Ensemble tests

This commit is contained in:
Ricardo Montañana Gómez 2024-04-08 19:09:51 +02:00
parent d12a779bd9
commit a1178554ff
Signed by: rmontanana
GPG Key ID: 46064262FD9A7ADE
7 changed files with 242 additions and 97 deletions

View File

@ -103,7 +103,6 @@ coverage: ## Run tests and generate coverage report (build/index.html)
viewcoverage: ## Run tests, generate coverage report and upload it to codecov (build/index.html)
@echo ">>> Building tests with coverage..."
@folder=`pwd` ;
@$(MAKE) coverage
@echo ">>> Building report..."
@cd $(f_debug)/tests; \
@ -113,7 +112,7 @@ viewcoverage: ## Run tests, generate coverage report and upload it to codecov (b
lcov --remove coverage.info 'libtorch/*' --output-file coverage.info >/dev/null 2>&1; \
lcov --remove coverage.info 'tests/*' --output-file coverage.info >/dev/null 2>&1; \
lcov --remove coverage.info 'bayesnet/utils/loguru.*' --output-file coverage.info >/dev/null 2>&1; \
genhtml coverage.info --output-directory $(f_debug)/tests/coverage >/dev/null 2>&1;
genhtml coverage.info --output-directory coverage >/dev/null 2>&1;
@$(MAKE) updatebadge
@xdg-open $(f_debug)/tests/coverage/index.html || open $(f_debug)/tests/coverage/index.html 2>/dev/null
@echo ">>> Done";

View File

@ -5,7 +5,7 @@
![Gitea Release](https://img.shields.io/gitea/v/release/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es:3000)
[![Codacy Badge](https://app.codacy.com/project/badge/Grade/cf3e0ac71d764650b1bf4d8d00d303b1)](https://app.codacy.com/gh/Doctorado-ML/BayesNet/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
![Gitea Last Commit](https://img.shields.io/gitea/last-commit/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es:3000&logo=gitea)
![Static Badge](https://img.shields.io/badge/Coverage-92,4%25-green)
![Static Badge](https://img.shields.io/badge/Coverage-94,0%25-green)
Bayesian Network Classifiers using libtorch from scratch

View File

@ -8,7 +8,9 @@ if(ENABLE_TESTING)
${CMAKE_BINARY_DIR}/configured_files/include
)
file(GLOB_RECURSE BayesNet_SOURCES "${BayesNet_SOURCE_DIR}/bayesnet/*.cc")
add_executable(TestBayesNet TestBayesNetwork.cc TestBayesNode.cc TestBayesClassifier.cc TestBayesModels.cc TestBayesMetrics.cc TestFeatureSelection.cc TestUtils.cc ${BayesNet_SOURCES})
add_executable(TestBayesNet TestBayesNetwork.cc TestBayesNode.cc TestBayesClassifier.cc
TestBayesModels.cc TestBayesMetrics.cc TestFeatureSelection.cc TestBoostAODE.cc
TestUtils.cc TestBayesEnsemble.cc ${BayesNet_SOURCES})
target_link_libraries(TestBayesNet PUBLIC "${TORCH_LIBRARIES}" ArffFiles mdlp Catch2::Catch2WithMain )
add_test(NAME BayesNetworkTest COMMAND TestBayesNet)
add_test(NAME Network COMMAND TestBayesNet "[Network]")
@ -16,5 +18,7 @@ if(ENABLE_TESTING)
add_test(NAME Metrics COMMAND TestBayesNet "[Metrics]")
add_test(NAME FeatureSelection COMMAND TestBayesNet "[FeatureSelection]")
add_test(NAME Classifier COMMAND TestBayesNet "[Classifier]")
add_test(NAME Ensemble COMMAND TestBayesNet "[Ensemble]")
add_test(NAME Models COMMAND TestBayesNet "[Models]")
add_test(NAME BoostAODE COMMAND TestBayesNet "[BoostAODE]")
endif(ENABLE_TESTING)

104
tests/TestBayesEnsemble.cc Normal file
View File

@ -0,0 +1,104 @@
#include <type_traits>
#include <catch2/catch_test_macros.hpp>
#include <catch2/catch_approx.hpp>
#include <catch2/generators/catch_generators.hpp>
#include "bayesnet/ensembles/BoostAODE.h"
#include "TestUtils.h"
TEST_CASE("Topological Order", "[Ensemble]")
{
auto raw = RawDatasets("glass", true);
auto clf = bayesnet::BoostAODE();
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
auto order = clf.topological_order();
REQUIRE(order.size() == 0);
}
TEST_CASE("Dump CPT", "[Ensemble]")
{
auto raw = RawDatasets("glass", true);
auto clf = bayesnet::BoostAODE();
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
auto dump = clf.dump_cpt();
REQUIRE(dump == "");
}
TEST_CASE("Number of States", "[Ensemble]")
{
auto clf = bayesnet::BoostAODE();
auto raw = RawDatasets("iris", true);
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
REQUIRE(clf.getNumberOfStates() == 76);
}
TEST_CASE("Show", "[Ensemble]")
{
auto clf = bayesnet::BoostAODE();
auto raw = RawDatasets("iris", true);
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
std::vector<std::string> expected = {
"class -> sepallength, sepalwidth, petallength, petalwidth, ",
"petallength -> sepallength, sepalwidth, petalwidth, ",
"petalwidth -> ",
"sepallength -> ",
"sepalwidth -> ",
"class -> sepallength, sepalwidth, petallength, petalwidth, ",
"petallength -> ",
"petalwidth -> sepallength, sepalwidth, petallength, ",
"sepallength -> ",
"sepalwidth -> ",
"class -> sepallength, sepalwidth, petallength, petalwidth, ",
"petallength -> ",
"petalwidth -> ",
"sepallength -> sepalwidth, petallength, petalwidth, ",
"sepalwidth -> ",
"class -> sepallength, sepalwidth, petallength, petalwidth, ",
"petallength -> ",
"petalwidth -> ",
"sepallength -> ",
"sepalwidth -> sepallength, petallength, petalwidth, ",
};
auto show = clf.show();
REQUIRE(show.size() == expected.size());
for (size_t i = 0; i < show.size(); i++)
REQUIRE(show[i] == expected[i]);
}
TEST_CASE("Graph", "[Ensemble]")
{
auto clf = bayesnet::BoostAODE();
auto raw = RawDatasets("iris", true);
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
auto graph = clf.graph();
REQUIRE(graph.size() == 56);
}
TEST_CASE("Compute ArgMax", "[Ensemble]")
{
class TestEnsemble : public bayesnet::BoostAODE {
public:
TestEnsemble() : bayesnet::BoostAODE() {}
torch::Tensor compute_arg_max(torch::Tensor& X) { return Ensemble::compute_arg_max(X); }
std::vector<int> compute_arg_max(std::vector<std::vector<double>>& X) { return Ensemble::compute_arg_max(X); }
};
TestEnsemble clf;
std::vector<std::vector<double>> X = {
{0.1f, 0.2f, 0.3f},
{0.4f, 0.9f, 0.6f},
{0.7f, 0.8f, 0.9f},
{0.5f, 0.2f, 0.1f},
{0.3f, 0.7f, 0.2f},
{0.5f, 0.5f, 0.2f}
};
std::vector<int> expected = { 2, 1, 2, 0, 1, 0 };
auto argmax = clf.compute_arg_max(X);
REQUIRE(argmax.size() == expected.size());
REQUIRE(argmax == expected);
auto Xt = torch::zeros({ 6, 3 }, torch::kFloat32);
Xt[0][0] = 0.1f; Xt[0][1] = 0.2f; Xt[0][2] = 0.3f;
Xt[1][0] = 0.4f; Xt[1][1] = 0.9f; Xt[1][2] = 0.6f;
Xt[2][0] = 0.7f; Xt[2][1] = 0.8f; Xt[2][2] = 0.9f;
Xt[3][0] = 0.5f; Xt[3][1] = 0.2f; Xt[3][2] = 0.1f;
Xt[4][0] = 0.3f; Xt[4][1] = 0.7f; Xt[4][2] = 0.2f;
Xt[5][0] = 0.5f; Xt[5][1] = 0.5f; Xt[5][2] = 0.2f;
auto argmaxt = clf.compute_arg_max(Xt);
REQUIRE(argmaxt.size(0) == expected.size());
for (int i = 0; i < argmaxt.size(0); i++)
REQUIRE(argmaxt[i].item<int>() == expected[i]);
}

View File

@ -2,6 +2,7 @@
#include <catch2/catch_test_macros.hpp>
#include <catch2/catch_approx.hpp>
#include <catch2/generators/catch_generators.hpp>
#include <catch2/matchers/catch_matchers.hpp>
#include "bayesnet/classifiers/KDB.h"
#include "bayesnet/classifiers/TAN.h"
#include "bayesnet/classifiers/SPODE.h"
@ -87,62 +88,7 @@ TEST_CASE("Get num features & num edges", "[Models]")
REQUIRE(clf.getNumberOfNodes() == 5);
REQUIRE(clf.getNumberOfEdges() == 8);
}
TEST_CASE("BoostAODE feature_select CFS", "[Models]")
{
auto raw = RawDatasets("glass", true);
auto clf = bayesnet::BoostAODE();
clf.setHyperparameters({ {"select_features", "CFS"} });
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
REQUIRE(clf.getNumberOfNodes() == 90);
REQUIRE(clf.getNumberOfEdges() == 153);
REQUIRE(clf.getNotes().size() == 2);
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 9 with CFS");
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
}
TEST_CASE("BoostAODE feature_select IWSS", "[Models]")
{
auto raw = RawDatasets("glass", true);
auto clf = bayesnet::BoostAODE();
clf.setHyperparameters({ {"select_features", "IWSS"}, {"threshold", 0.5 } });
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
REQUIRE(clf.getNumberOfNodes() == 90);
REQUIRE(clf.getNumberOfEdges() == 153);
REQUIRE(clf.getNotes().size() == 2);
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 5 of 9 with IWSS");
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
}
TEST_CASE("BoostAODE feature_select FCBF", "[Models]")
{
auto raw = RawDatasets("glass", true);
auto clf = bayesnet::BoostAODE();
clf.setHyperparameters({ {"select_features", "FCBF"}, {"threshold", 1e-7 } });
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
REQUIRE(clf.getNumberOfNodes() == 90);
REQUIRE(clf.getNumberOfEdges() == 153);
REQUIRE(clf.getNotes().size() == 2);
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 5 of 9 with FCBF");
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
}
TEST_CASE("BoostAODE test used features in train note and score", "[Models]")
{
auto raw = RawDatasets("diabetes", true);
auto clf = bayesnet::BoostAODE(true);
clf.setHyperparameters({
{"order", "asc"},
{"convergence", true},
{"select_features","CFS"},
});
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
REQUIRE(clf.getNumberOfNodes() == 72);
REQUIRE(clf.getNumberOfEdges() == 120);
REQUIRE(clf.getNotes().size() == 2);
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS");
REQUIRE(clf.getNotes()[1] == "Number of models: 8");
auto score = clf.score(raw.Xv, raw.yv);
auto scoret = clf.score(raw.Xt, raw.yt);
REQUIRE(score == Catch::Approx(0.80078).epsilon(raw.epsilon));
REQUIRE(scoret == Catch::Approx(0.80078).epsilon(raw.epsilon));
}
TEST_CASE("Model predict_proba", "[Models]")
{
std::string model = GENERATE("TAN", "SPODE", "BoostAODEproba", "BoostAODEvoting");
@ -230,25 +176,7 @@ TEST_CASE("Model predict_proba", "[Models]")
delete clf;
}
}
TEST_CASE("BoostAODE voting-proba", "[Models]")
{
auto raw = RawDatasets("iris", true);
auto clf = bayesnet::BoostAODE(false);
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
auto score_proba = clf.score(raw.Xv, raw.yv);
auto pred_proba = clf.predict_proba(raw.Xv);
clf.setHyperparameters({
{"predict_voting",true},
});
auto score_voting = clf.score(raw.Xv, raw.yv);
auto pred_voting = clf.predict_proba(raw.Xv);
REQUIRE(score_proba == Catch::Approx(0.97333).epsilon(raw.epsilon));
REQUIRE(score_voting == Catch::Approx(0.98).epsilon(raw.epsilon));
REQUIRE(pred_voting[83][2] == Catch::Approx(0.552091).epsilon(raw.epsilon));
REQUIRE(pred_proba[83][2] == Catch::Approx(0.546017).epsilon(raw.epsilon));
REQUIRE(clf.dump_cpt() == "");
REQUIRE(clf.topological_order() == std::vector<std::string>());
}
TEST_CASE("AODE voting-proba", "[Models]")
{
auto raw = RawDatasets("glass", true);
@ -294,22 +222,21 @@ TEST_CASE("KDB with hyperparameters", "[Models]")
REQUIRE(score == Catch::Approx(0.827103).epsilon(raw.epsilon));
REQUIRE(scoret == Catch::Approx(0.761682).epsilon(raw.epsilon));
}
TEST_CASE("BoostAODE order asc, desc & random", "[Models]")
TEST_CASE("Predict, predict_proba & score without fitting", "[Models]")
{
auto raw = RawDatasets("glass", true);
std::map<std::string, double> scores{
{"asc", 0.83645f }, { "desc", 0.84579f }, { "rand", 0.84112 }
};
for (const std::string& order : { "asc", "desc", "rand" }) {
auto clf = bayesnet::BoostAODE();
clf.setHyperparameters({
{"order", order},
});
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
auto score = clf.score(raw.Xv, raw.yv);
auto scoret = clf.score(raw.Xt, raw.yt);
INFO("BoostAODE order: " + order);
REQUIRE(score == Catch::Approx(scores[order]).epsilon(raw.epsilon));
REQUIRE(scoret == Catch::Approx(scores[order]).epsilon(raw.epsilon));
}
}
auto clf = bayesnet::AODE();
auto raw = RawDatasets("iris", true);
std::string message = "Ensemble has not been fitted";
REQUIRE_THROWS_AS(clf.predict(raw.Xv), std::logic_error);
REQUIRE_THROWS_AS(clf.predict_proba(raw.Xv), std::logic_error);
REQUIRE_THROWS_AS(clf.predict(raw.Xt), std::logic_error);
REQUIRE_THROWS_AS(clf.predict_proba(raw.Xt), std::logic_error);
REQUIRE_THROWS_AS(clf.score(raw.Xv, raw.yv), std::logic_error);
REQUIRE_THROWS_AS(clf.score(raw.Xt, raw.yt), std::logic_error);
REQUIRE_THROWS_WITH(clf.predict(raw.Xv), message);
REQUIRE_THROWS_WITH(clf.predict_proba(raw.Xv), message);
REQUIRE_THROWS_WITH(clf.predict(raw.Xt), message);
REQUIRE_THROWS_WITH(clf.predict_proba(raw.Xt), message);
REQUIRE_THROWS_WITH(clf.score(raw.Xv, raw.yv), message);
REQUIRE_THROWS_WITH(clf.score(raw.Xt, raw.yt), message);
}

106
tests/TestBoostAODE.cc Normal file
View File

@ -0,0 +1,106 @@
#include <type_traits>
#include <catch2/catch_test_macros.hpp>
#include <catch2/catch_approx.hpp>
#include <catch2/generators/catch_generators.hpp>
#include "bayesnet/ensembles/BoostAODE.h"
#include "TestUtils.h"
TEST_CASE("Feature_select CFS", "[BoostAODE]")
{
auto raw = RawDatasets("glass", true);
auto clf = bayesnet::BoostAODE();
clf.setHyperparameters({ {"select_features", "CFS"} });
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
REQUIRE(clf.getNumberOfNodes() == 90);
REQUIRE(clf.getNumberOfEdges() == 153);
REQUIRE(clf.getNotes().size() == 2);
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 9 with CFS");
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
}
TEST_CASE("Feature_select IWSS", "[BoostAODE]")
{
auto raw = RawDatasets("glass", true);
auto clf = bayesnet::BoostAODE();
clf.setHyperparameters({ {"select_features", "IWSS"}, {"threshold", 0.5 } });
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
REQUIRE(clf.getNumberOfNodes() == 90);
REQUIRE(clf.getNumberOfEdges() == 153);
REQUIRE(clf.getNotes().size() == 2);
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 5 of 9 with IWSS");
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
}
TEST_CASE("Feature_select FCBF", "[BoostAODE]")
{
auto raw = RawDatasets("glass", true);
auto clf = bayesnet::BoostAODE();
clf.setHyperparameters({ {"select_features", "FCBF"}, {"threshold", 1e-7 } });
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
REQUIRE(clf.getNumberOfNodes() == 90);
REQUIRE(clf.getNumberOfEdges() == 153);
REQUIRE(clf.getNotes().size() == 2);
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 5 of 9 with FCBF");
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
}
TEST_CASE("Test used features in train note and score", "[BoostAODE]")
{
auto raw = RawDatasets("diabetes", true);
auto clf = bayesnet::BoostAODE(true);
clf.setHyperparameters({
{"order", "asc"},
{"convergence", true},
{"select_features","CFS"},
});
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
REQUIRE(clf.getNumberOfNodes() == 72);
REQUIRE(clf.getNumberOfEdges() == 120);
REQUIRE(clf.getNotes().size() == 2);
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS");
REQUIRE(clf.getNotes()[1] == "Number of models: 8");
auto score = clf.score(raw.Xv, raw.yv);
auto scoret = clf.score(raw.Xt, raw.yt);
REQUIRE(score == Catch::Approx(0.80078).epsilon(raw.epsilon));
REQUIRE(scoret == Catch::Approx(0.80078).epsilon(raw.epsilon));
}
TEST_CASE("Voting vs proba", "[BoostAODE]")
{
auto raw = RawDatasets("iris", true);
auto clf = bayesnet::BoostAODE(false);
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
auto score_proba = clf.score(raw.Xv, raw.yv);
auto pred_proba = clf.predict_proba(raw.Xv);
clf.setHyperparameters({
{"predict_voting",true},
});
auto score_voting = clf.score(raw.Xv, raw.yv);
auto pred_voting = clf.predict_proba(raw.Xv);
REQUIRE(score_proba == Catch::Approx(0.97333).epsilon(raw.epsilon));
REQUIRE(score_voting == Catch::Approx(0.98).epsilon(raw.epsilon));
REQUIRE(pred_voting[83][2] == Catch::Approx(0.552091).epsilon(raw.epsilon));
REQUIRE(pred_proba[83][2] == Catch::Approx(0.546017).epsilon(raw.epsilon));
REQUIRE(clf.dump_cpt() == "");
REQUIRE(clf.topological_order() == std::vector<std::string>());
}
TEST_CASE("Order asc, desc & random", "[BoostAODE]")
{
auto raw = RawDatasets("glass", true);
std::map<std::string, double> scores{
{"asc", 0.83645f }, { "desc", 0.84579f }, { "rand", 0.84112 }
};
for (const std::string& order : { "asc", "desc", "rand" }) {
auto clf = bayesnet::BoostAODE();
clf.setHyperparameters({
{"order", order},
});
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
auto score = clf.score(raw.Xv, raw.yv);
auto scoret = clf.score(raw.Xt, raw.yt);
INFO("BoostAODE order: " + order);
REQUIRE(score == Catch::Approx(scores[order]).epsilon(raw.epsilon));
REQUIRE(scoret == Catch::Approx(scores[order]).epsilon(raw.epsilon));
}
}
TEST_CASE("Oddities", "[BoostAODE]")
{
}

View File

@ -10,6 +10,10 @@ output = subprocess.check_output(
"tail -1",
shell=True,
)
value = float(output.decode("utf-8").strip().replace("%", ""))
if value < 90:
print("Coverage is less than 90%. I won't update the badge.")
sys.exit(1)
percentage = output.decode("utf-8").strip().replace(".", ",")
coverage_line = (
f"![Static Badge](https://img.shields.io/badge/Coverage-{percentage}25-green)"
@ -23,3 +27,4 @@ with open(readme_file, "w") as f:
f.write(coverage_line + "\n")
else:
f.write(line)
print(f"Coverage updated with value: {percentage}")