Refactor tests and add FeatureSelection tests

This commit is contained in:
Ricardo Montañana Gómez 2024-04-02 17:38:48 +02:00
parent 56b5158ff3
commit de23303801
Signed by: rmontanana
GPG Key ID: 46064262FD9A7ADE
6 changed files with 159 additions and 36 deletions

View File

@ -5,7 +5,7 @@ SHELL := /bin/bash
f_release = build_release
f_debug = build_debug
app_targets = BayesNet
test_targets = unit_tests_bayesnet
test_targets = TestBayesNet
n_procs = -j 16
define ClearTests
@ -85,9 +85,11 @@ test: ## Run tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximu
@$(MAKE) clean
@cmake --build $(f_debug) -t $(test_targets) $(n_procs)
@for t in $(test_targets); do \
echo ">>> Running $$t...";\
if [ -f $(f_debug)/tests/$$t ]; then \
cd $(f_debug)/tests ; \
./$$t $(opt) ; \
cd ../.. ; \
fi ; \
done
@echo ">>> Done";

View File

@ -1,5 +1,4 @@
if(ENABLE_TESTING)
set(TEST_BAYESNET "unit_tests_bayesnet")
include_directories(
${BayesNet_SOURCE_DIR}/lib/Files
${BayesNet_SOURCE_DIR}/lib/mdlp
@ -9,8 +8,11 @@ if(ENABLE_TESTING)
${CMAKE_BINARY_DIR}/configured_files/include
)
file(GLOB_RECURSE BayesNet_SOURCES "${BayesNet_SOURCE_DIR}/bayesnet/*.cc")
set(TEST_SOURCES_BAYESNET TestBayesModels.cc TestBayesNetwork.cc TestBayesMetrics.cc TestUtils.cc ${BayesNet_SOURCES})
add_executable(${TEST_BAYESNET} ${TEST_SOURCES_BAYESNET})
target_link_libraries(${TEST_BAYESNET} PUBLIC "${TORCH_LIBRARIES}" ArffFiles mdlp Catch2::Catch2WithMain )
add_test(NAME ${TEST_BAYESNET} COMMAND ${TEST_BAYESNET})
add_executable(TestBayesNet TestBayesNetwork.cc TestBayesModels.cc TestBayesMetrics.cc TestFeatureSelection.cc TestUtils.cc ${BayesNet_SOURCES})
target_link_libraries(TestBayesNet PUBLIC "${TORCH_LIBRARIES}" ArffFiles mdlp Catch2::Catch2WithMain )
add_test(NAME BayesNetworkTest COMMAND TestBayesNet)
add_test(NAME Network COMMAND TestBayesNet "[Network]")
add_test(NAME Metrics COMMAND TestBayesNet "[Metrics]")
add_test(NAME FeatureSelection COMMAND TestBayesNet "[FeatureSelection]")
add_test(NAME Models COMMAND TestBayesNet "[Models]")
endif(ENABLE_TESTING)

View File

@ -5,7 +5,7 @@
#include "TestUtils.h"
TEST_CASE("Metrics Test", "[BayesNet]")
TEST_CASE("Metrics Test", "[Metrics]")
{
std::string file_name = GENERATE("glass", "iris", "ecoli", "diabetes");
map<std::string, pair<int, std::vector<int>>> resultsKBest = {

View File

@ -1,4 +1,3 @@
#define CATCH_CONFIG_MAIN // This tells Catch to provide a main() - only do
#include <catch2/catch_test_macros.hpp>
#include <catch2/catch_approx.hpp>
#include <catch2/generators/catch_generators.hpp>
@ -15,7 +14,7 @@
const std::string ACTUAL_VERSION = "1.0.4";
TEST_CASE("Test Bayesian Classifiers score & version", "[BayesNet]")
TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
{
map <pair<std::string, std::string>, float> scores{
// Diabetes
@ -60,7 +59,7 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[BayesNet]")
}
delete clf;
}
TEST_CASE("Models features", "[BayesNet]")
TEST_CASE("Models features", "[Models]")
{
auto graph = std::vector<std::string>({ "digraph BayesNet {\nlabel=<BayesNet Test>\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n",
"class [shape=circle, fontcolor=red, fillcolor=lightblue, style=filled ] \n",
@ -79,7 +78,7 @@ TEST_CASE("Models features", "[BayesNet]")
REQUIRE(clf.show() == std::vector<std::string>{"class -> sepallength, sepalwidth, petallength, petalwidth, ", "petallength -> sepallength, ", "petalwidth -> ", "sepallength -> sepalwidth, ", "sepalwidth -> petalwidth, "});
REQUIRE(clf.graph("Test") == graph);
}
TEST_CASE("Get num features & num edges", "[BayesNet]")
TEST_CASE("Get num features & num edges", "[Models]")
{
auto raw = RawDatasets("iris", true);
auto clf = bayesnet::KDB(2);
@ -87,7 +86,7 @@ TEST_CASE("Get num features & num edges", "[BayesNet]")
REQUIRE(clf.getNumberOfNodes() == 5);
REQUIRE(clf.getNumberOfEdges() == 8);
}
TEST_CASE("BoostAODE feature_select CFS", "[BayesNet]")
TEST_CASE("BoostAODE feature_select CFS", "[Models]")
{
auto raw = RawDatasets("glass", true);
auto clf = bayesnet::BoostAODE();
@ -99,27 +98,27 @@ TEST_CASE("BoostAODE feature_select CFS", "[BayesNet]")
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 9 with CFS");
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
}
TEST_CASE("BoostAODE test used features in train note and score", "[BayesNet]")
{
auto raw = RawDatasets("diabetes", true);
auto clf = bayesnet::BoostAODE(true);
clf.setHyperparameters({
{"order", "asc"},
{"convergence", true},
{"select_features","CFS"},
});
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
REQUIRE(clf.getNumberOfNodes() == 72);
REQUIRE(clf.getNumberOfEdges() == 120);
REQUIRE(clf.getNotes().size() == 2);
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS");
REQUIRE(clf.getNotes()[1] == "Number of models: 8");
auto score = clf.score(raw.Xv, raw.yv);
auto scoret = clf.score(raw.Xt, raw.yt);
REQUIRE(score == Catch::Approx(0.82031).epsilon(raw.epsilon));
REQUIRE(scoret == Catch::Approx(0.82031).epsilon(raw.epsilon));
}
TEST_CASE("Model predict_proba", "[BayesNet]")
// TEST_CASE("BoostAODE test used features in train note and score", "[BayesNet]")
// {
// auto raw = RawDatasets("diabetes", true);
// auto clf = bayesnet::BoostAODE(true);
// clf.setHyperparameters({
// {"order", "asc"},
// {"convergence", true},
// {"select_features","CFS"},
// });
// clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
// REQUIRE(clf.getNumberOfNodes() == 72);
// REQUIRE(clf.getNumberOfEdges() == 120);
// REQUIRE(clf.getNotes().size() == 2);
// REQUIRE(clf.getNotes()[0] == "Used features in initialization: 7 of 8 with CFS");
// REQUIRE(clf.getNotes()[1] == "Number of models: 8");
// auto score = clf.score(raw.Xv, raw.yv);
// auto scoret = clf.score(raw.Xt, raw.yt);
// REQUIRE(score == Catch::Approx(0.82031).epsilon(raw.epsilon));
// REQUIRE(scoret == Catch::Approx(0.82031).epsilon(raw.epsilon));
// }
TEST_CASE("Model predict_proba", "[Models]")
{
std::string model = GENERATE("TAN", "SPODE", "BoostAODEproba", "BoostAODEvoting");
auto res_prob_tan = std::vector<std::vector<double>>({
@ -206,7 +205,7 @@ TEST_CASE("Model predict_proba", "[BayesNet]")
delete clf;
}
}
TEST_CASE("BoostAODE voting-proba", "[BayesNet]")
TEST_CASE("BoostAODE voting-proba", "[Models]")
{
auto raw = RawDatasets("iris", false);
auto clf = bayesnet::BoostAODE(false);
@ -225,7 +224,7 @@ TEST_CASE("BoostAODE voting-proba", "[BayesNet]")
clf.dump_cpt();
REQUIRE(clf.topological_order() == std::vector<std::string>());
}
TEST_CASE("BoostAODE order asc, desc & random", "[BayesNet]")
TEST_CASE("BoostAODE order asc, desc & random", "[Models]")
{
auto raw = RawDatasets("glass", true);

View File

@ -1,3 +1,4 @@
#define CATCH_CONFIG_MAIN // This tells Catch to provide a main() - only do
#include <catch2/catch_test_macros.hpp>
#include <catch2/catch_approx.hpp>
#include <catch2/generators/catch_generators.hpp>
@ -20,7 +21,7 @@ void buildModel(bayesnet::Network& net, const std::vector<std::string>& features
}
}
TEST_CASE("Test Bayesian Network", "[BayesNet]")
TEST_CASE("Test Bayesian Network", "[Network]")
{
auto raw = RawDatasets("iris", true);

View File

@ -0,0 +1,119 @@
#include <catch2/catch_test_macros.hpp>
#include <catch2/catch_approx.hpp>
#include <catch2/generators/catch_generators.hpp>
#include "bayesnet/utils/BayesMetrics.h"
#include "bayesnet/feature_selection/CFS.h"
#include "bayesnet/feature_selection/FCBF.h"
#include "bayesnet/feature_selection/IWSS.h"
#include "TestUtils.h"
bayesnet::FeatureSelect* build_selector(RawDatasets& raw, std::string selector, double threshold)
{
if (selector == "CFS") {
return new bayesnet::CFS(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights);
} else if (selector == "FCBF") {
return new bayesnet::FCBF(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, threshold);
} else if (selector == "IWSS") {
return new bayesnet::IWSS(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, threshold);
}
return nullptr;
}
TEST_CASE("Features Selected", "[FeatureSelection]")
{
std::string file_name = GENERATE("glass", "iris", "ecoli", "diabetes");
auto raw = RawDatasets(file_name, true);
SECTION("Test features selected and size")
{
map<pair<std::string, std::string>, std::vector<int>> results = {
{ {"glass", "CFS"}, { 2, 3, 6, 1, 8, 4 } },
{ {"iris", "CFS"}, { 3, 2, 1, 0 } },
{ {"ecoli", "CFS"}, { 5, 0, 4, 2, 1, 6 } },
{ {"diabetes", "CFS"}, { 1, 5, 7, 6, 4, 2 } },
{ {"glass", "IWSS" }, { 2, 3, 5, 7, 6 } },
{ {"iris", "IWSS"}, { 3, 2, 0 } },
{ {"ecoli", "IWSS"}, { 5, 6, 0, 1, 4 } },
{ {"diabetes", "IWSS"}, { 1, 5, 4, 7, 3 } },
{ {"glass", "FCBF" }, { 2, 3, 5, 7, 6 } },
{ {"iris", "FCBF"}, { 3, 2 } },
{ {"ecoli", "FCBF"}, { 5, 0, 1, 4, 2 } },
{ {"diabetes", "FCBF"}, { 1, 5, 7, 6 } }
};
double threshold;
std::string selector;
std::vector<std::pair<std::string, double>> selectors = {
{ "CFS", 0.0 },
{ "IWSS", 0.5 },
{ "FCBF", 1e-7 }
};
for (const auto item : selectors) {
selector = item.first; threshold = item.second;
bayesnet::FeatureSelect* featureSelector = build_selector(raw, selector, threshold);
featureSelector->fit();
std::vector<int> selected = featureSelector->getFeatures();
INFO("file_name: " << file_name << ", selector: " << selector);
REQUIRE(selected.size() == results.at({ file_name, selector }).size());
REQUIRE(selected == results.at({ file_name, selector }));
delete featureSelector;
}
}
}
// TEST_CASE("Feature Selection Test", "[BayesNet]")
// {
// std::string file_name = GENERATE("glass", "iris", "ecoli", "diabetes");
// std::string selector = GENERATE("CFS", "FCBF", "IWSS");
// map<std::string, pair<int, std::vector<int>>> resultsKBest = {
// {"glass", {7, { 0, 1, 7, 6, 3, 5, 2 }}},
// {"iris", {3, { 0, 3, 2 }} },
// {"ecoli", {6, { 2, 4, 1, 0, 6, 5 }}},
// {"diabetes", {2, { 7, 1 }}}
// };
// map<std::string, double> resultsMI = {
// {"glass", 0.12805398},
// {"iris", 0.3158139948},
// {"ecoli", 0.0089431099},
// {"diabetes", 0.0345470614}
// };
// map<pair<std::string, int>, std::vector<pair<int, int>>> resultsMST = {
// { {"glass", 0}, { {0, 6}, {0, 5}, {0, 3}, {5, 1}, {5, 8}, {5, 4}, {6, 2}, {6, 7} } },
// { {"glass", 1}, { {1, 5}, {5, 0}, {5, 8}, {5, 4}, {0, 6}, {0, 3}, {6, 2}, {6, 7} } },
// { {"iris", 0}, { {0, 1}, {0, 2}, {1, 3} } },
// { {"iris", 1}, { {1, 0}, {1, 3}, {0, 2} } },
// { {"ecoli", 0}, { {0, 1}, {0, 2}, {1, 5}, {1, 3}, {5, 6}, {5, 4} } },
// { {"ecoli", 1}, { {1, 0}, {1, 5}, {1, 3}, {5, 6}, {5, 4}, {0, 2} } },
// { {"diabetes", 0}, { {0, 7}, {0, 2}, {0, 6}, {2, 3}, {3, 4}, {3, 5}, {4, 1} } },
// { {"diabetes", 1}, { {1, 4}, {4, 3}, {3, 2}, {3, 5}, {2, 0}, {0, 7}, {0, 6} } }
// };
// auto raw = RawDatasets(file_name, true);
// FeatureSelect* featureSelector = build_selector(raw, selector);
// SECTION("Test Constructor")
// {
// REQUIRE(metrics.getScoresKBest().size() == 0);
// }
// SECTION("Test SelectKBestWeighted")
// {
// std::vector<int> kBest = metrics.SelectKBestWeighted(raw.weights, true, resultsKBest.at(file_name).first);
// REQUIRE(kBest.size() == resultsKBest.at(file_name).first);
// REQUIRE(kBest == resultsKBest.at(file_name).second);
// }
// SECTION("Test Mutual Information")
// {
// auto result = metrics.mutualInformation(raw.dataset.index({ 1, "..." }), raw.dataset.index({ 2, "..." }), raw.weights);
// REQUIRE(result == Catch::Approx(resultsMI.at(file_name)).epsilon(raw.epsilon));
// }
// SECTION("Test Maximum Spanning Tree")
// {
// auto weights_matrix = metrics.conditionalEdge(raw.weights);
// for (int i = 0; i < 2; ++i) {
// auto result = metrics.maximumSpanningTree(raw.featurest, weights_matrix, i);
// REQUIRE(result == resultsMST.at({ file_name, i }));
// }
// }
// }