From ad72bb355b02f48abd46ecc70d8a2c98ea53ff38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Sun, 1 Jun 2025 13:54:18 +0200 Subject: [PATCH] Fix CFS merit computation error --- bayesnet/feature_selection/FeatureSelect.cc | 8 +- tests/TestBoostA2DE.cc | 24 +++-- tests/TestBoostAODE.cc | 75 ++++++++------- tests/TestFeatureSelection.cc | 18 ++-- tests/TestXBA2DE.cc | 101 +++++++++++--------- tests/TestXBAODE.cc | 75 ++++++++------- 6 files changed, 164 insertions(+), 137 deletions(-) diff --git a/bayesnet/feature_selection/FeatureSelect.cc b/bayesnet/feature_selection/FeatureSelect.cc index 130bd3e..5103ddd 100644 --- a/bayesnet/feature_selection/FeatureSelect.cc +++ b/bayesnet/feature_selection/FeatureSelect.cc @@ -1,8 +1,8 @@ -// ** +// *************************************************************** // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez // SPDX-FileType: SOURCE // SPDX-License-Identifier: MIT -// ** +// *************************************************************** #include "bayesnet/utils/bayesnetUtils.h" #include "FeatureSelect.h" @@ -136,6 +136,4 @@ namespace bayesnet { if (!fitted) throw std::runtime_error("FeatureSelect not fitted"); return selectedScores; } - -} // namespace bayesnet - \ No newline at end of file +} \ No newline at end of file diff --git a/tests/TestBoostA2DE.cc b/tests/TestBoostA2DE.cc index ed5159d..1235d96 100644 --- a/tests/TestBoostA2DE.cc +++ b/tests/TestBoostA2DE.cc @@ -33,13 +33,11 @@ TEST_CASE("Feature_select IWSS", "[BoostA2DE]") auto clf = bayesnet::BoostA2DE(); clf.setHyperparameters({ {"select_features", "IWSS"}, {"threshold", 0.5 } }); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); - REQUIRE(clf.getNumberOfNodes() == 140); - REQUIRE(clf.getNumberOfEdges() == 294); - REQUIRE(clf.getNotes().size() == 4); - REQUIRE(clf.getNotes()[0] == "Used features in initialization: 4 of 9 with IWSS"); - REQUIRE(clf.getNotes()[1] == "Convergence threshold reached & 15 models eliminated"); - REQUIRE(clf.getNotes()[2] == "Pairs not used in train: 2"); - REQUIRE(clf.getNotes()[3] == "Number of models: 14"); + REQUIRE(clf.getNumberOfNodes() == 360); + REQUIRE(clf.getNumberOfEdges() == 756); + REQUIRE(clf.getNotes().size() == 2); + REQUIRE(clf.getNotes()[0] == "Used features in initialization: 9 of 9 with IWSS"); + REQUIRE(clf.getNotes()[1] == "Number of models: 36"); } TEST_CASE("Feature_select FCBF", "[BoostA2DE]") { @@ -64,15 +62,15 @@ TEST_CASE("Test used features in train note and score", "[BoostA2DE]") {"select_features","CFS"}, }); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); - REQUIRE(clf.getNumberOfNodes() == 144); - REQUIRE(clf.getNumberOfEdges() == 288); + REQUIRE(clf.getNumberOfNodes() == 189); + REQUIRE(clf.getNumberOfEdges() == 378); REQUIRE(clf.getNotes().size() == 2); - REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS"); - REQUIRE(clf.getNotes()[1] == "Number of models: 16"); + REQUIRE(clf.getNotes()[0] == "Used features in initialization: 7 of 8 with CFS"); + REQUIRE(clf.getNotes()[1] == "Number of models: 21"); auto score = clf.score(raw.Xv, raw.yv); auto scoret = clf.score(raw.Xt, raw.yt); - REQUIRE(score == Catch::Approx(0.856771).epsilon(raw.epsilon)); - REQUIRE(scoret == Catch::Approx(0.856771).epsilon(raw.epsilon)); + REQUIRE(score == Catch::Approx(0.85546875f).epsilon(raw.epsilon)); + REQUIRE(scoret == Catch::Approx(0.85546875f).epsilon(raw.epsilon)); } TEST_CASE("Voting vs proba", "[BoostA2DE]") { diff --git a/tests/TestBoostAODE.cc b/tests/TestBoostAODE.cc index 44b3ed5..0f2bb07 100644 --- a/tests/TestBoostAODE.cc +++ b/tests/TestBoostAODE.cc @@ -11,32 +11,35 @@ #include "TestUtils.h" #include "bayesnet/ensembles/BoostAODE.h" -TEST_CASE("Feature_select CFS", "[BoostAODE]") { +TEST_CASE("Feature_select CFS", "[BoostAODE]") +{ auto raw = RawDatasets("glass", true); auto clf = bayesnet::BoostAODE(); - clf.setHyperparameters({{"select_features", "CFS"}}); + clf.setHyperparameters({ {"select_features", "CFS"} }); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 90); REQUIRE(clf.getNumberOfEdges() == 153); REQUIRE(clf.getNotes().size() == 2); - REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 9 with CFS"); + REQUIRE(clf.getNotes()[0] == "Used features in initialization: 9 of 9 with CFS"); REQUIRE(clf.getNotes()[1] == "Number of models: 9"); } -TEST_CASE("Feature_select IWSS", "[BoostAODE]") { +TEST_CASE("Feature_select IWSS", "[BoostAODE]") +{ auto raw = RawDatasets("glass", true); auto clf = bayesnet::BoostAODE(); - clf.setHyperparameters({{"select_features", "IWSS"}, {"threshold", 0.5}}); + clf.setHyperparameters({ {"select_features", "IWSS"}, {"threshold", 0.5} }); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 90); REQUIRE(clf.getNumberOfEdges() == 153); REQUIRE(clf.getNotes().size() == 2); - REQUIRE(clf.getNotes()[0] == "Used features in initialization: 4 of 9 with IWSS"); + REQUIRE(clf.getNotes()[0] == "Used features in initialization: 9 of 9 with IWSS"); REQUIRE(clf.getNotes()[1] == "Number of models: 9"); } -TEST_CASE("Feature_select FCBF", "[BoostAODE]") { +TEST_CASE("Feature_select FCBF", "[BoostAODE]") +{ auto raw = RawDatasets("glass", true); auto clf = bayesnet::BoostAODE(); - clf.setHyperparameters({{"select_features", "FCBF"}, {"threshold", 1e-7}}); + clf.setHyperparameters({ {"select_features", "FCBF"}, {"threshold", 1e-7} }); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 90); REQUIRE(clf.getNumberOfEdges() == 153); @@ -44,26 +47,28 @@ TEST_CASE("Feature_select FCBF", "[BoostAODE]") { REQUIRE(clf.getNotes()[0] == "Used features in initialization: 4 of 9 with FCBF"); REQUIRE(clf.getNotes()[1] == "Number of models: 9"); } -TEST_CASE("Test used features in train note and score", "[BoostAODE]") { +TEST_CASE("Test used features in train note and score", "[BoostAODE]") +{ auto raw = RawDatasets("diabetes", true); auto clf = bayesnet::BoostAODE(true); clf.setHyperparameters({ {"order", "asc"}, {"convergence", true}, {"select_features", "CFS"}, - }); + }); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 72); REQUIRE(clf.getNumberOfEdges() == 120); REQUIRE(clf.getNotes().size() == 2); - REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS"); + REQUIRE(clf.getNotes()[0] == "Used features in initialization: 7 of 8 with CFS"); REQUIRE(clf.getNotes()[1] == "Number of models: 8"); auto score = clf.score(raw.Xv, raw.yv); auto scoret = clf.score(raw.Xt, raw.yt); - REQUIRE(score == Catch::Approx(0.809895813).epsilon(raw.epsilon)); - REQUIRE(scoret == Catch::Approx(0.809895813).epsilon(raw.epsilon)); + REQUIRE(score == Catch::Approx(0.8046875f).epsilon(raw.epsilon)); + REQUIRE(scoret == Catch::Approx(0.8046875f).epsilon(raw.epsilon)); } -TEST_CASE("Voting vs proba", "[BoostAODE]") { +TEST_CASE("Voting vs proba", "[BoostAODE]") +{ auto raw = RawDatasets("iris", true); auto clf = bayesnet::BoostAODE(false); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); @@ -71,7 +76,7 @@ TEST_CASE("Voting vs proba", "[BoostAODE]") { auto pred_proba = clf.predict_proba(raw.Xv); clf.setHyperparameters({ {"predict_voting", true}, - }); + }); auto score_voting = clf.score(raw.Xv, raw.yv); auto pred_voting = clf.predict_proba(raw.Xv); REQUIRE(score_proba == Catch::Approx(0.97333).epsilon(raw.epsilon)); @@ -81,17 +86,18 @@ TEST_CASE("Voting vs proba", "[BoostAODE]") { REQUIRE(clf.dump_cpt().size() == 7004); REQUIRE(clf.topological_order() == std::vector()); } -TEST_CASE("Order asc, desc & random", "[BoostAODE]") { +TEST_CASE("Order asc, desc & random", "[BoostAODE]") +{ auto raw = RawDatasets("glass", true); - std::map scores{{"asc", 0.83645f}, {"desc", 0.84579f}, {"rand", 0.84112}}; - for (const std::string &order : {"asc", "desc", "rand"}) { + std::map scores{ {"asc", 0.83645f}, {"desc", 0.84579f}, {"rand", 0.84112} }; + for (const std::string& order : { "asc", "desc", "rand" }) { auto clf = bayesnet::BoostAODE(); clf.setHyperparameters({ {"order", order}, {"bisection", false}, {"maxTolerance", 1}, {"convergence", false}, - }); + }); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); auto score = clf.score(raw.Xv, raw.yv); auto scoret = clf.score(raw.Xt, raw.yt); @@ -100,7 +106,8 @@ TEST_CASE("Order asc, desc & random", "[BoostAODE]") { REQUIRE(scoret == Catch::Approx(scores[order]).epsilon(raw.epsilon)); } } -TEST_CASE("Oddities", "[BoostAODE]") { +TEST_CASE("Oddities", "[BoostAODE]") +{ auto clf = bayesnet::BoostAODE(); auto raw = RawDatasets("iris", true); auto bad_hyper = nlohmann::json{ @@ -109,34 +116,35 @@ TEST_CASE("Oddities", "[BoostAODE]") { {{"maxTolerance", 0}}, {{"maxTolerance", 7}}, }; - for (const auto &hyper : bad_hyper.items()) { + for (const auto& hyper : bad_hyper.items()) { INFO("BoostAODE hyper: " << hyper.value().dump()); REQUIRE_THROWS_AS(clf.setHyperparameters(hyper.value()), std::invalid_argument); } - REQUIRE_THROWS_AS(clf.setHyperparameters({{"maxTolerance", 0}}), std::invalid_argument); + REQUIRE_THROWS_AS(clf.setHyperparameters({ {"maxTolerance", 0} }), std::invalid_argument); auto bad_hyper_fit = nlohmann::json{ {{"select_features", "IWSS"}, {"threshold", -0.01}}, {{"select_features", "IWSS"}, {"threshold", 0.51}}, {{"select_features", "FCBF"}, {"threshold", 1e-8}}, {{"select_features", "FCBF"}, {"threshold", 1.01}}, }; - for (const auto &hyper : bad_hyper_fit.items()) { + for (const auto& hyper : bad_hyper_fit.items()) { INFO("BoostAODE hyper: " << hyper.value().dump()); clf.setHyperparameters(hyper.value()); REQUIRE_THROWS_AS(clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing), - std::invalid_argument); + std::invalid_argument); } auto bad_hyper_fit2 = nlohmann::json{ {{"alpha_block", true}, {"block_update", true}}, {{"bisection", false}, {"block_update", true}}, }; - for (const auto &hyper : bad_hyper_fit2.items()) { + for (const auto& hyper : bad_hyper_fit2.items()) { INFO("BoostAODE hyper: " << hyper.value().dump()); REQUIRE_THROWS_AS(clf.setHyperparameters(hyper.value()), std::invalid_argument); } } -TEST_CASE("Bisection Best", "[BoostAODE]") { +TEST_CASE("Bisection Best", "[BoostAODE]") +{ auto clf = bayesnet::BoostAODE(); auto raw = RawDatasets("kdd_JapaneseVowels", true, 1200, true, false); clf.setHyperparameters({ @@ -145,7 +153,7 @@ TEST_CASE("Bisection Best", "[BoostAODE]") { {"convergence", true}, {"block_update", false}, {"convergence_best", false}, - }); + }); clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 210); REQUIRE(clf.getNumberOfEdges() == 378); @@ -156,7 +164,8 @@ TEST_CASE("Bisection Best", "[BoostAODE]") { REQUIRE(score == Catch::Approx(0.991666675f).epsilon(raw.epsilon)); REQUIRE(scoret == Catch::Approx(0.991666675f).epsilon(raw.epsilon)); } -TEST_CASE("Bisection Best vs Last", "[BoostAODE]") { +TEST_CASE("Bisection Best vs Last", "[BoostAODE]") +{ auto raw = RawDatasets("kdd_JapaneseVowels", true, 1500, true, false); auto clf = bayesnet::BoostAODE(true); auto hyperparameters = nlohmann::json{ @@ -176,7 +185,8 @@ TEST_CASE("Bisection Best vs Last", "[BoostAODE]") { auto score_last = clf.score(raw.X_test, raw.y_test); REQUIRE(score_last == Catch::Approx(0.976666689f).epsilon(raw.epsilon)); } -TEST_CASE("Block Update", "[BoostAODE]") { +TEST_CASE("Block Update", "[BoostAODE]") +{ auto clf = bayesnet::BoostAODE(); auto raw = RawDatasets("mfeat-factors", true, 500); clf.setHyperparameters({ @@ -184,7 +194,7 @@ TEST_CASE("Block Update", "[BoostAODE]") { {"block_update", true}, {"maxTolerance", 3}, {"convergence", true}, - }); + }); clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 868); REQUIRE(clf.getNumberOfEdges() == 1724); @@ -205,13 +215,14 @@ TEST_CASE("Block Update", "[BoostAODE]") { // } // std::cout << "Score " << score << std::endl; } -TEST_CASE("Alphablock", "[BoostAODE]") { +TEST_CASE("Alphablock", "[BoostAODE]") +{ auto clf_alpha = bayesnet::BoostAODE(); auto clf_no_alpha = bayesnet::BoostAODE(); auto raw = RawDatasets("diabetes", true); clf_alpha.setHyperparameters({ {"alpha_block", true}, - }); + }); clf_alpha.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing); clf_no_alpha.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing); auto score_alpha = clf_alpha.score(raw.X_test, raw.y_test); diff --git a/tests/TestFeatureSelection.cc b/tests/TestFeatureSelection.cc index 7de8229..6c90e89 100644 --- a/tests/TestFeatureSelection.cc +++ b/tests/TestFeatureSelection.cc @@ -36,14 +36,14 @@ TEST_CASE("Features Selected", "[FeatureSelection]") SECTION("Test features selected, scores and sizes") { map, pair, std::vector>> results = { - { {"glass", "CFS"}, { { 2, 3, 6, 1, 8, 4 }, {0.365513, 0.42895, 0.369809, 0.298294, 0.240952, 0.200915} } }, - { {"iris", "CFS"}, { { 3, 2, 1, 0 }, {0.870521, 0.890375, 0.588155, 0.41843} } }, - { {"ecoli", "CFS"}, { { 5, 0, 4, 2, 1, 6 }, {0.512319, 0.565381, 0.486025, 0.41087, 0.331423, 0.266251} } }, - { {"diabetes", "CFS"}, { { 1, 5, 7, 6, 4, 2 }, {0.132858, 0.151209, 0.14244, 0.126591, 0.106028, 0.0825904} } }, - { {"glass", "IWSS" }, { { 2, 3, 5, 7, 6 }, {0.365513, 0.42895, 0.359907, 0.273784, 0.223346} } }, - { {"iris", "IWSS"}, { { 3, 2, 0 }, {0.870521, 0.890375, 0.585426} }}, - { {"ecoli", "IWSS"}, { { 5, 6, 0, 1, 4 }, {0.512319, 0.550978, 0.475025, 0.382607, 0.308203} } }, - { {"diabetes", "IWSS"}, { { 1, 5, 4, 7, 3 }, {0.132858, 0.151209, 0.136576, 0.122097, 0.0802232} } }, + { {"glass", "CFS"}, { { 2, 3, 5, 6, 7, 1, 0, 8, 4 }, {0.365513, 0.42895, 0.46186, 0.481897, 0.500943, 0.504027, 0.505625, 0.493256, 0.478226} } }, + { {"iris", "CFS"}, { { 3, 2, 0, 1 }, {0.870521, 0.890375, 0.84104719, 0.799310961} } }, + { {"ecoli", "CFS"}, { { 5, 0, 6, 1, 4, 2, 3 }, {0.512319, 0.565381, 0.61824, 0.637094, 0.637759, 0.633802, 0.598266} } }, + { {"diabetes", "CFS"}, { { 1, 5, 7, 4, 6, 0 }, {0.132858, 0.151209, 0.148887, 0.14862, 0.142902, 0.137233} } }, + { {"glass", "IWSS" }, { { 2, 3, 5, 7, 6, 1, 0, 8, 4 }, {0.365513, 0.42895, 0.46186, 0.479866, 0.500943, 0.504027, 0.505625, 0.493256, 0.478226} } }, + { {"iris", "IWSS"}, { { 3, 2, 0 }, {0.870521, 0.890375, 0.841047} }}, + { {"ecoli", "IWSS"}, { { 5, 6, 0, 1, 4, 2, 3}, {0.512319, 0.550978, 0.61824, 0.637094, 0.637759, 0.633802, 0.598266} } }, + { {"diabetes", "IWSS"}, { { 1, 5, 4, 7, 3 }, {0.132858, 0.151209, 0.146771, 0.14862, 0.136493,} } }, { {"glass", "FCBF" }, { { 2, 3, 5, 7, 6 }, {0.365513, 0.304911, 0.302109, 0.281621, 0.253297} } }, { {"iris", "FCBF"}, {{ 3, 2 }, {0.870521, 0.816401} }}, { {"ecoli", "FCBF"}, {{ 5, 0, 1, 4, 2 }, {0.512319, 0.350406, 0.260905, 0.203132, 0.11229} }}, @@ -53,7 +53,7 @@ TEST_CASE("Features Selected", "[FeatureSelection]") std::string selector; std::vector> selectors = { { "CFS", 0.0 }, - { "IWSS", 0.5 }, + { "IWSS", 0.1 }, { "FCBF", 1e-7 } }; for (const auto item : selectors) { diff --git a/tests/TestXBA2DE.cc b/tests/TestXBA2DE.cc index 9e1b26f..6b84616 100644 --- a/tests/TestXBA2DE.cc +++ b/tests/TestXBA2DE.cc @@ -11,7 +11,8 @@ #include "TestUtils.h" #include "bayesnet/ensembles/XBA2DE.h" -TEST_CASE("Normal test", "[XBA2DE]") { +TEST_CASE("Normal test", "[XBA2DE]") +{ auto raw = RawDatasets("iris", true); auto clf = bayesnet::XBA2DE(); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); @@ -25,37 +26,38 @@ TEST_CASE("Normal test", "[XBA2DE]") { REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(1.0f)); REQUIRE(clf.graph().size() == 1); } -TEST_CASE("Feature_select CFS", "[XBA2DE]") { +TEST_CASE("Feature_select CFS", "[XBA2DE]") +{ auto raw = RawDatasets("glass", true); auto clf = bayesnet::XBA2DE(); - clf.setHyperparameters({{"select_features", "CFS"}}); + clf.setHyperparameters({ {"select_features", "CFS"} }); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); - REQUIRE(clf.getNumberOfNodes() == 220); - REQUIRE(clf.getNumberOfEdges() == 506); + REQUIRE(clf.getNumberOfNodes() == 360); + REQUIRE(clf.getNumberOfEdges() == 828); REQUIRE(clf.getNotes().size() == 2); - REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 9 with CFS"); - REQUIRE(clf.getNotes()[1] == "Number of models: 22"); + REQUIRE(clf.getNotes()[0] == "Used features in initialization: 9 of 9 with CFS"); + REQUIRE(clf.getNotes()[1] == "Number of models: 36"); REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(0.720930219)); } -TEST_CASE("Feature_select IWSS", "[XBA2DE]") { +TEST_CASE("Feature_select IWSS", "[XBA2DE]") +{ auto raw = RawDatasets("glass", true); auto clf = bayesnet::XBA2DE(); - clf.setHyperparameters({{"select_features", "IWSS"}, {"threshold", 0.5}}); + clf.setHyperparameters({ {"select_features", "IWSS"}, {"threshold", 0.5} }); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); - REQUIRE(clf.getNumberOfNodes() == 220); - REQUIRE(clf.getNumberOfEdges() == 506); - REQUIRE(clf.getNotes().size() == 4); - REQUIRE(clf.getNotes()[0] == "Used features in initialization: 4 of 9 with IWSS"); - REQUIRE(clf.getNotes()[1] == "Convergence threshold reached & 15 models eliminated"); - REQUIRE(clf.getNotes()[2] == "Pairs not used in train: 2"); - REQUIRE(clf.getNotes()[3] == "Number of models: 22"); - REQUIRE(clf.getNumberOfStates() == 5346); + REQUIRE(clf.getNumberOfNodes() == 360); + REQUIRE(clf.getNumberOfEdges() == 828); + REQUIRE(clf.getNotes().size() == 2); + REQUIRE(clf.getNotes()[0] == "Used features in initialization: 9 of 9 with IWSS"); + REQUIRE(clf.getNotes()[1] == "Number of models: 36"); + REQUIRE(clf.getNumberOfStates() == 8748); REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(0.72093)); } -TEST_CASE("Feature_select FCBF", "[XBA2DE]") { +TEST_CASE("Feature_select FCBF", "[XBA2DE]") +{ auto raw = RawDatasets("glass", true); auto clf = bayesnet::XBA2DE(); - clf.setHyperparameters({{"select_features", "FCBF"}, {"threshold", 1e-7}}); + clf.setHyperparameters({ {"select_features", "FCBF"}, {"threshold", 1e-7} }); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 290); REQUIRE(clf.getNumberOfEdges() == 667); @@ -66,37 +68,39 @@ TEST_CASE("Feature_select FCBF", "[XBA2DE]") { REQUIRE(clf.getNotes()[2] == "Number of models: 29"); REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(0.744186)); } -TEST_CASE("Test used features in train note and score", "[XBA2DE]") { +TEST_CASE("Test used features in train note and score", "[XBA2DE]") +{ auto raw = RawDatasets("diabetes", true); auto clf = bayesnet::XBA2DE(); clf.setHyperparameters({ {"order", "asc"}, {"convergence", true}, {"select_features", "CFS"}, - }); + }); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); - REQUIRE(clf.getNumberOfNodes() == 144); - REQUIRE(clf.getNumberOfEdges() == 320); - REQUIRE(clf.getNumberOfStates() == 5504); + REQUIRE(clf.getNumberOfNodes() == 189); + REQUIRE(clf.getNumberOfEdges() == 420); + REQUIRE(clf.getNumberOfStates() == 7224); REQUIRE(clf.getNotes().size() == 2); - REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS"); - REQUIRE(clf.getNotes()[1] == "Number of models: 16"); + REQUIRE(clf.getNotes()[0] == "Used features in initialization: 7 of 8 with CFS"); + REQUIRE(clf.getNotes()[1] == "Number of models: 21"); auto score = clf.score(raw.Xv, raw.yv); auto scoret = clf.score(raw.Xt, raw.yt); - REQUIRE(score == Catch::Approx(0.850260437f).epsilon(raw.epsilon)); - REQUIRE(scoret == Catch::Approx(0.850260437f).epsilon(raw.epsilon)); + REQUIRE(score == Catch::Approx(0.854166687f).epsilon(raw.epsilon)); + REQUIRE(scoret == Catch::Approx(0.854166687f).epsilon(raw.epsilon)); } -TEST_CASE("Order asc, desc & random", "[XBA2DE]") { +TEST_CASE("Order asc, desc & random", "[XBA2DE]") +{ auto raw = RawDatasets("glass", true); - std::map scores{{"asc", 0.827103}, {"desc", 0.808411}, {"rand", 0.827103}}; - for (const std::string &order : {"asc", "desc", "rand"}) { + std::map scores{ {"asc", 0.827103}, {"desc", 0.808411}, {"rand", 0.827103} }; + for (const std::string& order : { "asc", "desc", "rand" }) { auto clf = bayesnet::XBA2DE(); clf.setHyperparameters({ {"order", order}, {"bisection", false}, {"maxTolerance", 1}, {"convergence", true}, - }); + }); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); auto score = clf.score(raw.Xv, raw.yv); auto scoret = clf.score(raw.Xt, raw.yt); @@ -105,7 +109,8 @@ TEST_CASE("Order asc, desc & random", "[XBA2DE]") { REQUIRE(scoret == Catch::Approx(scores[order]).epsilon(raw.epsilon)); } } -TEST_CASE("Oddities", "[XBA2DE]") { +TEST_CASE("Oddities", "[XBA2DE]") +{ auto clf = bayesnet::XBA2DE(); auto raw = RawDatasets("iris", true); auto bad_hyper = nlohmann::json{ @@ -114,28 +119,28 @@ TEST_CASE("Oddities", "[XBA2DE]") { {{"maxTolerance", 0}}, {{"maxTolerance", 7}}, }; - for (const auto &hyper : bad_hyper.items()) { + for (const auto& hyper : bad_hyper.items()) { INFO("XBA2DE hyper: " << hyper.value().dump()); REQUIRE_THROWS_AS(clf.setHyperparameters(hyper.value()), std::invalid_argument); } - REQUIRE_THROWS_AS(clf.setHyperparameters({{"maxTolerance", 0}}), std::invalid_argument); + REQUIRE_THROWS_AS(clf.setHyperparameters({ {"maxTolerance", 0} }), std::invalid_argument); auto bad_hyper_fit = nlohmann::json{ {{"select_features", "IWSS"}, {"threshold", -0.01}}, {{"select_features", "IWSS"}, {"threshold", 0.51}}, {{"select_features", "FCBF"}, {"threshold", 1e-8}}, {{"select_features", "FCBF"}, {"threshold", 1.01}}, }; - for (const auto &hyper : bad_hyper_fit.items()) { + for (const auto& hyper : bad_hyper_fit.items()) { INFO("XBA2DE hyper: " << hyper.value().dump()); clf.setHyperparameters(hyper.value()); REQUIRE_THROWS_AS(clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing), - std::invalid_argument); + std::invalid_argument); } auto bad_hyper_fit2 = nlohmann::json{ {{"alpha_block", true}, {"block_update", true}}, {{"bisection", false}, {"block_update", true}}, }; - for (const auto &hyper : bad_hyper_fit2.items()) { + for (const auto& hyper : bad_hyper_fit2.items()) { INFO("XBA2DE hyper: " << hyper.value().dump()); REQUIRE_THROWS_AS(clf.setHyperparameters(hyper.value()), std::invalid_argument); } @@ -146,12 +151,13 @@ TEST_CASE("Oddities", "[XBA2DE]") { raw.features.pop_back(); raw.features.pop_back(); raw.features.pop_back(); - clf.setHyperparameters({{"select_features", "CFS"}, {"alpha_block", false}, {"block_update", false}}); + clf.setHyperparameters({ {"select_features", "CFS"}, {"alpha_block", false}, {"block_update", false} }); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNotes().size() == 1); REQUIRE(clf.getNotes()[0] == "No features selected in initialization"); } -TEST_CASE("Bisection Best", "[XBA2DE]") { +TEST_CASE("Bisection Best", "[XBA2DE]") +{ auto clf = bayesnet::XBA2DE(); auto raw = RawDatasets("kdd_JapaneseVowels", true, 1200, true, false); clf.setHyperparameters({ @@ -159,7 +165,7 @@ TEST_CASE("Bisection Best", "[XBA2DE]") { {"maxTolerance", 3}, {"convergence", true}, {"convergence_best", false}, - }); + }); clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 330); REQUIRE(clf.getNumberOfEdges() == 836); @@ -173,7 +179,8 @@ TEST_CASE("Bisection Best", "[XBA2DE]") { REQUIRE(score == Catch::Approx(0.975).epsilon(raw.epsilon)); REQUIRE(scoret == Catch::Approx(0.975).epsilon(raw.epsilon)); } -TEST_CASE("Bisection Best vs Last", "[XBA2DE]") { +TEST_CASE("Bisection Best vs Last", "[XBA2DE]") +{ auto raw = RawDatasets("kdd_JapaneseVowels", true, 1500, true, false); auto clf = bayesnet::XBA2DE(); auto hyperparameters = nlohmann::json{ @@ -193,7 +200,8 @@ TEST_CASE("Bisection Best vs Last", "[XBA2DE]") { auto score_last = clf.score(raw.X_test, raw.y_test); REQUIRE(score_last == Catch::Approx(0.99).epsilon(raw.epsilon)); } -TEST_CASE("Block Update", "[XBA2DE]") { +TEST_CASE("Block Update", "[XBA2DE]") +{ auto clf = bayesnet::XBA2DE(); auto raw = RawDatasets("kdd_JapaneseVowels", true, 1500, true, false); clf.setHyperparameters({ @@ -201,7 +209,7 @@ TEST_CASE("Block Update", "[XBA2DE]") { {"block_update", true}, {"maxTolerance", 3}, {"convergence", true}, - }); + }); clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 120); REQUIRE(clf.getNumberOfEdges() == 304); @@ -221,13 +229,14 @@ TEST_CASE("Block Update", "[XBA2DE]") { /*}*/ /*std::cout << "Score " << score << std::endl;*/ } -TEST_CASE("Alphablock", "[XBA2DE]") { +TEST_CASE("Alphablock", "[XBA2DE]") +{ auto clf_alpha = bayesnet::XBA2DE(); auto clf_no_alpha = bayesnet::XBA2DE(); auto raw = RawDatasets("diabetes", true); clf_alpha.setHyperparameters({ {"alpha_block", true}, - }); + }); clf_alpha.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing); clf_no_alpha.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing); auto score_alpha = clf_alpha.score(raw.X_test, raw.y_test); diff --git a/tests/TestXBAODE.cc b/tests/TestXBAODE.cc index 0ab62dc..6999e04 100644 --- a/tests/TestXBAODE.cc +++ b/tests/TestXBAODE.cc @@ -11,7 +11,8 @@ #include "TestUtils.h" #include "bayesnet/ensembles/XBAODE.h" -TEST_CASE("Normal test", "[XBAODE]") { +TEST_CASE("Normal test", "[XBAODE]") +{ auto raw = RawDatasets("iris", true); auto clf = bayesnet::XBAODE(); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); @@ -23,34 +24,37 @@ TEST_CASE("Normal test", "[XBAODE]") { REQUIRE(clf.getNumberOfStates() == 256); REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(0.933333)); } -TEST_CASE("Feature_select CFS", "[XBAODE]") { +TEST_CASE("Feature_select CFS", "[XBAODE]") +{ auto raw = RawDatasets("glass", true); auto clf = bayesnet::XBAODE(); - clf.setHyperparameters({{"select_features", "CFS"}}); + clf.setHyperparameters({ {"select_features", "CFS"} }); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 90); REQUIRE(clf.getNumberOfEdges() == 171); REQUIRE(clf.getNotes().size() == 2); - REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 9 with CFS"); + REQUIRE(clf.getNotes()[0] == "Used features in initialization: 9 of 9 with CFS"); REQUIRE(clf.getNotes()[1] == "Number of models: 9"); REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(0.720930219)); } -TEST_CASE("Feature_select IWSS", "[XBAODE]") { +TEST_CASE("Feature_select IWSS", "[XBAODE]") +{ auto raw = RawDatasets("glass", true); auto clf = bayesnet::XBAODE(); - clf.setHyperparameters({{"select_features", "IWSS"}, {"threshold", 0.5}}); + clf.setHyperparameters({ {"select_features", "IWSS"}, {"threshold", 0.5} }); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 90); REQUIRE(clf.getNumberOfEdges() == 171); REQUIRE(clf.getNotes().size() == 2); - REQUIRE(clf.getNotes()[0] == "Used features in initialization: 4 of 9 with IWSS"); + REQUIRE(clf.getNotes()[0] == "Used features in initialization: 9 of 9 with IWSS"); REQUIRE(clf.getNotes()[1] == "Number of models: 9"); - REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(0.697674394)); + REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(0.720930219f)); } -TEST_CASE("Feature_select FCBF", "[XBAODE]") { +TEST_CASE("Feature_select FCBF", "[XBAODE]") +{ auto raw = RawDatasets("glass", true); auto clf = bayesnet::XBAODE(); - clf.setHyperparameters({{"select_features", "FCBF"}, {"threshold", 1e-7}}); + clf.setHyperparameters({ {"select_features", "FCBF"}, {"threshold", 1e-7} }); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 90); REQUIRE(clf.getNumberOfEdges() == 171); @@ -59,36 +63,38 @@ TEST_CASE("Feature_select FCBF", "[XBAODE]") { REQUIRE(clf.getNotes()[1] == "Number of models: 9"); REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(0.720930219)); } -TEST_CASE("Test used features in train note and score", "[XBAODE]") { +TEST_CASE("Test used features in train note and score", "[XBAODE]") +{ auto raw = RawDatasets("diabetes", true); auto clf = bayesnet::XBAODE(); clf.setHyperparameters({ {"order", "asc"}, {"convergence", true}, {"select_features", "CFS"}, - }); + }); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 72); REQUIRE(clf.getNumberOfEdges() == 136); REQUIRE(clf.getNotes().size() == 2); - REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS"); + REQUIRE(clf.getNotes()[0] == "Used features in initialization: 7 of 8 with CFS"); REQUIRE(clf.getNotes()[1] == "Number of models: 8"); auto score = clf.score(raw.Xv, raw.yv); auto scoret = clf.score(raw.Xt, raw.yt); - REQUIRE(score == Catch::Approx(0.819010437f).epsilon(raw.epsilon)); - REQUIRE(scoret == Catch::Approx(0.819010437f).epsilon(raw.epsilon)); + REQUIRE(score == Catch::Approx(0.82421875f).epsilon(raw.epsilon)); + REQUIRE(scoret == Catch::Approx(0.82421875f).epsilon(raw.epsilon)); } -TEST_CASE("Order asc, desc & random", "[XBAODE]") { +TEST_CASE("Order asc, desc & random", "[XBAODE]") +{ auto raw = RawDatasets("glass", true); - std::map scores{{"asc", 0.83645f}, {"desc", 0.84579f}, {"rand", 0.84112}}; - for (const std::string &order : {"asc", "desc", "rand"}) { + std::map scores{ {"asc", 0.83645f}, {"desc", 0.84579f}, {"rand", 0.84112} }; + for (const std::string& order : { "asc", "desc", "rand" }) { auto clf = bayesnet::XBAODE(); clf.setHyperparameters({ {"order", order}, {"bisection", false}, {"maxTolerance", 1}, {"convergence", false}, - }); + }); clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing); auto score = clf.score(raw.Xv, raw.yv); auto scoret = clf.score(raw.Xt, raw.yt); @@ -97,7 +103,8 @@ TEST_CASE("Order asc, desc & random", "[XBAODE]") { REQUIRE(scoret == Catch::Approx(scores[order]).epsilon(raw.epsilon)); } } -TEST_CASE("Oddities", "[XBAODE]") { +TEST_CASE("Oddities", "[XBAODE]") +{ auto clf = bayesnet::XBAODE(); auto raw = RawDatasets("iris", true); auto bad_hyper = nlohmann::json{ @@ -106,33 +113,34 @@ TEST_CASE("Oddities", "[XBAODE]") { {{"maxTolerance", 0}}, {{"maxTolerance", 7}}, }; - for (const auto &hyper : bad_hyper.items()) { + for (const auto& hyper : bad_hyper.items()) { INFO("XBAODE hyper: " << hyper.value().dump()); REQUIRE_THROWS_AS(clf.setHyperparameters(hyper.value()), std::invalid_argument); } - REQUIRE_THROWS_AS(clf.setHyperparameters({{"maxTolerance", 0}}), std::invalid_argument); + REQUIRE_THROWS_AS(clf.setHyperparameters({ {"maxTolerance", 0} }), std::invalid_argument); auto bad_hyper_fit = nlohmann::json{ {{"select_features", "IWSS"}, {"threshold", -0.01}}, {{"select_features", "IWSS"}, {"threshold", 0.51}}, {{"select_features", "FCBF"}, {"threshold", 1e-8}}, {{"select_features", "FCBF"}, {"threshold", 1.01}}, }; - for (const auto &hyper : bad_hyper_fit.items()) { + for (const auto& hyper : bad_hyper_fit.items()) { INFO("XBAODE hyper: " << hyper.value().dump()); clf.setHyperparameters(hyper.value()); REQUIRE_THROWS_AS(clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing), - std::invalid_argument); + std::invalid_argument); } auto bad_hyper_fit2 = nlohmann::json{ {{"alpha_block", true}, {"block_update", true}}, {{"bisection", false}, {"block_update", true}}, }; - for (const auto &hyper : bad_hyper_fit2.items()) { + for (const auto& hyper : bad_hyper_fit2.items()) { INFO("XBAODE hyper: " << hyper.value().dump()); REQUIRE_THROWS_AS(clf.setHyperparameters(hyper.value()), std::invalid_argument); } } -TEST_CASE("Bisection Best", "[XBAODE]") { +TEST_CASE("Bisection Best", "[XBAODE]") +{ auto clf = bayesnet::XBAODE(); auto raw = RawDatasets("kdd_JapaneseVowels", true, 1200, true, false); clf.setHyperparameters({ @@ -140,7 +148,7 @@ TEST_CASE("Bisection Best", "[XBAODE]") { {"maxTolerance", 3}, {"convergence", true}, {"convergence_best", false}, - }); + }); clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 210); REQUIRE(clf.getNumberOfEdges() == 406); @@ -151,7 +159,8 @@ TEST_CASE("Bisection Best", "[XBAODE]") { REQUIRE(score == Catch::Approx(0.991666675f).epsilon(raw.epsilon)); REQUIRE(scoret == Catch::Approx(0.991666675f).epsilon(raw.epsilon)); } -TEST_CASE("Bisection Best vs Last", "[XBAODE]") { +TEST_CASE("Bisection Best vs Last", "[XBAODE]") +{ auto raw = RawDatasets("kdd_JapaneseVowels", true, 1500, true, false); auto clf = bayesnet::XBAODE(); auto hyperparameters = nlohmann::json{ @@ -171,7 +180,8 @@ TEST_CASE("Bisection Best vs Last", "[XBAODE]") { auto score_last = clf.score(raw.X_test, raw.y_test); REQUIRE(score_last == Catch::Approx(0.976666689f).epsilon(raw.epsilon)); } -TEST_CASE("Block Update", "[XBAODE]") { +TEST_CASE("Block Update", "[XBAODE]") +{ auto clf = bayesnet::XBAODE(); auto raw = RawDatasets("mfeat-factors", true, 500); clf.setHyperparameters({ @@ -179,7 +189,7 @@ TEST_CASE("Block Update", "[XBAODE]") { {"block_update", true}, {"maxTolerance", 3}, {"convergence", true}, - }); + }); clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 1085); REQUIRE(clf.getNumberOfEdges() == 2165); @@ -200,13 +210,14 @@ TEST_CASE("Block Update", "[XBAODE]") { // } // std::cout << "Score " << score << std::endl; } -TEST_CASE("Alphablock", "[XBAODE]") { +TEST_CASE("Alphablock", "[XBAODE]") +{ auto clf_alpha = bayesnet::XBAODE(); auto clf_no_alpha = bayesnet::XBAODE(); auto raw = RawDatasets("diabetes", true); clf_alpha.setHyperparameters({ {"alpha_block", true}, - }); + }); clf_alpha.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing); clf_no_alpha.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing); auto score_alpha = clf_alpha.score(raw.X_test, raw.y_test);