From a1f90867808b85ba48e4e7b04c96ed262e8d5c5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Tue, 2 Apr 2024 22:53:00 +0200 Subject: [PATCH] Fix CFS mistake --- .vscode/launch.json | 3 +- bayesnet/ensembles/BoostAODE.cc | 4 ++ bayesnet/feature_selection/CFS.cc | 2 +- tests/TestFeatureSelection.cc | 102 ++++++++---------------------- 4 files changed, 34 insertions(+), 77 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 65760c3..5871dd6 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -14,8 +14,9 @@ "type": "lldb", "request": "launch", "name": "test", - "program": "${workspaceFolder}/build_debug/tests/unit_tests_bayesnet", + "program": "${workspaceFolder}/build_debug/tests/TestBayesNet", "args": [ + "[FeatureSelection]" //"-c=\"Metrics Test\"", // "-s", ], diff --git a/bayesnet/ensembles/BoostAODE.cc b/bayesnet/ensembles/BoostAODE.cc index 1ce4bbb..8426638 100644 --- a/bayesnet/ensembles/BoostAODE.cc +++ b/bayesnet/ensembles/BoostAODE.cc @@ -145,6 +145,10 @@ namespace bayesnet { } featureSelector->fit(); auto cfsFeatures = featureSelector->getFeatures(); + auto scores = featureSelector->getScores(); + for (int i = 0; i < cfsFeatures.size(); ++i) { + LOG_F(INFO, "Feature: %d Score: %f", cfsFeatures[i], scores[i]); + } for (const int& feature : cfsFeatures) { featuresUsed.push_back(feature); std::unique_ptr model = std::make_unique(feature); diff --git a/bayesnet/feature_selection/CFS.cc b/bayesnet/feature_selection/CFS.cc index bf0c746..d7b55ca 100644 --- a/bayesnet/feature_selection/CFS.cc +++ b/bayesnet/feature_selection/CFS.cc @@ -11,7 +11,7 @@ namespace bayesnet { auto feature = featureOrder[0]; selectedFeatures.push_back(feature); selectedScores.push_back(suLabels[feature]); - selectedFeatures.erase(selectedFeatures.begin()); + featureOrder.erase(featureOrder.begin()); while (continueCondition) { double merit = std::numeric_limits::lowest(); int bestFeature = -1; diff --git a/tests/TestFeatureSelection.cc b/tests/TestFeatureSelection.cc index 06f9c2d..2feb723 100644 --- a/tests/TestFeatureSelection.cc +++ b/tests/TestFeatureSelection.cc @@ -25,21 +25,21 @@ TEST_CASE("Features Selected", "[FeatureSelection]") auto raw = RawDatasets(file_name, true); - SECTION("Test features selected and size") + SECTION("Test features selected, scores and sizes") { - map, std::vector> results = { - { {"glass", "CFS"}, { 2, 3, 6, 1, 8, 4 } }, - { {"iris", "CFS"}, { 3, 2, 1, 0 } }, - { {"ecoli", "CFS"}, { 5, 0, 4, 2, 1, 6 } }, - { {"diabetes", "CFS"}, { 1, 5, 7, 6, 4, 2 } }, - { {"glass", "IWSS" }, { 2, 3, 5, 7, 6 } }, - { {"iris", "IWSS"}, { 3, 2, 0 } }, - { {"ecoli", "IWSS"}, { 5, 6, 0, 1, 4 } }, - { {"diabetes", "IWSS"}, { 1, 5, 4, 7, 3 } }, - { {"glass", "FCBF" }, { 2, 3, 5, 7, 6 } }, - { {"iris", "FCBF"}, { 3, 2 } }, - { {"ecoli", "FCBF"}, { 5, 0, 1, 4, 2 } }, - { {"diabetes", "FCBF"}, { 1, 5, 7, 6 } } + map, pair, std::vector>> results = { + { {"glass", "CFS"}, { { 2, 3, 6, 1, 8, 4 }, {0.365513, 0.42895, 0.369809, 0.298294, 0.240952, 0.200915} } }, + { {"iris", "CFS"}, { { 3, 2, 1, 0 }, {0.870521, 0.890375, 0.588155, 0.41843} } }, + { {"ecoli", "CFS"}, { { 5, 0, 4, 2, 1, 6 }, {0.512319, 0.565381, 0.486025, 0.41087, 0.331423, 0.266251} } }, + { {"diabetes", "CFS"}, { { 1, 5, 7, 6, 4, 2 }, {0.132858, 0.151209, 0.14244, 0.126591, 0.106028, 0.0825904} } }, + { {"glass", "IWSS" }, { { 2, 3, 5, 7, 6 }, {0.365513, 0.42895, 0.359907, 0.273784, 0.223346} } }, + { {"iris", "IWSS"}, { { 3, 2, 0 }, {0.870521, 0.890375, 0.585426} }}, + { {"ecoli", "IWSS"}, { { 5, 6, 0, 1, 4 }, {0.512319, 0.550978, 0.475025, 0.382607, 0.308203} } }, + { {"diabetes", "IWSS"}, { { 1, 5, 4, 7, 3 }, {0.132858, 0.151209, 0.136576, 0.122097, 0.0802232} } }, + { {"glass", "FCBF" }, { { 2, 3, 5, 7, 6 }, {0.365513, 0.304911, 0.302109, 0.281621, 0.253297} } }, + { {"iris", "FCBF"}, {{ 3, 2 }, {0.870521, 0.816401} }}, + { {"ecoli", "FCBF"}, {{ 5, 0, 1, 4, 2 }, {0.512319, 0.350406, 0.260905, 0.203132, 0.11229} }}, + { {"diabetes", "FCBF"}, {{ 1, 5, 7, 6 }, {0.132858, 0.083191, 0.0480135, 0.0224186} }} }; double threshold; std::string selector; @@ -52,68 +52,20 @@ TEST_CASE("Features Selected", "[FeatureSelection]") selector = item.first; threshold = item.second; bayesnet::FeatureSelect* featureSelector = build_selector(raw, selector, threshold); featureSelector->fit(); - std::vector selected = featureSelector->getFeatures(); INFO("file_name: " << file_name << ", selector: " << selector); - REQUIRE(selected.size() == results.at({ file_name, selector }).size()); - REQUIRE(selected == results.at({ file_name, selector })); + // Features + auto expected_features = results.at({ file_name, selector }).first; + std::vector selected_features = featureSelector->getFeatures(); + REQUIRE(selected_features.size() == expected_features.size()); + REQUIRE(selected_features == expected_features); + // Scores + auto expected_scores = results.at({ file_name, selector }).second; + std::vector selected_scores = featureSelector->getScores(); + REQUIRE(selected_scores.size() == selected_features.size()); + for (int i = 0; i < selected_scores.size(); i++) { + REQUIRE(selected_scores[i] == Catch::Approx(expected_scores[i]).epsilon(raw.epsilon)); + } delete featureSelector; } } -} - -// TEST_CASE("Feature Selection Test", "[BayesNet]") -// { -// std::string file_name = GENERATE("glass", "iris", "ecoli", "diabetes"); -// std::string selector = GENERATE("CFS", "FCBF", "IWSS"); -// map>> resultsKBest = { -// {"glass", {7, { 0, 1, 7, 6, 3, 5, 2 }}}, -// {"iris", {3, { 0, 3, 2 }} }, -// {"ecoli", {6, { 2, 4, 1, 0, 6, 5 }}}, -// {"diabetes", {2, { 7, 1 }}} -// }; -// map resultsMI = { -// {"glass", 0.12805398}, -// {"iris", 0.3158139948}, -// {"ecoli", 0.0089431099}, -// {"diabetes", 0.0345470614} -// }; -// map, std::vector>> resultsMST = { -// { {"glass", 0}, { {0, 6}, {0, 5}, {0, 3}, {5, 1}, {5, 8}, {5, 4}, {6, 2}, {6, 7} } }, -// { {"glass", 1}, { {1, 5}, {5, 0}, {5, 8}, {5, 4}, {0, 6}, {0, 3}, {6, 2}, {6, 7} } }, -// { {"iris", 0}, { {0, 1}, {0, 2}, {1, 3} } }, -// { {"iris", 1}, { {1, 0}, {1, 3}, {0, 2} } }, -// { {"ecoli", 0}, { {0, 1}, {0, 2}, {1, 5}, {1, 3}, {5, 6}, {5, 4} } }, -// { {"ecoli", 1}, { {1, 0}, {1, 5}, {1, 3}, {5, 6}, {5, 4}, {0, 2} } }, -// { {"diabetes", 0}, { {0, 7}, {0, 2}, {0, 6}, {2, 3}, {3, 4}, {3, 5}, {4, 1} } }, -// { {"diabetes", 1}, { {1, 4}, {4, 3}, {3, 2}, {3, 5}, {2, 0}, {0, 7}, {0, 6} } } -// }; -// auto raw = RawDatasets(file_name, true); -// FeatureSelect* featureSelector = build_selector(raw, selector); - -// SECTION("Test Constructor") -// { -// REQUIRE(metrics.getScoresKBest().size() == 0); -// } - -// SECTION("Test SelectKBestWeighted") -// { -// std::vector kBest = metrics.SelectKBestWeighted(raw.weights, true, resultsKBest.at(file_name).first); -// REQUIRE(kBest.size() == resultsKBest.at(file_name).first); -// REQUIRE(kBest == resultsKBest.at(file_name).second); -// } - -// SECTION("Test Mutual Information") -// { -// auto result = metrics.mutualInformation(raw.dataset.index({ 1, "..." }), raw.dataset.index({ 2, "..." }), raw.weights); -// REQUIRE(result == Catch::Approx(resultsMI.at(file_name)).epsilon(raw.epsilon)); -// } - -// SECTION("Test Maximum Spanning Tree") -// { -// auto weights_matrix = metrics.conditionalEdge(raw.weights); -// for (int i = 0; i < 2; ++i) { -// auto result = metrics.maximumSpanningTree(raw.featurest, weights_matrix, i); -// REQUIRE(result == resultsMST.at({ file_name, i })); -// } -// } -// } \ No newline at end of file +} \ No newline at end of file