// *************************************************************** // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez // SPDX-FileType: SOURCE // SPDX-License-Identifier: MIT // *************************************************************** #include #include #include #include "bayesnet/utils/BayesMetrics.h" #include "TestUtils.h" #include "Timer.h" TEST_CASE("Metrics Test", "[Metrics]") { std::string file_name = GENERATE("glass", "iris", "ecoli", "diabetes"); map>> resultsKBest = { {"glass", {7, { 0, 1, 7, 6, 3, 5, 2 }}}, {"iris", {3, { 0, 3, 2 }} }, {"ecoli", {6, { 2, 4, 1, 0, 6, 5 }}}, {"diabetes", {2, { 7, 1 }}} }; map resultsMI = { {"glass", 0.12805398}, {"iris", 0.3158139948}, {"ecoli", 0.0089431099}, {"diabetes", 0.0345470614} }; map, std::vector>> resultsMST = { { {"glass", 0}, { {0, 6}, {0, 5}, {0, 3}, {5, 1}, {5, 8}, {5, 4}, {6, 2}, {6, 7} } }, { {"glass", 1}, { {1, 5}, {5, 0}, {5, 8}, {5, 4}, {0, 6}, {0, 3}, {6, 2}, {6, 7} } }, { {"iris", 0}, { {0, 1}, {0, 2}, {1, 3} } }, { {"iris", 1}, { {1, 0}, {1, 3}, {0, 2} } }, { {"ecoli", 0}, { {0, 1}, {0, 2}, {1, 5}, {1, 3}, {5, 6}, {5, 4} } }, { {"ecoli", 1}, { {1, 0}, {1, 5}, {1, 3}, {5, 6}, {5, 4}, {0, 2} } }, { {"diabetes", 0}, { {0, 7}, {0, 2}, {0, 6}, {2, 3}, {3, 4}, {3, 5}, {4, 1} } }, { {"diabetes", 1}, { {1, 4}, {4, 3}, {3, 2}, {3, 5}, {2, 0}, {0, 7}, {0, 6} } } }; auto raw = RawDatasets(file_name, true); bayesnet::Metrics metrics(raw.dataset, raw.features, raw.className, raw.classNumStates); bayesnet::Metrics metricsv(raw.Xv, raw.yv, raw.features, raw.className, raw.classNumStates); SECTION("Test Constructor") { REQUIRE(metrics.getScoresKBest().size() == 0); REQUIRE(metricsv.getScoresKBest().size() == 0); } SECTION("Test SelectKBestWeighted") { std::vector kBest = metrics.SelectKBestWeighted(raw.weights, true, resultsKBest.at(file_name).first); std::vector kBestv = metricsv.SelectKBestWeighted(raw.weights, true, resultsKBest.at(file_name).first); REQUIRE(kBest.size() == resultsKBest.at(file_name).first); REQUIRE(kBestv.size() == resultsKBest.at(file_name).first); REQUIRE(kBest == resultsKBest.at(file_name).second); REQUIRE(kBestv == resultsKBest.at(file_name).second); } SECTION("Test Mutual Information") { auto result = metrics.mutualInformation(raw.dataset.index({ 1, "..." }), raw.dataset.index({ 2, "..." }), raw.weights); auto resultv = metricsv.mutualInformation(raw.dataset.index({ 1, "..." }), raw.dataset.index({ 2, "..." }), raw.weights); REQUIRE(result == Catch::Approx(resultsMI.at(file_name)).epsilon(raw.epsilon)); REQUIRE(resultv == Catch::Approx(resultsMI.at(file_name)).epsilon(raw.epsilon)); } SECTION("Test Maximum Spanning Tree") { auto weights_matrix = metrics.conditionalEdge(raw.weights); auto weights_matrixv = metricsv.conditionalEdge(raw.weights); for (int i = 0; i < 2; ++i) { auto result = metrics.maximumSpanningTree(raw.features, weights_matrix, i); auto resultv = metricsv.maximumSpanningTree(raw.features, weights_matrixv, i); REQUIRE(result == resultsMST.at({ file_name, i })); REQUIRE(resultv == resultsMST.at({ file_name, i })); } } } TEST_CASE("Select all features ordered by Mutual Information", "[Metrics]") { auto raw = RawDatasets("iris", true); bayesnet::Metrics metrics(raw.dataset, raw.features, raw.className, raw.classNumStates); auto kBest = metrics.SelectKBestWeighted(raw.weights, true, 0); REQUIRE(kBest.size() == raw.features.size()); REQUIRE(kBest == std::vector({ 1, 0, 3, 2 })); } TEST_CASE("Entropy Test", "[Metrics]") { auto raw = RawDatasets("iris", true); bayesnet::Metrics metrics(raw.dataset, raw.features, raw.className, raw.classNumStates); auto result = metrics.entropy(raw.dataset.index({ 0, "..." }), raw.weights); REQUIRE(result == Catch::Approx(0.9848175048828125).epsilon(raw.epsilon)); auto data = torch::tensor({ 0, 0, 0, 0, 0, 0, 0, 1, 1, 1 }, torch::kInt32); auto weights = torch::tensor({ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, torch::kFloat32); result = metrics.entropy(data, weights); REQUIRE(result == Catch::Approx(0.61086434125900269).epsilon(raw.epsilon)); data = torch::tensor({ 0, 0, 0, 0, 0, 1, 1, 1, 1, 1 }, torch::kInt32); result = metrics.entropy(data, weights); REQUIRE(result == Catch::Approx(0.693147180559945).epsilon(raw.epsilon)); } TEST_CASE("Conditional Entropy", "[Metrics]") { auto raw = RawDatasets("mfeat-factors", true); bayesnet::Metrics metrics(raw.dataset, raw.features, raw.className, raw.classNumStates); bayesnet::Metrics metrics2(raw.dataset, raw.features, raw.className, raw.classNumStates); auto feature0 = raw.dataset.index({ 0, "..." }); auto feature1 = raw.dataset.index({ 1, "..." }); auto feature2 = raw.dataset.index({ 2, "..." }); auto feature3 = raw.dataset.index({ 3, "..." }); platform::Timer timer; double result, greatest = 0; int best_i, best_j; timer.start(); for (int i = 0; i < raw.features.size() - 1; ++i) { if (i % 50 == 0) { std::cout << "i=" << i << " Time=" << timer.getDurationString(true) << std::endl; } for (int j = i + 1; j < raw.features.size(); ++j) { result = metrics.conditionalMutualInformation(raw.dataset.index({ i, "..." }), raw.dataset.index({ j, "..." }), raw.yt, raw.weights); if (result > greatest) { greatest = result; best_i = i; best_j = j; } } } timer.stop(); std::cout << "CMI(" << best_i << "," << best_j << ")=" << greatest << "\n"; std::cout << "Time=" << timer.getDurationString() << std::endl; // Se pueden precalcular estos valores y utilizarlos en el algoritmo como entrada }