Complete selectKPairs method & test

This commit is contained in:
Ricardo Montañana Gómez 2024-05-16 13:46:38 +02:00
parent 2e3e0e0fc2
commit cccaa6e0af
Signed by: rmontanana
GPG Key ID: 46064262FD9A7ADE
5 changed files with 96 additions and 35 deletions

View File

@ -34,38 +34,40 @@ namespace bayesnet {
{
// Return the K Best features
auto n = features.size();
if (k == 0) {
k = n;
}
// compute scores
scoresKPairs.clear();
pairsKBest.clear();
auto label = samples.index({ -1, "..." });
// for (int i = 0; i < n; ++i) {
// for (int j = i + 1; j < n; ++j) {
// scoresKBest.push_back(mutualInformation(samples.index({ i, "..." }), samples.index({ j, "..." }), weights));
// featuresKBest.push_back(i);
// featuresKBest.push_back(j);
// }
// }
// // sort & reduce scores and features
// if (ascending) {
// sort(featuresKBest.begin(), featuresKBest.end(), [&](int i, int j)
// { return scoresKBest[i] < scoresKBest[j]; });
// sort(scoresKBest.begin(), scoresKBest.end(), std::less<double>());
// if (k < n) {
// for (int i = 0; i < n - k; ++i) {
// featuresKBest.erase(featuresKBest.begin());
// scoresKBest.erase(scoresKBest.begin());
// }
// }
// } else {
// sort(featuresKBest.begin(), featuresKBest.end(), [&](int i, int j)
// { return scoresKBest[i] > scoresKBest[j]; });
// sort(scoresKBest.begin(), scoresKBest.end(), std::greater<double>());
// featuresKBest.resize(k);
// scoresKBest.resize(k);
// }
auto labels = samples.index({ -1, "..." });
for (int i = 0; i < n - 1; ++i) {
for (int j = i + 1; j < n; ++j) {
auto key = std::make_pair(i, j);
auto value = conditionalMutualInformation(samples.index({ i, "..." }), samples.index({ j, "..." }), labels, weights);
scoresKPairs.push_back({ key, value });
}
}
// sort scores
if (ascending) {
sort(scoresKPairs.begin(), scoresKPairs.end(), [](auto& a, auto& b)
{ return a.second < b.second; });
} else {
sort(scoresKPairs.begin(), scoresKPairs.end(), [](auto& a, auto& b)
{ return a.second > b.second; });
}
for (auto& [pairs, score] : scoresKPairs) {
pairsKBest.push_back(pairs);
}
if (k != 0) {
if (ascending) {
for (int i = 0; i < n - k; ++i) {
pairsKBest.erase(pairsKBest.begin());
scoresKPairs.erase(scoresKPairs.begin());
}
} else {
pairsKBest.resize(k);
scoresKPairs.resize(k);
}
}
return pairsKBest;
}
std::vector<int> Metrics::SelectKBestWeighted(const torch::Tensor& weights, bool ascending, unsigned k)
@ -107,7 +109,10 @@ namespace bayesnet {
{
return scoresKBest;
}
std::vector<std::pair<std::pair<int, int>, double>> Metrics::getScoresKPairs() const
{
return scoresKPairs;
}
torch::Tensor Metrics::conditionalEdge(const torch::Tensor& weights)
{
auto result = std::vector<double>();

View File

@ -18,6 +18,7 @@ namespace bayesnet {
std::vector<int> SelectKBestWeighted(const torch::Tensor& weights, bool ascending = false, unsigned k = 0);
std::vector<std::pair<int, int>> SelectKPairs(const torch::Tensor& weights, bool ascending = false, unsigned k = 0);
std::vector<double> getScoresKBest() const;
std::vector<std::pair<std::pair<int, int>, double>> getScoresKPairs() const;
double mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights);
double conditionalMutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights);
torch::Tensor conditionalEdge(const torch::Tensor& weights);
@ -34,7 +35,7 @@ namespace bayesnet {
std::vector<std::pair<T, T>> doCombinations(const std::vector<T>& source)
{
std::vector<std::pair<T, T>> result;
for (int i = 0; i < source.size(); ++i) {
for (int i = 0; i < source.size() - 1; ++i) {
T temp = source[i];
for (int j = i + 1; j < source.size(); ++j) {
result.push_back({ temp, source[j] });
@ -42,7 +43,7 @@ namespace bayesnet {
}
return result;
}
template <class T>
template <class T>
T pop_first(std::vector<T>& v)
{
T temp = v[0];
@ -54,7 +55,7 @@ namespace bayesnet {
std::vector<double> scoresKBest;
std::vector<int> featuresKBest; // sorted indices of the features
std::vector<std::pair<int, int>> pairsKBest; // sorted indices of the pairs
std::map<std::pair<int, int>, double> scoresKPairs;
std::vector<std::pair<std::pair<int, int>, double>> scoresKPairs;
double conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights);
};
}

View File

@ -136,4 +136,58 @@ TEST_CASE("Conditional Mutual Information", "[Metrics]")
REQUIRE(result == Catch::Approx(expected.at({ i, j })).epsilon(raw.epsilon));
}
}
}
TEST_CASE("Select K Pairs descending", "[Metrics]")
{
auto raw = RawDatasets("iris", true);
bayesnet::Metrics metrics(raw.dataset, raw.features, raw.className, raw.classNumStates);
auto results = metrics.SelectKPairs(raw.weights, false);
auto expected = std::vector<std::pair<std::pair<int, int>, double>>{
{ { 1, 3 }, 1.31852 },
{ { 1, 2 }, 1.17112 },
{ { 0, 3 }, 0.403749 },
{ { 0, 2 }, 0.287696 },
{ { 2, 3 }, 0.210068 },
{ { 0, 1 }, 0.0 },
};
auto scores = metrics.getScoresKPairs();
for (int i = 0; i < results.size(); ++i) {
auto result = results[i];
auto expect = expected[i];
auto score = scores[i];
REQUIRE(result.first == expect.first.first);
REQUIRE(result.second == expect.first.second);
REQUIRE(score.first.first == expect.first.first);
REQUIRE(score.first.second == expect.first.second);
REQUIRE(score.second == Catch::Approx(expect.second).epsilon(raw.epsilon));
}
REQUIRE(results.size() == 6);
REQUIRE(scores.size() == 6);
}
TEST_CASE("Select K Pairs ascending", "[Metrics]")
{
auto raw = RawDatasets("iris", true);
bayesnet::Metrics metrics(raw.dataset, raw.features, raw.className, raw.classNumStates);
auto results = metrics.SelectKPairs(raw.weights, true);
auto expected = std::vector<std::pair<std::pair<int, int>, double>>{
{ { 0, 1 }, 0.0 },
{ { 2, 3 }, 0.210068 },
{ { 0, 2 }, 0.287696 },
{ { 0, 3 }, 0.403749 },
{ { 1, 2 }, 1.17112 },
{ { 1, 3 }, 1.31852 },
};
auto scores = metrics.getScoresKPairs();
for (int i = 0; i < results.size(); ++i) {
auto result = results[i];
auto expect = expected[i];
auto score = scores[i];
REQUIRE(result.first == expect.first.first);
REQUIRE(result.second == expect.first.second);
REQUIRE(score.first.first == expect.first.first);
REQUIRE(score.first.second == expect.first.second);
REQUIRE(score.second == Catch::Approx(expect.second).epsilon(raw.epsilon));
}
REQUIRE(results.size() == 6);
REQUIRE(scores.size() == 6);
}

View File

@ -56,14 +56,14 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
auto raw = RawDatasets(file_name, discretize);
clf->fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states);
auto score = clf->score(raw.Xt, raw.yt);
INFO("Classifier: " + name + " File: " + file_name);
INFO("Classifier: " << name << " File: " << file_name);
REQUIRE(score == Catch::Approx(scores[{file_name, name}]).epsilon(raw.epsilon));
REQUIRE(clf->getStatus() == bayesnet::NORMAL);
}
}
SECTION("Library check version")
{
INFO("Checking version of " + name + " classifier");
INFO("Checking version of " << name << " classifier");
REQUIRE(clf->getVersion() == ACTUAL_VERSION);
}
delete clf;

View File

@ -8,6 +8,7 @@
#include <catch2/catch_test_macros.hpp>
#include <catch2/catch_approx.hpp>
#include <catch2/generators/catch_generators.hpp>
#include <catch2/matchers/catch_matchers.hpp>
#include "bayesnet/ensembles/BoostAODE.h"
#include "TestUtils.h"