Implement Conditional Mutual Information

This commit is contained in:
Ricardo Montañana Gómez 2024-05-15 00:48:02 +02:00
parent 56b62a67cc
commit e2e0fb0c40
Signed by: rmontanana
GPG Key ID: 46064262FD9A7ADE
3 changed files with 145 additions and 1 deletions

View File

@ -4,6 +4,9 @@
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// *************************************************************** // ***************************************************************
#include <map>
#include <unordered_map>
#include <tuple>
#include "Mst.h" #include "Mst.h"
#include "BayesMetrics.h" #include "BayesMetrics.h"
namespace bayesnet { namespace bayesnet {
@ -105,6 +108,8 @@ namespace bayesnet {
} }
return matrix; return matrix;
} }
// Measured in nats (natural logarithm (log) base e)
// Elements of Information Theory, 2nd Edition, Thomas M. Cover, Joy A. Thomas p. 14
double Metrics::entropy(const torch::Tensor& feature, const torch::Tensor& weights) double Metrics::entropy(const torch::Tensor& feature, const torch::Tensor& weights)
{ {
torch::Tensor counts = feature.bincount(weights); torch::Tensor counts = feature.bincount(weights);
@ -143,11 +148,117 @@ namespace bayesnet {
} }
return entropyValue; return entropyValue;
} }
// H(Y|X,C) = sum_{x in X, c in C} p(x,c) H(Y|X=x,C=c)
double Metrics::conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights)
{
// Ensure the tensors are of the same length
assert(firstFeature.size(0) == secondFeature.size(0) && firstFeature.size(0) == labels.size(0) && firstFeature.size(0) == weights.size(0));
// Convert tensors to vectors for easier processing
auto firstFeatureData = firstFeature.accessor<int, 1>();
auto secondFeatureData = secondFeature.accessor<int, 1>();
auto labelsData = labels.accessor<int, 1>();
auto weightsData = weights.accessor<double, 1>();
int numSamples = firstFeature.size(0);
// Maps for joint and marginal probabilities
std::map<std::tuple<int, int, int>, double> jointCount;
std::map<std::tuple<int, int>, double> marginalCount;
// Compute joint and marginal counts
for (int i = 0; i < numSamples; ++i) {
auto keyJoint = std::make_tuple(firstFeatureData[i], labelsData[i], secondFeatureData[i]);
auto keyMarginal = std::make_tuple(firstFeatureData[i], labelsData[i]);
jointCount[keyJoint] += weightsData[i];
marginalCount[keyMarginal] += weightsData[i];
}
// Total weight sum
double totalWeight = torch::sum(weights).item<double>();
// Compute the conditional entropy
double conditionalEntropy = 0.0;
for (const auto& [keyJoint, jointFreq] : jointCount) {
auto [x, c, y] = keyJoint;
auto keyMarginal = std::make_tuple(x, c);
double p_xc = marginalCount[keyMarginal] / totalWeight;
double p_y_given_xc = jointFreq / marginalCount[keyMarginal];
if (p_y_given_xc > 0) {
conditionalEntropy -= (jointFreq / totalWeight) * std::log(p_y_given_xc);
}
}
return conditionalEntropy;
}
double Metrics::conditionalEntropy2(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights)
{
int numSamples = firstFeature.size(0);
// Get unique values for each variable
auto [uniqueX, countsX] = at::_unique(firstFeature);
auto [uniqueC, countsC] = at::_unique(labels);
// Compute p(x,c) for each unique value of X and C
std::map<int, std::map<std::pair<int, int>, double>> jointCounts;
double totalWeight = 0;
for (auto i = 0; i < numSamples; i++) {
int x = firstFeature[i].item<int>();
int y = secondFeature[i].item<int>();
int c = labels[i].item<int>();
const auto key = std::make_pair(x, c);
jointCounts[y][key] += weights[i].item<double>();
totalWeight += weights[i].item<float>();
}
if (totalWeight == 0)
return 0;
double entropyValue = 0;
// Iterate over unique values of X and C
for (int i = 0; i < uniqueX.size(0); i++) {
int x_val = uniqueX[i].item<int>();
for (int j = 0; j < uniqueC.size(0); j++) {
int c_val = uniqueC[j].item<int>();
double p_xc = 0; // Probability of (X=x, C=c)
double entropy_f = 0;
// Find joint counts for this specific (X,C) combination
for (auto& [y, jointCount] : jointCounts) {
auto joint_count_xc = jointCount.find({ x_val, c_val });
if (joint_count_xc != jointCount.end()) {
p_xc += joint_count_xc->second;
}
}
// Only calculate conditional entropy if p(X=x, C=c) > 0
if (p_xc > 0) {
p_xc /= totalWeight;
for (auto& [y, jointCount] : jointCounts) {
auto key = std::make_pair(x_val, c_val);
double p_y_xc = jointCount[key] / p_xc;
if (p_y_xc > 0) {
entropy_f -= p_y_xc * log(p_y_xc);
}
}
}
entropyValue += p_xc * entropy_f;
}
}
return entropyValue;
return 0;
}
// I(X;Y) = H(Y) - H(Y|X) // I(X;Y) = H(Y) - H(Y|X)
double Metrics::mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights) double Metrics::mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights)
{ {
return entropy(firstFeature, weights) - conditionalEntropy(firstFeature, secondFeature, weights); return entropy(firstFeature, weights) - conditionalEntropy(firstFeature, secondFeature, weights);
} }
// I(X;Y|C) = H(Y|C) - H(Y|X,C)
double Metrics::conditionalMutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights)
{
return conditionalEntropy(firstFeature, labels, weights) - conditionalEntropy(firstFeature, secondFeature, labels, weights);
}
/* /*
Compute the maximum spanning tree considering the weights as distances Compute the maximum spanning tree considering the weights as distances
and the indices of the weights as nodes of this square matrix using and the indices of the weights as nodes of this square matrix using

View File

@ -18,12 +18,17 @@ namespace bayesnet {
std::vector<int> SelectKBestWeighted(const torch::Tensor& weights, bool ascending = false, unsigned k = 0); std::vector<int> SelectKBestWeighted(const torch::Tensor& weights, bool ascending = false, unsigned k = 0);
std::vector<double> getScoresKBest() const; std::vector<double> getScoresKBest() const;
double mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights); double mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights);
double conditionalMutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights);
torch::Tensor conditionalEdge(const torch::Tensor& weights); torch::Tensor conditionalEdge(const torch::Tensor& weights);
std::vector<std::pair<int, int>> maximumSpanningTree(const std::vector<std::string>& features, const torch::Tensor& weights, const int root); std::vector<std::pair<int, int>> maximumSpanningTree(const std::vector<std::string>& features, const torch::Tensor& weights, const int root);
// Measured in nats (natural logarithm (log) base e)
// Elements of Information Theory, 2nd Edition, Thomas M. Cover, Joy A. Thomas p. 14
double entropy(const torch::Tensor& feature, const torch::Tensor& weights);
double conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights);
double conditionalEntropy2(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights);
protected: protected:
torch::Tensor samples; // n+1xm torch::Tensor used to fit the model where samples[-1] is the y std::vector torch::Tensor samples; // n+1xm torch::Tensor used to fit the model where samples[-1] is the y std::vector
std::string className; std::string className;
double entropy(const torch::Tensor& feature, const torch::Tensor& weights);
std::vector<std::string> features; std::vector<std::string> features;
template <class T> template <class T>
std::vector<std::pair<T, T>> doCombinations(const std::vector<T>& source) std::vector<std::pair<T, T>> doCombinations(const std::vector<T>& source)

View File

@ -83,4 +83,32 @@ TEST_CASE("Select all features ordered by Mutual Information", "[Metrics]")
auto kBest = metrics.SelectKBestWeighted(raw.weights, true, 0); auto kBest = metrics.SelectKBestWeighted(raw.weights, true, 0);
REQUIRE(kBest.size() == raw.features.size()); REQUIRE(kBest.size() == raw.features.size());
REQUIRE(kBest == std::vector<int>({ 1, 0, 3, 2 })); REQUIRE(kBest == std::vector<int>({ 1, 0, 3, 2 }));
}
TEST_CASE("Entropy Test", "[Metrics]")
{
auto raw = RawDatasets("iris", true);
bayesnet::Metrics metrics(raw.dataset, raw.features, raw.className, raw.classNumStates);
auto result = metrics.entropy(raw.dataset.index({ 0, "..." }), raw.weights);
REQUIRE(result == Catch::Approx(0.9848175048828125).epsilon(raw.epsilon));
auto data = torch::tensor({ 0, 0, 0, 0, 0, 0, 0, 1, 1, 1 }, torch::kInt32);
auto weights = torch::tensor({ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, torch::kFloat32);
result = metrics.entropy(data, weights);
REQUIRE(result == Catch::Approx(0.61086434125900269).epsilon(raw.epsilon));
data = torch::tensor({ 0, 0, 0, 0, 0, 1, 1, 1, 1, 1 }, torch::kInt32);
result = metrics.entropy(data, weights);
REQUIRE(result == Catch::Approx(0.693147180559945).epsilon(raw.epsilon));
}
TEST_CASE("Conditional Entropy", "[Metrics]")
{
auto raw = RawDatasets("iris", true);
bayesnet::Metrics metrics(raw.dataset, raw.features, raw.className, raw.classNumStates);
auto feature0 = raw.dataset.index({ 0, "..." });
auto feature1 = raw.dataset.index({ 1, "..." });
auto feature2 = raw.dataset.index({ 2, "..." });
auto feature3 = raw.dataset.index({ 3, "..." });
auto labels = raw.dataset.index({ 4, "..." });
auto result = metrics.conditionalEntropy(feature0, feature1, labels, raw.weights);
auto result2 = metrics.conditionalEntropy2(feature0, feature1, labels, raw.weights);
std::cout << "Result=" << result << "\n";
std::cout << "Result2=" << result2 << "\n";
} }