Implement Conditional Mutual Information

This commit is contained in:
2024-05-15 00:48:02 +02:00
parent 56b62a67cc
commit e2e0fb0c40
3 changed files with 145 additions and 1 deletions

View File

@@ -4,6 +4,9 @@
// SPDX-License-Identifier: MIT
// ***************************************************************
#include <map>
#include <unordered_map>
#include <tuple>
#include "Mst.h"
#include "BayesMetrics.h"
namespace bayesnet {
@@ -105,6 +108,8 @@ namespace bayesnet {
}
return matrix;
}
// Measured in nats (natural logarithm (log) base e)
// Elements of Information Theory, 2nd Edition, Thomas M. Cover, Joy A. Thomas p. 14
double Metrics::entropy(const torch::Tensor& feature, const torch::Tensor& weights)
{
torch::Tensor counts = feature.bincount(weights);
@@ -143,11 +148,117 @@ namespace bayesnet {
}
return entropyValue;
}
// H(Y|X,C) = sum_{x in X, c in C} p(x,c) H(Y|X=x,C=c)
double Metrics::conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights)
{
// Ensure the tensors are of the same length
assert(firstFeature.size(0) == secondFeature.size(0) && firstFeature.size(0) == labels.size(0) && firstFeature.size(0) == weights.size(0));
// Convert tensors to vectors for easier processing
auto firstFeatureData = firstFeature.accessor<int, 1>();
auto secondFeatureData = secondFeature.accessor<int, 1>();
auto labelsData = labels.accessor<int, 1>();
auto weightsData = weights.accessor<double, 1>();
int numSamples = firstFeature.size(0);
// Maps for joint and marginal probabilities
std::map<std::tuple<int, int, int>, double> jointCount;
std::map<std::tuple<int, int>, double> marginalCount;
// Compute joint and marginal counts
for (int i = 0; i < numSamples; ++i) {
auto keyJoint = std::make_tuple(firstFeatureData[i], labelsData[i], secondFeatureData[i]);
auto keyMarginal = std::make_tuple(firstFeatureData[i], labelsData[i]);
jointCount[keyJoint] += weightsData[i];
marginalCount[keyMarginal] += weightsData[i];
}
// Total weight sum
double totalWeight = torch::sum(weights).item<double>();
// Compute the conditional entropy
double conditionalEntropy = 0.0;
for (const auto& [keyJoint, jointFreq] : jointCount) {
auto [x, c, y] = keyJoint;
auto keyMarginal = std::make_tuple(x, c);
double p_xc = marginalCount[keyMarginal] / totalWeight;
double p_y_given_xc = jointFreq / marginalCount[keyMarginal];
if (p_y_given_xc > 0) {
conditionalEntropy -= (jointFreq / totalWeight) * std::log(p_y_given_xc);
}
}
return conditionalEntropy;
}
double Metrics::conditionalEntropy2(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights)
{
int numSamples = firstFeature.size(0);
// Get unique values for each variable
auto [uniqueX, countsX] = at::_unique(firstFeature);
auto [uniqueC, countsC] = at::_unique(labels);
// Compute p(x,c) for each unique value of X and C
std::map<int, std::map<std::pair<int, int>, double>> jointCounts;
double totalWeight = 0;
for (auto i = 0; i < numSamples; i++) {
int x = firstFeature[i].item<int>();
int y = secondFeature[i].item<int>();
int c = labels[i].item<int>();
const auto key = std::make_pair(x, c);
jointCounts[y][key] += weights[i].item<double>();
totalWeight += weights[i].item<float>();
}
if (totalWeight == 0)
return 0;
double entropyValue = 0;
// Iterate over unique values of X and C
for (int i = 0; i < uniqueX.size(0); i++) {
int x_val = uniqueX[i].item<int>();
for (int j = 0; j < uniqueC.size(0); j++) {
int c_val = uniqueC[j].item<int>();
double p_xc = 0; // Probability of (X=x, C=c)
double entropy_f = 0;
// Find joint counts for this specific (X,C) combination
for (auto& [y, jointCount] : jointCounts) {
auto joint_count_xc = jointCount.find({ x_val, c_val });
if (joint_count_xc != jointCount.end()) {
p_xc += joint_count_xc->second;
}
}
// Only calculate conditional entropy if p(X=x, C=c) > 0
if (p_xc > 0) {
p_xc /= totalWeight;
for (auto& [y, jointCount] : jointCounts) {
auto key = std::make_pair(x_val, c_val);
double p_y_xc = jointCount[key] / p_xc;
if (p_y_xc > 0) {
entropy_f -= p_y_xc * log(p_y_xc);
}
}
}
entropyValue += p_xc * entropy_f;
}
}
return entropyValue;
return 0;
}
// I(X;Y) = H(Y) - H(Y|X)
double Metrics::mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights)
{
return entropy(firstFeature, weights) - conditionalEntropy(firstFeature, secondFeature, weights);
}
// I(X;Y|C) = H(Y|C) - H(Y|X,C)
double Metrics::conditionalMutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights)
{
return conditionalEntropy(firstFeature, labels, weights) - conditionalEntropy(firstFeature, secondFeature, labels, weights);
}
/*
Compute the maximum spanning tree considering the weights as distances
and the indices of the weights as nodes of this square matrix using