Implement Conditional Mutual Information

2024-05-15 00:48:02 +02:00 · 2024-05-15 00:48:02 +02:00 · e2e0fb0c40
commit e2e0fb0c40
parent 56b62a67cc
3 changed files with 145 additions and 1 deletions
--- a/bayesnet/utils/BayesMetrics.cc
+++ b/bayesnet/utils/BayesMetrics.cc
@ -4,6 +4,9 @@
 // SPDX-License-Identifier: MIT
 // ***************************************************************

+#include <map>
+#include <unordered_map>
+#include <tuple>
 #include "Mst.h"
 #include "BayesMetrics.h"
 namespace bayesnet {
@ -105,6 +108,8 @@ namespace bayesnet {
        }
        return matrix;
    }
+    // Measured in nats (natural logarithm (log) base e)
+    // Elements of Information Theory, 2nd Edition, Thomas M. Cover, Joy A. Thomas p. 14
    double Metrics::entropy(const torch::Tensor& feature, const torch::Tensor& weights)
    {
        torch::Tensor counts = feature.bincount(weights);
@ -143,11 +148,117 @@ namespace bayesnet {
        }
        return entropyValue;
    }
+    // H(Y|X,C) = sum_{x in X, c in C} p(x,c) H(Y|X=x,C=c)
+    double Metrics::conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights)
+    {
+        // Ensure the tensors are of the same length
+        assert(firstFeature.size(0) == secondFeature.size(0) && firstFeature.size(0) == labels.size(0) && firstFeature.size(0) == weights.size(0));
+
+        // Convert tensors to vectors for easier processing
+        auto firstFeatureData = firstFeature.accessor<int, 1>();
+        auto secondFeatureData = secondFeature.accessor<int, 1>();
+        auto labelsData = labels.accessor<int, 1>();
+        auto weightsData = weights.accessor<double, 1>();
+
+        int numSamples = firstFeature.size(0);
+
+        // Maps for joint and marginal probabilities
+        std::map<std::tuple<int, int, int>, double> jointCount;
+        std::map<std::tuple<int, int>, double> marginalCount;
+
+        // Compute joint and marginal counts
+        for (int i = 0; i < numSamples; ++i) {
+            auto keyJoint = std::make_tuple(firstFeatureData[i], labelsData[i], secondFeatureData[i]);
+            auto keyMarginal = std::make_tuple(firstFeatureData[i], labelsData[i]);
+
+            jointCount[keyJoint] += weightsData[i];
+            marginalCount[keyMarginal] += weightsData[i];
+        }
+
+        // Total weight sum
+        double totalWeight = torch::sum(weights).item<double>();
+
+        // Compute the conditional entropy
+        double conditionalEntropy = 0.0;
+
+        for (const auto& [keyJoint, jointFreq] : jointCount) {
+            auto [x, c, y] = keyJoint;
+            auto keyMarginal = std::make_tuple(x, c);
+
+            double p_xc = marginalCount[keyMarginal] / totalWeight;
+            double p_y_given_xc = jointFreq / marginalCount[keyMarginal];
+
+            if (p_y_given_xc > 0) {
+                conditionalEntropy -= (jointFreq / totalWeight) * std::log(p_y_given_xc);
+            }
+        }
+
+        return conditionalEntropy;
+    }
+    double Metrics::conditionalEntropy2(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights)
+    {
+        int numSamples = firstFeature.size(0);
+        // Get unique values for each variable
+        auto [uniqueX, countsX] = at::_unique(firstFeature);
+        auto [uniqueC, countsC] = at::_unique(labels);
+
+        // Compute p(x,c) for each unique value of X and C
+        std::map<int, std::map<std::pair<int, int>, double>> jointCounts;
+        double totalWeight = 0;
+        for (auto i = 0; i < numSamples; i++) {
+            int x = firstFeature[i].item<int>();
+            int y = secondFeature[i].item<int>();
+            int c = labels[i].item<int>();
+            const auto key = std::make_pair(x, c);
+            jointCounts[y][key] += weights[i].item<double>();
+            totalWeight += weights[i].item<float>();
+        }
+        if (totalWeight == 0)
+            return 0;
+        double entropyValue = 0;
+
+        // Iterate over unique values of X and C
+        for (int i = 0; i < uniqueX.size(0); i++) {
+            int x_val = uniqueX[i].item<int>();
+            for (int j = 0; j < uniqueC.size(0); j++) {
+                int c_val = uniqueC[j].item<int>();
+                double p_xc = 0; // Probability of (X=x, C=c)
+                double entropy_f = 0;
+                // Find joint counts for this specific (X,C) combination
+                for (auto& [y, jointCount] : jointCounts) {
+                    auto joint_count_xc = jointCount.find({ x_val, c_val });
+                    if (joint_count_xc != jointCount.end()) {
+                        p_xc += joint_count_xc->second;
+                    }
+                }
+                // Only calculate conditional entropy if p(X=x, C=c) > 0
+                if (p_xc > 0) {
+                    p_xc /= totalWeight;
+                    for (auto& [y, jointCount] : jointCounts) {
+                        auto key = std::make_pair(x_val, c_val);
+                        double p_y_xc = jointCount[key] / p_xc;
+
+                        if (p_y_xc > 0) {
+                            entropy_f -= p_y_xc * log(p_y_xc);
+                        }
+                    }
+                }
+                entropyValue += p_xc * entropy_f;
+            }
+        }
+        return entropyValue;
+        return 0;
+    }
    // I(X;Y) = H(Y) - H(Y|X)
    double Metrics::mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights)
    {
        return entropy(firstFeature, weights) - conditionalEntropy(firstFeature, secondFeature, weights);
    }
+    // I(X;Y|C) = H(Y|C) - H(Y|X,C)
+    double Metrics::conditionalMutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights)
+    {
+        return conditionalEntropy(firstFeature, labels, weights) - conditionalEntropy(firstFeature, secondFeature, labels, weights);
+    }
    /*
    Compute the maximum spanning tree considering the weights as distances
    and the indices of the weights as nodes of this square matrix using
--- a/bayesnet/utils/BayesMetrics.h
+++ b/bayesnet/utils/BayesMetrics.h
@ -18,12 +18,17 @@ namespace bayesnet {
        std::vector<int> SelectKBestWeighted(const torch::Tensor& weights, bool ascending = false, unsigned k = 0);
        std::vector<double> getScoresKBest() const;
        double mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights);
+        double conditionalMutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights);
        torch::Tensor conditionalEdge(const torch::Tensor& weights);
        std::vector<std::pair<int, int>> maximumSpanningTree(const std::vector<std::string>& features, const torch::Tensor& weights, const int root);
+        // Measured in nats (natural logarithm (log) base e)
+        // Elements of Information Theory, 2nd Edition, Thomas M. Cover, Joy A. Thomas p. 14
+        double entropy(const torch::Tensor& feature, const torch::Tensor& weights);
+        double conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights);
+        double conditionalEntropy2(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights);
    protected:
        torch::Tensor samples; // n+1xm torch::Tensor used to fit the model where samples[-1] is the y std::vector
        std::string className;
-        double entropy(const torch::Tensor& feature, const torch::Tensor& weights);
        std::vector<std::string> features;
        template <class T>
        std::vector<std::pair<T, T>> doCombinations(const std::vector<T>& source)
--- a/tests/TestBayesMetrics.cc
+++ b/tests/TestBayesMetrics.cc
@ -83,4 +83,32 @@ TEST_CASE("Select all features ordered by Mutual Information", "[Metrics]")
    auto kBest = metrics.SelectKBestWeighted(raw.weights, true, 0);
    REQUIRE(kBest.size() == raw.features.size());
    REQUIRE(kBest == std::vector<int>({ 1, 0, 3, 2 }));
+}
+TEST_CASE("Entropy Test", "[Metrics]")
+{
+    auto raw = RawDatasets("iris", true);
+    bayesnet::Metrics metrics(raw.dataset, raw.features, raw.className, raw.classNumStates);
+    auto result = metrics.entropy(raw.dataset.index({ 0, "..." }), raw.weights);
+    REQUIRE(result == Catch::Approx(0.9848175048828125).epsilon(raw.epsilon));
+    auto data = torch::tensor({ 0, 0, 0, 0, 0, 0, 0, 1, 1, 1 }, torch::kInt32);
+    auto weights = torch::tensor({ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, torch::kFloat32);
+    result = metrics.entropy(data, weights);
+    REQUIRE(result == Catch::Approx(0.61086434125900269).epsilon(raw.epsilon));
+    data = torch::tensor({ 0, 0, 0, 0, 0, 1, 1, 1, 1, 1 }, torch::kInt32);
+    result = metrics.entropy(data, weights);
+    REQUIRE(result == Catch::Approx(0.693147180559945).epsilon(raw.epsilon));
+}
+TEST_CASE("Conditional Entropy", "[Metrics]")
+{
+    auto raw = RawDatasets("iris", true);
+    bayesnet::Metrics metrics(raw.dataset, raw.features, raw.className, raw.classNumStates);
+    auto feature0 = raw.dataset.index({ 0, "..." });
+    auto feature1 = raw.dataset.index({ 1, "..." });
+    auto feature2 = raw.dataset.index({ 2, "..." });
+    auto feature3 = raw.dataset.index({ 3, "..." });
+    auto labels = raw.dataset.index({ 4, "..." });
+    auto result = metrics.conditionalEntropy(feature0, feature1, labels, raw.weights);
+    auto result2 = metrics.conditionalEntropy2(feature0, feature1, labels, raw.weights);
+    std::cout << "Result=" << result << "\n";
+    std::cout << "Result2=" << result2 << "\n";
 }