Force mutual information methods to be at least 0

There were cases where a tiny negative number was returned (less than -1e-7) Fix mst glass test that is affected with this change
2024-05-17 11:15:45 +02:00
parent 291ba0fb0e
commit 2584e8294d
4 changed files with 8 additions and 18 deletions
--- a/bayesnet/ensembles/BoostA2DE.cc
+++ b/bayesnet/ensembles/BoostA2DE.cc
@@ -155,7 +155,7 @@ namespace bayesnet {
            }
        }
        if (pairSelection.size() > 0) {
-            notes.push_back("Used pairs not used in train: " + std::to_string(pairSelection.size()));
+            notes.push_back("Pairs not used in train: " + std::to_string(pairSelection.size()));
            status = WARNING;
        }
        notes.push_back("Number of models: " + std::to_string(n_models));
--- a/bayesnet/utils/BayesMetrics.cc
+++ b/bayesnet/utils/BayesMetrics.cc
@@ -198,24 +198,20 @@ namespace bayesnet {
        }
        return entropyValue;
    }
-    // H(Y|X,C) = sum_{x in X, c in C} p(x,c) H(Y|X=x,C=c)
+    // H(X|Y,C) = sum_{y in Y, c in C} p(x,c) H(X|Y=y,C=c)
    double Metrics::conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights)
    {
        // Ensure the tensors are of the same length
        assert(firstFeature.size(0) == secondFeature.size(0) && firstFeature.size(0) == labels.size(0) && firstFeature.size(0) == weights.size(0));
-
        // Convert tensors to vectors for easier processing
        auto firstFeatureData = firstFeature.accessor<int, 1>();
        auto secondFeatureData = secondFeature.accessor<int, 1>();
        auto labelsData = labels.accessor<int, 1>();
        auto weightsData = weights.accessor<double, 1>();
-
        int numSamples = firstFeature.size(0);
-
        // Maps for joint and marginal probabilities
        std::map<std::tuple<int, int, int>, double> jointCount;
        std::map<std::tuple<int, int>, double> marginalCount;
-
        // Compute joint and marginal counts
        for (int i = 0; i < numSamples; ++i) {
            auto keyJoint = std::make_tuple(firstFeatureData[i], labelsData[i], secondFeatureData[i]);
@@ -224,34 +220,29 @@ namespace bayesnet {
            jointCount[keyJoint] += weightsData[i];
            marginalCount[keyMarginal] += weightsData[i];
        }
-
        // Total weight sum
        double totalWeight = torch::sum(weights).item<double>();
        if (totalWeight == 0)
            return 0;
-
        // Compute the conditional entropy
        double conditionalEntropy = 0.0;
-
        for (const auto& [keyJoint, jointFreq] : jointCount) {
            auto [x, c, y] = keyJoint;
            auto keyMarginal = std::make_tuple(x, c);
-
            //double p_xc = marginalCount[keyMarginal] / totalWeight;
            double p_y_given_xc = jointFreq / marginalCount[keyMarginal];
-
            if (p_y_given_xc > 0) {
                conditionalEntropy -= (jointFreq / totalWeight) * std::log(p_y_given_xc);
            }
        }
        return conditionalEntropy;
    }
-    // I(X;Y) = H(Y) - H(Y|X)
+    // I(X;Y) = H(Y) - H(Y|X) ; I(X;Y) >= 0
    double Metrics::mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights)
    {
-        return entropy(firstFeature, weights) - conditionalEntropy(firstFeature, secondFeature, weights);
+        return std::max(entropy(firstFeature, weights) - conditionalEntropy(firstFeature, secondFeature, weights), 0.0);
    }
-    // I(X;Y|C) = H(Y|C) - H(Y|X,C)
+    // I(X;Y|C) = H(X|C) - H(X|Y,C) >= 0
    double Metrics::conditionalMutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights)
    {
        return std::max(conditionalEntropy(firstFeature, labels, weights) - conditionalEntropy(firstFeature, secondFeature, labels, weights), 0.0);