Remove unoptimized implementation of conditionalEntropy

2024-05-15 01:24:27 +02:00
parent e2e0fb0c40
commit 521bfd2a8e
4 changed files with 67 additions and 62 deletions
--- a/bayesnet/utils/BayesMetrics.cc
+++ b/bayesnet/utils/BayesMetrics.cc
@@ -177,6 +177,8 @@ namespace bayesnet {

        // Total weight sum
        double totalWeight = torch::sum(weights).item<double>();
+        if (totalWeight == 0)
+            return 0;

        // Compute the conditional entropy
        double conditionalEntropy = 0.0;
@@ -192,63 +194,8 @@ namespace bayesnet {
                conditionalEntropy -= (jointFreq / totalWeight) * std::log(p_y_given_xc);
            }
        }
-
        return conditionalEntropy;
    }
-    double Metrics::conditionalEntropy2(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights)
-    {
-        int numSamples = firstFeature.size(0);
-        // Get unique values for each variable
-        auto [uniqueX, countsX] = at::_unique(firstFeature);
-        auto [uniqueC, countsC] = at::_unique(labels);
-
-        // Compute p(x,c) for each unique value of X and C
-        std::map<int, std::map<std::pair<int, int>, double>> jointCounts;
-        double totalWeight = 0;
-        for (auto i = 0; i < numSamples; i++) {
-            int x = firstFeature[i].item<int>();
-            int y = secondFeature[i].item<int>();
-            int c = labels[i].item<int>();
-            const auto key = std::make_pair(x, c);
-            jointCounts[y][key] += weights[i].item<double>();
-            totalWeight += weights[i].item<float>();
-        }
-        if (totalWeight == 0)
-            return 0;
-        double entropyValue = 0;
-
-        // Iterate over unique values of X and C
-        for (int i = 0; i < uniqueX.size(0); i++) {
-            int x_val = uniqueX[i].item<int>();
-            for (int j = 0; j < uniqueC.size(0); j++) {
-                int c_val = uniqueC[j].item<int>();
-                double p_xc = 0; // Probability of (X=x, C=c)
-                double entropy_f = 0;
-                // Find joint counts for this specific (X,C) combination
-                for (auto& [y, jointCount] : jointCounts) {
-                    auto joint_count_xc = jointCount.find({ x_val, c_val });
-                    if (joint_count_xc != jointCount.end()) {
-                        p_xc += joint_count_xc->second;
-                    }
-                }
-                // Only calculate conditional entropy if p(X=x, C=c) > 0
-                if (p_xc > 0) {
-                    p_xc /= totalWeight;
-                    for (auto& [y, jointCount] : jointCounts) {
-                        auto key = std::make_pair(x_val, c_val);
-                        double p_y_xc = jointCount[key] / p_xc;
-
-                        if (p_y_xc > 0) {
-                            entropy_f -= p_y_xc * log(p_y_xc);
-                        }
-                    }
-                }
-                entropyValue += p_xc * entropy_f;
-            }
-        }
-        return entropyValue;
-        return 0;
-    }
    // I(X;Y) = H(Y) - H(Y|X)
    double Metrics::mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights)
    {
--- a/bayesnet/utils/BayesMetrics.h
+++ b/bayesnet/utils/BayesMetrics.h
@@ -25,7 +25,6 @@ namespace bayesnet {
        // Elements of Information Theory, 2nd Edition, Thomas M. Cover, Joy A. Thomas p. 14
        double entropy(const torch::Tensor& feature, const torch::Tensor& weights);
        double conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights);
-        double conditionalEntropy2(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights);
    protected:
        torch::Tensor samples; // n+1xm torch::Tensor used to fit the model where samples[-1] is the y std::vector
        std::string className;