diff --git a/bayesnet/utils/BayesMetrics.cc b/bayesnet/utils/BayesMetrics.cc index 07de7cb..61e5845 100644 --- a/bayesnet/utils/BayesMetrics.cc +++ b/bayesnet/utils/BayesMetrics.cc @@ -4,6 +4,9 @@ // SPDX-License-Identifier: MIT // *************************************************************** +#include +#include +#include #include "Mst.h" #include "BayesMetrics.h" namespace bayesnet { @@ -105,6 +108,8 @@ namespace bayesnet { } return matrix; } + // Measured in nats (natural logarithm (log) base e) + // Elements of Information Theory, 2nd Edition, Thomas M. Cover, Joy A. Thomas p. 14 double Metrics::entropy(const torch::Tensor& feature, const torch::Tensor& weights) { torch::Tensor counts = feature.bincount(weights); @@ -143,11 +148,117 @@ namespace bayesnet { } return entropyValue; } + // H(Y|X,C) = sum_{x in X, c in C} p(x,c) H(Y|X=x,C=c) + double Metrics::conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights) + { + // Ensure the tensors are of the same length + assert(firstFeature.size(0) == secondFeature.size(0) && firstFeature.size(0) == labels.size(0) && firstFeature.size(0) == weights.size(0)); + + // Convert tensors to vectors for easier processing + auto firstFeatureData = firstFeature.accessor(); + auto secondFeatureData = secondFeature.accessor(); + auto labelsData = labels.accessor(); + auto weightsData = weights.accessor(); + + int numSamples = firstFeature.size(0); + + // Maps for joint and marginal probabilities + std::map, double> jointCount; + std::map, double> marginalCount; + + // Compute joint and marginal counts + for (int i = 0; i < numSamples; ++i) { + auto keyJoint = std::make_tuple(firstFeatureData[i], labelsData[i], secondFeatureData[i]); + auto keyMarginal = std::make_tuple(firstFeatureData[i], labelsData[i]); + + jointCount[keyJoint] += weightsData[i]; + marginalCount[keyMarginal] += weightsData[i]; + } + + // Total weight sum + double totalWeight = torch::sum(weights).item(); + + // Compute the conditional entropy + double conditionalEntropy = 0.0; + + for (const auto& [keyJoint, jointFreq] : jointCount) { + auto [x, c, y] = keyJoint; + auto keyMarginal = std::make_tuple(x, c); + + double p_xc = marginalCount[keyMarginal] / totalWeight; + double p_y_given_xc = jointFreq / marginalCount[keyMarginal]; + + if (p_y_given_xc > 0) { + conditionalEntropy -= (jointFreq / totalWeight) * std::log(p_y_given_xc); + } + } + + return conditionalEntropy; + } + double Metrics::conditionalEntropy2(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights) + { + int numSamples = firstFeature.size(0); + // Get unique values for each variable + auto [uniqueX, countsX] = at::_unique(firstFeature); + auto [uniqueC, countsC] = at::_unique(labels); + + // Compute p(x,c) for each unique value of X and C + std::map, double>> jointCounts; + double totalWeight = 0; + for (auto i = 0; i < numSamples; i++) { + int x = firstFeature[i].item(); + int y = secondFeature[i].item(); + int c = labels[i].item(); + const auto key = std::make_pair(x, c); + jointCounts[y][key] += weights[i].item(); + totalWeight += weights[i].item(); + } + if (totalWeight == 0) + return 0; + double entropyValue = 0; + + // Iterate over unique values of X and C + for (int i = 0; i < uniqueX.size(0); i++) { + int x_val = uniqueX[i].item(); + for (int j = 0; j < uniqueC.size(0); j++) { + int c_val = uniqueC[j].item(); + double p_xc = 0; // Probability of (X=x, C=c) + double entropy_f = 0; + // Find joint counts for this specific (X,C) combination + for (auto& [y, jointCount] : jointCounts) { + auto joint_count_xc = jointCount.find({ x_val, c_val }); + if (joint_count_xc != jointCount.end()) { + p_xc += joint_count_xc->second; + } + } + // Only calculate conditional entropy if p(X=x, C=c) > 0 + if (p_xc > 0) { + p_xc /= totalWeight; + for (auto& [y, jointCount] : jointCounts) { + auto key = std::make_pair(x_val, c_val); + double p_y_xc = jointCount[key] / p_xc; + + if (p_y_xc > 0) { + entropy_f -= p_y_xc * log(p_y_xc); + } + } + } + entropyValue += p_xc * entropy_f; + } + } + return entropyValue; + return 0; + } // I(X;Y) = H(Y) - H(Y|X) double Metrics::mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights) { return entropy(firstFeature, weights) - conditionalEntropy(firstFeature, secondFeature, weights); } + // I(X;Y|C) = H(Y|C) - H(Y|X,C) + double Metrics::conditionalMutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights) + { + return conditionalEntropy(firstFeature, labels, weights) - conditionalEntropy(firstFeature, secondFeature, labels, weights); + } /* Compute the maximum spanning tree considering the weights as distances and the indices of the weights as nodes of this square matrix using diff --git a/bayesnet/utils/BayesMetrics.h b/bayesnet/utils/BayesMetrics.h index 2665e73..538e574 100644 --- a/bayesnet/utils/BayesMetrics.h +++ b/bayesnet/utils/BayesMetrics.h @@ -18,12 +18,17 @@ namespace bayesnet { std::vector SelectKBestWeighted(const torch::Tensor& weights, bool ascending = false, unsigned k = 0); std::vector getScoresKBest() const; double mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights); + double conditionalMutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights); torch::Tensor conditionalEdge(const torch::Tensor& weights); std::vector> maximumSpanningTree(const std::vector& features, const torch::Tensor& weights, const int root); + // Measured in nats (natural logarithm (log) base e) + // Elements of Information Theory, 2nd Edition, Thomas M. Cover, Joy A. Thomas p. 14 + double entropy(const torch::Tensor& feature, const torch::Tensor& weights); + double conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights); + double conditionalEntropy2(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights); protected: torch::Tensor samples; // n+1xm torch::Tensor used to fit the model where samples[-1] is the y std::vector std::string className; - double entropy(const torch::Tensor& feature, const torch::Tensor& weights); std::vector features; template std::vector> doCombinations(const std::vector& source) diff --git a/tests/TestBayesMetrics.cc b/tests/TestBayesMetrics.cc index 09b3545..c05b491 100644 --- a/tests/TestBayesMetrics.cc +++ b/tests/TestBayesMetrics.cc @@ -83,4 +83,32 @@ TEST_CASE("Select all features ordered by Mutual Information", "[Metrics]") auto kBest = metrics.SelectKBestWeighted(raw.weights, true, 0); REQUIRE(kBest.size() == raw.features.size()); REQUIRE(kBest == std::vector({ 1, 0, 3, 2 })); +} +TEST_CASE("Entropy Test", "[Metrics]") +{ + auto raw = RawDatasets("iris", true); + bayesnet::Metrics metrics(raw.dataset, raw.features, raw.className, raw.classNumStates); + auto result = metrics.entropy(raw.dataset.index({ 0, "..." }), raw.weights); + REQUIRE(result == Catch::Approx(0.9848175048828125).epsilon(raw.epsilon)); + auto data = torch::tensor({ 0, 0, 0, 0, 0, 0, 0, 1, 1, 1 }, torch::kInt32); + auto weights = torch::tensor({ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, torch::kFloat32); + result = metrics.entropy(data, weights); + REQUIRE(result == Catch::Approx(0.61086434125900269).epsilon(raw.epsilon)); + data = torch::tensor({ 0, 0, 0, 0, 0, 1, 1, 1, 1, 1 }, torch::kInt32); + result = metrics.entropy(data, weights); + REQUIRE(result == Catch::Approx(0.693147180559945).epsilon(raw.epsilon)); +} +TEST_CASE("Conditional Entropy", "[Metrics]") +{ + auto raw = RawDatasets("iris", true); + bayesnet::Metrics metrics(raw.dataset, raw.features, raw.className, raw.classNumStates); + auto feature0 = raw.dataset.index({ 0, "..." }); + auto feature1 = raw.dataset.index({ 1, "..." }); + auto feature2 = raw.dataset.index({ 2, "..." }); + auto feature3 = raw.dataset.index({ 3, "..." }); + auto labels = raw.dataset.index({ 4, "..." }); + auto result = metrics.conditionalEntropy(feature0, feature1, labels, raw.weights); + auto result2 = metrics.conditionalEntropy2(feature0, feature1, labels, raw.weights); + std::cout << "Result=" << result << "\n"; + std::cout << "Result2=" << result2 << "\n"; } \ No newline at end of file