Complete Conditional Mutual Information and test

Remove unoptimized implementation of conditionalEntropy
Implement Conditional Mutual Information
2024-05-15 11:09:23 +02:00 · 2024-05-15 01:24:27 +02:00 · 2024-05-15 00:48:02 +02:00 · 2024-05-12 20:23:05 +02:00 · 2024-05-12 19:05:36 +02:00
6 changed files with 154 additions and 13 deletions
--- a/.gitmodules
+++ b/.gitmodules
@@ -13,3 +13,8 @@
 	url = https://github.com/rmontanana/folding
 	main = main
 	update = merge
 [submodule "tests/lib/catch2"]
 	path = tests/lib/catch2
        url = https://github.com/catchorg/Catch2.git
 	main = main
 	update = merge
--- a/bayesnet/utils/BayesMetrics.cc
+++ b/bayesnet/utils/BayesMetrics.cc
@@ -4,6 +4,9 @@
 // SPDX-License-Identifier: MIT
 // ***************************************************************
 #include <map>
 #include <unordered_map>
 #include <tuple>
 #include "Mst.h"
 #include "BayesMetrics.h"
 namespace bayesnet {
@@ -105,6 +108,8 @@ namespace bayesnet {
        }
        return matrix;
    }
    // Measured in nats (natural logarithm (log) base e)
    // Elements of Information Theory, 2nd Edition, Thomas M. Cover, Joy A. Thomas p. 14
    double Metrics::entropy(const torch::Tensor& feature, const torch::Tensor& weights)
    {
        torch::Tensor counts = feature.bincount(weights);
@@ -143,11 +148,64 @@ namespace bayesnet {
        }
        return entropyValue;
    }
    // H(Y|X,C) = sum_{x in X, c in C} p(x,c) H(Y|X=x,C=c)
    double Metrics::conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights)
    {
        // Ensure the tensors are of the same length
        assert(firstFeature.size(0) == secondFeature.size(0) && firstFeature.size(0) == labels.size(0) && firstFeature.size(0) == weights.size(0));
        // Convert tensors to vectors for easier processing
        auto firstFeatureData = firstFeature.accessor<int, 1>();
        auto secondFeatureData = secondFeature.accessor<int, 1>();
        auto labelsData = labels.accessor<int, 1>();
        auto weightsData = weights.accessor<double, 1>();
        int numSamples = firstFeature.size(0);
        // Maps for joint and marginal probabilities
        std::map<std::tuple<int, int, int>, double> jointCount;
        std::map<std::tuple<int, int>, double> marginalCount;
        // Compute joint and marginal counts
        for (int i = 0; i < numSamples; ++i) {
            auto keyJoint = std::make_tuple(firstFeatureData[i], labelsData[i], secondFeatureData[i]);
            auto keyMarginal = std::make_tuple(firstFeatureData[i], labelsData[i]);
            jointCount[keyJoint] += weightsData[i];
            marginalCount[keyMarginal] += weightsData[i];
        }
        // Total weight sum
        double totalWeight = torch::sum(weights).item<double>();
        if (totalWeight == 0)
            return 0;
        // Compute the conditional entropy
        double conditionalEntropy = 0.0;
        for (const auto& [keyJoint, jointFreq] : jointCount) {
            auto [x, c, y] = keyJoint;
            auto keyMarginal = std::make_tuple(x, c);
            double p_xc = marginalCount[keyMarginal] / totalWeight;
            double p_y_given_xc = jointFreq / marginalCount[keyMarginal];
            if (p_y_given_xc > 0) {
                conditionalEntropy -= (jointFreq / totalWeight) * std::log(p_y_given_xc);
            }
        }
        return conditionalEntropy;
    }
    // I(X;Y) = H(Y) - H(Y|X)
    double Metrics::mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights)
    {
        return entropy(firstFeature, weights) - conditionalEntropy(firstFeature, secondFeature, weights);
    }
    // I(X;Y|C) = H(Y|C) - H(Y|X,C)
    double Metrics::conditionalMutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights)
    {
        return std::max(conditionalEntropy(firstFeature, labels, weights) - conditionalEntropy(firstFeature, secondFeature, labels, weights), 0.0);
    }
    /*
    Compute the maximum spanning tree considering the weights as distances
    and the indices of the weights as nodes of this square matrix using
--- a/bayesnet/utils/BayesMetrics.h
+++ b/bayesnet/utils/BayesMetrics.h
@@ -18,12 +18,16 @@ namespace bayesnet {
        std::vector<int> SelectKBestWeighted(const torch::Tensor& weights, bool ascending = false, unsigned k = 0);
        std::vector<double> getScoresKBest() const;
        double mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights);
        double conditionalMutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights);
        torch::Tensor conditionalEdge(const torch::Tensor& weights);
        std::vector<std::pair<int, int>> maximumSpanningTree(const std::vector<std::string>& features, const torch::Tensor& weights, const int root);
        // Measured in nats (natural logarithm (log) base e)
        // Elements of Information Theory, 2nd Edition, Thomas M. Cover, Joy A. Thomas p. 14
        double entropy(const torch::Tensor& feature, const torch::Tensor& weights);
        double conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights);
    protected:
        torch::Tensor samples; // n+1xm torch::Tensor used to fit the model where samples[-1] is the y std::vector
        std::string className;
        double entropy(const torch::Tensor& feature, const torch::Tensor& weights);
        std::vector<std::string> features;
        template <class T>
        std::vector<std::pair<T, T>> doCombinations(const std::vector<T>& source)
--- a/tests/TestBayesMetrics.cc
+++ b/tests/TestBayesMetrics.cc
@@ -9,6 +9,7 @@
 #include <catch2/generators/catch_generators.hpp>
 #include "bayesnet/utils/BayesMetrics.h"
 #include "TestUtils.h"
 #include "Timer.h"
 TEST_CASE("Metrics Test", "[Metrics]")
@@ -83,4 +84,37 @@ TEST_CASE("Select all features ordered by Mutual Information", "[Metrics]")
    auto kBest = metrics.SelectKBestWeighted(raw.weights, true, 0);
    REQUIRE(kBest.size() == raw.features.size());
    REQUIRE(kBest == std::vector<int>({ 1, 0, 3, 2 }));
 }
 TEST_CASE("Entropy Test", "[Metrics]")
 {
    auto raw = RawDatasets("iris", true);
    bayesnet::Metrics metrics(raw.dataset, raw.features, raw.className, raw.classNumStates);
    auto result = metrics.entropy(raw.dataset.index({ 0, "..." }), raw.weights);
    REQUIRE(result == Catch::Approx(0.9848175048828125).epsilon(raw.epsilon));
    auto data = torch::tensor({ 0, 0, 0, 0, 0, 0, 0, 1, 1, 1 }, torch::kInt32);
    auto weights = torch::tensor({ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, torch::kFloat32);
    result = metrics.entropy(data, weights);
    REQUIRE(result == Catch::Approx(0.61086434125900269).epsilon(raw.epsilon));
    data = torch::tensor({ 0, 0, 0, 0, 0, 1, 1, 1, 1, 1 }, torch::kInt32);
    result = metrics.entropy(data, weights);
    REQUIRE(result == Catch::Approx(0.693147180559945).epsilon(raw.epsilon));
 }
 TEST_CASE("Conditional Entropy", "[Metrics]")
 {
    auto raw = RawDatasets("iris", true);
    bayesnet::Metrics metrics(raw.dataset, raw.features, raw.className, raw.classNumStates);
    auto expected = std::map<std::pair<int, int>, double>{
        { { 0, 1 }, 0.0 },
        { { 0, 2 }, 0.287696 },
        { { 0, 3 }, 0.403749 },
        { { 1, 2 }, 1.17112 },
        { { 1, 3 }, 1.31852 },
        { { 2, 3 }, 0.210068 },
    };
    for (int i = 0; i < raw.features.size() - 1; ++i) {
        for (int j = i + 1; j < raw.features.size(); ++j) {
            double result = metrics.conditionalMutualInformation(raw.dataset.index({ i, "..." }), raw.dataset.index({ j, "..." }), raw.yt, raw.weights);
            REQUIRE(result == Catch::Approx(expected.at({ i, j })).epsilon(raw.epsilon));
        }
    }
 }
--- a/tests/TestBoostAODE.cc
+++ b/tests/TestBoostAODE.cc
@@ -45,7 +45,7 @@ TEST_CASE("Feature_select FCBF", "[BoostAODE]")
    REQUIRE(clf.getNumberOfNodes() == 90);
    REQUIRE(clf.getNumberOfEdges() == 153);
    REQUIRE(clf.getNotes().size() == 2);
-    REQUIRE(clf.getNotes()[0] == "Used features in initialization: 5 of 9 with FCBF");
+    REQUIRE(clf.getNotes()[0] == "Used features in initialization: 4 of 9 with FCBF");
    REQUIRE(clf.getNotes()[1] == "Number of models: 9");
 }
 TEST_CASE("Test used features in train note and score", "[BoostAODE]")
@@ -65,8 +65,8 @@ TEST_CASE("Test used features in train note and score", "[BoostAODE]")
    REQUIRE(clf.getNotes()[1] == "Number of models: 8");
    auto score = clf.score(raw.Xv, raw.yv);
    auto scoret = clf.score(raw.Xt, raw.yt);
-    REQUIRE(score == Catch::Approx(0.80078).epsilon(raw.epsilon));
+    REQUIRE(score == Catch::Approx(0.809895813).epsilon(raw.epsilon));
-    REQUIRE(scoret == Catch::Approx(0.80078).epsilon(raw.epsilon));
+    REQUIRE(scoret == Catch::Approx(0.809895813).epsilon(raw.epsilon));
 }
 TEST_CASE("Voting vs proba", "[BoostAODE]")
 {
@@ -149,15 +149,14 @@ TEST_CASE("Bisection Best", "[BoostAODE]")
        {"convergence_best", false},
        });
    clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states);
-    REQUIRE(clf.getNumberOfNodes() == 75);
+    REQUIRE(clf.getNumberOfNodes() == 210);
-    REQUIRE(clf.getNumberOfEdges() == 135);
+    REQUIRE(clf.getNumberOfEdges() == 378);
-    REQUIRE(clf.getNotes().size() == 2);
+    REQUIRE(clf.getNotes().size() == 1);
-    REQUIRE(clf.getNotes().at(0) == "Convergence threshold reached & 9 models eliminated");
+    REQUIRE(clf.getNotes().at(0) == "Number of models: 14");
    REQUIRE(clf.getNotes().at(1) == "Number of models: 5");
    auto score = clf.score(raw.X_test, raw.y_test);
    auto scoret = clf.score(raw.X_test, raw.y_test);
-    REQUIRE(score == Catch::Approx(1.0f).epsilon(raw.epsilon));
+    REQUIRE(score == Catch::Approx(0.991666675f).epsilon(raw.epsilon));
-    REQUIRE(scoret == Catch::Approx(1.0f).epsilon(raw.epsilon));
+    REQUIRE(scoret == Catch::Approx(0.991666675f).epsilon(raw.epsilon));
 }
 TEST_CASE("Bisection Best vs Last", "[BoostAODE]")
 {
@@ -172,13 +171,13 @@ TEST_CASE("Bisection Best vs Last", "[BoostAODE]")
    clf.setHyperparameters(hyperparameters);
    clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states);
    auto score_best = clf.score(raw.X_test, raw.y_test);
-    REQUIRE(score_best == Catch::Approx(0.993355f).epsilon(raw.epsilon));
+    REQUIRE(score_best == Catch::Approx(0.980000019f).epsilon(raw.epsilon));
    // Now we will set the hyperparameter to use the last accuracy
    hyperparameters["convergence_best"] = false;
    clf.setHyperparameters(hyperparameters);
    clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states);
    auto score_last = clf.score(raw.X_test, raw.y_test);
-    REQUIRE(score_last == Catch::Approx(0.996678f).epsilon(raw.epsilon));
+    REQUIRE(score_last == Catch::Approx(0.976666689f).epsilon(raw.epsilon));
 }
 TEST_CASE("Block Update", "[BoostAODE]")
--- a/tests/Timer.h
+++ b/tests/Timer.h
@@ -0,0 +1,41 @@
 #pragma once
 #include <chrono>
 #include <string>
 #include <sstream>
 namespace platform {
    class Timer {
    private:
        std::chrono::high_resolution_clock::time_point begin;
        std::chrono::high_resolution_clock::time_point end;
    public:
        Timer() = default;
        ~Timer() = default;
        void start() { begin = std::chrono::high_resolution_clock::now(); }
        void stop() { end = std::chrono::high_resolution_clock::now(); }
        double getDuration()
        {
            stop();
            std::chrono::duration<double> time_span = std::chrono::duration_cast<std::chrono::duration<double >> (end - begin);
            return time_span.count();
        }
        double getLapse()
        {
            std::chrono::duration<double> time_span = std::chrono::duration_cast<std::chrono::duration<double >> (std::chrono::high_resolution_clock::now() - begin);
            return time_span.count();
        }
        std::string getDurationString(bool lapse = false)
        {
            double duration = lapse ? getLapse() : getDuration();
            return translate2String(duration);
        }
        std::string translate2String(double duration)
        {
            double durationShow = duration > 3600 ? duration / 3600 : duration > 60 ? duration / 60 : duration;
            std::string durationUnit = duration > 3600 ? "h" : duration > 60 ? "m" : "s";
            std::stringstream ss;
            ss << std::setprecision(2) << std::fixed << durationShow << " " << durationUnit;
            return ss.str();
        }
    };
 } /* namespace platform */
Author	SHA1	Message	Date
Ricardo Montañana Gómez	0e24135d46	Complete Conditional Mutual Information and test	2024-05-15 11:09:23 +02:00
Ricardo Montañana Gómez	521bfd2a8e	Remove unoptimized implementation of conditionalEntropy	2024-05-15 01:24:27 +02:00
Ricardo Montañana Gómez	e2e0fb0c40	Implement Conditional Mutual Information	2024-05-15 00:48:02 +02:00
Ricardo Montañana Gómez	56b62a67cc	Change BoostAODE tests results because folding upgrade	2024-05-12 20:23:05 +02:00
Ricardo Montañana	c0fc107abb	Fix catch2 submodule config	2024-05-12 19:05:36 +02:00