Add entropy, conditionalEntropy, mutualInformation and conditionalEdgeWeight methods

2023-07-11 17:42:20 +02:00
parent 3750662f2c
commit c7e2042c6e
8 changed files with 683 additions and 43 deletions
--- a/src/Network.cc
+++ b/src/Network.cc
@@ -98,11 +98,14 @@ namespace bayesnet {
        this->className = className;
        dataset.clear();

-        // Build dataset
+        // Build dataset & tensor of samples
+        samples = torch::zeros({ static_cast<int64_t>(input_data[0].size()), static_cast<int64_t>(input_data.size() + 1) }, torch::kInt64);
        for (int i = 0; i < featureNames.size(); ++i) {
            dataset[featureNames[i]] = input_data[i];
+            samples.index_put_({ "...", i }, torch::tensor(input_data[i], torch::kInt64));
        }
        dataset[className] = labels;
+        samples.index_put_({ "...", -1 }, torch::tensor(labels, torch::kInt64));
        classNumStates = *max_element(labels.begin(), labels.end()) + 1;
        int maxThreadsRunning = static_cast<int>(std::thread::hardware_concurrency() * maxThreads);
        if (maxThreadsRunning < 1) {
@@ -150,14 +153,14 @@ namespace bayesnet {
        }
    }

-    vector<int> Network::predict(const vector<vector<int>>& samples)
+    vector<int> Network::predict(const vector<vector<int>>& tsamples)
    {
        vector<int> predictions;
        vector<int> sample;
-        for (int row = 0; row < samples[0].size(); ++row) {
+        for (int row = 0; row < tsamples[0].size(); ++row) {
            sample.clear();
-            for (int col = 0; col < samples.size(); ++col) {
-                sample.push_back(samples[col][row]);
+            for (int col = 0; col < tsamples.size(); ++col) {
+                sample.push_back(tsamples[col][row]);
            }
            vector<double> classProbabilities = predict_sample(sample);
            // Find the class with the maximum posterior probability
@@ -167,22 +170,22 @@ namespace bayesnet {
        }
        return predictions;
    }
-    vector<vector<double>> Network::predict_proba(const vector<vector<int>>& samples)
+    vector<vector<double>> Network::predict_proba(const vector<vector<int>>& tsamples)
    {
        vector<vector<double>> predictions;
        vector<int> sample;
-        for (int row = 0; row < samples[0].size(); ++row) {
+        for (int row = 0; row < tsamples[0].size(); ++row) {
            sample.clear();
-            for (int col = 0; col < samples.size(); ++col) {
-                sample.push_back(samples[col][row]);
+            for (int col = 0; col < tsamples.size(); ++col) {
+                sample.push_back(tsamples[col][row]);
            }
            predictions.push_back(predict_sample(sample));
        }
        return predictions;
    }
-    double Network::score(const vector<vector<int>>& samples, const vector<int>& labels)
+    double Network::score(const vector<vector<int>>& tsamples, const vector<int>& labels)
    {
-        vector<int> y_pred = predict(samples);
+        vector<int> y_pred = predict(tsamples);
        int correct = 0;
        for (int i = 0; i < y_pred.size(); ++i) {
            if (y_pred[i] == labels[i]) {
@@ -238,4 +241,83 @@ namespace bayesnet {
        }
        return result;
    }
+    double Network::mutual_info(torch::Tensor& first, torch::Tensor& second)
+    {
+        return 1;
+    }
+    torch::Tensor Network::conditionalEdgeWeight()
+    {
+        auto result = vector<double>();
+        auto source = vector<string>(features);
+        source.push_back(className);
+        auto combinations = nodes[className]->combinations(source);
+        auto margin = nodes[className]->getCPT();
+        for (auto [first, second] : combinations) {
+            int64_t index_first = find(features.begin(), features.end(), first) - features.begin();
+            int64_t index_second = find(features.begin(), features.end(), second) - features.begin();
+            double accumulated = 0;
+            for (int value = 0; value < classNumStates; ++value) {
+                auto mask = samples.index({ "...", -1 }) == value;
+                auto first_dataset = samples.index({ mask, index_first });
+                auto second_dataset = samples.index({ mask, index_second });
+                auto mi = mutualInformation(first_dataset, second_dataset);
+                auto pb = margin[value].item<float>();
+                accumulated += pb * mi;
+            }
+            result.push_back(accumulated);
+        }
+        long n_vars = source.size();
+        auto matrix = torch::zeros({ n_vars, n_vars });
+        auto indices = torch::triu_indices(n_vars, n_vars, 1);
+        for (auto i = 0; i < result.size(); ++i) {
+            auto x = indices[0][i];
+            auto y = indices[1][i];
+            matrix[x][y] = result[i];
+            matrix[y][x] = result[i];
+        }
+        return matrix;
+    }
+    double Network::entropy(torch::Tensor& feature)
+    {
+        torch::Tensor counts = feature.bincount();
+        int totalWeight = counts.sum().item<int>();
+        torch::Tensor probs = counts.to(torch::kFloat) / totalWeight;
+        torch::Tensor logProbs = torch::log(probs);
+        torch::Tensor entropy = -probs * logProbs;
+        return entropy.nansum().item<double>();
+    }
+    // H(Y|X) = sum_{x in X} p(x) H(Y|X=x)
+    double Network::conditionalEntropy(torch::Tensor& firstFeature, torch::Tensor& secondFeature)
+    {
+        int numSamples = firstFeature.sizes()[0];
+        torch::Tensor featureCounts = secondFeature.bincount();
+        unordered_map<int, unordered_map<int, double>> jointCounts;
+        double totalWeight = 0;
+        for (auto i = 0; i < numSamples; i++) {
+            jointCounts[secondFeature[i].item<int>()][firstFeature[i].item<int>()] += 1;
+            totalWeight += 1;
+        }
+        if (totalWeight == 0)
+            throw invalid_argument("Total weight should not be zero");
+        double entropyValue = 0;
+        for (int value = 0; value < featureCounts.sizes()[0]; ++value) {
+            double p_f = featureCounts[value].item<double>() / totalWeight;
+            double entropy_f = 0;
+            for (auto& [label, jointCount] : jointCounts[value]) {
+                double p_l_f = jointCount / featureCounts[value].item<double>();
+                if (p_l_f > 0) {
+                    entropy_f -= p_l_f * log(p_l_f);
+                } else {
+                    entropy_f = 0;
+                }
+            }
+            entropyValue += p_f * entropy_f;
+        }
+        return entropyValue;
+    }
+    // I(X;Y) = H(Y) - H(Y|X)
+    double Network::mutualInformation(torch::Tensor& firstFeature, torch::Tensor& secondFeature)
+    {
+        return entropy(firstFeature) - conditionalEntropy(firstFeature, secondFeature);
+    }
 }
--- a/src/Network.h
+++ b/src/Network.h
@@ -19,7 +19,12 @@ namespace bayesnet {
        vector<double> predict_sample(const vector<int>&);
        vector<double> exactInference(map<string, int>&);
        double computeFactor(map<string, int>&);
+        double mutual_info(torch::Tensor&, torch::Tensor&);
+        double entropy(torch::Tensor&);
+        double conditionalEntropy(torch::Tensor&, torch::Tensor&);
+        double mutualInformation(torch::Tensor&, torch::Tensor&);
    public:
+        torch::Tensor samples;
        Network();
        Network(float, int);
        Network(float);
@@ -35,6 +40,8 @@ namespace bayesnet {
        string getClassName();
        void fit(const vector<vector<int>>&, const vector<int>&, const vector<string>&, const string&);
        vector<int> predict(const vector<vector<int>>&);
+        //Computes the conditional edge weight of variable index u and v conditioned on class_node
+        torch::Tensor conditionalEdgeWeight();
        vector<vector<double>> predict_proba(const vector<vector<int>>&);
        double score(const vector<vector<int>>&, const vector<int>&);
        inline string version() { return "0.1.0"; }
--- a/src/Node.cc
+++ b/src/Node.cc
@@ -57,23 +57,23 @@ namespace bayesnet {
    */
    unsigned Node::minFill()
    {
-        set<string> neighbors;
+        unordered_set<string> neighbors;
        for (auto child : children) {
            neighbors.emplace(child->getName());
        }
        for (auto parent : parents) {
            neighbors.emplace(parent->getName());
        }
-        return combinations(neighbors).size();
+        auto source = vector<string>(neighbors.begin(), neighbors.end());
+        return combinations(source).size();
    }
-    vector<string> Node::combinations(const set<string>& neighbors)
+    vector<pair<string, string>> Node::combinations(const vector<string>& source)
    {
-        vector<string> source(neighbors.begin(), neighbors.end());
-        vector<string> result;
+        vector<pair<string, string>> result;
        for (int i = 0; i < source.size(); ++i) {
            string temp = source[i];
            for (int j = i + 1; j < source.size(); ++j) {
-                result.push_back(temp + source[j]);
+                result.push_back({ temp, source[j] });
            }
        }
        return result;
--- a/src/Node.h
+++ b/src/Node.h
@@ -1,6 +1,7 @@
 #ifndef NODE_H
 #define NODE_H
 #include <torch/torch.h>
+#include <unordered_set>
 #include <vector>
 #include <string>
 namespace bayesnet {
@@ -13,8 +14,8 @@ namespace bayesnet {
        int numStates; // number of states of the variable
        torch::Tensor cpTable; // Order of indices is 0-> node variable, 1-> 1st parent, 2-> 2nd parent, ...
        vector<int64_t> dimensions; // dimensions of the cpTable
-        vector<string> combinations(const set<string>&);
    public:
+        vector<pair<string, string>> combinations(const vector<string>&);
        Node(const std::string&, int);
        void addParent(Node*);
        void addChild(Node*);