From d1eaab6408d514235fd95129234ebbb61bd0b525 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= <rmontanana@gmail.com>
Date: Tue, 11 Jul 2023 22:23:49 +0200
Subject: [PATCH] Refactor Network and create Metrics class

---
 README.md          | 109 +--------------------------------------------
 sample/main.cc     |   4 +-
 src/CMakeLists.txt |   2 +-
 src/Metrics.cc     | 102 ++++++++++++++++++++++++++++++++++++++++++
 src/Metrics.hpp    |  23 ++++++++++
 src/Network.cc     |  84 +++-------------------------------
 src/Network.h      |   3 +-
 7 files changed, 137 insertions(+), 190 deletions(-)
 create mode 100644 src/Metrics.cc
 create mode 100644 src/Metrics.hpp
diff --git a/README.md b/README.md
index 98a7732..c95d141 100644
--- a/README.md
+++ b/README.md
@@ -2,111 +2,4 @@
 
 Bayesian Network Classifier with libtorch from scratch
 
-## Variable Elimination
-
-To decide the first variable to eliminate wel use the MinFill criterion, that is
-the variable that minimizes the number of edges that need to be added to the
-graph to make it triangulated.
-This is done by counting the number of edges that need to be added to the graph
-if the variable is eliminated. The variable with the minimum number of edges is
-chosen.
-In pgmpy this is done computing then the length of the combinations of the
-neighbors taken 2 by 2.
-
-Once the variable to eliminate is chosen, we need to compute the factors that
-need to be multiplied to get the new factor.
-This is done by multiplying all the factors that contain the variable to
-eliminate and then marginalizing the variable out.
-
-The new factor is then added to the list of factors and the variable to
-eliminate is removed from the list of variables.
-
-The process is repeated until there are no more variables to eliminate.
-
-## Code for combination
-
-```cpp
-// Combinations of length 2
-vector<string> combinations(vector<string> source)
-{
-    vector<string> result;
-    for (int i = 0; i < source.size(); ++i) {
-        string temp = source[i];
-        for (int j = i + 1; j < source.size(); ++j) {
-            result.push_back(temp + source[j]);
-        }
-    }
-    return result;
-}
-```
-
-## Code for Variable Elimination
-
-```cpp
-// Variable Elimination
-vector<string> variableElimination(vector<string> source, map<string, vector<string>> graph)
-{
-    vector<string> variables = source;
-    vector<string> factors = source;
-    while (variables.size() > 0) {
-        string variable = minFill(variables, graph);
-        vector<string> neighbors = graph[variable];
-        vector<string> combinations = combinations(neighbors);
-        vector<string> factorsToMultiply;
-        for (int i = 0; i < factors.size(); ++i) {
-            string factor = factors[i];
-            for (int j = 0; j < combinations.size(); ++j) {
-                string combination = combinations[j];
-                if (factor.find(combination) != string::npos) {
-                    factorsToMultiply.push_back(factor);
-                    break;
-                }
-            }
-        }
-        string newFactor = multiplyFactors(factorsToMultiply);
-        factors.push_back(newFactor);
-        variables.erase(remove(variables.begin(), variables.end(), variable), variables.end());
-    }
-    return factors;
-}
-```
-
-## Network copy constructor
-
-```cpp
-// Network copy constructor
-Network::Network(const Network& network)
-{
-    this->variables = network.variables;
-    this->factors = network.factors;
-    this->graph = network.graph;
-}
-```
-
-## Code for MinFill
-
-```cpp
-// MinFill
-string minFill(vector<string> source, map<string, vector<string>> graph)
-{
-    string result;
-    int min = INT_MAX;
-    for (int i = 0; i < source.size(); ++i) {
-        string temp = source[i];
-        int count = 0;
-        vector<string> neighbors = graph[temp];
-        vector<string> combinations = combinations(neighbors);
-        for (int j = 0; j < combinations.size(); ++j) {
-            string combination = combinations[j];
-            if (graph[combination].size() == 0) {
-                count++;
-            }
-        }
-        if (count < min) {
-            min = count;
-            result = temp;
-        }
-    }
-    return result;
-}
-```
+## 1. Introduction
diff --git a/sample/main.cc b/sample/main.cc
index 4f79a9b..c1b1e1f 100644
--- a/sample/main.cc
+++ b/sample/main.cc
@@ -5,6 +5,7 @@
 #include <getopt.h>
 #include "ArffFiles.h"
 #include "Network.h"
+#include "Metrics.hpp"
 #include "CPPFImdlp.h"
 
 
@@ -230,6 +231,7 @@ int main(int argc, char** argv)
     cout << "BayesNet version: " << network.version() << endl;
     unsigned int nthreads = std::thread::hardware_concurrency();
     cout << "Computer has " << nthreads << " cores." << endl;
-    cout << "conditionalEdgeWeight " << endl << network.conditionalEdgeWeight() << endl;
+    auto metrics = bayesnet::Metrics(network.getSamples(), features, className, network.getClassNumStates());
+    cout << "conditionalEdgeWeight " << endl << metrics.conditionalEdgeWeight() << endl;
     return 0;
 }
\ No newline at end of file
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index e2b3ce9..b923ad7 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,2 +1,2 @@
-add_library(BayesNet Network.cc Node.cc)
+add_library(BayesNet Network.cc Node.cc Metrics.cc)
 target_link_libraries(BayesNet "${TORCH_LIBRARIES}")
\ No newline at end of file
diff --git a/src/Metrics.cc b/src/Metrics.cc
new file mode 100644
index 0000000..40e485f
--- /dev/null
+++ b/src/Metrics.cc
@@ -0,0 +1,102 @@
+#include "Metrics.hpp"
+using namespace std;
+namespace bayesnet {
+    Metrics::Metrics(torch::Tensor& samples, vector<string>& features, string& className, int classNumStates)
+        : samples(samples)
+        , features(features)
+        , className(className)
+        , classNumStates(classNumStates)
+    {
+    }
+    vector<pair<string, string>> Metrics::doCombinations(const vector<string>& source)
+    {
+        vector<pair<string, string>> result;
+        for (int i = 0; i < source.size(); ++i) {
+            string temp = source[i];
+            for (int j = i + 1; j < source.size(); ++j) {
+                result.push_back({ temp, source[j] });
+            }
+        }
+        return result;
+    }
+    torch::Tensor Metrics::conditionalEdgeWeight()
+    {
+        auto result = vector<double>();
+        auto source = vector<string>(features);
+        source.push_back(className);
+        auto combinations = doCombinations(source);
+        // Compute class prior
+        auto margin = torch::zeros({ classNumStates });
+        for (int value = 0; value < classNumStates; ++value) {
+            auto mask = samples.index({ "...", -1 }) == value;
+            margin[value] = mask.sum().item<float>() / samples.sizes()[0];
+        }
+        for (auto [first, second] : combinations) {
+            int64_t index_first = find(features.begin(), features.end(), first) - features.begin();
+            int64_t index_second = find(features.begin(), features.end(), second) - features.begin();
+            double accumulated = 0;
+            for (int value = 0; value < classNumStates; ++value) {
+                auto mask = samples.index({ "...", -1 }) == value;
+                auto first_dataset = samples.index({ mask, index_first });
+                auto second_dataset = samples.index({ mask, index_second });
+                auto mi = mutualInformation(first_dataset, second_dataset);
+                auto pb = margin[value].item<float>();
+                accumulated += pb * mi;
+            }
+            result.push_back(accumulated);
+        }
+        long n_vars = source.size();
+        auto matrix = torch::zeros({ n_vars, n_vars });
+        auto indices = torch::triu_indices(n_vars, n_vars, 1);
+        for (auto i = 0; i < result.size(); ++i) {
+            auto x = indices[0][i];
+            auto y = indices[1][i];
+            matrix[x][y] = result[i];
+            matrix[y][x] = result[i];
+        }
+        return matrix;
+    }
+    double Metrics::entropy(torch::Tensor& feature)
+    {
+        torch::Tensor counts = feature.bincount();
+        int totalWeight = counts.sum().item<int>();
+        torch::Tensor probs = counts.to(torch::kFloat) / totalWeight;
+        torch::Tensor logProbs = torch::log(probs);
+        torch::Tensor entropy = -probs * logProbs;
+        return entropy.nansum().item<double>();
+    }
+    // H(Y|X) = sum_{x in X} p(x) H(Y|X=x)
+    double Metrics::conditionalEntropy(torch::Tensor& firstFeature, torch::Tensor& secondFeature)
+    {
+        int numSamples = firstFeature.sizes()[0];
+        torch::Tensor featureCounts = secondFeature.bincount();
+        unordered_map<int, unordered_map<int, double>> jointCounts;
+        double totalWeight = 0;
+        for (auto i = 0; i < numSamples; i++) {
+            jointCounts[secondFeature[i].item<int>()][firstFeature[i].item<int>()] += 1;
+            totalWeight += 1;
+        }
+        if (totalWeight == 0)
+            throw invalid_argument("Total weight should not be zero");
+        double entropyValue = 0;
+        for (int value = 0; value < featureCounts.sizes()[0]; ++value) {
+            double p_f = featureCounts[value].item<double>() / totalWeight;
+            double entropy_f = 0;
+            for (auto& [label, jointCount] : jointCounts[value]) {
+                double p_l_f = jointCount / featureCounts[value].item<double>();
+                if (p_l_f > 0) {
+                    entropy_f -= p_l_f * log(p_l_f);
+                } else {
+                    entropy_f = 0;
+                }
+            }
+            entropyValue += p_f * entropy_f;
+        }
+        return entropyValue;
+    }
+    // I(X;Y) = H(Y) - H(Y|X)
+    double Metrics::mutualInformation(torch::Tensor& firstFeature, torch::Tensor& secondFeature)
+    {
+        return entropy(firstFeature) - conditionalEntropy(firstFeature, secondFeature);
+    }
+}
\ No newline at end of file
diff --git a/src/Metrics.hpp b/src/Metrics.hpp
new file mode 100644
index 0000000..c37e951
--- /dev/null
+++ b/src/Metrics.hpp
@@ -0,0 +1,23 @@
+#ifndef BAYESNET_METRICS_H
+#define BAYESNET_METRICS_H
+#include <torch/torch.h>
+#include <vector>
+#include <string>
+using namespace std;
+namespace bayesnet {
+    class Metrics {
+    private:
+        torch::Tensor& samples;
+        vector<string>& features;
+        string& className;
+        int classNumStates;
+        vector<pair<string, string>> doCombinations(const vector<string>&);
+        double entropy(torch::Tensor&);
+        double conditionalEntropy(torch::Tensor&, torch::Tensor&);
+        double mutualInformation(torch::Tensor&, torch::Tensor&);
+    public:
+        Metrics(torch::Tensor&, vector<string>&, string&, int);
+        torch::Tensor conditionalEdgeWeight();
+    };
+}
+#endif
\ No newline at end of file
diff --git a/src/Network.cc b/src/Network.cc
index 8d336ac..b1094c3 100644
--- a/src/Network.cc
+++ b/src/Network.cc
@@ -21,6 +21,10 @@ namespace bayesnet {
     {
         return maxThreads;
     }
+    torch::Tensor& Network::getSamples()
+    {
+        return samples;
+    }
     void Network::addNode(string name, int numStates)
     {
         if (nodes.find(name) != nodes.end()) {
@@ -241,83 +245,5 @@ namespace bayesnet {
         }
         return result;
     }
-    double Network::mutual_info(torch::Tensor& first, torch::Tensor& second)
-    {
-        return 1;
-    }
-    torch::Tensor Network::conditionalEdgeWeight()
-    {
-        auto result = vector<double>();
-        auto source = vector<string>(features);
-        source.push_back(className);
-        auto combinations = nodes[className]->combinations(source);
-        auto margin = nodes[className]->getCPT();
-        for (auto [first, second] : combinations) {
-            int64_t index_first = find(features.begin(), features.end(), first) - features.begin();
-            int64_t index_second = find(features.begin(), features.end(), second) - features.begin();
-            double accumulated = 0;
-            for (int value = 0; value < classNumStates; ++value) {
-                auto mask = samples.index({ "...", -1 }) == value;
-                auto first_dataset = samples.index({ mask, index_first });
-                auto second_dataset = samples.index({ mask, index_second });
-                auto mi = mutualInformation(first_dataset, second_dataset);
-                auto pb = margin[value].item<float>();
-                accumulated += pb * mi;
-            }
-            result.push_back(accumulated);
-        }
-        long n_vars = source.size();
-        auto matrix = torch::zeros({ n_vars, n_vars });
-        auto indices = torch::triu_indices(n_vars, n_vars, 1);
-        for (auto i = 0; i < result.size(); ++i) {
-            auto x = indices[0][i];
-            auto y = indices[1][i];
-            matrix[x][y] = result[i];
-            matrix[y][x] = result[i];
-        }
-        return matrix;
-    }
-    double Network::entropy(torch::Tensor& feature)
-    {
-        torch::Tensor counts = feature.bincount();
-        int totalWeight = counts.sum().item<int>();
-        torch::Tensor probs = counts.to(torch::kFloat) / totalWeight;
-        torch::Tensor logProbs = torch::log(probs);
-        torch::Tensor entropy = -probs * logProbs;
-        return entropy.nansum().item<double>();
-    }
-    // H(Y|X) = sum_{x in X} p(x) H(Y|X=x)
-    double Network::conditionalEntropy(torch::Tensor& firstFeature, torch::Tensor& secondFeature)
-    {
-        int numSamples = firstFeature.sizes()[0];
-        torch::Tensor featureCounts = secondFeature.bincount();
-        unordered_map<int, unordered_map<int, double>> jointCounts;
-        double totalWeight = 0;
-        for (auto i = 0; i < numSamples; i++) {
-            jointCounts[secondFeature[i].item<int>()][firstFeature[i].item<int>()] += 1;
-            totalWeight += 1;
-        }
-        if (totalWeight == 0)
-            throw invalid_argument("Total weight should not be zero");
-        double entropyValue = 0;
-        for (int value = 0; value < featureCounts.sizes()[0]; ++value) {
-            double p_f = featureCounts[value].item<double>() / totalWeight;
-            double entropy_f = 0;
-            for (auto& [label, jointCount] : jointCounts[value]) {
-                double p_l_f = jointCount / featureCounts[value].item<double>();
-                if (p_l_f > 0) {
-                    entropy_f -= p_l_f * log(p_l_f);
-                } else {
-                    entropy_f = 0;
-                }
-            }
-            entropyValue += p_f * entropy_f;
-        }
-        return entropyValue;
-    }
-    // I(X;Y) = H(Y) - H(Y|X)
-    double Network::mutualInformation(torch::Tensor& firstFeature, torch::Tensor& secondFeature)
-    {
-        return entropy(firstFeature) - conditionalEntropy(firstFeature, secondFeature);
-    }
+
 }
diff --git a/src/Network.h b/src/Network.h
index 0ba6783..42403a7 100644
--- a/src/Network.h
+++ b/src/Network.h
@@ -15,6 +15,7 @@ namespace bayesnet {
         vector<string> features;
         string className;
         int laplaceSmoothing;
+        torch::Tensor samples;
         bool isCyclic(const std::string&, std::unordered_set<std::string>&, std::unordered_set<std::string>&);
         vector<double> predict_sample(const vector<int>&);
         vector<double> exactInference(map<string, int>&);
@@ -24,12 +25,12 @@ namespace bayesnet {
         double conditionalEntropy(torch::Tensor&, torch::Tensor&);
         double mutualInformation(torch::Tensor&, torch::Tensor&);
     public:
-        torch::Tensor samples;
         Network();
         Network(float, int);
         Network(float);
         Network(Network&);
         ~Network();
+        torch::Tensor& getSamples();
         float getmaxThreads();
         void addNode(string, int);
         void addEdge(const string, const string);