Refactor library

Complete CPP model integration
Fix some small mistakes
2025-08-18 17:15:53 +00:00 · 2023-07-19 16:16:15 +02:00 · 2023-07-18 23:39:50 +02:00 · 2023-07-13 17:11:08 +02:00 · 2023-07-13 16:59:37 +02:00 · 2023-07-12 12:59:02 +02:00
36 changed files with 10356 additions and 70 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -0,0 +1,7 @@
 cmake_minimum_required(VERSION 3.20)
 project(feature)
 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_BUILD_TYPE Debug)
 add_executable(feature bayesclass/cpp/FeatureSelect.cpp)
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1 +1,13 @@
 include README.md LICENSE
 include bayesclass/cpp/FeatureSelect.h
 include bayesclass/cpp/Node.h
 include bayesclass/cpp/Mst.h
 include bayesclass/cpp/Network.h
 include bayesclass/cpp/Metrics.hpp
 include bayesclass/cpp/BaseClassifier.h
 include bayesclass/cpp/Ensemble.h
 include bayesclass/cpp/TAN.h
 include bayesclass/cpp/KDB.h
 include bayesclass/cpp/SPODE.h
 include bayesclass/cpp/AODE.h
 include bayesclass/cpp/utils.h
--- a/4
+++ b/4
@@ -16,6 +16,10 @@ lint:  ## Lint and static-check
 	flake8 bayesclass
 	mypy bayesclass
 feature: ## compile FeatureSelect
 	cmake -B build feature
 push:  ## Push code with tags
 	git push && git push --tags
--- a/bayesclass/BayesNetwork.cpp
+++ b/bayesclass/BayesNetwork.cpp
@@ -0,0 +1 @@
 #error Do not use this file, it is the result of a failed Cython compilation.
--- a/bayesclass/BayesNetwork.pyx
+++ b/bayesclass/BayesNetwork.pyx
@@ -0,0 +1,177 @@
 # distutils: language = c++
 # cython: language_level = 3
 from libcpp.vector cimport vector
 from libcpp.string cimport string
 from libcpp.map cimport map
 import numpy as np
 cdef extern from "cpp/Network.h" namespace "bayesnet":
    cdef cppclass Network:
        Network(float, float) except + 
        void fit(vector[vector[int]]&, vector[int]&, vector[string]&, string)
        vector[int] predict(vector[vector[int]]&)
        vector[vector[double]] predict_proba(vector[vector[int]]&)
        float score(const vector[vector[int]]&, const vector[int]&)
        void addNode(string, int)
        void addEdge(string, string) except +
        vector[string] getFeatures()
        int getClassNumStates()
        int getStates()
        string getClassName()
        string version()
        void show()
 cdef class BayesNetwork:
    cdef Network *thisptr
    def __cinit__(self, maxThreads=0.8, laplaceSmooth=1.0):
        self.thisptr = new Network(maxThreads, laplaceSmooth) 
    def __dealloc__(self):
        del self.thisptr
    def fit(self, X, y, features, className):
        X_ = [X[:, i] for i in range(X.shape[1])]
        features_bytes = [x.encode() for x in features]
        self.thisptr.fit(X_, y, features_bytes, className.encode())
        return self
    def predict(self, X):
        X_ = [X[:, i] for i in range(X.shape[1])]
        return self.thisptr.predict(X_)
    def predict_proba(self, X):
        X_ = [X[:, i] for i in range(X.shape[1])]
        return self.thisptr.predict_proba(X_)
    def score(self, X, y):
        X_ = [X[:, i] for i in range(X.shape[1])]
        return self.thisptr.score(X_, y)
    def addNode(self, name, states):
        self.thisptr.addNode(str.encode(name), states)
    def addEdge(self, source, destination):
        self.thisptr.addEdge(str.encode(source), str.encode(destination))
    def getFeatures(self):
        res = self.thisptr.getFeatures()
        return [x.decode() for x in res]
    def getStates(self):
        return self.thisptr.getStates()
    def getClassName(self):
        return self.thisptr.getClassName().decode()
    def getClassNumStates(self):
        return self.thisptr.getClassNumStates()
    def show(self):
        return self.thisptr.show()
    def __reduce__(self):
        return (BayesNetwork, ())
 cdef extern from "cpp/Metrics.hpp" namespace "bayesnet":
    cdef cppclass Metrics:
        Metrics(vector[vector[int]], vector[int], vector[string]&, string&, int) except +
        vector[float] conditionalEdgeWeights()
 cdef class CMetrics:
    cdef Metrics *thisptr
    def __cinit__(self, X, y, features, className, classStates):
        X_ = [X[:, i] for i in range(X.shape[1])]
        features_bytes = [x.encode() for x in features]
        self.thisptr = new Metrics(X_, y, features_bytes, className.encode(), classStates)
    def __dealloc__(self):
        del self.thisptr
    def conditionalEdgeWeights(self, n_vars):
        return np.reshape(self.thisptr.conditionalEdgeWeights(), (n_vars, n_vars))
    def __reduce__(self):
        return (CMetrics, ())
 cdef extern from "cpp/TAN.h" namespace "bayesnet":
    cdef cppclass CTAN:
        CTAN() except + 
        void fit(vector[vector[int]]&, vector[int]&, vector[string]&, string, map[string, vector[int]]&)
        vector[int] predict(vector[vector[int]]&)
        vector[vector[double]] predict_proba(vector[vector[int]]&)
        float score(const vector[vector[int]]&, const vector[int]&)
        vector[string] graph()
 cdef extern from "cpp/KDB.h" namespace "bayesnet":
    cdef cppclass CKDB:
        CKDB(int) except + 
        void fit(vector[vector[int]]&, vector[int]&, vector[string]&, string, map[string, vector[int]]&)
        vector[int] predict(vector[vector[int]]&)
        vector[vector[double]] predict_proba(vector[vector[int]]&)
        float score(const vector[vector[int]]&, const vector[int]&)
        vector[string] graph()
 cdef extern from "cpp/AODE.h" namespace "bayesnet":
    cdef cppclass CAODE:
        CAODE() except + 
        void fit(vector[vector[int]]&, vector[int]&, vector[string]&, string, map[string, vector[int]]&)
        vector[int] predict(vector[vector[int]]&)
        vector[vector[double]] predict_proba(vector[vector[int]]&)
        float score(const vector[vector[int]]&, const vector[int]&)
        vector[string] graph()
 cdef class TAN:
    cdef CTAN *thisptr
    def __cinit__(self):
        self.thisptr = new CTAN() 
    def __dealloc__(self):
        del self.thisptr
    def fit(self, X, y, features, className, states):
        X_ = [X[:, i] for i in range(X.shape[1])]
        features_bytes = [x.encode() for x in features]
        states_dict = {key.encode(): value for key, value in states.items()}
        states_dict[className.encode()] = np.unique(y).tolist()
        self.thisptr.fit(X_, y, features_bytes, className.encode(), states_dict)
        return self
    def predict(self, X):
        X_ = [X[:, i] for i in range(X.shape[1])]
        return self.thisptr.predict(X_)
    def score(self, X, y):
        X_ = [X[:, i] for i in range(X.shape[1])]
        return self.thisptr.score(X_, y)
    def graph(self):
        return self.thisptr.graph()
    def __reduce__(self):
        return (TAN, ())
 cdef class CKDB:
    cdef KDB *thisptr
    def __cinit__(self, k):
        self.thisptr = new KDB(k) 
    def __dealloc__(self):
        del self.thisptr
    def fit(self, X, y, features, className, states):
        X_ = [X[:, i] for i in range(X.shape[1])]
        features_bytes = [x.encode() for x in features]
        states_dict = {key.encode(): value for key, value in states.items()}
        states_dict[className.encode()] = np.unique(y).tolist()
        self.thisptr.fit(X_, y, features_bytes, className.encode(), states_dict)
        return self
    def predict(self, X):
        X_ = [X[:, i] for i in range(X.shape[1])]
        return self.thisptr.predict(X_)
    def score(self, X, y):
        X_ = [X[:, i] for i in range(X.shape[1])]
        return self.thisptr.score(X_, y)
    def graph(self):
        return self.thisptr.graph()
    def __reduce__(self):
        return (CKDB, ())
 cdef class CAODE:
    cdef AODE *thisptr
    def __cinit__(self):
        self.thisptr = new AODE() 
    def __dealloc__(self):
        del self.thisptr
    def fit(self, X, y, features, className, states):
        X_ = [X[:, i] for i in range(X.shape[1])]
        features_bytes = [x.encode() for x in features]
        states_dict = {key.encode(): value for key, value in states.items()}
        states_dict[className.encode()] = np.unique(y).tolist()
        self.thisptr.fit(X_, y, features_bytes, className.encode(), states_dict)
        return self
    def predict(self, X):
        X_ = [X[:, i] for i in range(X.shape[1])]
        return self.thisptr.predict(X_)
    def score(self, X, y):
        X_ = [X[:, i] for i in range(X.shape[1])]
        return self.thisptr.score(X_, y)
    def graph(self):
        return self.thisptr.graph()
    def __reduce__(self):
        return (CAODE, ())
--- a/bayesclass/_version.py
+++ b/bayesclass/_version.py
@@ -1 +1 @@
-__version__ = "0.1.1"
+__version__ = "0.2.0"
--- a/bayesclass/cSelectFeatures.cpp
+++ b/bayesclass/cSelectFeatures.cpp
--- a/bayesclass/cSelectFeatures.pyx
+++ b/bayesclass/cSelectFeatures.pyx
@@ -0,0 +1,33 @@
 # distutils: language = c++
 # cython: language_level = 3
 from libcpp.vector cimport vector
 from libcpp.string cimport string
 from libcpp cimport bool
 cdef extern from "cpp/FeatureSelect.h" namespace "features":
    ctypedef float precision_t
    cdef cppclass SelectKBestWeighted:
        SelectKBestWeighted(vector[vector[int]]&, vector[int]&, vector[precision_t]&, int, bool) except + 
        void fit()
        string version()
        vector[precision_t] getScores()
        vector[int] getFeatures()
 cdef class CSelectKBestWeighted:
    cdef SelectKBestWeighted *thisptr
    def __cinit__(self, X, y, weights, k, natural=False): # log or log2
        self.thisptr = new SelectKBestWeighted(X, y, weights, k, natural) 
    def __dealloc__(self):
        del self.thisptr
    def fit(self,):
        self.thisptr.fit()
        return self
    def get_scores(self):
        return self.thisptr.getScores()
    def get_features(self):
        return self.thisptr.getFeatures()
    def get_version(self):
        return self.thisptr.version()
    def __reduce__(self):
        return (CSelectKBestWeighted, ())
--- a/bayesclass/clfs.py
+++ b/bayesclass/clfs.py
@@ -4,8 +4,8 @@ import numpy as np
 import pandas as pd
 from scipy.stats import mode
 from sklearn.base import clone, ClassifierMixin, BaseEstimator
 from sklearn.feature_selection import SelectKBest
 from sklearn.ensemble import BaseEnsemble
 from sklearn.feature_selection import mutual_info_classif
 from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
 from sklearn.utils.multiclass import unique_labels
 from sklearn.feature_selection import mutual_info_classif
@@ -15,6 +15,8 @@ from pgmpy.models import BayesianNetwork
 from pgmpy.base import DAG
 import matplotlib.pyplot as plt
 from fimdlp.mdlp import FImdlp
 from .cppSelectFeatures import CSelectKBestWeighted
 from .BayesNet import BayesNetwork, CMetrics
 from ._version import __version__
@@ -93,7 +95,7 @@ class BayesBase(BaseEstimator, ClassifierMixin):
    @property
    def states_(self):
        if hasattr(self, "fitted_"):
-            return sum([len(item) for _, item in self.model_.states.items()])
+            return self.states_computed_
        return 0
    @property
@@ -142,7 +144,7 @@ class BayesBase(BaseEstimator, ClassifierMixin):
        # Store the information needed to build the model
        self.build_dataset()
        # Build the DAG
-        self._build()
+        self._build(kwargs)
        # Train the model
        self._train(kwargs)
        self.fitted_ = True
@@ -151,11 +153,14 @@ class BayesBase(BaseEstimator, ClassifierMixin):
        # Return the classifier
        return self
-    def _build(self):
+    def _build(self, kwargs):
-        """This method should be implemented by the subclasses to
+        self.model_ = BayesNetwork()
-        build the DAG
+        features = kwargs["features"]
-        """
+        states = kwargs["state_names"]
-        ...
+        for feature in features:
            self.model_.addNode(feature, len(states[feature]))
        class_name = kwargs["class_name"]
        self.model_.addNode(class_name, max(self.y_) + 1)
    def _train(self, kwargs):
        """Build and train a BayesianNetwork from the DAG and the dataset
@@ -165,17 +170,24 @@ class BayesBase(BaseEstimator, ClassifierMixin):
        kwargs : dict
            fit parameters
        """
-        self.model_ = BayesianNetwork(
+        # self.model_ = BayesianNetwork(
-            self.dag_.edges(), show_progress=self.show_progress
+        #     self.dag_.edges(), show_progress=self.show_progress
-        )
+        # )
-        states = dict(state_names=kwargs.pop("state_names", []))
+        # states = dict(state_names=kwargs.pop("state_names", []))
-        self.model_.fit(
+        # self.model_.fit(
-            self.dataset_,
+        #     self.dataset_,
-            estimator=BayesianEstimator,
+        #     estimator=BayesianEstimator,
-            prior_type="K2",
+        #     prior_type="K2",
-            weighted=self.weighted_,
+        #     weighted=self.weighted_,
-            **states,
+        #     **states,
-        )
+        # )
        features = kwargs["features"]
        class_name = kwargs["class_name"]
        for source, destination in self.edges_:
            self.model_.addEdge(source, destination)
        self.model_.fit(self.X_, self.y_, features, class_name)
        self.states_computed_ = self.model_.getStates()
    def predict(self, X):
        """A reference implementation of a prediction for a classifier.
@@ -227,10 +239,11 @@ class BayesBase(BaseEstimator, ClassifierMixin):
        check_is_fitted(self, ["X_", "y_", "fitted_"])
        # Input validation
        X = check_array(X)
-        dataset = pd.DataFrame(
+        # dataset = pd.DataFrame(
-            X, columns=self.feature_names_in_, dtype=np.int32
+        #     X, columns=self.feature_names_in_, dtype=np.int32
-        )
+        # )
-        return self.model_.predict(dataset).values.ravel()
+        # return self.model_.predict(dataset).values.ravel()
        return self.model_.predict(X)
    def plot(self, title="", node_size=800):
        warnings.simplefilter("ignore", UserWarning)
@@ -293,7 +306,7 @@ class TAN(BayesBase):
            raise ValueError("Head index out of range")
        return X, y
-    def _build(self):
+    def _build(self, kwargs):
        est = TreeSearch(
            self.dataset_, root_node=self.feature_names_in_[self.head_]
        )
@@ -346,7 +359,7 @@ class KDB(BayesBase):
        ]
        return self._check_params_fit(X, y, expected_args, kwargs)
-    def _add_m_edges(self, dag, idx, S_nodes, conditional_weights):
+    def _add_m_edges(self, idx, S_nodes, conditional_weights):
        n_edges = min(self.k, len(S_nodes))
        cond_w = conditional_weights.copy()
        exit_cond = self.k == 0
@@ -355,7 +368,7 @@ class KDB(BayesBase):
            max_minfo = np.argmax(cond_w[idx, :])
            if max_minfo in S_nodes and cond_w[idx, max_minfo] > self.theta:
                try:
-                    dag.add_edge(
+                    self.model_.addEdge(
                        self.feature_names_in_[max_minfo],
                        self.feature_names_in_[idx],
                    )
@@ -366,9 +379,9 @@ class KDB(BayesBase):
            cond_w[idx, max_minfo] = -1
            exit_cond = num == n_edges or np.all(cond_w[idx, :] <= self.theta)
-    def _build(self):
+    def _build(self, kwargs):
        """
-        1. For each feature Xi, compute mutual information, I(X;;C),
+        1. For each feature Xi, compute mutual information, I(X;C),
        where C is the class.
        2. Compute class conditional mutual information I(Xi;XjIC), f or each
        pair of features Xi and Xj, where i#j.
@@ -389,29 +402,38 @@ class KDB(BayesBase):
        # 1. get the mutual information between each feature and the class
        mutual = mutual_info_classif(self.X_, self.y_, discrete_features=True)
        # 2. symmetric matrix where each element represents I(X, Y| class_node)
-        conditional_weights = TreeSearch(
+        metrics = CMetrics(
-            self.dataset_
+            self.X_,
-        )._get_conditional_weights(
+            self.y_,
-            self.dataset_, self.class_name_, show_progress=self.show_progress
+            self.features_,
            self.class_name_,
            self.n_classes_,
        )
        conditional_weights = metrics.conditionalEdgeWeights(
            self.n_features_in_ + 1
        )
        # 3. Let the used variable list, S, be empty.
        S_nodes = []
        num_states = {
            feature: len(states)
            for feature, states in kwargs["state_names"].items()
        }
        # 4. Let the DAG being constructed, BN, begin with a single class node
-        dag = BayesianNetwork(show_progress=self.show_progress)
+        self.model_ = BayesNetwork()
-        dag.add_node(self.class_name_)  # , state_names=self.classes_)
+        self.model_.addNode(self.class_name_, self.n_classes_)
        # 5. Repeat until S includes all domain features
        # 5.1 Select feature Xmax which is not in S and has the largest value
-        for idx in np.argsort(mutual):
+        for idx in np.argsort(-mutual):
            # 5.2 Add a node to BN representing Xmax.
            feature = self.feature_names_in_[idx]
-            dag.add_node(feature)
+            self.model_.addNode(feature, num_states[feature])
            # 5.3 Add an arc from C to Xmax in BN.
-            dag.add_edge(self.class_name_, feature)
+            self.model_.addEdge(self.class_name_, feature)
            # 5.4 Add m = min(lSl,/c) arcs from m distinct features Xj in S
-            self._add_m_edges(dag, idx, S_nodes, conditional_weights)
+            self._add_m_edges(idx, S_nodes, conditional_weights)
            # 5.5 Add Xmax to S.
            S_nodes.append(idx)
-        self.dag_ = dag
+        self.edges_ = []
 def build_spodes(features, class_name):
@@ -806,7 +828,7 @@ class BoostSPODE(BayesBase):
        ]
        return self._check_params_fit(X, y, expected_args, kwargs)
-    def _build(self):
+    def _build(self, _):
        class_edges = [(self.class_name_, f) for f in self.feature_names_in_]
        feature_edges = [
            (self.sparent_, f)
@@ -818,7 +840,6 @@ class BoostSPODE(BayesBase):
    def _train(self, kwargs):
        states = dict(state_names=kwargs.get("state_names", []))
        breakpoint()
        self.model_ = BayesianNetwork(self.dag_.edges(), show_progress=False)
        self.model_.fit(
            self.dataset_,
@@ -835,11 +856,9 @@ class BoostAODE(ClassifierMixin, BaseEnsemble):
        show_progress=False,
        random_state=None,
        estimator=None,
        n_estimators=10,
    ):
        self.show_progress = show_progress
        self.random_state = random_state
        self.n_estimators = n_estimators
        super().__init__(estimator=estimator)
    def _validate_estimator(self) -> None:
@@ -868,26 +887,67 @@ class BoostAODE(ClassifierMixin, BaseEnsemble):
        self.nodes_leaves = self.nodes_edges
        return self
    def version(self):
        if hasattr(self, "fitted_"):
            return self.estimator_.version()
        return SPODE(None, False).version()
    @property
    def states_(self):
        if hasattr(self, "fitted_"):
            return sum(
                [
                    len(item)
                    for model in self.estimators_
                    for _, item in model.model_.states.items()
                ]
            ) / len(self.estimators_)
        return 0
    @property
    def depth_(self):
        return self.states_
    def nodes_edges(self):
        nodes = 0
        edges = 0
        if hasattr(self, "fitted_"):
            nodes = sum([len(x.dag_) for x in self.estimators_])
            edges = sum([len(x.dag_.edges()) for x in self.estimators_])
        return nodes, edges
    def plot(self, title=""):
        warnings.simplefilter("ignore", UserWarning)
        for idx, model in enumerate(self.estimators_):
            model.plot(title=f"{idx} {title}")
    def _train(self, kwargs):
        """Build boosted SPODEs"""
        weights = [1 / self.n_samples_] * self.n_samples_
        selected_features = []
        # Step 0: Set the finish condition
-        for num in range(self.n_estimators):
+        for _ in range(self.n_features_in_):
            # Step 1: Build ranking with mutual information
-            # OJO MAL, ESTO NO ACTUALIZA EL RANKING CON LOS PESOS
+            features = (
-            # SIEMPRE VA A SACAR LO MISMO
+                CSelectKBestWeighted(
-            feature = (
+                    self.X_, self.y_, weights, k=self.n_features_in_
                SelectKBest(k=1)
                .fit(self.X_, self.y_)
                .get_feature_names_out(self.feature_names_in_)
                .tolist()[0]
                )
                .fit()
                .get_features()
            )
            # Step 1.1: Select the feature to become the sparent
            for n_feature in features:
                if n_feature not in selected_features:
                    selected_features.append(n_feature)
                    break
            feature = self.feature_names_in_[n_feature]
            # Step 2: Build & train spode with the first feature as sparent
            estimator = clone(self.estimator_)
            _args = kwargs.copy()
            _args["sparent"] = feature
            _args["sample_weight"] = weights
            _args["weighted"] = True
            # print("I'm gonna build a spode with", feature)
            # Step 2.1: build dataset
            # Step 2.2: Train the model
            estimator.fit(self.X_, self.y_, **_args)
@@ -898,18 +958,17 @@ class BoostAODE(ClassifierMixin, BaseEnsemble):
            am = np.log((1 - em) / em) + np.log(estimator.n_classes_ - 1)
            # Step 3.2: Update weights for next classifier
            weights = [
-                wm * np.exp(am * (ym != y_pred))
+                wm * np.exp(am * (ym != yp))
-                for wm, ym in zip(weights, self.y_)
+                for wm, ym, yp in zip(weights, self.y_, y_pred)
            ]
            # Step 4: Add the new model
            self.estimators_.append(estimator)
-        """
+        self.weights_ = weights
-        class_edges = [(self.class_name_, f) for f in self.feature_names_in_]
+
-        feature_edges = [
+    def predict(self, X: np.ndarray) -> np.ndarray:
-            (sparent, f) for f in self.feature_names_in_ if f != sparent
+        n_samples = X.shape[0]
-        ]
+        n_estimators = len(self.estimators_)
-        self.weights_ = weights.copy() if weights is not None else None
+        result = np.empty((n_samples, n_estimators))
-        feature_edges.extend(class_edges)
+        for index, estimator in enumerate(self.estimators_):
-        self.model_ = BayesianNetwork(feature_edges, show_progress=False)
+            result[:, index] = estimator.predict(X)
-        return self.model_
+        return mode(result, axis=1, keepdims=False).mode.ravel()
        """
--- a/bayesclass/cpp/AODE.cc
+++ b/bayesclass/cpp/AODE.cc
@@ -0,0 +1,16 @@
 #include "AODE.h"
 namespace bayesnet {
    AODE::AODE() : Ensemble() {}
    void AODE::train()
    {
        models.clear();
        for (int i = 0; i < features.size(); ++i) {
            models.push_back(std::make_unique<SPODE>(i));
        }
    }
    vector<string> AODE::graph(string title)
    {
        return Ensemble::graph(title);
    }
 }
--- a/bayesclass/cpp/AODE.h
+++ b/bayesclass/cpp/AODE.h
@@ -0,0 +1,14 @@
 #ifndef AODE_H
 #define AODE_H
 #include "Ensemble.h"
 #include "SPODE.h"
 namespace bayesnet {
    class AODE : public Ensemble {
    protected:
        void train() override;
    public:
        AODE();
        vector<string> graph(string title = "AODE");
    };
 }
 #endif
--- a/bayesclass/cpp/BaseClassifier.cc
+++ b/bayesclass/cpp/BaseClassifier.cc
@@ -0,0 +1,127 @@
 #include "BaseClassifier.h"
 #include "utils.h"
 namespace bayesnet {
    using namespace std;
    using namespace torch;
    BaseClassifier::BaseClassifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {}
    BaseClassifier& BaseClassifier::build(vector<string>& features, string className, map<string, vector<int>>& states)
    {
        dataset = torch::cat({ X, y.view({y.size(0), 1}) }, 1);
        this->features = features;
        this->className = className;
        this->states = states;
        checkFitParameters();
        auto n_classes = states[className].size();
        metrics = Metrics(dataset, features, className, n_classes);
        train();
        model.fit(Xv, yv, features, className);
        fitted = true;
        return *this;
    }
    BaseClassifier& BaseClassifier::fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states)
    {
        this->X = torch::zeros({ static_cast<int64_t>(X[0].size()), static_cast<int64_t>(X.size()) }, kInt64);
        Xv = X;
        for (int i = 0; i < X.size(); ++i) {
            this->X.index_put_({ "...", i }, torch::tensor(X[i], kInt64));
        }
        this->y = torch::tensor(y, kInt64);
        yv = y;
        return build(features, className, states);
    }
    void BaseClassifier::checkFitParameters()
    {
        auto sizes = X.sizes();
        m = sizes[0];
        n = sizes[1];
        if (m != y.size(0)) {
            throw invalid_argument("X and y must have the same number of samples");
        }
        if (n != features.size()) {
            throw invalid_argument("X and features must have the same number of features");
        }
        if (states.find(className) == states.end()) {
            throw invalid_argument("className not found in states");
        }
        for (auto feature : features) {
            if (states.find(feature) == states.end()) {
                throw invalid_argument("feature [" + feature + "] not found in states");
            }
        }
    }
    Tensor BaseClassifier::predict(Tensor& X)
    {
        if (!fitted) {
            throw logic_error("Classifier has not been fitted");
        }
        auto m_ = X.size(0);
        auto n_ = X.size(1);
        vector<vector<int>> Xd(n_, vector<int>(m_, 0));
        for (auto i = 0; i < n_; i++) {
            auto temp = X.index({ "...", i });
            Xd[i] = vector<int>(temp.data_ptr<int>(), temp.data_ptr<int>() + m_);
        }
        auto yp = model.predict(Xd);
        auto ypred = torch::tensor(yp, torch::kInt64);
        return ypred;
    }
    vector<int> BaseClassifier::predict(vector<vector<int>>& X)
    {
        if (!fitted) {
            throw logic_error("Classifier has not been fitted");
        }
        auto m_ = X[0].size();
        auto n_ = X.size();
        vector<vector<int>> Xd(n_, vector<int>(m_, 0));
        for (auto i = 0; i < n_; i++) {
            Xd[i] = vector<int>(X[i].begin(), X[i].end());
        }
        auto yp = model.predict(Xd);
        return yp;
    }
    float BaseClassifier::score(Tensor& X, Tensor& y)
    {
        if (!fitted) {
            throw logic_error("Classifier has not been fitted");
        }
        Tensor y_pred = predict(X);
        return (y_pred == y).sum().item<float>() / y.size(0);
    }
    float BaseClassifier::score(vector<vector<int>>& X, vector<int>& y)
    {
        if (!fitted) {
            throw logic_error("Classifier has not been fitted");
        }
        auto m_ = X[0].size();
        auto n_ = X.size();
        vector<vector<int>> Xd(n_, vector<int>(m_, 0));
        for (auto i = 0; i < n_; i++) {
            Xd[i] = vector<int>(X[i].begin(), X[i].end());
        }
        return model.score(Xd, y);
    }
    vector<string> BaseClassifier::show()
    {
        return model.show();
    }
    void BaseClassifier::addNodes()
    {
        // Add all nodes to the network
        for (auto feature : features) {
            model.addNode(feature, states[feature].size());
        }
        model.addNode(className, states[className].size());
    }
    int BaseClassifier::getNumberOfNodes()
    {
        // Features does not include class
        return fitted ? model.getFeatures().size() + 1 : 0;
    }
    int BaseClassifier::getNumberOfEdges()
    {
        return fitted ? model.getEdges().size() : 0;
    }
 }
--- a/bayesclass/cpp/BaseClassifier.h
+++ b/bayesclass/cpp/BaseClassifier.h
@@ -0,0 +1,48 @@
 #ifndef CLASSIFIERS_H
 #define CLASSIFIERS_H
 #include <torch/torch.h>
 #include "Network.h"
 #include "Metrics.hpp"
 using namespace std;
 using namespace torch;
 namespace bayesnet {
    class BaseClassifier {
    private:
        bool fitted;
        BaseClassifier& build(vector<string>& features, string className, map<string, vector<int>>& states);
    protected:
        Network model;
        int m, n; // m: number of samples, n: number of features
        Tensor X;
        vector<vector<int>> Xv;
        Tensor y;
        vector<int> yv;
        Tensor dataset;
        Metrics metrics;
        vector<string> features;
        string className;
        map<string, vector<int>> states;
        void checkFitParameters();
        virtual void train() = 0;
    public:
        BaseClassifier(Network model);
        virtual ~BaseClassifier() = default;
        BaseClassifier& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states);
        void addNodes();
        int getNumberOfNodes();
        int getNumberOfEdges();
        Tensor predict(Tensor& X);
        vector<int> predict(vector<vector<int>>& X);
        float score(Tensor& X, Tensor& y);
        float score(vector<vector<int>>& X, vector<int>& y);
        vector<string> show();
        virtual vector<string> graph(string title) = 0;
    };
 }
 #endif
--- a/bayesclass/cpp/Ensemble.cc
+++ b/bayesclass/cpp/Ensemble.cc
@@ -0,0 +1,112 @@
 #include "Ensemble.h"
 namespace bayesnet {
    using namespace std;
    using namespace torch;
    Ensemble::Ensemble() : m(0), n(0), n_models(0), metrics(Metrics()), fitted(false) {}
    Ensemble& Ensemble::build(vector<string>& features, string className, map<string, vector<int>>& states)
    {
        dataset = cat({ X, y.view({y.size(0), 1}) }, 1);
        this->features = features;
        this->className = className;
        this->states = states;
        auto n_classes = states[className].size();
        metrics = Metrics(dataset, features, className, n_classes);
        // Build models
        train();
        // Train models
        n_models = models.size();
        for (auto i = 0; i < n_models; ++i) {
            models[i]->fit(Xv, yv, features, className, states);
        }
        fitted = true;
        return *this;
    }
    Ensemble& Ensemble::fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states)
    {
        this->X = torch::zeros({ static_cast<int64_t>(X[0].size()), static_cast<int64_t>(X.size()) }, kInt64);
        Xv = X;
        for (int i = 0; i < X.size(); ++i) {
            this->X.index_put_({ "...", i }, torch::tensor(X[i], kInt64));
        }
        this->y = torch::tensor(y, kInt64);
        yv = y;
        return build(features, className, states);
    }
    Tensor Ensemble::predict(Tensor& X)
    {
        if (!fitted) {
            throw logic_error("Ensemble has not been fitted");
        }
        Tensor y_pred = torch::zeros({ X.size(0), n_models }, kInt64);
        for (auto i = 0; i < n_models; ++i) {
            y_pred.index_put_({ "...", i }, models[i]->predict(X));
        }
        return torch::tensor(voting(y_pred));
    }
    vector<int> Ensemble::voting(Tensor& y_pred)
    {
        auto y_pred_ = y_pred.accessor<int64_t, 2>();
        vector<int> y_pred_final;
        for (int i = 0; i < y_pred.size(0); ++i) {
            vector<float> votes(states[className].size(), 0);
            for (int j = 0; j < y_pred.size(1); ++j) {
                votes[y_pred_[i][j]] += 1;
            }
            auto indices = argsort(votes);
            y_pred_final.push_back(indices[0]);
        }
        return y_pred_final;
    }
    vector<int> Ensemble::predict(vector<vector<int>>& X)
    {
        if (!fitted) {
            throw logic_error("Ensemble has not been fitted");
        }
        long m_ = X[0].size();
        long n_ = X.size();
        vector<vector<int>> Xd(n_, vector<int>(m_, 0));
        for (auto i = 0; i < n_; i++) {
            Xd[i] = vector<int>(X[i].begin(), X[i].end());
        }
        Tensor y_pred = torch::zeros({ m_, n_models }, kInt64);
        for (auto i = 0; i < n_models; ++i) {
            y_pred.index_put_({ "...", i }, torch::tensor(models[i]->predict(Xd), kInt64));
        }
        return voting(y_pred);
    }
    float Ensemble::score(vector<vector<int>>& X, vector<int>& y)
    {
        if (!fitted) {
            throw logic_error("Ensemble has not been fitted");
        }
        auto y_pred = predict(X);
        int correct = 0;
        for (int i = 0; i < y_pred.size(); ++i) {
            if (y_pred[i] == y[i]) {
                correct++;
            }
        }
        return (double)correct / y_pred.size();
    }
    vector<string> Ensemble::show()
    {
        auto result = vector<string>();
        for (auto i = 0; i < n_models; ++i) {
            auto res = models[i]->show();
            result.insert(result.end(), res.begin(), res.end());
        }
        return result;
    }
    vector<string> Ensemble::graph(string title)
    {
        auto result = vector<string>();
        for (auto i = 0; i < n_models; ++i) {
            auto res = models[i]->graph(title + "_" + to_string(i));
            result.insert(result.end(), res.begin(), res.end());
        }
        return result;
    }
 }
--- a/bayesclass/cpp/Ensemble.h
+++ b/bayesclass/cpp/Ensemble.h
@@ -0,0 +1,42 @@
 #ifndef ENSEMBLE_H
 #define ENSEMBLE_H
 #include <torch/torch.h>
 #include "BaseClassifier.h"
 #include "Metrics.hpp"
 #include "utils.h"
 using namespace std;
 using namespace torch;
 namespace bayesnet {
    class Ensemble {
    private:
        bool fitted;
        long n_models;
        Ensemble& build(vector<string>& features, string className, map<string, vector<int>>& states);
    protected:
        vector<unique_ptr<BaseClassifier>> models;
        int m, n; // m: number of samples, n: number of features
        Tensor X;
        vector<vector<int>> Xv;
        Tensor y;
        vector<int> yv;
        Tensor dataset;
        Metrics metrics;
        vector<string> features;
        string className;
        map<string, vector<int>> states;
        void virtual train() = 0;
        vector<int> voting(Tensor& y_pred);
    public:
        Ensemble();
        virtual ~Ensemble() = default;
        Ensemble& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states);
        Tensor predict(Tensor& X);
        vector<int> predict(vector<vector<int>>& X);
        float score(Tensor& X, Tensor& y);
        float score(vector<vector<int>>& X, vector<int>& y);
        vector<string> show();
        vector<string> graph(string title);
    };
 }
 #endif
--- a/bayesclass/cpp/FeatureSelect.cpp
+++ b/bayesclass/cpp/FeatureSelect.cpp
@@ -0,0 +1,118 @@
 #include "FeatureSelect.h"
 namespace features {
    SelectKBestWeighted::SelectKBestWeighted(samples_t& samples, labels_t& labels, weights_t& weights, int k, bool nat)
        : samples(samples), labels(labels), weights(weights), k(k), nat(nat)
    {
        if (samples.size() == 0 || samples[0].size() == 0)
            throw invalid_argument("features must be a non-empty matrix");
        if (samples.size() != labels.size())
            throw invalid_argument("number of samples and labels must be equal");
        if (samples.size() != weights.size())
            throw invalid_argument("number of samples and weights must be equal");
        if (k < 1 || k >  static_cast<int>(samples[0].size()))
            throw invalid_argument("k must be between 1 and number of features");
        numFeatures = 0;
        numClasses = 0;
        numSamples = 0;
        fitted = false;
    }
    void SelectKBestWeighted::SelectKBestWeighted::fit()
    {
        auto labelsCopy = labels;
        numFeatures = samples[0].size();
        numSamples = samples.size();
        // compute number of classes
        sort(labelsCopy.begin(), labelsCopy.end());
        auto last = unique(labelsCopy.begin(), labelsCopy.end());
        labelsCopy.erase(last, labelsCopy.end());
        numClasses = labelsCopy.size();
        // compute scores
        scores.reserve(numFeatures);
        for (int i = 0; i < numFeatures; ++i) {
            scores.push_back(MutualInformation(i));
            features.push_back(i);
        }
        // sort & reduce scores and features
        sort(features.begin(), features.end(), [&](int i, int j)
            { return scores[i] > scores[j]; });
        sort(scores.begin(), scores.end(), greater<precision_t>());
        features.resize(k);
        scores.resize(k);
        fitted = true;
    }
    precision_t SelectKBestWeighted::entropyLabel()
    {
        return entropy(labels);
    }
    precision_t SelectKBestWeighted::entropy(const sample_t& data)
    {
        precision_t ventropy = 0, totalWeight = 0;
        score_t counts(numClasses + 1, 0);
        for (auto i = 0; i < static_cast<int>(data.size()); ++i) {
            counts[data[i]] += weights[i];
            totalWeight += weights[i];
        }
        for (auto count : counts) {
            precision_t p = count / totalWeight;
            if (p > 0) {
                if (nat) {
                    ventropy -= p * log(p);
                } else {
                    ventropy -= p * log2(p);
                }
            }
        }
        return ventropy;
    }
    // H(Y|X) = sum_{x in X} p(x) H(Y|X=x)
    precision_t SelectKBestWeighted::conditionalEntropy(const int feature)
    {
        unordered_map<value_t, precision_t> featureCounts;
        unordered_map<value_t, unordered_map<value_t, precision_t>> jointCounts;
        featureCounts.clear();
        jointCounts.clear();
        precision_t totalWeight = 0;
        for (auto i = 0; i < numSamples; i++) {
            featureCounts[samples[i][feature]] += weights[i];
            jointCounts[samples[i][feature]][labels[i]] += weights[i];
            totalWeight += weights[i];
        }
        if (totalWeight == 0)
            throw invalid_argument("Total weight should not be zero");
        precision_t entropy = 0;
        for (auto& [feat, count] : featureCounts) {
            auto p_f = count / totalWeight;
            precision_t entropy_f = 0;
            for (auto& [label, jointCount] : jointCounts[feat]) {
                auto p_l_f = jointCount / count;
                if (p_l_f > 0) {
                    if (nat) {
                        entropy_f -= p_l_f * log(p_l_f);
                    } else {
                        entropy_f -= p_l_f * log2(p_l_f);
                    }
                }
            }
            entropy += p_f * entropy_f;
        }
        return entropy;
    }
    // I(X;Y) = H(Y) - H(Y|X)
    precision_t SelectKBestWeighted::MutualInformation(const int i)
    {
        return entropyLabel() - conditionalEntropy(i);
    }
    score_t SelectKBestWeighted::getScores() const
    {
        if (!fitted)
            throw logic_error("score not fitted");
        return scores;
    }
    //Return the indices of the selected features
    labels_t SelectKBestWeighted::getFeatures() const
    {
        if (!fitted)
            throw logic_error("score not fitted");
        return features;
    }
 }
--- a/bayesclass/cpp/FeatureSelect.h
+++ b/bayesclass/cpp/FeatureSelect.h
@@ -0,0 +1,38 @@
 #ifndef SELECT_K_BEST_WEIGHTED_H
 #define SELECT_K_BEST_WEIGHTED_H
 #include <map>
 #include <vector>
 #include <string>
 using namespace std;
 namespace features {
    typedef float precision_t;
    typedef int value_t;
    typedef vector<value_t> sample_t;
    typedef vector<sample_t> samples_t;
    typedef vector<value_t> labels_t;
    typedef vector<precision_t> score_t, weights_t;
    class SelectKBestWeighted {
    private:
        const samples_t samples;
        const labels_t labels;
        const weights_t weights;
        const int k;
        bool nat; // use natural log or log2
        int numFeatures, numClasses, numSamples;
        bool fitted;
        score_t scores; // scores of the features
        labels_t features; // indices of the selected features
        precision_t entropyLabel();
        precision_t entropy(const sample_t&);
        precision_t conditionalEntropy(const int);
        precision_t MutualInformation(const int);
    public:
        SelectKBestWeighted(samples_t&, labels_t&, weights_t&, int, bool);
        void fit();
        score_t getScores() const;
        labels_t getFeatures() const; //Return the indices of the selected features
        static inline string version() { return "0.1.0"; };
    };
 }
 #endif
--- a/bayesclass/cpp/KDB.cc
+++ b/bayesclass/cpp/KDB.cc
@@ -0,0 +1,90 @@
 #include "KDB.h"
 namespace bayesnet {
    using namespace std;
    using namespace torch;
    KDB::KDB(int k, float theta) : BaseClassifier(Network()), k(k), theta(theta) {}
    void KDB::train()
    {
        /*
        1. For each feature Xi, compute mutual information, I(X;C),
        where C is the class.
        2. Compute class conditional mutual information I(Xi;XjIC), f or each
        pair of features Xi and Xj, where i#j.
        3. Let the used variable list, S, be empty.
        4. Let the DAG network being constructed, BN, begin with a single
        class node, C.
        5. Repeat until S includes all domain features
        5.1. Select feature Xmax which is not in S and has the largest value
        I(Xmax;C).
        5.2. Add a node to BN representing Xmax.
        5.3. Add an arc from C to Xmax in BN.
        5.4. Add m = min(lSl,/c) arcs from m distinct features Xj in S with
        the highest value for I(Xmax;X,jC).
        5.5. Add Xmax to S.
        Compute the conditional probabilility infered by the structure of BN by
        using counts from DB, and output BN.
        */
        // 1. For each feature Xi, compute mutual information, I(X;C),
        // where C is the class.
        vector <float> mi;
        for (auto i = 0; i < features.size(); i++) {
            Tensor firstFeature = X.index({ "...", i });
            mi.push_back(metrics.mutualInformation(firstFeature, y));
        }
        // 2. Compute class conditional mutual information I(Xi;XjIC), f or each
        auto conditionalEdgeWeights = metrics.conditionalEdge();
        // 3. Let the used variable list, S, be empty.
        vector<int> S;
        // 4. Let the DAG network being constructed, BN, begin with a single
        // class node, C.
        model.addNode(className, states[className].size());
        // 5. Repeat until S includes all domain features
        // 5.1. Select feature Xmax which is not in S and has the largest value
        // I(Xmax;C).
        auto order = argsort(mi);
        for (auto idx : order) {
            // 5.2. Add a node to BN representing Xmax.
            model.addNode(features[idx], states[features[idx]].size());
            // 5.3. Add an arc from C to Xmax in BN.
            model.addEdge(className, features[idx]);
            // 5.4. Add m = min(lSl,/c) arcs from m distinct features Xj in S with
            // the highest value for I(Xmax;X,jC).
            add_m_edges(idx, S, conditionalEdgeWeights);
            // 5.5. Add Xmax to S.
            S.push_back(idx);
        }
    }
    void KDB::add_m_edges(int idx, vector<int>& S, Tensor& weights)
    {
        auto n_edges = min(k, static_cast<int>(S.size()));
        auto cond_w = clone(weights);
        bool exit_cond = k == 0;
        int num = 0;
        while (!exit_cond) {
            auto max_minfo = argmax(cond_w.index({ idx, "..." })).item<int>();
            auto belongs = find(S.begin(), S.end(), max_minfo) != S.end();
            if (belongs && cond_w.index({ idx, max_minfo }).item<float>() > theta) {
                try {
                    model.addEdge(features[max_minfo], features[idx]);
                    num++;
                }
                catch (const invalid_argument& e) {
                    // Loops are not allowed
                }
            }
            cond_w.index_put_({ idx, max_minfo }, -1);
            auto candidates_mask = cond_w.index({ idx, "..." }).gt(theta);
            auto candidates = candidates_mask.nonzero();
            exit_cond = num == n_edges || candidates.size(0) == 0;
        }
    }
    vector<string> KDB::graph(string title)
    {
        if (title == "KDB") {
            title += " (k=" + to_string(k) + ", theta=" + to_string(theta) + ")";
        }
        return model.graph(title);
    }
 }
--- a/bayesclass/cpp/KDB.h
+++ b/bayesclass/cpp/KDB.h
@@ -0,0 +1,20 @@
 #ifndef KDB_H
 #define KDB_H
 #include "BaseClassifier.h"
 #include "utils.h"
 namespace bayesnet {
    using namespace std;
    using namespace torch;
    class KDB : public BaseClassifier {
    private:
        int k;
        float theta;
        void add_m_edges(int idx, vector<int>& S, Tensor& weights);
    protected:
        void train() override;
    public:
        KDB(int k, float theta = 0.03);
        vector<string> graph(string name = "KDB") override;
    };
 }
 #endif
--- a/bayesclass/cpp/Metrics.cc
+++ b/bayesclass/cpp/Metrics.cc
@@ -0,0 +1,131 @@
 #include "Metrics.hpp"
 #include "Mst.h"
 using namespace std;
 namespace bayesnet {
    Metrics::Metrics(torch::Tensor& samples, vector<string>& features, string& className, int classNumStates)
        : samples(samples)
        , features(features)
        , className(className)
        , classNumStates(classNumStates)
    {
    }
    Metrics::Metrics(const vector<vector<int>>& vsamples, const vector<int>& labels, const vector<string>& features, const string& className, const int classNumStates)
        : features(features)
        , className(className)
        , classNumStates(classNumStates)
    {
        samples = torch::zeros({ static_cast<int64_t>(vsamples[0].size()), static_cast<int64_t>(vsamples.size() + 1) }, torch::kInt64);
        for (int i = 0; i < vsamples.size(); ++i) {
            samples.index_put_({ "...", i }, torch::tensor(vsamples[i], torch::kInt64));
        }
        samples.index_put_({ "...", -1 }, torch::tensor(labels, torch::kInt64));
    }
    vector<pair<string, string>> Metrics::doCombinations(const vector<string>& source)
    {
        vector<pair<string, string>> result;
        for (int i = 0; i < source.size(); ++i) {
            string temp = source[i];
            for (int j = i + 1; j < source.size(); ++j) {
                result.push_back({ temp, source[j] });
            }
        }
        return result;
    }
    torch::Tensor Metrics::conditionalEdge()
    {
        auto result = vector<double>();
        auto source = vector<string>(features);
        source.push_back(className);
        auto combinations = doCombinations(source);
        // Compute class prior
        auto margin = torch::zeros({ classNumStates });
        for (int value = 0; value < classNumStates; ++value) {
            auto mask = samples.index({ "...", -1 }) == value;
            margin[value] = mask.sum().item<float>() / samples.sizes()[0];
        }
        for (auto [first, second] : combinations) {
            int64_t index_first = find(features.begin(), features.end(), first) - features.begin();
            int64_t index_second = find(features.begin(), features.end(), second) - features.begin();
            double accumulated = 0;
            for (int value = 0; value < classNumStates; ++value) {
                auto mask = samples.index({ "...", -1 }) == value;
                auto first_dataset = samples.index({ mask, index_first });
                auto second_dataset = samples.index({ mask, index_second });
                auto mi = mutualInformation(first_dataset, second_dataset);
                auto pb = margin[value].item<float>();
                accumulated += pb * mi;
            }
            result.push_back(accumulated);
        }
        long n_vars = source.size();
        auto matrix = torch::zeros({ n_vars, n_vars });
        auto indices = torch::triu_indices(n_vars, n_vars, 1);
        for (auto i = 0; i < result.size(); ++i) {
            auto x = indices[0][i];
            auto y = indices[1][i];
            matrix[x][y] = result[i];
            matrix[y][x] = result[i];
        }
        return matrix;
    }
    vector<float> Metrics::conditionalEdgeWeights()
    {
        auto matrix = conditionalEdge();
        std::vector<float> v(matrix.data_ptr<float>(), matrix.data_ptr<float>() + matrix.numel());
        return v;
    }
    double Metrics::entropy(torch::Tensor& feature)
    {
        torch::Tensor counts = feature.bincount();
        int totalWeight = counts.sum().item<int>();
        torch::Tensor probs = counts.to(torch::kFloat) / totalWeight;
        torch::Tensor logProbs = torch::log(probs);
        torch::Tensor entropy = -probs * logProbs;
        return entropy.nansum().item<double>();
    }
    // H(Y|X) = sum_{x in X} p(x) H(Y|X=x)
    double Metrics::conditionalEntropy(torch::Tensor& firstFeature, torch::Tensor& secondFeature)
    {
        int numSamples = firstFeature.sizes()[0];
        torch::Tensor featureCounts = secondFeature.bincount();
        unordered_map<int, unordered_map<int, double>> jointCounts;
        double totalWeight = 0;
        for (auto i = 0; i < numSamples; i++) {
            jointCounts[secondFeature[i].item<int>()][firstFeature[i].item<int>()] += 1;
            totalWeight += 1;
        }
        if (totalWeight == 0)
            throw invalid_argument("Total weight should not be zero");
        double entropyValue = 0;
        for (int value = 0; value < featureCounts.sizes()[0]; ++value) {
            double p_f = featureCounts[value].item<double>() / totalWeight;
            double entropy_f = 0;
            for (auto& [label, jointCount] : jointCounts[value]) {
                double p_l_f = jointCount / featureCounts[value].item<double>();
                if (p_l_f > 0) {
                    entropy_f -= p_l_f * log(p_l_f);
                } else {
                    entropy_f = 0;
                }
            }
            entropyValue += p_f * entropy_f;
        }
        return entropyValue;
    }
    // I(X;Y) = H(Y) - H(Y|X)
    double Metrics::mutualInformation(torch::Tensor& firstFeature, torch::Tensor& secondFeature)
    {
        return entropy(firstFeature) - conditionalEntropy(firstFeature, secondFeature);
    }
    /*
    Compute the maximum spanning tree considering the weights as distances
    and the indices of the weights as nodes of this square matrix using
    Kruskal algorithm
    */
    vector<pair<int, int>> Metrics::maximumSpanningTree(vector<string> features, Tensor& weights, int root)
    {
        auto result = vector<pair<int, int>>();
        auto mst = MST(features, weights, root);
        return mst.maximumSpanningTree();
    }
 }
--- a/bayesclass/cpp/Metrics.hpp
+++ b/bayesclass/cpp/Metrics.hpp
@@ -0,0 +1,28 @@
 #ifndef BAYESNET_METRICS_H
 #define BAYESNET_METRICS_H
 #include <torch/torch.h>
 #include <vector>
 #include <string>
 namespace bayesnet {
    using namespace std;
    using namespace torch;
    class Metrics {
    private:
        Tensor samples;
        vector<string> features;
        string className;
        int classNumStates;
    public:
        Metrics() = default;
        Metrics(Tensor&, vector<string>&, string&, int);
        Metrics(const vector<vector<int>>&, const vector<int>&, const vector<string>&, const string&, const int);
        double entropy(Tensor&);
        double conditionalEntropy(Tensor&, Tensor&);
        double mutualInformation(Tensor&, Tensor&);
        vector<float> conditionalEdgeWeights();
        Tensor conditionalEdge();
        vector<pair<string, string>> doCombinations(const vector<string>&);
        vector<pair<int, int>> maximumSpanningTree(vector<string> features, Tensor& weights, int root);
    };
 }
 #endif
--- a/bayesclass/cpp/Mst.cc
+++ b/bayesclass/cpp/Mst.cc
@@ -0,0 +1,115 @@
 #include "Mst.h"
 #include <vector>
 /*
    Based on the code from https://www.softwaretestinghelp.com/minimum-spanning-tree-tutorial/
 */
 namespace bayesnet {
    using namespace std;
    Graph::Graph(int V)
    {
        parent = vector<int>(V);
        for (int i = 0; i < V; i++)
            parent[i] = i;
        G.clear();
        T.clear();
    }
    void Graph::addEdge(int u, int v, float wt)
    {
        G.push_back({ wt, { u, v } });
    }
    int Graph::find_set(int i)
    {
        // If i is the parent of itself
        if (i == parent[i])
            return i;
        else
            //else recursively find the parent of i
            return find_set(parent[i]);
    }
    void Graph::union_set(int u, int v)
    {
        parent[u] = parent[v];
    }
    void Graph::kruskal_algorithm()
    {
        int i, uSt, vEd;
        // sort the edges ordered on decreasing weight
        sort(G.begin(), G.end(), [](auto& left, auto& right) {return left.first > right.first;});
        for (i = 0; i < G.size(); i++) {
            uSt = find_set(G[i].second.first);
            vEd = find_set(G[i].second.second);
            if (uSt != vEd) {
                T.push_back(G[i]); // add to mst vector
                union_set(uSt, vEd);
            }
        }
    }
    void Graph::display_mst()
    {
        cout << "Edge :" << " Weight" << endl;
        for (int i = 0; i < T.size(); i++) {
            cout << T[i].second.first << " - " << T[i].second.second << " : "
                << T[i].first;
            cout << endl;
        }
    }
    vector<pair<int, int>> reorder(vector<pair<float, pair<int, int>>> T, int root_original)
    {
        auto result = vector<pair<int, int>>();
        auto visited = vector<int>();
        auto nextVariables = unordered_set<int>();
        nextVariables.emplace(root_original);
        while (nextVariables.size() > 0) {
            int root = *nextVariables.begin();
            nextVariables.erase(nextVariables.begin());
            for (int i = 0; i < T.size(); ++i) {
                auto [weight, edge] = T[i];
                auto [from, to] = edge;
                if (from == root || to == root) {
                    visited.insert(visited.begin(), i);
                    if (from == root) {
                        result.push_back({ from, to });
                        nextVariables.emplace(to);
                    } else {
                        result.push_back({ to, from });
                        nextVariables.emplace(from);
                    }
                }
            }
            // Remove visited
            for (int i = 0; i < visited.size(); ++i) {
                T.erase(T.begin() + visited[i]);
            }
            visited.clear();
        }
        if (T.size() > 0) {
            for (int i = 0; i < T.size(); ++i) {
                auto [weight, edge] = T[i];
                auto [from, to] = edge;
                result.push_back({ from, to });
            }
        }
        return result;
    }
    MST::MST(vector<string>& features, Tensor& weights, int root) : features(features), weights(weights), root(root) {}
    vector<pair<int, int>> MST::maximumSpanningTree()
    {
        auto num_features = features.size();
        Graph g(num_features);
        // Make a complete graph
        for (int i = 0; i < num_features - 1; ++i) {
            for (int j = i; j < num_features; ++j) {
                g.addEdge(i, j, weights[i][j].item<float>());
            }
        }
        g.kruskal_algorithm();
        auto mst = g.get_mst();
        return reorder(mst, root);
    }
 }
--- a/bayesclass/cpp/Mst.h
+++ b/bayesclass/cpp/Mst.h
@@ -0,0 +1,35 @@
 #ifndef MST_H
 #define MST_H
 #include <torch/torch.h>
 #include <vector>
 #include <string>
 namespace bayesnet {
    using namespace std;
    using namespace torch;
    class MST {
    private:
        Tensor weights;
        vector<string> features;
        int root;
    public:
        MST() = default;
        MST(vector<string>& features, Tensor& weights, int root);
        vector<pair<int, int>> maximumSpanningTree();
    };
    class Graph {
    private:
        int V;      // number of nodes in graph
        vector <pair<float, pair<int, int>>> G; // vector for graph
        vector <pair<float, pair<int, int>>> T; // vector for mst
        vector<int> parent;
    public:
        Graph(int V);
        void addEdge(int u, int v, float wt);
        int find_set(int i);
        void union_set(int u, int v);
        void kruskal_algorithm();
        void display_mst();
        vector <pair<float, pair<int, int>>> get_mst() { return T; }
    };
 }
 #endif
--- a/bayesclass/cpp/Network.cc
+++ b/bayesclass/cpp/Network.cc
@@ -0,0 +1,291 @@
 #include <thread>
 #include <mutex>
 #include "Network.h"
 namespace bayesnet {
    Network::Network() : laplaceSmoothing(1), features(vector<string>()), className(""), classNumStates(0), maxThreads(0.8), fitted(false) {}
    Network::Network(float maxT) : laplaceSmoothing(1), features(vector<string>()), className(""), classNumStates(0), maxThreads(maxT), fitted(false) {}
    Network::Network(float maxT, int smoothing) : laplaceSmoothing(smoothing), features(vector<string>()), className(""), classNumStates(0), maxThreads(maxT), fitted(false) {}
    Network::Network(Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()), maxThreads(other.getmaxThreads()), fitted(other.fitted)
    {
        for (auto& pair : other.nodes) {
            nodes[pair.first] = make_unique<Node>(*pair.second);
        }
    }
    float Network::getmaxThreads()
    {
        return maxThreads;
    }
    torch::Tensor& Network::getSamples()
    {
        return samples;
    }
    void Network::addNode(string name, int numStates)
    {
        if (find(features.begin(), features.end(), name) == features.end()) {
            features.push_back(name);
        }
        if (nodes.find(name) != nodes.end()) {
            // if node exists update its number of states
            nodes[name]->setNumStates(numStates);
            return;
        }
        nodes[name] = make_unique<Node>(name, numStates);
    }
    vector<string> Network::getFeatures()
    {
        return features;
    }
    int Network::getClassNumStates()
    {
        return classNumStates;
    }
    int Network::getStates()
    {
        int result = 0;
        for (auto& node : nodes) {
            result += node.second->getNumStates();
        }
        return result;
    }
    string Network::getClassName()
    {
        return className;
    }
    bool Network::isCyclic(const string& nodeId, unordered_set<string>& visited, unordered_set<string>& recStack)
    {
        if (visited.find(nodeId) == visited.end()) // if node hasn't been visited yet
        {
            visited.insert(nodeId);
            recStack.insert(nodeId);
            for (Node* child : nodes[nodeId]->getChildren()) {
                if (visited.find(child->getName()) == visited.end() && isCyclic(child->getName(), visited, recStack))
                    return true;
                else if (recStack.find(child->getName()) != recStack.end())
                    return true;
            }
        }
        recStack.erase(nodeId); // remove node from recursion stack before function ends
        return false;
    }
    void Network::addEdge(const string parent, const string child)
    {
        if (nodes.find(parent) == nodes.end()) {
            throw invalid_argument("Parent node " + parent + " does not exist");
        }
        if (nodes.find(child) == nodes.end()) {
            throw invalid_argument("Child node " + child + " does not exist");
        }
        // Temporarily add edge to check for cycles
        nodes[parent]->addChild(nodes[child].get());
        nodes[child]->addParent(nodes[parent].get());
        unordered_set<string> visited;
        unordered_set<string> recStack;
        if (isCyclic(nodes[child]->getName(), visited, recStack)) // if adding this edge forms a cycle
        {
            // remove problematic edge
            nodes[parent]->removeChild(nodes[child].get());
            nodes[child]->removeParent(nodes[parent].get());
            throw invalid_argument("Adding this edge forms a cycle in the graph.");
        }
    }
    map<string, std::unique_ptr<Node>>& Network::getNodes()
    {
        return nodes;
    }
    void Network::fit(const vector<vector<int>>& input_data, const vector<int>& labels, const vector<string>& featureNames, const string& className)
    {
        features = featureNames;
        this->className = className;
        dataset.clear();
        // Build dataset & tensor of samples
        samples = torch::zeros({ static_cast<int64_t>(input_data[0].size()), static_cast<int64_t>(input_data.size() + 1) }, torch::kInt64);
        for (int i = 0; i < featureNames.size(); ++i) {
            dataset[featureNames[i]] = input_data[i];
            samples.index_put_({ "...", i }, torch::tensor(input_data[i], torch::kInt64));
        }
        dataset[className] = labels;
        samples.index_put_({ "...", -1 }, torch::tensor(labels, torch::kInt64));
        classNumStates = *max_element(labels.begin(), labels.end()) + 1;
        int maxThreadsRunning = static_cast<int>(std::thread::hardware_concurrency() * maxThreads);
        if (maxThreadsRunning < 1) {
            maxThreadsRunning = 1;
        }
        vector<thread> threads;
        mutex mtx;
        condition_variable cv;
        int activeThreads = 0;
        int nextNodeIndex = 0;
        while (nextNodeIndex < nodes.size()) {
            unique_lock<mutex> lock(mtx);
            cv.wait(lock, [&activeThreads, &maxThreadsRunning]() { return activeThreads < maxThreadsRunning; });
            if (nextNodeIndex >= nodes.size()) {
                break;  // No more work remaining
            }
            threads.emplace_back([this, &nextNodeIndex, &mtx, &cv, &activeThreads]() {
                while (true) {
                    unique_lock<mutex> lock(mtx);
                    if (nextNodeIndex >= nodes.size()) {
                        break;  // No more work remaining
                    }
                    auto& pair = *std::next(nodes.begin(), nextNodeIndex);
                    ++nextNodeIndex;
                    lock.unlock();
                    pair.second->computeCPT(dataset, laplaceSmoothing);
                    lock.lock();
                    nodes[pair.first] = std::move(pair.second);
                    lock.unlock();
                }
                lock_guard<mutex> lock(mtx);
                --activeThreads;
                cv.notify_one();
                });
            ++activeThreads;
        }
        for (auto& thread : threads) {
            thread.join();
        }
        fitted = true;
    }
    vector<int> Network::predict(const vector<vector<int>>& tsamples)
    {
        if (!fitted) {
            throw logic_error("You must call fit() before calling predict()");
        }
        vector<int> predictions;
        vector<int> sample;
        for (int row = 0; row < tsamples[0].size(); ++row) {
            sample.clear();
            for (int col = 0; col < tsamples.size(); ++col) {
                sample.push_back(tsamples[col][row]);
            }
            vector<double> classProbabilities = predict_sample(sample);
            // Find the class with the maximum posterior probability
            auto maxElem = max_element(classProbabilities.begin(), classProbabilities.end());
            int predictedClass = distance(classProbabilities.begin(), maxElem);
            predictions.push_back(predictedClass);
        }
        return predictions;
    }
    vector<vector<double>> Network::predict_proba(const vector<vector<int>>& tsamples)
    {
        if (!fitted) {
            throw logic_error("You must call fit() before calling predict_proba()");
        }
        vector<vector<double>> predictions;
        vector<int> sample;
        for (int row = 0; row < tsamples[0].size(); ++row) {
            sample.clear();
            for (int col = 0; col < tsamples.size(); ++col) {
                sample.push_back(tsamples[col][row]);
            }
            predictions.push_back(predict_sample(sample));
        }
        return predictions;
    }
    double Network::score(const vector<vector<int>>& tsamples, const vector<int>& labels)
    {
        vector<int> y_pred = predict(tsamples);
        int correct = 0;
        for (int i = 0; i < y_pred.size(); ++i) {
            if (y_pred[i] == labels[i]) {
                correct++;
            }
        }
        return (double)correct / y_pred.size();
    }
    vector<double> Network::predict_sample(const vector<int>& sample)
    {
        // Ensure the sample size is equal to the number of features
        if (sample.size() != features.size()) {
            throw invalid_argument("Sample size (" + to_string(sample.size()) +
                ") does not match the number of features (" + to_string(features.size()) + ")");
        }
        map<string, int> evidence;
        for (int i = 0; i < sample.size(); ++i) {
            evidence[features[i]] = sample[i];
        }
        return exactInference(evidence);
    }
    double Network::computeFactor(map<string, int>& completeEvidence)
    {
        double result = 1.0;
        for (auto& node : getNodes()) {
            result *= node.second->getFactorValue(completeEvidence);
        }
        return result;
    }
    vector<double> Network::exactInference(map<string, int>& evidence)
    {
        vector<double> result(classNumStates, 0.0);
        vector<thread> threads;
        mutex mtx;
        for (int i = 0; i < classNumStates; ++i) {
            threads.emplace_back([this, &result, &evidence, i, &mtx]() {
                auto completeEvidence = map<string, int>(evidence);
                completeEvidence[getClassName()] = i;
                double factor = computeFactor(completeEvidence);
                lock_guard<mutex> lock(mtx);
                result[i] = factor;
                });
        }
        for (auto& thread : threads) {
            thread.join();
        }
        // Normalize result
        double sum = accumulate(result.begin(), result.end(), 0.0);
        for (double& value : result) {
            value /= sum;
        }
        return result;
    }
    vector<string> Network::show()
    {
        vector<string> result;
        // Draw the network
        for (auto& node : nodes) {
            string line = node.first + " -> ";
            for (auto child : node.second->getChildren()) {
                line += child->getName() + ", ";
            }
            result.push_back(line);
        }
        return result;
    }
    vector<string> Network::graph(string title)
    {
        auto output = vector<string>();
        auto prefix = "digraph BayesNet {\nlabel=<BayesNet ";
        auto suffix = ">\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n";
        string header = prefix + title + suffix;
        output.push_back(header);
        for (auto& node : nodes) {
            auto result = node.second->graph(className);
            output.insert(output.end(), result.begin(), result.end());
        }
        output.push_back("}\n");
        return output;
    }
    vector<pair<string, string>> Network::getEdges()
    {
        auto edges = vector<pair<string, string>>();
        for (const auto& node : nodes) {
            auto head = node.first;
            for (const auto& child : node.second->getChildren()) {
                auto tail = child->getName();
                edges.push_back({ head, tail });
            }
        }
        return edges;
    }
 }
--- a/bayesclass/cpp/Network.h
+++ b/bayesclass/cpp/Network.h
@@ -0,0 +1,53 @@
 #ifndef NETWORK_H
 #define NETWORK_H
 #include "Node.h"
 #include <map>
 #include <vector>
 namespace bayesnet {
    class Network {
    private:
        map<string, std::unique_ptr<Node>> nodes;
        map<string, vector<int>> dataset;
        bool fitted;
        float maxThreads;
        int classNumStates;
        vector<string> features;
        string className;
        int laplaceSmoothing;
        torch::Tensor samples;
        bool isCyclic(const std::string&, std::unordered_set<std::string>&, std::unordered_set<std::string>&);
        vector<double> predict_sample(const vector<int>&);
        vector<double> exactInference(map<string, int>&);
        double computeFactor(map<string, int>&);
        double mutual_info(torch::Tensor&, torch::Tensor&);
        double entropy(torch::Tensor&);
        double conditionalEntropy(torch::Tensor&, torch::Tensor&);
        double mutualInformation(torch::Tensor&, torch::Tensor&);
    public:
        Network();
        Network(float, int);
        Network(float);
        Network(Network&);
        torch::Tensor& getSamples();
        float getmaxThreads();
        void addNode(string, int);
        void addEdge(const string, const string);
        map<string, std::unique_ptr<Node>>& getNodes();
        vector<string> getFeatures();
        int getStates();
        vector<pair<string, string>> getEdges();
        int getClassNumStates();
        string getClassName();
        void fit(const vector<vector<int>>&, const vector<int>&, const vector<string>&, const string&);
        vector<int> predict(const vector<vector<int>>&);
        //Computes the conditional edge weight of variable index u and v conditioned on class_node
        torch::Tensor conditionalEdgeWeight();
        vector<vector<double>> predict_proba(const vector<vector<int>>&);
        double score(const vector<vector<int>>&, const vector<int>&);
        vector<string> show();
        vector<string> graph(string title); // Returns a vector of strings representing the graph in graphviz format
        inline string version() { return "0.1.0"; }
    };
 }
 #endif
--- a/bayesclass/cpp/Node.cc
+++ b/bayesclass/cpp/Node.cc
@@ -0,0 +1,122 @@
 #include "Node.h"
 namespace bayesnet {
    Node::Node(const std::string& name, int numStates)
        : name(name), numStates(numStates), cpTable(torch::Tensor()), parents(vector<Node*>()), children(vector<Node*>())
    {
    }
    string Node::getName() const
    {
        return name;
    }
    void Node::addParent(Node* parent)
    {
        parents.push_back(parent);
    }
    void Node::removeParent(Node* parent)
    {
        parents.erase(std::remove(parents.begin(), parents.end(), parent), parents.end());
    }
    void Node::removeChild(Node* child)
    {
        children.erase(std::remove(children.begin(), children.end(), child), children.end());
    }
    void Node::addChild(Node* child)
    {
        children.push_back(child);
    }
    vector<Node*>& Node::getParents()
    {
        return parents;
    }
    vector<Node*>& Node::getChildren()
    {
        return children;
    }
    int Node::getNumStates() const
    {
        return numStates;
    }
    void Node::setNumStates(int numStates)
    {
        this->numStates = numStates;
    }
    torch::Tensor& Node::getCPT()
    {
        return cpTable;
    }
    /*
     The MinFill criterion is a heuristic for variable elimination.
     The variable that minimizes the number of edges that need to be added to the graph to make it triangulated.
     This is done by counting the number of edges that need to be added to the graph if the variable is eliminated.
     The variable with the minimum number of edges is chosen.
     Here this is done computing the length of the combinations of the node neighbors taken 2 by 2.
    */
    unsigned Node::minFill()
    {
        unordered_set<string> neighbors;
        for (auto child : children) {
            neighbors.emplace(child->getName());
        }
        for (auto parent : parents) {
            neighbors.emplace(parent->getName());
        }
        auto source = vector<string>(neighbors.begin(), neighbors.end());
        return combinations(source).size();
    }
    vector<pair<string, string>> Node::combinations(const vector<string>& source)
    {
        vector<pair<string, string>> result;
        for (int i = 0; i < source.size(); ++i) {
            string temp = source[i];
            for (int j = i + 1; j < source.size(); ++j) {
                result.push_back({ temp, source[j] });
            }
        }
        return result;
    }
    void Node::computeCPT(map<string, vector<int>>& dataset, const int laplaceSmoothing)
    {
        // Get dimensions of the CPT
        dimensions.push_back(numStates);
        for (auto father : getParents()) {
            dimensions.push_back(father->getNumStates());
        }
        auto length = dimensions.size();
        // Create a tensor of zeros with the dimensions of the CPT
        cpTable = torch::zeros(dimensions, torch::kFloat) + laplaceSmoothing;
        // Fill table with counts
        for (int n_sample = 0; n_sample < dataset[name].size(); ++n_sample) {
            torch::List<c10::optional<torch::Tensor>> coordinates;
            coordinates.push_back(torch::tensor(dataset[name][n_sample]));
            for (auto father : getParents()) {
                coordinates.push_back(torch::tensor(dataset[father->getName()][n_sample]));
            }
            // Increment the count of the corresponding coordinate
            cpTable.index_put_({ coordinates }, cpTable.index({ coordinates }) + 1);
        }
        // Normalize the counts
        cpTable = cpTable / cpTable.sum(0);
    }
    float Node::getFactorValue(map<string, int>& evidence)
    {
        torch::List<c10::optional<torch::Tensor>> coordinates;
        // following predetermined order of indices in the cpTable (see Node.h)
        coordinates.push_back(torch::tensor(evidence[name]));
        for (auto parent : getParents()) {
            coordinates.push_back(torch::tensor(evidence[parent->getName()]));
        }
        return cpTable.index({ coordinates }).item<float>();
    }
    vector<string> Node::graph(string className)
    {
        auto output = vector<string>();
        auto suffix = name == className ? ", fontcolor=red, fillcolor=lightblue, style=filled " : "";
        output.push_back(name + " [shape=circle" + suffix + "] \n");
        for (auto& child : children) {
            output.push_back(name + " -> " + child->getName());
        }
        return output;
    }
 }
--- a/bayesclass/cpp/Node.h
+++ b/bayesclass/cpp/Node.h
@@ -0,0 +1,36 @@
 #ifndef NODE_H
 #define NODE_H
 #include <torch/torch.h>
 #include <unordered_set>
 #include <vector>
 #include <string>
 namespace bayesnet {
    using namespace std;
    class Node {
    private:
        string name;
        vector<Node*> parents;
        vector<Node*> children;
        int numStates; // number of states of the variable
        torch::Tensor cpTable; // Order of indices is 0-> node variable, 1-> 1st parent, 2-> 2nd parent, ...
        vector<int64_t> dimensions; // dimensions of the cpTable
    public:
        vector<pair<string, string>> combinations(const vector<string>&);
        Node(const std::string&, int);
        void addParent(Node*);
        void addChild(Node*);
        void removeParent(Node*);
        void removeChild(Node*);
        string getName() const;
        vector<Node*>& getParents();
        vector<Node*>& getChildren();
        torch::Tensor& getCPT();
        void computeCPT(map<string, vector<int>>&, const int);
        int getNumStates() const;
        void setNumStates(int);
        unsigned minFill();
        vector<string> graph(string clasName); // Returns a vector of strings representing the graph in graphviz format
        float getFactorValue(map<string, int>&);
    };
 }
 #endif
--- a/bayesclass/cpp/SPODE.cc
+++ b/bayesclass/cpp/SPODE.cc
@@ -0,0 +1,25 @@
 #include "SPODE.h"
 namespace bayesnet {
    SPODE::SPODE(int root) : BaseClassifier(Network()), root(root) {}
    void SPODE::train()
    {
        // 0. Add all nodes to the model
        addNodes();
        // 1. Add edges from the class node to all other nodes
        // 2. Add edges from the root node to all other nodes
        for (int i = 0; i < static_cast<int>(features.size()); ++i) {
            model.addEdge(className, features[i]);
            if (i != root) {
                model.addEdge(features[root], features[i]);
            }
        }
    }
    vector<string> SPODE::graph(string name )
    {
        return model.graph(name);
    }
 }
--- a/bayesclass/cpp/SPODE.h
+++ b/bayesclass/cpp/SPODE.h
@@ -0,0 +1,15 @@
 #ifndef SPODE_H
 #define SPODE_H
 #include "BaseClassifier.h"
 namespace bayesnet {
    class SPODE : public BaseClassifier {
    private:
        int root;
    protected:
        void train() override;
    public:
        SPODE(int root);
        vector<string> graph(string name = "SPODE") override;
    };
 }
 #endif
--- a/bayesclass/cpp/TAN.cc
+++ b/bayesclass/cpp/TAN.cc
@@ -0,0 +1,42 @@
 #include "TAN.h"
 namespace bayesnet {
    using namespace std;
    using namespace torch;
    TAN::TAN() : BaseClassifier(Network()) {}
    void TAN::train()
    {
        // 0. Add all nodes to the model
        addNodes();
        // 1. Compute mutual information between each feature and the class and set the root node
        // as the highest mutual information with the class
        auto mi = vector <pair<int, float >>();
        Tensor class_dataset = dataset.index({ "...", -1 });
        for (int i = 0; i < static_cast<int>(features.size()); ++i) {
            Tensor feature_dataset = dataset.index({ "...", i });
            auto mi_value = metrics.mutualInformation(class_dataset, feature_dataset);
            mi.push_back({ i, mi_value });
        }
        sort(mi.begin(), mi.end(), [](auto& left, auto& right) {return left.second < right.second;});
        auto root = mi[mi.size() - 1].first;
        // 2. Compute mutual information between each feature and the class
        auto weights = metrics.conditionalEdge();
        // 3. Compute the maximum spanning tree
        auto mst = metrics.maximumSpanningTree(features, weights, root);
        // 4. Add edges from the maximum spanning tree to the model
        for (auto i = 0; i < mst.size(); ++i) {
            auto [from, to] = mst[i];
            model.addEdge(features[from], features[to]);
        }
        // 5. Add edges from the class to all features
        for (auto feature : features) {
            model.addEdge(className, feature);
        }
    }
    vector<string> TAN::graph(string title)
    {
        return model.graph(title);
    }
 }
--- a/bayesclass/cpp/TAN.h
+++ b/bayesclass/cpp/TAN.h
@@ -0,0 +1,16 @@
 #ifndef TAN_H
 #define TAN_H
 #include "BaseClassifier.h"
 namespace bayesnet {
    using namespace std;
    using namespace torch;
    class TAN : public BaseClassifier {
    private:
    protected:
        void train() override;
    public:
        TAN();
        vector<string> graph(string name = "TAN") override;
    };
 }
 #endif
--- a/bayesclass/cpp/utils.cc
+++ b/bayesclass/cpp/utils.cc
@@ -0,0 +1,31 @@
 #include <torch/torch.h>
 #include <vector>
 namespace bayesnet {
    using namespace std;
    using namespace torch;
    vector<int> argsort(vector<float>& nums)
    {
        int n = nums.size();
        vector<int> indices(n);
        iota(indices.begin(), indices.end(), 0);
        sort(indices.begin(), indices.end(), [&nums](int i, int j) {return nums[i] > nums[j];});
        return indices;
    }
    vector<vector<int>> tensorToVector(const Tensor& tensor)
    {
        // convert mxn tensor to nxm vector
        vector<vector<int>> result;
        auto tensor_accessor = tensor.accessor<int, 2>();
        // Iterate over columns and rows of the tensor
        for (int j = 0; j < tensor.size(1); ++j) {
            vector<int> column;
            for (int i = 0; i < tensor.size(0); ++i) {
                column.push_back(tensor_accessor[i][j]);
            }
            result.push_back(column);
        }
        return result;
    }
 }
--- a/bayesclass/cpp/utils.h
+++ b/bayesclass/cpp/utils.h
@@ -0,0 +1,8 @@
 namespace bayesnet {
    using namespace std;
    using namespace torch;
    vector<int> argsort(vector<float>& nums);
    vector<vector<int>> tensorToVector(const Tensor& tensor);
 }
--- a/bayesclass/feature_selection.py
+++ b/bayesclass/feature_selection.py
@@ -0,0 +1,93 @@
 # import numpy as np
 # from sklearn.feature_selection import mutual_info_classif
 # from sklearn.utils.validation import check_X_y, check_is_fitted
 # from sklearn.feature_selection._univariate_selection import (
 #     _BaseFilter,
 #     _clean_nans,
 # )
 # """
 # Compute the weighted mutual information between each feature and the
 # target.
 # Based on
 # Silviu Guiaşu,
 # Weighted entropy,
 # Reports on Mathematical Physics,
 # Volume 2, Issue 3,
 # 1971,
 # Pages 165-179,
 # ISSN 0034-4877,
 # https://doi.org/10.1016/0034-4877(71)90002-4.
 # (https://www.sciencedirect.com/science/article/pii/0034487771900024)
 # Abstract: Weighted entropy is the measure of information supplied by a
 # probablistic experiment whose elementary events are characterized both by their
 # objective probabilities and by some qualitative (objective or subjective)
 # weights. The properties, the axiomatics and the maximum value of the weighted
 # entropy are given.
 # """
 # class SelectKBestWeighted(_BaseFilter):
 #     def __init__(self, *, k=10):
 #         super().__init__(score_func=mutual_info_classif)
 #         self.k = k
 #     def _check_params(self, X, y):
 #         if self.k > X.shape[1] or self.k < 1:
 #             raise ValueError(
 #                 f"k must be between 1 and {X.shape[1]} got {self.k}."
 #             )
 #     def _get_support_mask(self):
 #         check_is_fitted(self)
 #         if self.k == "all":
 #             return np.ones(self.scores_.shape, dtype=bool)
 #         elif self.k == 0:
 #             return np.zeros(self.scores_.shape, dtype=bool)
 #         else:
 #             scores = _clean_nans(self.scores_)
 #             mask = np.zeros(scores.shape, dtype=bool)
 #             # Request a stable sort. Mergesort takes more memory (~40MB per
 #             # megafeature on x86-64).
 #             mask[np.argsort(scores, kind="mergesort")[-self.k :]] = 1
 #             return mask
 #     def fit(self, X, y, sample_weight):
 #         self.X_, self.y_ = check_X_y(X, y)
 #         self._check_params(X, y)
 #         self.n_features_in_ = X.shape[1]
 #         self.sample_weight_ = sample_weight
 #         # Compute the entropy of the target variable
 #         entropy_y = -np.sum(
 #             np.multiply(
 #                 np.bincount(y, weights=sample_weight),
 #                 np.log(np.bincount(y, weights=sample_weight)),
 #             )
 #         )
 #         # Compute the mutual information between each feature and the target
 #         mi = self.score_func(X, y)
 #         # Compute the weighted entropy of each feature
 #         entropy_weighted = []
 #         for i in range(X.shape[1]):
 #             # Compute the weighted frequency of each unique value of the
 #             # feature
 #             freq_weighted = np.bincount(X[:, i], weights=sample_weight)
 #             freq_weighted = freq_weighted[freq_weighted != 0]
 #             # Compute the weighted entropy of the feature
 #             entropy_weighted.append(
 #                 -np.sum(np.multiply(freq_weighted, np.log(freq_weighted)))
 #                 / np.sum(sample_weight)
 #             )
 #         # Compute the weighted mutual information between each feature and
 #         # the target
 #         mi_weighted = mi * entropy_weighted / entropy_y
 #         # Return the weighted mutual information scores
 #         self.scores_ = mi_weighted
 #         return self
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,5 +1,5 @@
 [build-system]
-requires = ["setuptools", "setuptools-scm", "wheel"]
+requires = ["setuptools", "setuptools-scm", "cython", "wheel", "torch"]
 build-backend = "setuptools.build_meta"
 [tool.setuptools]
@@ -39,9 +39,7 @@ classifiers = [
  "Operating System :: OS Independent",
  "Programming Language :: Python",
  "Programming Language :: Python",
-  "Programming Language :: Python :: 3.8",
+  "Programming Language :: Python :: 3.11",
  "Programming Language :: Python :: 3.9",
  "Programming Language :: Python :: 3.10",
 ]
 [project.optional-dependencies]
@@ -61,7 +59,7 @@ show_missing = true
 [tool.black]
 line-length = 79
-target_version = ['py38', 'py39', 'py310']
+target_version = ['py311']
 include = '\.pyi?$'
 exclude = '''
 /(
--- a/setup.py
+++ b/setup.py
@@ -0,0 +1,49 @@
 """
    Calling
    $python setup.py build_ext --inplace
    will build the extension library in the current file.
 """
 from setuptools import Extension, setup
 from torch.utils.cpp_extension import (
    BuildExtension,
    CppExtension,
    include_paths,
 )
 setup(
    ext_modules=[
        Extension(
            name="bayesclass.cppSelectFeatures",
            sources=[
                "bayesclass/cSelectFeatures.pyx",
                "bayesclass/cpp/FeatureSelect.cpp",
            ],
            language="c++",
            include_dirs=["bayesclass"],
            extra_compile_args=[
                "-std=c++17",
            ],
        ),
        CppExtension(
            name="bayesclass.BayesNet",
            sources=[
                "bayesclass/BayesNetwork.pyx",
                "bayesclass/cpp/Network.cc",
                "bayesclass/cpp/Node.cc",
                "bayesclass/cpp/Metrics.cc",
                "bayesclass/cpp/utils.cc",
                "bayesclass/cpp/Mst.cc",
                "bayesclass/cpp/BaseClassifier.cc",
                "bayesclass/cpp/Ensemble.cc",
                "bayesclass/cpp/TAN.cc",
                "bayesclass/cpp/KDB.cc",
                "bayesclass/cpp/SPODE.cc",
                "bayesclass/cpp/AODE.cc",
            ],
            include_dirs=include_paths(),
        ),
    ],
    cmdclass={"build_ext": BuildExtension},
 )
Author	SHA1	Message	Date
Ricardo Montañana	7f5ea1ab1e	Refactor library	2023-07-19 16:16:15 +02:00
Ricardo Montañana	168cc368ee	Complete CPP model integration	2023-07-18 23:39:50 +02:00
Ricardo Montañana	d1cafc230b	Fix some small mistakes	2023-07-13 17:11:08 +02:00
Ricardo Montañana	99083ceede	Fix KDB algorithm argsort	2023-07-13 16:59:37 +02:00
Ricardo Montañana	64f1500176	Refactor cpp library methods	2023-07-12 12:59:02 +02:00
Ricardo Montañana	aef22306ef	Complete refactor of KDB with BayesNet library	2023-07-12 12:07:01 +02:00
Ricardo Montañana	2ff38f73e7	refactor conditionalEdgeWeights	2023-07-12 11:20:05 +02:00
Ricardo Montañana	1af3edd050	Adding Metrics	2023-07-12 03:24:40 +02:00
Ricardo Montañana	8b6624e08a	Add getStates	2023-07-11 21:28:29 +02:00
Ricardo Montañana	36cc875615	Refator kdb with new BayesNetwork	2023-07-08 10:40:33 +02:00
Ricardo Montañana	260997c872	transpose dimensions of X in BayesNetwork	2023-07-08 01:13:29 +02:00
Ricardo Montañana	8a9c86a22d	Update BayesNetwork class	2023-07-08 00:39:10 +02:00
Ricardo Montañana	4bad5ccfee	Complete integration with BayesNet	2023-07-07 19:19:52 +02:00
Ricardo Montañana	5866e19fae	Add predict_proba	2023-07-07 00:36:14 +02:00
Ricardo Montañana	61e4c176eb	First try to link with bayesnet	2023-07-07 00:23:47 +02:00
Ricardo Montañana	ea473fc604	First complete boostAODE	2023-06-26 10:09:28 +02:00
Ricardo Montañana	9d7e787f6c	Finish cppSelectFeatures	2023-06-23 20:07:26 +02:00
Ricardo Montañana	d7425e5af0	Remove unneeded small value added to logs	2023-06-23 01:25:23 +02:00
Ricardo Montañana	30cc744033	Chcked mutual_info with sklearn	2023-06-23 01:21:24 +02:00
Ricardo Montañana	0094d500d4	Begin cython structure	2023-06-22 17:56:34 +02:00
Ricardo Montañana	99321043ec	Complete feature_selection with weighted entropy	2023-06-21 16:40:29 +02:00
Ricardo Montañana	fbaa5eb7d3	continue feature selection	2023-06-21 14:42:33 +02:00
Ricardo Montañana	0b27d9d9b0	Begin implementation	2023-06-21 11:27:14 +02:00
		`@@ -0,0 +1 @@`
							`#error Do not use this file, it is the result of a failed Cython compilation.`