Refactor library

Complete CPP model integration
Fix some small mistakes
2025-08-18 09:05:55 +00:00 · 2023-07-19 16:16:15 +02:00 · 2023-07-18 23:39:50 +02:00 · 2023-07-13 17:11:08 +02:00 · 2023-07-13 16:59:37 +02:00 · 2023-07-12 12:59:02 +02:00
36 changed files with 10356 additions and 70 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -0,0 +1,7 @@
+cmake_minimum_required(VERSION 3.20)
+project(feature)
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_BUILD_TYPE Debug)
+
+add_executable(feature bayesclass/cpp/FeatureSelect.cpp)
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1 +1,13 @@
 include README.md LICENSE
+include bayesclass/cpp/FeatureSelect.h
+include bayesclass/cpp/Node.h
+include bayesclass/cpp/Mst.h
+include bayesclass/cpp/Network.h
+include bayesclass/cpp/Metrics.hpp
+include bayesclass/cpp/BaseClassifier.h
+include bayesclass/cpp/Ensemble.h
+include bayesclass/cpp/TAN.h
+include bayesclass/cpp/KDB.h
+include bayesclass/cpp/SPODE.h
+include bayesclass/cpp/AODE.h
+include bayesclass/cpp/utils.h
--- a/4
+++ b/4
@@ -16,6 +16,10 @@ lint:  ## Lint and static-check
 	flake8 bayesclass
 	mypy bayesclass

+feature: ## compile FeatureSelect
+	cmake -B build feature
+
+
 push:  ## Push code with tags
 	git push && git push --tags

--- a/bayesclass/BayesNetwork.cpp
+++ b/bayesclass/BayesNetwork.cpp
@@ -0,0 +1 @@
+#error Do not use this file, it is the result of a failed Cython compilation.
--- a/bayesclass/BayesNetwork.pyx
+++ b/bayesclass/BayesNetwork.pyx
@@ -0,0 +1,177 @@
+# distutils: language = c++
+# cython: language_level = 3
+from libcpp.vector cimport vector
+from libcpp.string cimport string
+from libcpp.map cimport map
+import numpy as np
+
+cdef extern from "cpp/Network.h" namespace "bayesnet":
+    cdef cppclass Network:
+        Network(float, float) except + 
+        void fit(vector[vector[int]]&, vector[int]&, vector[string]&, string)
+        vector[int] predict(vector[vector[int]]&)
+        vector[vector[double]] predict_proba(vector[vector[int]]&)
+        float score(const vector[vector[int]]&, const vector[int]&)
+        void addNode(string, int)
+        void addEdge(string, string) except +
+        vector[string] getFeatures()
+        int getClassNumStates()
+        int getStates()
+        string getClassName()
+        string version()
+        void show()
+        
+cdef class BayesNetwork:
+    cdef Network *thisptr
+    def __cinit__(self, maxThreads=0.8, laplaceSmooth=1.0):
+        self.thisptr = new Network(maxThreads, laplaceSmooth) 
+    def __dealloc__(self):
+        del self.thisptr
+    def fit(self, X, y, features, className):
+        X_ = [X[:, i] for i in range(X.shape[1])]
+        features_bytes = [x.encode() for x in features]
+        self.thisptr.fit(X_, y, features_bytes, className.encode())
+        return self
+    def predict(self, X):
+        X_ = [X[:, i] for i in range(X.shape[1])]
+        return self.thisptr.predict(X_)
+    def predict_proba(self, X):
+        X_ = [X[:, i] for i in range(X.shape[1])]
+        return self.thisptr.predict_proba(X_)
+    def score(self, X, y):
+        X_ = [X[:, i] for i in range(X.shape[1])]
+        return self.thisptr.score(X_, y)
+    def addNode(self, name, states):
+        self.thisptr.addNode(str.encode(name), states)
+    def addEdge(self, source, destination):
+        self.thisptr.addEdge(str.encode(source), str.encode(destination))
+    def getFeatures(self):
+        res = self.thisptr.getFeatures()
+        return [x.decode() for x in res]
+    def getStates(self):
+        return self.thisptr.getStates()
+    def getClassName(self):
+        return self.thisptr.getClassName().decode()
+    def getClassNumStates(self):
+        return self.thisptr.getClassNumStates()
+    def show(self):
+        return self.thisptr.show()
+    def __reduce__(self):
+        return (BayesNetwork, ())
+
+cdef extern from "cpp/Metrics.hpp" namespace "bayesnet":
+    cdef cppclass Metrics:
+        Metrics(vector[vector[int]], vector[int], vector[string]&, string&, int) except +
+        vector[float] conditionalEdgeWeights()
+
+cdef class CMetrics:
+    cdef Metrics *thisptr
+    def __cinit__(self, X, y, features, className, classStates):
+        X_ = [X[:, i] for i in range(X.shape[1])]
+        features_bytes = [x.encode() for x in features]
+        self.thisptr = new Metrics(X_, y, features_bytes, className.encode(), classStates)
+    def __dealloc__(self):
+        del self.thisptr
+    def conditionalEdgeWeights(self, n_vars):
+        return np.reshape(self.thisptr.conditionalEdgeWeights(), (n_vars, n_vars))
+    def __reduce__(self):
+        return (CMetrics, ())
+
+cdef extern from "cpp/TAN.h" namespace "bayesnet":
+    cdef cppclass CTAN:
+        CTAN() except + 
+        void fit(vector[vector[int]]&, vector[int]&, vector[string]&, string, map[string, vector[int]]&)
+        vector[int] predict(vector[vector[int]]&)
+        vector[vector[double]] predict_proba(vector[vector[int]]&)
+        float score(const vector[vector[int]]&, const vector[int]&)
+        vector[string] graph()
+
+cdef extern from "cpp/KDB.h" namespace "bayesnet":
+    cdef cppclass CKDB:
+        CKDB(int) except + 
+        void fit(vector[vector[int]]&, vector[int]&, vector[string]&, string, map[string, vector[int]]&)
+        vector[int] predict(vector[vector[int]]&)
+        vector[vector[double]] predict_proba(vector[vector[int]]&)
+        float score(const vector[vector[int]]&, const vector[int]&)
+        vector[string] graph()
+
+cdef extern from "cpp/AODE.h" namespace "bayesnet":
+    cdef cppclass CAODE:
+        CAODE() except + 
+        void fit(vector[vector[int]]&, vector[int]&, vector[string]&, string, map[string, vector[int]]&)
+        vector[int] predict(vector[vector[int]]&)
+        vector[vector[double]] predict_proba(vector[vector[int]]&)
+        float score(const vector[vector[int]]&, const vector[int]&)
+        vector[string] graph()
+        
+cdef class TAN:
+    cdef CTAN *thisptr
+    def __cinit__(self):
+        self.thisptr = new CTAN() 
+    def __dealloc__(self):
+        del self.thisptr
+    def fit(self, X, y, features, className, states):
+        X_ = [X[:, i] for i in range(X.shape[1])]
+        features_bytes = [x.encode() for x in features]
+        states_dict = {key.encode(): value for key, value in states.items()}
+        states_dict[className.encode()] = np.unique(y).tolist()
+        self.thisptr.fit(X_, y, features_bytes, className.encode(), states_dict)
+        return self
+    def predict(self, X):
+        X_ = [X[:, i] for i in range(X.shape[1])]
+        return self.thisptr.predict(X_)
+    def score(self, X, y):
+        X_ = [X[:, i] for i in range(X.shape[1])]
+        return self.thisptr.score(X_, y)
+    def graph(self):
+        return self.thisptr.graph()
+    def __reduce__(self):
+        return (TAN, ())
+
+cdef class CKDB:
+    cdef KDB *thisptr
+    def __cinit__(self, k):
+        self.thisptr = new KDB(k) 
+    def __dealloc__(self):
+        del self.thisptr
+    def fit(self, X, y, features, className, states):
+        X_ = [X[:, i] for i in range(X.shape[1])]
+        features_bytes = [x.encode() for x in features]
+        states_dict = {key.encode(): value for key, value in states.items()}
+        states_dict[className.encode()] = np.unique(y).tolist()
+        self.thisptr.fit(X_, y, features_bytes, className.encode(), states_dict)
+        return self
+    def predict(self, X):
+        X_ = [X[:, i] for i in range(X.shape[1])]
+        return self.thisptr.predict(X_)
+    def score(self, X, y):
+        X_ = [X[:, i] for i in range(X.shape[1])]
+        return self.thisptr.score(X_, y)
+    def graph(self):
+        return self.thisptr.graph()
+    def __reduce__(self):
+        return (CKDB, ())
+
+cdef class CAODE:
+    cdef AODE *thisptr
+    def __cinit__(self):
+        self.thisptr = new AODE() 
+    def __dealloc__(self):
+        del self.thisptr
+    def fit(self, X, y, features, className, states):
+        X_ = [X[:, i] for i in range(X.shape[1])]
+        features_bytes = [x.encode() for x in features]
+        states_dict = {key.encode(): value for key, value in states.items()}
+        states_dict[className.encode()] = np.unique(y).tolist()
+        self.thisptr.fit(X_, y, features_bytes, className.encode(), states_dict)
+        return self
+    def predict(self, X):
+        X_ = [X[:, i] for i in range(X.shape[1])]
+        return self.thisptr.predict(X_)
+    def score(self, X, y):
+        X_ = [X[:, i] for i in range(X.shape[1])]
+        return self.thisptr.score(X_, y)
+    def graph(self):
+        return self.thisptr.graph()
+    def __reduce__(self):
+        return (CAODE, ())
--- a/bayesclass/_version.py
+++ b/bayesclass/_version.py
@@ -1 +1 @@
-__version__ = "0.1.1"
+__version__ = "0.2.0"
--- a/bayesclass/cSelectFeatures.cpp
+++ b/bayesclass/cSelectFeatures.cpp
--- a/bayesclass/cSelectFeatures.pyx
+++ b/bayesclass/cSelectFeatures.pyx
@@ -0,0 +1,33 @@
+# distutils: language = c++
+# cython: language_level = 3
+from libcpp.vector cimport vector
+from libcpp.string cimport string
+from libcpp cimport bool
+
+
+cdef extern from "cpp/FeatureSelect.h" namespace "features":
+    ctypedef float precision_t
+    cdef cppclass SelectKBestWeighted:
+        SelectKBestWeighted(vector[vector[int]]&, vector[int]&, vector[precision_t]&, int, bool) except + 
+        void fit()
+        string version()
+        vector[precision_t] getScores()
+        vector[int] getFeatures()
+        
+cdef class CSelectKBestWeighted:
+    cdef SelectKBestWeighted *thisptr
+    def __cinit__(self, X, y, weights, k, natural=False): # log or log2
+        self.thisptr = new SelectKBestWeighted(X, y, weights, k, natural) 
+    def __dealloc__(self):
+        del self.thisptr
+    def fit(self,):
+        self.thisptr.fit()
+        return self
+    def get_scores(self):
+        return self.thisptr.getScores()
+    def get_features(self):
+        return self.thisptr.getFeatures()
+    def get_version(self):
+        return self.thisptr.version()
+    def __reduce__(self):
+        return (CSelectKBestWeighted, ())
--- a/bayesclass/clfs.py
+++ b/bayesclass/clfs.py
@@ -4,8 +4,8 @@ import numpy as np
 import pandas as pd
 from scipy.stats import mode
 from sklearn.base import clone, ClassifierMixin, BaseEstimator
-from sklearn.feature_selection import SelectKBest
 from sklearn.ensemble import BaseEnsemble
+from sklearn.feature_selection import mutual_info_classif
 from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
 from sklearn.utils.multiclass import unique_labels
 from sklearn.feature_selection import mutual_info_classif
@@ -15,6 +15,8 @@ from pgmpy.models import BayesianNetwork
 from pgmpy.base import DAG
 import matplotlib.pyplot as plt
 from fimdlp.mdlp import FImdlp
+from .cppSelectFeatures import CSelectKBestWeighted
+from .BayesNet import BayesNetwork, CMetrics
 from ._version import __version__


@@ -93,7 +95,7 @@ class BayesBase(BaseEstimator, ClassifierMixin):
    @property
    def states_(self):
        if hasattr(self, "fitted_"):
-            return sum([len(item) for _, item in self.model_.states.items()])
+            return self.states_computed_
        return 0

    @property
@@ -142,7 +144,7 @@ class BayesBase(BaseEstimator, ClassifierMixin):
        # Store the information needed to build the model
        self.build_dataset()
        # Build the DAG
-        self._build()
+        self._build(kwargs)
        # Train the model
        self._train(kwargs)
        self.fitted_ = True
@@ -151,11 +153,14 @@ class BayesBase(BaseEstimator, ClassifierMixin):
        # Return the classifier
        return self

-    def _build(self):
-        """This method should be implemented by the subclasses to
-        build the DAG
-        """
-        ...
+    def _build(self, kwargs):
+        self.model_ = BayesNetwork()
+        features = kwargs["features"]
+        states = kwargs["state_names"]
+        for feature in features:
+            self.model_.addNode(feature, len(states[feature]))
+        class_name = kwargs["class_name"]
+        self.model_.addNode(class_name, max(self.y_) + 1)

    def _train(self, kwargs):
        """Build and train a BayesianNetwork from the DAG and the dataset
@@ -165,17 +170,24 @@ class BayesBase(BaseEstimator, ClassifierMixin):
        kwargs : dict
            fit parameters
        """
-        self.model_ = BayesianNetwork(
-            self.dag_.edges(), show_progress=self.show_progress
-        )
-        states = dict(state_names=kwargs.pop("state_names", []))
-        self.model_.fit(
-            self.dataset_,
-            estimator=BayesianEstimator,
-            prior_type="K2",
-            weighted=self.weighted_,
-            **states,
-        )
+        # self.model_ = BayesianNetwork(
+        #     self.dag_.edges(), show_progress=self.show_progress
+        # )
+        # states = dict(state_names=kwargs.pop("state_names", []))
+        # self.model_.fit(
+        #     self.dataset_,
+        #     estimator=BayesianEstimator,
+        #     prior_type="K2",
+        #     weighted=self.weighted_,
+        #     **states,
+        # )
+
+        features = kwargs["features"]
+        class_name = kwargs["class_name"]
+        for source, destination in self.edges_:
+            self.model_.addEdge(source, destination)
+        self.model_.fit(self.X_, self.y_, features, class_name)
+        self.states_computed_ = self.model_.getStates()

    def predict(self, X):
        """A reference implementation of a prediction for a classifier.
@@ -227,10 +239,11 @@ class BayesBase(BaseEstimator, ClassifierMixin):
        check_is_fitted(self, ["X_", "y_", "fitted_"])
        # Input validation
        X = check_array(X)
-        dataset = pd.DataFrame(
-            X, columns=self.feature_names_in_, dtype=np.int32
-        )
-        return self.model_.predict(dataset).values.ravel()
+        # dataset = pd.DataFrame(
+        #     X, columns=self.feature_names_in_, dtype=np.int32
+        # )
+        # return self.model_.predict(dataset).values.ravel()
+        return self.model_.predict(X)

    def plot(self, title="", node_size=800):
        warnings.simplefilter("ignore", UserWarning)
@@ -293,7 +306,7 @@ class TAN(BayesBase):
            raise ValueError("Head index out of range")
        return X, y

-    def _build(self):
+    def _build(self, kwargs):
        est = TreeSearch(
            self.dataset_, root_node=self.feature_names_in_[self.head_]
        )
@@ -346,7 +359,7 @@ class KDB(BayesBase):
        ]
        return self._check_params_fit(X, y, expected_args, kwargs)

-    def _add_m_edges(self, dag, idx, S_nodes, conditional_weights):
+    def _add_m_edges(self, idx, S_nodes, conditional_weights):
        n_edges = min(self.k, len(S_nodes))
        cond_w = conditional_weights.copy()
        exit_cond = self.k == 0
@@ -355,7 +368,7 @@ class KDB(BayesBase):
            max_minfo = np.argmax(cond_w[idx, :])
            if max_minfo in S_nodes and cond_w[idx, max_minfo] > self.theta:
                try:
-                    dag.add_edge(
+                    self.model_.addEdge(
                        self.feature_names_in_[max_minfo],
                        self.feature_names_in_[idx],
                    )
@@ -366,9 +379,9 @@ class KDB(BayesBase):
            cond_w[idx, max_minfo] = -1
            exit_cond = num == n_edges or np.all(cond_w[idx, :] <= self.theta)

-    def _build(self):
+    def _build(self, kwargs):
        """
-        1. For each feature Xi, compute mutual information, I(X;;C),
+        1. For each feature Xi, compute mutual information, I(X;C),
        where C is the class.
        2. Compute class conditional mutual information I(Xi;XjIC), f or each
        pair of features Xi and Xj, where i#j.
@@ -389,29 +402,38 @@ class KDB(BayesBase):
        # 1. get the mutual information between each feature and the class
        mutual = mutual_info_classif(self.X_, self.y_, discrete_features=True)
        # 2. symmetric matrix where each element represents I(X, Y| class_node)
-        conditional_weights = TreeSearch(
-            self.dataset_
-        )._get_conditional_weights(
-            self.dataset_, self.class_name_, show_progress=self.show_progress
+        metrics = CMetrics(
+            self.X_,
+            self.y_,
+            self.features_,
+            self.class_name_,
+            self.n_classes_,
+        )
+        conditional_weights = metrics.conditionalEdgeWeights(
+            self.n_features_in_ + 1
        )
        # 3. Let the used variable list, S, be empty.
        S_nodes = []
+        num_states = {
+            feature: len(states)
+            for feature, states in kwargs["state_names"].items()
+        }
        # 4. Let the DAG being constructed, BN, begin with a single class node
-        dag = BayesianNetwork(show_progress=self.show_progress)
-        dag.add_node(self.class_name_)  # , state_names=self.classes_)
+        self.model_ = BayesNetwork()
+        self.model_.addNode(self.class_name_, self.n_classes_)
        # 5. Repeat until S includes all domain features
        # 5.1 Select feature Xmax which is not in S and has the largest value
-        for idx in np.argsort(mutual):
+        for idx in np.argsort(-mutual):
            # 5.2 Add a node to BN representing Xmax.
            feature = self.feature_names_in_[idx]
-            dag.add_node(feature)
+            self.model_.addNode(feature, num_states[feature])
            # 5.3 Add an arc from C to Xmax in BN.
-            dag.add_edge(self.class_name_, feature)
+            self.model_.addEdge(self.class_name_, feature)
            # 5.4 Add m = min(lSl,/c) arcs from m distinct features Xj in S
-            self._add_m_edges(dag, idx, S_nodes, conditional_weights)
+            self._add_m_edges(idx, S_nodes, conditional_weights)
            # 5.5 Add Xmax to S.
            S_nodes.append(idx)
-        self.dag_ = dag
+        self.edges_ = []


 def build_spodes(features, class_name):
@@ -806,7 +828,7 @@ class BoostSPODE(BayesBase):
        ]
        return self._check_params_fit(X, y, expected_args, kwargs)

-    def _build(self):
+    def _build(self, _):
        class_edges = [(self.class_name_, f) for f in self.feature_names_in_]
        feature_edges = [
            (self.sparent_, f)
@@ -818,7 +840,6 @@ class BoostSPODE(BayesBase):

    def _train(self, kwargs):
        states = dict(state_names=kwargs.get("state_names", []))
-        breakpoint()
        self.model_ = BayesianNetwork(self.dag_.edges(), show_progress=False)
        self.model_.fit(
            self.dataset_,
@@ -835,11 +856,9 @@ class BoostAODE(ClassifierMixin, BaseEnsemble):
        show_progress=False,
        random_state=None,
        estimator=None,
-        n_estimators=10,
    ):
        self.show_progress = show_progress
        self.random_state = random_state
-        self.n_estimators = n_estimators
        super().__init__(estimator=estimator)

    def _validate_estimator(self) -> None:
@@ -868,26 +887,67 @@ class BoostAODE(ClassifierMixin, BaseEnsemble):
        self.nodes_leaves = self.nodes_edges
        return self

+    def version(self):
+        if hasattr(self, "fitted_"):
+            return self.estimator_.version()
+        return SPODE(None, False).version()
+
+    @property
+    def states_(self):
+        if hasattr(self, "fitted_"):
+            return sum(
+                [
+                    len(item)
+                    for model in self.estimators_
+                    for _, item in model.model_.states.items()
+                ]
+            ) / len(self.estimators_)
+        return 0
+
+    @property
+    def depth_(self):
+        return self.states_
+
+    def nodes_edges(self):
+        nodes = 0
+        edges = 0
+        if hasattr(self, "fitted_"):
+            nodes = sum([len(x.dag_) for x in self.estimators_])
+            edges = sum([len(x.dag_.edges()) for x in self.estimators_])
+        return nodes, edges
+
+    def plot(self, title=""):
+        warnings.simplefilter("ignore", UserWarning)
+        for idx, model in enumerate(self.estimators_):
+            model.plot(title=f"{idx} {title}")
+
    def _train(self, kwargs):
        """Build boosted SPODEs"""
        weights = [1 / self.n_samples_] * self.n_samples_
+        selected_features = []
        # Step 0: Set the finish condition
-        for num in range(self.n_estimators):
+        for _ in range(self.n_features_in_):
            # Step 1: Build ranking with mutual information
-            # OJO MAL, ESTO NO ACTUALIZA EL RANKING CON LOS PESOS
-            # SIEMPRE VA A SACAR LO MISMO
-            feature = (
-                SelectKBest(k=1)
-                .fit(self.X_, self.y_)
-                .get_feature_names_out(self.feature_names_in_)
-                .tolist()[0]
+            features = (
+                CSelectKBestWeighted(
+                    self.X_, self.y_, weights, k=self.n_features_in_
                )
+                .fit()
+                .get_features()
+            )
+            # Step 1.1: Select the feature to become the sparent
+            for n_feature in features:
+                if n_feature not in selected_features:
+                    selected_features.append(n_feature)
+                    break
+            feature = self.feature_names_in_[n_feature]
            # Step 2: Build & train spode with the first feature as sparent
            estimator = clone(self.estimator_)
            _args = kwargs.copy()
            _args["sparent"] = feature
            _args["sample_weight"] = weights
            _args["weighted"] = True
+            # print("I'm gonna build a spode with", feature)
            # Step 2.1: build dataset
            # Step 2.2: Train the model
            estimator.fit(self.X_, self.y_, **_args)
@@ -898,18 +958,17 @@ class BoostAODE(ClassifierMixin, BaseEnsemble):
            am = np.log((1 - em) / em) + np.log(estimator.n_classes_ - 1)
            # Step 3.2: Update weights for next classifier
            weights = [
-                wm * np.exp(am * (ym != y_pred))
-                for wm, ym in zip(weights, self.y_)
+                wm * np.exp(am * (ym != yp))
+                for wm, ym, yp in zip(weights, self.y_, y_pred)
            ]
            # Step 4: Add the new model
            self.estimators_.append(estimator)
-        """
-        class_edges = [(self.class_name_, f) for f in self.feature_names_in_]
-        feature_edges = [
-            (sparent, f) for f in self.feature_names_in_ if f != sparent
-        ]
-        self.weights_ = weights.copy() if weights is not None else None
-        feature_edges.extend(class_edges)
-        self.model_ = BayesianNetwork(feature_edges, show_progress=False)
-        return self.model_
-        """
+        self.weights_ = weights
+
+    def predict(self, X: np.ndarray) -> np.ndarray:
+        n_samples = X.shape[0]
+        n_estimators = len(self.estimators_)
+        result = np.empty((n_samples, n_estimators))
+        for index, estimator in enumerate(self.estimators_):
+            result[:, index] = estimator.predict(X)
+        return mode(result, axis=1, keepdims=False).mode.ravel()
--- a/bayesclass/cpp/AODE.cc
+++ b/bayesclass/cpp/AODE.cc
@@ -0,0 +1,16 @@
+#include "AODE.h"
+
+namespace bayesnet {
+    AODE::AODE() : Ensemble() {}
+    void AODE::train()
+    {
+        models.clear();
+        for (int i = 0; i < features.size(); ++i) {
+            models.push_back(std::make_unique<SPODE>(i));
+        }
+    }
+    vector<string> AODE::graph(string title)
+    {
+        return Ensemble::graph(title);
+    }
+}
--- a/bayesclass/cpp/AODE.h
+++ b/bayesclass/cpp/AODE.h
@@ -0,0 +1,14 @@
+#ifndef AODE_H
+#define AODE_H
+#include "Ensemble.h"
+#include "SPODE.h"
+namespace bayesnet {
+    class AODE : public Ensemble {
+    protected:
+        void train() override;
+    public:
+        AODE();
+        vector<string> graph(string title = "AODE");
+    };
+}
+#endif
--- a/bayesclass/cpp/BaseClassifier.cc
+++ b/bayesclass/cpp/BaseClassifier.cc
@@ -0,0 +1,127 @@
+#include "BaseClassifier.h"
+#include "utils.h"
+
+namespace bayesnet {
+    using namespace std;
+    using namespace torch;
+
+    BaseClassifier::BaseClassifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {}
+    BaseClassifier& BaseClassifier::build(vector<string>& features, string className, map<string, vector<int>>& states)
+    {
+        dataset = torch::cat({ X, y.view({y.size(0), 1}) }, 1);
+        this->features = features;
+        this->className = className;
+        this->states = states;
+        checkFitParameters();
+        auto n_classes = states[className].size();
+        metrics = Metrics(dataset, features, className, n_classes);
+        train();
+        model.fit(Xv, yv, features, className);
+        fitted = true;
+        return *this;
+    }
+    BaseClassifier& BaseClassifier::fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states)
+    {
+        this->X = torch::zeros({ static_cast<int64_t>(X[0].size()), static_cast<int64_t>(X.size()) }, kInt64);
+        Xv = X;
+        for (int i = 0; i < X.size(); ++i) {
+            this->X.index_put_({ "...", i }, torch::tensor(X[i], kInt64));
+        }
+        this->y = torch::tensor(y, kInt64);
+        yv = y;
+        return build(features, className, states);
+    }
+    void BaseClassifier::checkFitParameters()
+    {
+        auto sizes = X.sizes();
+        m = sizes[0];
+        n = sizes[1];
+        if (m != y.size(0)) {
+            throw invalid_argument("X and y must have the same number of samples");
+        }
+        if (n != features.size()) {
+            throw invalid_argument("X and features must have the same number of features");
+        }
+        if (states.find(className) == states.end()) {
+            throw invalid_argument("className not found in states");
+        }
+        for (auto feature : features) {
+            if (states.find(feature) == states.end()) {
+                throw invalid_argument("feature [" + feature + "] not found in states");
+            }
+        }
+    }
+
+    Tensor BaseClassifier::predict(Tensor& X)
+    {
+        if (!fitted) {
+            throw logic_error("Classifier has not been fitted");
+        }
+        auto m_ = X.size(0);
+        auto n_ = X.size(1);
+        vector<vector<int>> Xd(n_, vector<int>(m_, 0));
+        for (auto i = 0; i < n_; i++) {
+            auto temp = X.index({ "...", i });
+            Xd[i] = vector<int>(temp.data_ptr<int>(), temp.data_ptr<int>() + m_);
+        }
+        auto yp = model.predict(Xd);
+        auto ypred = torch::tensor(yp, torch::kInt64);
+        return ypred;
+    }
+    vector<int> BaseClassifier::predict(vector<vector<int>>& X)
+    {
+        if (!fitted) {
+            throw logic_error("Classifier has not been fitted");
+        }
+        auto m_ = X[0].size();
+        auto n_ = X.size();
+        vector<vector<int>> Xd(n_, vector<int>(m_, 0));
+        for (auto i = 0; i < n_; i++) {
+            Xd[i] = vector<int>(X[i].begin(), X[i].end());
+        }
+        auto yp = model.predict(Xd);
+        return yp;
+    }
+    float BaseClassifier::score(Tensor& X, Tensor& y)
+    {
+        if (!fitted) {
+            throw logic_error("Classifier has not been fitted");
+        }
+        Tensor y_pred = predict(X);
+        return (y_pred == y).sum().item<float>() / y.size(0);
+    }
+    float BaseClassifier::score(vector<vector<int>>& X, vector<int>& y)
+    {
+        if (!fitted) {
+            throw logic_error("Classifier has not been fitted");
+        }
+        auto m_ = X[0].size();
+        auto n_ = X.size();
+        vector<vector<int>> Xd(n_, vector<int>(m_, 0));
+        for (auto i = 0; i < n_; i++) {
+            Xd[i] = vector<int>(X[i].begin(), X[i].end());
+        }
+        return model.score(Xd, y);
+    }
+    vector<string> BaseClassifier::show()
+    {
+        return model.show();
+    }
+    void BaseClassifier::addNodes()
+    {
+        // Add all nodes to the network
+        for (auto feature : features) {
+            model.addNode(feature, states[feature].size());
+        }
+        model.addNode(className, states[className].size());
+    }
+    int BaseClassifier::getNumberOfNodes()
+    {
+        // Features does not include class
+        return fitted ? model.getFeatures().size() + 1 : 0;
+    }
+    int BaseClassifier::getNumberOfEdges()
+    {
+        return fitted ? model.getEdges().size() : 0;
+    }
+}
--- a/bayesclass/cpp/BaseClassifier.h
+++ b/bayesclass/cpp/BaseClassifier.h
@@ -0,0 +1,48 @@
+#ifndef CLASSIFIERS_H
+#define CLASSIFIERS_H
+#include <torch/torch.h>
+#include "Network.h"
+#include "Metrics.hpp"
+using namespace std;
+using namespace torch;
+
+namespace bayesnet {
+    class BaseClassifier {
+    private:
+        bool fitted;
+        BaseClassifier& build(vector<string>& features, string className, map<string, vector<int>>& states);
+    protected:
+        Network model;
+        int m, n; // m: number of samples, n: number of features
+        Tensor X;
+        vector<vector<int>> Xv;
+        Tensor y;
+        vector<int> yv;
+        Tensor dataset;
+        Metrics metrics;
+        vector<string> features;
+        string className;
+        map<string, vector<int>> states;
+        void checkFitParameters();
+        virtual void train() = 0;
+    public:
+        BaseClassifier(Network model);
+        virtual ~BaseClassifier() = default;
+        BaseClassifier& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states);
+        void addNodes();
+        int getNumberOfNodes();
+        int getNumberOfEdges();
+        Tensor predict(Tensor& X);
+        vector<int> predict(vector<vector<int>>& X);
+        float score(Tensor& X, Tensor& y);
+        float score(vector<vector<int>>& X, vector<int>& y);
+        vector<string> show();
+        virtual vector<string> graph(string title) = 0;
+    };
+}
+#endif
+
+
+
+
+
--- a/bayesclass/cpp/Ensemble.cc
+++ b/bayesclass/cpp/Ensemble.cc
@@ -0,0 +1,112 @@
+#include "Ensemble.h"
+
+namespace bayesnet {
+    using namespace std;
+    using namespace torch;
+
+    Ensemble::Ensemble() : m(0), n(0), n_models(0), metrics(Metrics()), fitted(false) {}
+    Ensemble& Ensemble::build(vector<string>& features, string className, map<string, vector<int>>& states)
+    {
+        dataset = cat({ X, y.view({y.size(0), 1}) }, 1);
+        this->features = features;
+        this->className = className;
+        this->states = states;
+        auto n_classes = states[className].size();
+        metrics = Metrics(dataset, features, className, n_classes);
+        // Build models
+        train();
+        // Train models
+        n_models = models.size();
+        for (auto i = 0; i < n_models; ++i) {
+            models[i]->fit(Xv, yv, features, className, states);
+        }
+        fitted = true;
+        return *this;
+    }
+    Ensemble& Ensemble::fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states)
+    {
+        this->X = torch::zeros({ static_cast<int64_t>(X[0].size()), static_cast<int64_t>(X.size()) }, kInt64);
+        Xv = X;
+        for (int i = 0; i < X.size(); ++i) {
+            this->X.index_put_({ "...", i }, torch::tensor(X[i], kInt64));
+        }
+        this->y = torch::tensor(y, kInt64);
+        yv = y;
+        return build(features, className, states);
+    }
+    Tensor Ensemble::predict(Tensor& X)
+    {
+        if (!fitted) {
+            throw logic_error("Ensemble has not been fitted");
+        }
+        Tensor y_pred = torch::zeros({ X.size(0), n_models }, kInt64);
+        for (auto i = 0; i < n_models; ++i) {
+            y_pred.index_put_({ "...", i }, models[i]->predict(X));
+        }
+        return torch::tensor(voting(y_pred));
+    }
+    vector<int> Ensemble::voting(Tensor& y_pred)
+    {
+        auto y_pred_ = y_pred.accessor<int64_t, 2>();
+        vector<int> y_pred_final;
+        for (int i = 0; i < y_pred.size(0); ++i) {
+            vector<float> votes(states[className].size(), 0);
+            for (int j = 0; j < y_pred.size(1); ++j) {
+                votes[y_pred_[i][j]] += 1;
+            }
+            auto indices = argsort(votes);
+            y_pred_final.push_back(indices[0]);
+        }
+        return y_pred_final;
+    }
+    vector<int> Ensemble::predict(vector<vector<int>>& X)
+    {
+        if (!fitted) {
+            throw logic_error("Ensemble has not been fitted");
+        }
+        long m_ = X[0].size();
+        long n_ = X.size();
+        vector<vector<int>> Xd(n_, vector<int>(m_, 0));
+        for (auto i = 0; i < n_; i++) {
+            Xd[i] = vector<int>(X[i].begin(), X[i].end());
+        }
+        Tensor y_pred = torch::zeros({ m_, n_models }, kInt64);
+        for (auto i = 0; i < n_models; ++i) {
+            y_pred.index_put_({ "...", i }, torch::tensor(models[i]->predict(Xd), kInt64));
+        }
+        return voting(y_pred);
+    }
+    float Ensemble::score(vector<vector<int>>& X, vector<int>& y)
+    {
+        if (!fitted) {
+            throw logic_error("Ensemble has not been fitted");
+        }
+        auto y_pred = predict(X);
+        int correct = 0;
+        for (int i = 0; i < y_pred.size(); ++i) {
+            if (y_pred[i] == y[i]) {
+                correct++;
+            }
+        }
+        return (double)correct / y_pred.size();
+
+    }
+    vector<string> Ensemble::show()
+    {
+        auto result = vector<string>();
+        for (auto i = 0; i < n_models; ++i) {
+            auto res = models[i]->show();
+            result.insert(result.end(), res.begin(), res.end());
+        }
+        return result;
+    }
+    vector<string> Ensemble::graph(string title)
+    {
+        auto result = vector<string>();
+        for (auto i = 0; i < n_models; ++i) {
+            auto res = models[i]->graph(title + "_" + to_string(i));
+            result.insert(result.end(), res.begin(), res.end());
+        }
+        return result;
+    }
+}
--- a/bayesclass/cpp/Ensemble.h
+++ b/bayesclass/cpp/Ensemble.h
@@ -0,0 +1,42 @@
+#ifndef ENSEMBLE_H
+#define ENSEMBLE_H
+#include <torch/torch.h>
+#include "BaseClassifier.h"
+#include "Metrics.hpp"
+#include "utils.h"
+using namespace std;
+using namespace torch;
+
+namespace bayesnet {
+    class Ensemble {
+    private:
+        bool fitted;
+        long n_models;
+        Ensemble& build(vector<string>& features, string className, map<string, vector<int>>& states);
+    protected:
+        vector<unique_ptr<BaseClassifier>> models;
+        int m, n; // m: number of samples, n: number of features
+        Tensor X;
+        vector<vector<int>> Xv;
+        Tensor y;
+        vector<int> yv;
+        Tensor dataset;
+        Metrics metrics;
+        vector<string> features;
+        string className;
+        map<string, vector<int>> states;
+        void virtual train() = 0;
+        vector<int> voting(Tensor& y_pred);
+    public:
+        Ensemble();
+        virtual ~Ensemble() = default;
+        Ensemble& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states);
+        Tensor predict(Tensor& X);
+        vector<int> predict(vector<vector<int>>& X);
+        float score(Tensor& X, Tensor& y);
+        float score(vector<vector<int>>& X, vector<int>& y);
+        vector<string> show();
+        vector<string> graph(string title);
+    };
+}
+#endif
--- a/bayesclass/cpp/FeatureSelect.cpp
+++ b/bayesclass/cpp/FeatureSelect.cpp
@@ -0,0 +1,118 @@
+#include "FeatureSelect.h"
+namespace features {
+    SelectKBestWeighted::SelectKBestWeighted(samples_t& samples, labels_t& labels, weights_t& weights, int k, bool nat)
+        : samples(samples), labels(labels), weights(weights), k(k), nat(nat)
+    {
+        if (samples.size() == 0 || samples[0].size() == 0)
+            throw invalid_argument("features must be a non-empty matrix");
+        if (samples.size() != labels.size())
+            throw invalid_argument("number of samples and labels must be equal");
+        if (samples.size() != weights.size())
+            throw invalid_argument("number of samples and weights must be equal");
+        if (k < 1 || k >  static_cast<int>(samples[0].size()))
+            throw invalid_argument("k must be between 1 and number of features");
+        numFeatures = 0;
+        numClasses = 0;
+        numSamples = 0;
+        fitted = false;
+    }
+    void SelectKBestWeighted::SelectKBestWeighted::fit()
+    {
+        auto labelsCopy = labels;
+        numFeatures = samples[0].size();
+        numSamples = samples.size();
+        // compute number of classes
+        sort(labelsCopy.begin(), labelsCopy.end());
+        auto last = unique(labelsCopy.begin(), labelsCopy.end());
+        labelsCopy.erase(last, labelsCopy.end());
+        numClasses = labelsCopy.size();
+        // compute scores
+        scores.reserve(numFeatures);
+        for (int i = 0; i < numFeatures; ++i) {
+            scores.push_back(MutualInformation(i));
+            features.push_back(i);
+        }
+        // sort & reduce scores and features
+        sort(features.begin(), features.end(), [&](int i, int j)
+            { return scores[i] > scores[j]; });
+        sort(scores.begin(), scores.end(), greater<precision_t>());
+        features.resize(k);
+        scores.resize(k);
+        fitted = true;
+    }
+    precision_t SelectKBestWeighted::entropyLabel()
+    {
+        return entropy(labels);
+    }
+    precision_t SelectKBestWeighted::entropy(const sample_t& data)
+    {
+        precision_t ventropy = 0, totalWeight = 0;
+        score_t counts(numClasses + 1, 0);
+        for (auto i = 0; i < static_cast<int>(data.size()); ++i) {
+            counts[data[i]] += weights[i];
+            totalWeight += weights[i];
+        }
+        for (auto count : counts) {
+            precision_t p = count / totalWeight;
+            if (p > 0) {
+                if (nat) {
+                    ventropy -= p * log(p);
+                } else {
+                    ventropy -= p * log2(p);
+                }
+            }
+        }
+        return ventropy;
+    }
+    // H(Y|X) = sum_{x in X} p(x) H(Y|X=x)
+    precision_t SelectKBestWeighted::conditionalEntropy(const int feature)
+    {
+        unordered_map<value_t, precision_t> featureCounts;
+        unordered_map<value_t, unordered_map<value_t, precision_t>> jointCounts;
+        featureCounts.clear();
+        jointCounts.clear();
+        precision_t totalWeight = 0;
+        for (auto i = 0; i < numSamples; i++) {
+            featureCounts[samples[i][feature]] += weights[i];
+            jointCounts[samples[i][feature]][labels[i]] += weights[i];
+            totalWeight += weights[i];
+        }
+        if (totalWeight == 0)
+            throw invalid_argument("Total weight should not be zero");
+        precision_t entropy = 0;
+        for (auto& [feat, count] : featureCounts) {
+            auto p_f = count / totalWeight;
+            precision_t entropy_f = 0;
+            for (auto& [label, jointCount] : jointCounts[feat]) {
+                auto p_l_f = jointCount / count;
+                if (p_l_f > 0) {
+                    if (nat) {
+                        entropy_f -= p_l_f * log(p_l_f);
+                    } else {
+                        entropy_f -= p_l_f * log2(p_l_f);
+                    }
+                }
+            }
+            entropy += p_f * entropy_f;
+        }
+        return entropy;
+    }
+    // I(X;Y) = H(Y) - H(Y|X)
+    precision_t SelectKBestWeighted::MutualInformation(const int i)
+    {
+        return entropyLabel() - conditionalEntropy(i);
+    }
+    score_t SelectKBestWeighted::getScores() const
+    {
+        if (!fitted)
+            throw logic_error("score not fitted");
+        return scores;
+    }
+    //Return the indices of the selected features
+    labels_t SelectKBestWeighted::getFeatures() const
+    {
+        if (!fitted)
+            throw logic_error("score not fitted");
+        return features;
+    }
+}
--- a/bayesclass/cpp/FeatureSelect.h
+++ b/bayesclass/cpp/FeatureSelect.h
@@ -0,0 +1,38 @@
+#ifndef SELECT_K_BEST_WEIGHTED_H
+#define SELECT_K_BEST_WEIGHTED_H
+#include <map>
+#include <vector>
+#include <string>
+using namespace std;
+namespace features {
+    typedef float precision_t;
+    typedef int value_t;
+    typedef vector<value_t> sample_t;
+    typedef vector<sample_t> samples_t;
+    typedef vector<value_t> labels_t;
+    typedef vector<precision_t> score_t, weights_t;
+
+    class SelectKBestWeighted {
+    private:
+        const samples_t samples;
+        const labels_t labels;
+        const weights_t weights;
+        const int k;
+        bool nat; // use natural log or log2
+        int numFeatures, numClasses, numSamples;
+        bool fitted;
+        score_t scores; // scores of the features
+        labels_t features; // indices of the selected features
+        precision_t entropyLabel();
+        precision_t entropy(const sample_t&);
+        precision_t conditionalEntropy(const int);
+        precision_t MutualInformation(const int);
+    public:
+        SelectKBestWeighted(samples_t&, labels_t&, weights_t&, int, bool);
+        void fit();
+        score_t getScores() const;
+        labels_t getFeatures() const; //Return the indices of the selected features
+        static inline string version() { return "0.1.0"; };
+    };
+}
+#endif
--- a/bayesclass/cpp/KDB.cc
+++ b/bayesclass/cpp/KDB.cc
@@ -0,0 +1,90 @@
+#include "KDB.h"
+
+namespace bayesnet {
+    using namespace std;
+    using namespace torch;
+
+    KDB::KDB(int k, float theta) : BaseClassifier(Network()), k(k), theta(theta) {}
+    void KDB::train()
+    {
+        /*
+        1. For each feature Xi, compute mutual information, I(X;C),
+        where C is the class.
+        2. Compute class conditional mutual information I(Xi;XjIC), f or each
+        pair of features Xi and Xj, where i#j.
+        3. Let the used variable list, S, be empty.
+        4. Let the DAG network being constructed, BN, begin with a single
+        class node, C.
+        5. Repeat until S includes all domain features
+        5.1. Select feature Xmax which is not in S and has the largest value
+        I(Xmax;C).
+        5.2. Add a node to BN representing Xmax.
+        5.3. Add an arc from C to Xmax in BN.
+        5.4. Add m = min(lSl,/c) arcs from m distinct features Xj in S with
+        the highest value for I(Xmax;X,jC).
+        5.5. Add Xmax to S.
+        Compute the conditional probabilility infered by the structure of BN by
+        using counts from DB, and output BN.
+        */
+        // 1. For each feature Xi, compute mutual information, I(X;C),
+        // where C is the class.
+        vector <float> mi;
+        for (auto i = 0; i < features.size(); i++) {
+            Tensor firstFeature = X.index({ "...", i });
+            mi.push_back(metrics.mutualInformation(firstFeature, y));
+        }
+        // 2. Compute class conditional mutual information I(Xi;XjIC), f or each
+        auto conditionalEdgeWeights = metrics.conditionalEdge();
+        // 3. Let the used variable list, S, be empty.
+        vector<int> S;
+        // 4. Let the DAG network being constructed, BN, begin with a single
+        // class node, C.
+        model.addNode(className, states[className].size());
+        // 5. Repeat until S includes all domain features
+        // 5.1. Select feature Xmax which is not in S and has the largest value
+        // I(Xmax;C).
+        auto order = argsort(mi);
+        for (auto idx : order) {
+            // 5.2. Add a node to BN representing Xmax.
+            model.addNode(features[idx], states[features[idx]].size());
+            // 5.3. Add an arc from C to Xmax in BN.
+            model.addEdge(className, features[idx]);
+            // 5.4. Add m = min(lSl,/c) arcs from m distinct features Xj in S with
+            // the highest value for I(Xmax;X,jC).
+            add_m_edges(idx, S, conditionalEdgeWeights);
+            // 5.5. Add Xmax to S.
+            S.push_back(idx);
+        }
+    }
+    void KDB::add_m_edges(int idx, vector<int>& S, Tensor& weights)
+    {
+        auto n_edges = min(k, static_cast<int>(S.size()));
+        auto cond_w = clone(weights);
+        bool exit_cond = k == 0;
+        int num = 0;
+        while (!exit_cond) {
+            auto max_minfo = argmax(cond_w.index({ idx, "..." })).item<int>();
+            auto belongs = find(S.begin(), S.end(), max_minfo) != S.end();
+            if (belongs && cond_w.index({ idx, max_minfo }).item<float>() > theta) {
+                try {
+                    model.addEdge(features[max_minfo], features[idx]);
+                    num++;
+                }
+                catch (const invalid_argument& e) {
+                    // Loops are not allowed
+                }
+            }
+            cond_w.index_put_({ idx, max_minfo }, -1);
+            auto candidates_mask = cond_w.index({ idx, "..." }).gt(theta);
+            auto candidates = candidates_mask.nonzero();
+            exit_cond = num == n_edges || candidates.size(0) == 0;
+        }
+    }
+    vector<string> KDB::graph(string title)
+    {
+        if (title == "KDB") {
+            title += " (k=" + to_string(k) + ", theta=" + to_string(theta) + ")";
+        }
+        return model.graph(title);
+    }
+}
--- a/bayesclass/cpp/KDB.h
+++ b/bayesclass/cpp/KDB.h
@@ -0,0 +1,20 @@
+#ifndef KDB_H
+#define KDB_H
+#include "BaseClassifier.h"
+#include "utils.h"
+namespace bayesnet {
+    using namespace std;
+    using namespace torch;
+    class KDB : public BaseClassifier {
+    private:
+        int k;
+        float theta;
+        void add_m_edges(int idx, vector<int>& S, Tensor& weights);
+    protected:
+        void train() override;
+    public:
+        KDB(int k, float theta = 0.03);
+        vector<string> graph(string name = "KDB") override;
+    };
+}
+#endif
--- a/bayesclass/cpp/Metrics.cc
+++ b/bayesclass/cpp/Metrics.cc
@@ -0,0 +1,131 @@
+#include "Metrics.hpp"
+#include "Mst.h"
+using namespace std;
+namespace bayesnet {
+    Metrics::Metrics(torch::Tensor& samples, vector<string>& features, string& className, int classNumStates)
+        : samples(samples)
+        , features(features)
+        , className(className)
+        , classNumStates(classNumStates)
+    {
+    }
+    Metrics::Metrics(const vector<vector<int>>& vsamples, const vector<int>& labels, const vector<string>& features, const string& className, const int classNumStates)
+        : features(features)
+        , className(className)
+        , classNumStates(classNumStates)
+    {
+        samples = torch::zeros({ static_cast<int64_t>(vsamples[0].size()), static_cast<int64_t>(vsamples.size() + 1) }, torch::kInt64);
+        for (int i = 0; i < vsamples.size(); ++i) {
+            samples.index_put_({ "...", i }, torch::tensor(vsamples[i], torch::kInt64));
+        }
+        samples.index_put_({ "...", -1 }, torch::tensor(labels, torch::kInt64));
+    }
+    vector<pair<string, string>> Metrics::doCombinations(const vector<string>& source)
+    {
+        vector<pair<string, string>> result;
+        for (int i = 0; i < source.size(); ++i) {
+            string temp = source[i];
+            for (int j = i + 1; j < source.size(); ++j) {
+                result.push_back({ temp, source[j] });
+            }
+        }
+        return result;
+    }
+    torch::Tensor Metrics::conditionalEdge()
+    {
+        auto result = vector<double>();
+        auto source = vector<string>(features);
+        source.push_back(className);
+        auto combinations = doCombinations(source);
+        // Compute class prior
+        auto margin = torch::zeros({ classNumStates });
+        for (int value = 0; value < classNumStates; ++value) {
+            auto mask = samples.index({ "...", -1 }) == value;
+            margin[value] = mask.sum().item<float>() / samples.sizes()[0];
+        }
+        for (auto [first, second] : combinations) {
+            int64_t index_first = find(features.begin(), features.end(), first) - features.begin();
+            int64_t index_second = find(features.begin(), features.end(), second) - features.begin();
+            double accumulated = 0;
+            for (int value = 0; value < classNumStates; ++value) {
+                auto mask = samples.index({ "...", -1 }) == value;
+                auto first_dataset = samples.index({ mask, index_first });
+                auto second_dataset = samples.index({ mask, index_second });
+                auto mi = mutualInformation(first_dataset, second_dataset);
+                auto pb = margin[value].item<float>();
+                accumulated += pb * mi;
+            }
+            result.push_back(accumulated);
+        }
+        long n_vars = source.size();
+        auto matrix = torch::zeros({ n_vars, n_vars });
+        auto indices = torch::triu_indices(n_vars, n_vars, 1);
+        for (auto i = 0; i < result.size(); ++i) {
+            auto x = indices[0][i];
+            auto y = indices[1][i];
+            matrix[x][y] = result[i];
+            matrix[y][x] = result[i];
+        }
+        return matrix;
+    }
+    vector<float> Metrics::conditionalEdgeWeights()
+    {
+        auto matrix = conditionalEdge();
+        std::vector<float> v(matrix.data_ptr<float>(), matrix.data_ptr<float>() + matrix.numel());
+        return v;
+    }
+    double Metrics::entropy(torch::Tensor& feature)
+    {
+        torch::Tensor counts = feature.bincount();
+        int totalWeight = counts.sum().item<int>();
+        torch::Tensor probs = counts.to(torch::kFloat) / totalWeight;
+        torch::Tensor logProbs = torch::log(probs);
+        torch::Tensor entropy = -probs * logProbs;
+        return entropy.nansum().item<double>();
+    }
+    // H(Y|X) = sum_{x in X} p(x) H(Y|X=x)
+    double Metrics::conditionalEntropy(torch::Tensor& firstFeature, torch::Tensor& secondFeature)
+    {
+        int numSamples = firstFeature.sizes()[0];
+        torch::Tensor featureCounts = secondFeature.bincount();
+        unordered_map<int, unordered_map<int, double>> jointCounts;
+        double totalWeight = 0;
+        for (auto i = 0; i < numSamples; i++) {
+            jointCounts[secondFeature[i].item<int>()][firstFeature[i].item<int>()] += 1;
+            totalWeight += 1;
+        }
+        if (totalWeight == 0)
+            throw invalid_argument("Total weight should not be zero");
+        double entropyValue = 0;
+        for (int value = 0; value < featureCounts.sizes()[0]; ++value) {
+            double p_f = featureCounts[value].item<double>() / totalWeight;
+            double entropy_f = 0;
+            for (auto& [label, jointCount] : jointCounts[value]) {
+                double p_l_f = jointCount / featureCounts[value].item<double>();
+                if (p_l_f > 0) {
+                    entropy_f -= p_l_f * log(p_l_f);
+                } else {
+                    entropy_f = 0;
+                }
+            }
+            entropyValue += p_f * entropy_f;
+        }
+        return entropyValue;
+    }
+    // I(X;Y) = H(Y) - H(Y|X)
+    double Metrics::mutualInformation(torch::Tensor& firstFeature, torch::Tensor& secondFeature)
+    {
+        return entropy(firstFeature) - conditionalEntropy(firstFeature, secondFeature);
+    }
+    /*
+    Compute the maximum spanning tree considering the weights as distances
+    and the indices of the weights as nodes of this square matrix using
+    Kruskal algorithm
+    */
+    vector<pair<int, int>> Metrics::maximumSpanningTree(vector<string> features, Tensor& weights, int root)
+    {
+        auto result = vector<pair<int, int>>();
+        auto mst = MST(features, weights, root);
+        return mst.maximumSpanningTree();
+    }
+}
--- a/bayesclass/cpp/Metrics.hpp
+++ b/bayesclass/cpp/Metrics.hpp
@@ -0,0 +1,28 @@
+#ifndef BAYESNET_METRICS_H
+#define BAYESNET_METRICS_H
+#include <torch/torch.h>
+#include <vector>
+#include <string>
+namespace bayesnet {
+    using namespace std;
+    using namespace torch;
+    class Metrics {
+    private:
+        Tensor samples;
+        vector<string> features;
+        string className;
+        int classNumStates;
+    public:
+        Metrics() = default;
+        Metrics(Tensor&, vector<string>&, string&, int);
+        Metrics(const vector<vector<int>>&, const vector<int>&, const vector<string>&, const string&, const int);
+        double entropy(Tensor&);
+        double conditionalEntropy(Tensor&, Tensor&);
+        double mutualInformation(Tensor&, Tensor&);
+        vector<float> conditionalEdgeWeights();
+        Tensor conditionalEdge();
+        vector<pair<string, string>> doCombinations(const vector<string>&);
+        vector<pair<int, int>> maximumSpanningTree(vector<string> features, Tensor& weights, int root);
+    };
+}
+#endif
--- a/bayesclass/cpp/Mst.cc
+++ b/bayesclass/cpp/Mst.cc
@@ -0,0 +1,115 @@
+#include "Mst.h"
+#include <vector>
+/*
+    Based on the code from https://www.softwaretestinghelp.com/minimum-spanning-tree-tutorial/
+
+*/
+
+namespace bayesnet {
+    using namespace std;
+    Graph::Graph(int V)
+    {
+        parent = vector<int>(V);
+        for (int i = 0; i < V; i++)
+            parent[i] = i;
+        G.clear();
+        T.clear();
+    }
+    void Graph::addEdge(int u, int v, float wt)
+    {
+        G.push_back({ wt, { u, v } });
+    }
+    int Graph::find_set(int i)
+    {
+        // If i is the parent of itself
+        if (i == parent[i])
+            return i;
+        else
+            //else recursively find the parent of i
+            return find_set(parent[i]);
+    }
+    void Graph::union_set(int u, int v)
+    {
+        parent[u] = parent[v];
+    }
+    void Graph::kruskal_algorithm()
+    {
+        int i, uSt, vEd;
+        // sort the edges ordered on decreasing weight
+        sort(G.begin(), G.end(), [](auto& left, auto& right) {return left.first > right.first;});
+        for (i = 0; i < G.size(); i++) {
+            uSt = find_set(G[i].second.first);
+            vEd = find_set(G[i].second.second);
+            if (uSt != vEd) {
+                T.push_back(G[i]); // add to mst vector
+                union_set(uSt, vEd);
+            }
+        }
+    }
+    void Graph::display_mst()
+    {
+        cout << "Edge :" << " Weight" << endl;
+        for (int i = 0; i < T.size(); i++) {
+            cout << T[i].second.first << " - " << T[i].second.second << " : "
+                << T[i].first;
+            cout << endl;
+        }
+    }
+
+    vector<pair<int, int>> reorder(vector<pair<float, pair<int, int>>> T, int root_original)
+    {
+        auto result = vector<pair<int, int>>();
+        auto visited = vector<int>();
+        auto nextVariables = unordered_set<int>();
+        nextVariables.emplace(root_original);
+        while (nextVariables.size() > 0) {
+            int root = *nextVariables.begin();
+            nextVariables.erase(nextVariables.begin());
+            for (int i = 0; i < T.size(); ++i) {
+                auto [weight, edge] = T[i];
+                auto [from, to] = edge;
+                if (from == root || to == root) {
+                    visited.insert(visited.begin(), i);
+                    if (from == root) {
+                        result.push_back({ from, to });
+                        nextVariables.emplace(to);
+                    } else {
+                        result.push_back({ to, from });
+                        nextVariables.emplace(from);
+                    }
+                }
+            }
+            // Remove visited
+            for (int i = 0; i < visited.size(); ++i) {
+                T.erase(T.begin() + visited[i]);
+            }
+            visited.clear();
+        }
+        if (T.size() > 0) {
+            for (int i = 0; i < T.size(); ++i) {
+                auto [weight, edge] = T[i];
+                auto [from, to] = edge;
+                result.push_back({ from, to });
+            }
+        }
+        return result;
+    }
+
+    MST::MST(vector<string>& features, Tensor& weights, int root) : features(features), weights(weights), root(root) {}
+    vector<pair<int, int>> MST::maximumSpanningTree()
+    {
+        auto num_features = features.size();
+        Graph g(num_features);
+
+        // Make a complete graph
+        for (int i = 0; i < num_features - 1; ++i) {
+            for (int j = i; j < num_features; ++j) {
+                g.addEdge(i, j, weights[i][j].item<float>());
+            }
+        }
+        g.kruskal_algorithm();
+        auto mst = g.get_mst();
+        return reorder(mst, root);
+    }
+
+}
--- a/bayesclass/cpp/Mst.h
+++ b/bayesclass/cpp/Mst.h
@@ -0,0 +1,35 @@
+#ifndef MST_H
+#define MST_H
+#include <torch/torch.h>
+#include <vector>
+#include <string>
+namespace bayesnet {
+    using namespace std;
+    using namespace torch;
+    class MST {
+    private:
+        Tensor weights;
+        vector<string> features;
+        int root;
+    public:
+        MST() = default;
+        MST(vector<string>& features, Tensor& weights, int root);
+        vector<pair<int, int>> maximumSpanningTree();
+    };
+    class Graph {
+    private:
+        int V;      // number of nodes in graph
+        vector <pair<float, pair<int, int>>> G; // vector for graph
+        vector <pair<float, pair<int, int>>> T; // vector for mst
+        vector<int> parent;
+    public:
+        Graph(int V);
+        void addEdge(int u, int v, float wt);
+        int find_set(int i);
+        void union_set(int u, int v);
+        void kruskal_algorithm();
+        void display_mst();
+        vector <pair<float, pair<int, int>>> get_mst() { return T; }
+    };
+}
+#endif
--- a/bayesclass/cpp/Network.cc
+++ b/bayesclass/cpp/Network.cc
@@ -0,0 +1,291 @@
+#include <thread>
+#include <mutex>
+#include "Network.h"
+namespace bayesnet {
+    Network::Network() : laplaceSmoothing(1), features(vector<string>()), className(""), classNumStates(0), maxThreads(0.8), fitted(false) {}
+    Network::Network(float maxT) : laplaceSmoothing(1), features(vector<string>()), className(""), classNumStates(0), maxThreads(maxT), fitted(false) {}
+    Network::Network(float maxT, int smoothing) : laplaceSmoothing(smoothing), features(vector<string>()), className(""), classNumStates(0), maxThreads(maxT), fitted(false) {}
+    Network::Network(Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()), maxThreads(other.getmaxThreads()), fitted(other.fitted)
+    {
+        for (auto& pair : other.nodes) {
+            nodes[pair.first] = make_unique<Node>(*pair.second);
+        }
+    }
+    float Network::getmaxThreads()
+    {
+        return maxThreads;
+    }
+    torch::Tensor& Network::getSamples()
+    {
+        return samples;
+    }
+    void Network::addNode(string name, int numStates)
+    {
+        if (find(features.begin(), features.end(), name) == features.end()) {
+            features.push_back(name);
+        }
+        if (nodes.find(name) != nodes.end()) {
+            // if node exists update its number of states
+            nodes[name]->setNumStates(numStates);
+            return;
+        }
+        nodes[name] = make_unique<Node>(name, numStates);
+    }
+    vector<string> Network::getFeatures()
+    {
+        return features;
+    }
+    int Network::getClassNumStates()
+    {
+        return classNumStates;
+    }
+    int Network::getStates()
+    {
+        int result = 0;
+        for (auto& node : nodes) {
+            result += node.second->getNumStates();
+        }
+        return result;
+    }
+    string Network::getClassName()
+    {
+        return className;
+    }
+    bool Network::isCyclic(const string& nodeId, unordered_set<string>& visited, unordered_set<string>& recStack)
+    {
+        if (visited.find(nodeId) == visited.end()) // if node hasn't been visited yet
+        {
+            visited.insert(nodeId);
+            recStack.insert(nodeId);
+            for (Node* child : nodes[nodeId]->getChildren()) {
+                if (visited.find(child->getName()) == visited.end() && isCyclic(child->getName(), visited, recStack))
+                    return true;
+                else if (recStack.find(child->getName()) != recStack.end())
+                    return true;
+            }
+        }
+        recStack.erase(nodeId); // remove node from recursion stack before function ends
+        return false;
+    }
+    void Network::addEdge(const string parent, const string child)
+    {
+        if (nodes.find(parent) == nodes.end()) {
+            throw invalid_argument("Parent node " + parent + " does not exist");
+        }
+        if (nodes.find(child) == nodes.end()) {
+            throw invalid_argument("Child node " + child + " does not exist");
+        }
+        // Temporarily add edge to check for cycles
+        nodes[parent]->addChild(nodes[child].get());
+        nodes[child]->addParent(nodes[parent].get());
+        unordered_set<string> visited;
+        unordered_set<string> recStack;
+        if (isCyclic(nodes[child]->getName(), visited, recStack)) // if adding this edge forms a cycle
+        {
+            // remove problematic edge
+            nodes[parent]->removeChild(nodes[child].get());
+            nodes[child]->removeParent(nodes[parent].get());
+            throw invalid_argument("Adding this edge forms a cycle in the graph.");
+        }
+
+    }
+    map<string, std::unique_ptr<Node>>& Network::getNodes()
+    {
+        return nodes;
+    }
+    void Network::fit(const vector<vector<int>>& input_data, const vector<int>& labels, const vector<string>& featureNames, const string& className)
+    {
+        features = featureNames;
+        this->className = className;
+        dataset.clear();
+
+        // Build dataset & tensor of samples
+        samples = torch::zeros({ static_cast<int64_t>(input_data[0].size()), static_cast<int64_t>(input_data.size() + 1) }, torch::kInt64);
+        for (int i = 0; i < featureNames.size(); ++i) {
+            dataset[featureNames[i]] = input_data[i];
+            samples.index_put_({ "...", i }, torch::tensor(input_data[i], torch::kInt64));
+        }
+        dataset[className] = labels;
+        samples.index_put_({ "...", -1 }, torch::tensor(labels, torch::kInt64));
+        classNumStates = *max_element(labels.begin(), labels.end()) + 1;
+        int maxThreadsRunning = static_cast<int>(std::thread::hardware_concurrency() * maxThreads);
+        if (maxThreadsRunning < 1) {
+            maxThreadsRunning = 1;
+        }
+        vector<thread> threads;
+        mutex mtx;
+        condition_variable cv;
+        int activeThreads = 0;
+        int nextNodeIndex = 0;
+
+        while (nextNodeIndex < nodes.size()) {
+            unique_lock<mutex> lock(mtx);
+            cv.wait(lock, [&activeThreads, &maxThreadsRunning]() { return activeThreads < maxThreadsRunning; });
+
+            if (nextNodeIndex >= nodes.size()) {
+                break;  // No more work remaining
+            }
+
+            threads.emplace_back([this, &nextNodeIndex, &mtx, &cv, &activeThreads]() {
+                while (true) {
+                    unique_lock<mutex> lock(mtx);
+                    if (nextNodeIndex >= nodes.size()) {
+                        break;  // No more work remaining
+                    }
+                    auto& pair = *std::next(nodes.begin(), nextNodeIndex);
+                    ++nextNodeIndex;
+                    lock.unlock();
+
+                    pair.second->computeCPT(dataset, laplaceSmoothing);
+                    lock.lock();
+                    nodes[pair.first] = std::move(pair.second);
+                    lock.unlock();
+                }
+                lock_guard<mutex> lock(mtx);
+                --activeThreads;
+                cv.notify_one();
+                });
+
+            ++activeThreads;
+        }
+        for (auto& thread : threads) {
+            thread.join();
+        }
+        fitted = true;
+    }
+
+    vector<int> Network::predict(const vector<vector<int>>& tsamples)
+    {
+        if (!fitted) {
+            throw logic_error("You must call fit() before calling predict()");
+        }
+        vector<int> predictions;
+        vector<int> sample;
+        for (int row = 0; row < tsamples[0].size(); ++row) {
+            sample.clear();
+            for (int col = 0; col < tsamples.size(); ++col) {
+                sample.push_back(tsamples[col][row]);
+            }
+            vector<double> classProbabilities = predict_sample(sample);
+            // Find the class with the maximum posterior probability
+            auto maxElem = max_element(classProbabilities.begin(), classProbabilities.end());
+            int predictedClass = distance(classProbabilities.begin(), maxElem);
+            predictions.push_back(predictedClass);
+        }
+        return predictions;
+    }
+    vector<vector<double>> Network::predict_proba(const vector<vector<int>>& tsamples)
+    {
+        if (!fitted) {
+            throw logic_error("You must call fit() before calling predict_proba()");
+        }
+        vector<vector<double>> predictions;
+        vector<int> sample;
+        for (int row = 0; row < tsamples[0].size(); ++row) {
+            sample.clear();
+            for (int col = 0; col < tsamples.size(); ++col) {
+                sample.push_back(tsamples[col][row]);
+            }
+            predictions.push_back(predict_sample(sample));
+        }
+        return predictions;
+    }
+    double Network::score(const vector<vector<int>>& tsamples, const vector<int>& labels)
+    {
+        vector<int> y_pred = predict(tsamples);
+        int correct = 0;
+        for (int i = 0; i < y_pred.size(); ++i) {
+            if (y_pred[i] == labels[i]) {
+                correct++;
+            }
+        }
+        return (double)correct / y_pred.size();
+    }
+    vector<double> Network::predict_sample(const vector<int>& sample)
+    {
+        // Ensure the sample size is equal to the number of features
+        if (sample.size() != features.size()) {
+            throw invalid_argument("Sample size (" + to_string(sample.size()) +
+                ") does not match the number of features (" + to_string(features.size()) + ")");
+        }
+        map<string, int> evidence;
+        for (int i = 0; i < sample.size(); ++i) {
+            evidence[features[i]] = sample[i];
+        }
+        return exactInference(evidence);
+
+    }
+    double Network::computeFactor(map<string, int>& completeEvidence)
+    {
+        double result = 1.0;
+        for (auto& node : getNodes()) {
+            result *= node.second->getFactorValue(completeEvidence);
+        }
+        return result;
+    }
+    vector<double> Network::exactInference(map<string, int>& evidence)
+    {
+        vector<double> result(classNumStates, 0.0);
+        vector<thread> threads;
+        mutex mtx;
+        for (int i = 0; i < classNumStates; ++i) {
+            threads.emplace_back([this, &result, &evidence, i, &mtx]() {
+                auto completeEvidence = map<string, int>(evidence);
+                completeEvidence[getClassName()] = i;
+                double factor = computeFactor(completeEvidence);
+                lock_guard<mutex> lock(mtx);
+                result[i] = factor;
+                });
+        }
+        for (auto& thread : threads) {
+            thread.join();
+        }
+
+        // Normalize result
+        double sum = accumulate(result.begin(), result.end(), 0.0);
+        for (double& value : result) {
+            value /= sum;
+        }
+        return result;
+    }
+    vector<string> Network::show()
+    {
+        vector<string> result;
+        // Draw the network
+        for (auto& node : nodes) {
+            string line = node.first + " -> ";
+            for (auto child : node.second->getChildren()) {
+                line += child->getName() + ", ";
+            }
+            result.push_back(line);
+        }
+        return result;
+    }
+    vector<string> Network::graph(string title)
+    {
+        auto output = vector<string>();
+        auto prefix = "digraph BayesNet {\nlabel=<BayesNet ";
+        auto suffix = ">\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n";
+        string header = prefix + title + suffix;
+        output.push_back(header);
+        for (auto& node : nodes) {
+            auto result = node.second->graph(className);
+            output.insert(output.end(), result.begin(), result.end());
+        }
+        output.push_back("}\n");
+        return output;
+    }
+    vector<pair<string, string>> Network::getEdges()
+    {
+        auto edges = vector<pair<string, string>>();
+        for (const auto& node : nodes) {
+            auto head = node.first;
+            for (const auto& child : node.second->getChildren()) {
+                auto tail = child->getName();
+                edges.push_back({ head, tail });
+            }
+        }
+        return edges;
+    }
+
+}
--- a/bayesclass/cpp/Network.h
+++ b/bayesclass/cpp/Network.h
@@ -0,0 +1,53 @@
+#ifndef NETWORK_H
+#define NETWORK_H
+#include "Node.h"
+#include <map>
+#include <vector>
+
+namespace bayesnet {
+    class Network {
+    private:
+        map<string, std::unique_ptr<Node>> nodes;
+        map<string, vector<int>> dataset;
+        bool fitted;
+        float maxThreads;
+        int classNumStates;
+        vector<string> features;
+        string className;
+        int laplaceSmoothing;
+        torch::Tensor samples;
+        bool isCyclic(const std::string&, std::unordered_set<std::string>&, std::unordered_set<std::string>&);
+        vector<double> predict_sample(const vector<int>&);
+        vector<double> exactInference(map<string, int>&);
+        double computeFactor(map<string, int>&);
+        double mutual_info(torch::Tensor&, torch::Tensor&);
+        double entropy(torch::Tensor&);
+        double conditionalEntropy(torch::Tensor&, torch::Tensor&);
+        double mutualInformation(torch::Tensor&, torch::Tensor&);
+    public:
+        Network();
+        Network(float, int);
+        Network(float);
+        Network(Network&);
+        torch::Tensor& getSamples();
+        float getmaxThreads();
+        void addNode(string, int);
+        void addEdge(const string, const string);
+        map<string, std::unique_ptr<Node>>& getNodes();
+        vector<string> getFeatures();
+        int getStates();
+        vector<pair<string, string>> getEdges();
+        int getClassNumStates();
+        string getClassName();
+        void fit(const vector<vector<int>>&, const vector<int>&, const vector<string>&, const string&);
+        vector<int> predict(const vector<vector<int>>&);
+        //Computes the conditional edge weight of variable index u and v conditioned on class_node
+        torch::Tensor conditionalEdgeWeight();
+        vector<vector<double>> predict_proba(const vector<vector<int>>&);
+        double score(const vector<vector<int>>&, const vector<int>&);
+        vector<string> show();
+        vector<string> graph(string title); // Returns a vector of strings representing the graph in graphviz format
+        inline string version() { return "0.1.0"; }
+    };
+}
+#endif
--- a/bayesclass/cpp/Node.cc
+++ b/bayesclass/cpp/Node.cc
@@ -0,0 +1,122 @@
+#include "Node.h"
+
+namespace bayesnet {
+
+    Node::Node(const std::string& name, int numStates)
+        : name(name), numStates(numStates), cpTable(torch::Tensor()), parents(vector<Node*>()), children(vector<Node*>())
+    {
+    }
+    string Node::getName() const
+    {
+        return name;
+    }
+    void Node::addParent(Node* parent)
+    {
+        parents.push_back(parent);
+    }
+    void Node::removeParent(Node* parent)
+    {
+        parents.erase(std::remove(parents.begin(), parents.end(), parent), parents.end());
+    }
+    void Node::removeChild(Node* child)
+    {
+        children.erase(std::remove(children.begin(), children.end(), child), children.end());
+    }
+    void Node::addChild(Node* child)
+    {
+        children.push_back(child);
+    }
+    vector<Node*>& Node::getParents()
+    {
+        return parents;
+    }
+    vector<Node*>& Node::getChildren()
+    {
+        return children;
+    }
+    int Node::getNumStates() const
+    {
+        return numStates;
+    }
+    void Node::setNumStates(int numStates)
+    {
+        this->numStates = numStates;
+    }
+    torch::Tensor& Node::getCPT()
+    {
+        return cpTable;
+    }
+    /*
+     The MinFill criterion is a heuristic for variable elimination.
+     The variable that minimizes the number of edges that need to be added to the graph to make it triangulated.
+     This is done by counting the number of edges that need to be added to the graph if the variable is eliminated.
+     The variable with the minimum number of edges is chosen.
+     Here this is done computing the length of the combinations of the node neighbors taken 2 by 2.
+    */
+    unsigned Node::minFill()
+    {
+        unordered_set<string> neighbors;
+        for (auto child : children) {
+            neighbors.emplace(child->getName());
+        }
+        for (auto parent : parents) {
+            neighbors.emplace(parent->getName());
+        }
+        auto source = vector<string>(neighbors.begin(), neighbors.end());
+        return combinations(source).size();
+    }
+    vector<pair<string, string>> Node::combinations(const vector<string>& source)
+    {
+        vector<pair<string, string>> result;
+        for (int i = 0; i < source.size(); ++i) {
+            string temp = source[i];
+            for (int j = i + 1; j < source.size(); ++j) {
+                result.push_back({ temp, source[j] });
+            }
+        }
+        return result;
+    }
+    void Node::computeCPT(map<string, vector<int>>& dataset, const int laplaceSmoothing)
+    {
+        // Get dimensions of the CPT
+        dimensions.push_back(numStates);
+        for (auto father : getParents()) {
+            dimensions.push_back(father->getNumStates());
+        }
+        auto length = dimensions.size();
+        // Create a tensor of zeros with the dimensions of the CPT
+        cpTable = torch::zeros(dimensions, torch::kFloat) + laplaceSmoothing;
+        // Fill table with counts
+        for (int n_sample = 0; n_sample < dataset[name].size(); ++n_sample) {
+            torch::List<c10::optional<torch::Tensor>> coordinates;
+            coordinates.push_back(torch::tensor(dataset[name][n_sample]));
+            for (auto father : getParents()) {
+                coordinates.push_back(torch::tensor(dataset[father->getName()][n_sample]));
+            }
+            // Increment the count of the corresponding coordinate
+            cpTable.index_put_({ coordinates }, cpTable.index({ coordinates }) + 1);
+        }
+        // Normalize the counts
+        cpTable = cpTable / cpTable.sum(0);
+    }
+    float Node::getFactorValue(map<string, int>& evidence)
+    {
+        torch::List<c10::optional<torch::Tensor>> coordinates;
+        // following predetermined order of indices in the cpTable (see Node.h)
+        coordinates.push_back(torch::tensor(evidence[name]));
+        for (auto parent : getParents()) {
+            coordinates.push_back(torch::tensor(evidence[parent->getName()]));
+        }
+        return cpTable.index({ coordinates }).item<float>();
+    }
+    vector<string> Node::graph(string className)
+    {
+        auto output = vector<string>();
+        auto suffix = name == className ? ", fontcolor=red, fillcolor=lightblue, style=filled " : "";
+        output.push_back(name + " [shape=circle" + suffix + "] \n");
+        for (auto& child : children) {
+            output.push_back(name + " -> " + child->getName());
+        }
+        return output;
+    }
+}
--- a/bayesclass/cpp/Node.h
+++ b/bayesclass/cpp/Node.h
@@ -0,0 +1,36 @@
+#ifndef NODE_H
+#define NODE_H
+#include <torch/torch.h>
+#include <unordered_set>
+#include <vector>
+#include <string>
+namespace bayesnet {
+    using namespace std;
+    class Node {
+    private:
+        string name;
+        vector<Node*> parents;
+        vector<Node*> children;
+        int numStates; // number of states of the variable
+        torch::Tensor cpTable; // Order of indices is 0-> node variable, 1-> 1st parent, 2-> 2nd parent, ...
+        vector<int64_t> dimensions; // dimensions of the cpTable
+    public:
+        vector<pair<string, string>> combinations(const vector<string>&);
+        Node(const std::string&, int);
+        void addParent(Node*);
+        void addChild(Node*);
+        void removeParent(Node*);
+        void removeChild(Node*);
+        string getName() const;
+        vector<Node*>& getParents();
+        vector<Node*>& getChildren();
+        torch::Tensor& getCPT();
+        void computeCPT(map<string, vector<int>>&, const int);
+        int getNumStates() const;
+        void setNumStates(int);
+        unsigned minFill();
+        vector<string> graph(string clasName); // Returns a vector of strings representing the graph in graphviz format
+        float getFactorValue(map<string, int>&);
+    };
+}
+#endif
--- a/bayesclass/cpp/SPODE.cc
+++ b/bayesclass/cpp/SPODE.cc
@@ -0,0 +1,25 @@
+#include "SPODE.h"
+
+namespace bayesnet {
+
+    SPODE::SPODE(int root) : BaseClassifier(Network()), root(root) {}
+
+    void SPODE::train()
+    {
+        // 0. Add all nodes to the model
+        addNodes();
+        // 1. Add edges from the class node to all other nodes
+        // 2. Add edges from the root node to all other nodes
+        for (int i = 0; i < static_cast<int>(features.size()); ++i) {
+            model.addEdge(className, features[i]);
+            if (i != root) {
+                model.addEdge(features[root], features[i]);
+            }
+        }
+    }
+    vector<string> SPODE::graph(string name )
+    {
+        return model.graph(name);
+    }
+
+}
--- a/bayesclass/cpp/SPODE.h
+++ b/bayesclass/cpp/SPODE.h
@@ -0,0 +1,15 @@
+#ifndef SPODE_H
+#define SPODE_H
+#include "BaseClassifier.h"
+namespace bayesnet {
+    class SPODE : public BaseClassifier {
+    private:
+        int root;
+    protected:
+        void train() override;
+    public:
+        SPODE(int root);
+        vector<string> graph(string name = "SPODE") override;
+    };
+}
+#endif
--- a/bayesclass/cpp/TAN.cc
+++ b/bayesclass/cpp/TAN.cc
@@ -0,0 +1,42 @@
+#include "TAN.h"
+
+namespace bayesnet {
+    using namespace std;
+    using namespace torch;
+
+    TAN::TAN() : BaseClassifier(Network()) {}
+
+    void TAN::train()
+    {
+        // 0. Add all nodes to the model
+        addNodes();
+        // 1. Compute mutual information between each feature and the class and set the root node
+        // as the highest mutual information with the class
+        auto mi = vector <pair<int, float >>();
+        Tensor class_dataset = dataset.index({ "...", -1 });
+        for (int i = 0; i < static_cast<int>(features.size()); ++i) {
+            Tensor feature_dataset = dataset.index({ "...", i });
+            auto mi_value = metrics.mutualInformation(class_dataset, feature_dataset);
+            mi.push_back({ i, mi_value });
+        }
+        sort(mi.begin(), mi.end(), [](auto& left, auto& right) {return left.second < right.second;});
+        auto root = mi[mi.size() - 1].first;
+        // 2. Compute mutual information between each feature and the class
+        auto weights = metrics.conditionalEdge();
+        // 3. Compute the maximum spanning tree
+        auto mst = metrics.maximumSpanningTree(features, weights, root);
+        // 4. Add edges from the maximum spanning tree to the model
+        for (auto i = 0; i < mst.size(); ++i) {
+            auto [from, to] = mst[i];
+            model.addEdge(features[from], features[to]);
+        }
+        // 5. Add edges from the class to all features
+        for (auto feature : features) {
+            model.addEdge(className, feature);
+        }
+    }
+    vector<string> TAN::graph(string title)
+    {
+        return model.graph(title);
+    }
+}
--- a/bayesclass/cpp/TAN.h
+++ b/bayesclass/cpp/TAN.h
@@ -0,0 +1,16 @@
+#ifndef TAN_H
+#define TAN_H
+#include "BaseClassifier.h"
+namespace bayesnet {
+    using namespace std;
+    using namespace torch;
+    class TAN : public BaseClassifier {
+    private:
+    protected:
+        void train() override;
+    public:
+        TAN();
+        vector<string> graph(string name = "TAN") override;
+    };
+}
+#endif
--- a/bayesclass/cpp/utils.cc
+++ b/bayesclass/cpp/utils.cc
@@ -0,0 +1,31 @@
+#include <torch/torch.h>
+#include <vector>
+namespace bayesnet {
+    using namespace std;
+    using namespace torch;
+    vector<int> argsort(vector<float>& nums)
+    {
+        int n = nums.size();
+        vector<int> indices(n);
+        iota(indices.begin(), indices.end(), 0);
+        sort(indices.begin(), indices.end(), [&nums](int i, int j) {return nums[i] > nums[j];});
+        return indices;
+    }
+    vector<vector<int>> tensorToVector(const Tensor& tensor)
+    {
+        // convert mxn tensor to nxm vector
+        vector<vector<int>> result;
+        auto tensor_accessor = tensor.accessor<int, 2>();
+
+        // Iterate over columns and rows of the tensor
+        for (int j = 0; j < tensor.size(1); ++j) {
+            vector<int> column;
+            for (int i = 0; i < tensor.size(0); ++i) {
+                column.push_back(tensor_accessor[i][j]);
+            }
+            result.push_back(column);
+        }
+
+        return result;
+    }
+}
--- a/bayesclass/cpp/utils.h
+++ b/bayesclass/cpp/utils.h
@@ -0,0 +1,8 @@
+namespace bayesnet {
+    using namespace std;
+    using namespace torch;
+    vector<int> argsort(vector<float>& nums);
+
+    vector<vector<int>> tensorToVector(const Tensor& tensor);
+
+}
--- a/bayesclass/feature_selection.py
+++ b/bayesclass/feature_selection.py
@@ -0,0 +1,93 @@
+# import numpy as np
+# from sklearn.feature_selection import mutual_info_classif
+# from sklearn.utils.validation import check_X_y, check_is_fitted
+# from sklearn.feature_selection._univariate_selection import (
+#     _BaseFilter,
+#     _clean_nans,
+# )
+
+# """
+# Compute the weighted mutual information between each feature and the
+# target.
+# Based on
+# Silviu Guiaşu,
+# Weighted entropy,
+# Reports on Mathematical Physics,
+# Volume 2, Issue 3,
+# 1971,
+# Pages 165-179,
+# ISSN 0034-4877,
+# https://doi.org/10.1016/0034-4877(71)90002-4.
+# (https://www.sciencedirect.com/science/article/pii/0034487771900024)
+# Abstract: Weighted entropy is the measure of information supplied by a
+# probablistic experiment whose elementary events are characterized both by their
+# objective probabilities and by some qualitative (objective or subjective)
+# weights. The properties, the axiomatics and the maximum value of the weighted
+# entropy are given.
+# """
+
+
+# class SelectKBestWeighted(_BaseFilter):
+#     def __init__(self, *, k=10):
+#         super().__init__(score_func=mutual_info_classif)
+#         self.k = k
+
+#     def _check_params(self, X, y):
+#         if self.k > X.shape[1] or self.k < 1:
+#             raise ValueError(
+#                 f"k must be between 1 and {X.shape[1]} got {self.k}."
+#             )
+
+#     def _get_support_mask(self):
+#         check_is_fitted(self)
+
+#         if self.k == "all":
+#             return np.ones(self.scores_.shape, dtype=bool)
+#         elif self.k == 0:
+#             return np.zeros(self.scores_.shape, dtype=bool)
+#         else:
+#             scores = _clean_nans(self.scores_)
+#             mask = np.zeros(scores.shape, dtype=bool)
+
+#             # Request a stable sort. Mergesort takes more memory (~40MB per
+#             # megafeature on x86-64).
+#             mask[np.argsort(scores, kind="mergesort")[-self.k :]] = 1
+#             return mask
+
+#     def fit(self, X, y, sample_weight):
+#         self.X_, self.y_ = check_X_y(X, y)
+#         self._check_params(X, y)
+#         self.n_features_in_ = X.shape[1]
+#         self.sample_weight_ = sample_weight
+#         # Compute the entropy of the target variable
+#         entropy_y = -np.sum(
+#             np.multiply(
+#                 np.bincount(y, weights=sample_weight),
+#                 np.log(np.bincount(y, weights=sample_weight)),
+#             )
+#         )
+
+#         # Compute the mutual information between each feature and the target
+#         mi = self.score_func(X, y)
+
+#         # Compute the weighted entropy of each feature
+#         entropy_weighted = []
+#         for i in range(X.shape[1]):
+#             # Compute the weighted frequency of each unique value of the
+#             # feature
+#             freq_weighted = np.bincount(X[:, i], weights=sample_weight)
+#             freq_weighted = freq_weighted[freq_weighted != 0]
+
+#             # Compute the weighted entropy of the feature
+#             entropy_weighted.append(
+#                 -np.sum(np.multiply(freq_weighted, np.log(freq_weighted)))
+#                 / np.sum(sample_weight)
+#             )
+
+#         # Compute the weighted mutual information between each feature and
+#         # the target
+#         mi_weighted = mi * entropy_weighted / entropy_y
+
+#         # Return the weighted mutual information scores
+#         self.scores_ = mi_weighted
+#         return self
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,5 +1,5 @@
 [build-system]
-requires = ["setuptools", "setuptools-scm", "wheel"]
+requires = ["setuptools", "setuptools-scm", "cython", "wheel", "torch"]
 build-backend = "setuptools.build_meta"

 [tool.setuptools]
@@ -39,9 +39,7 @@ classifiers = [
  "Operating System :: OS Independent",
  "Programming Language :: Python",
  "Programming Language :: Python",
-  "Programming Language :: Python :: 3.8",
-  "Programming Language :: Python :: 3.9",
-  "Programming Language :: Python :: 3.10",
+  "Programming Language :: Python :: 3.11",
 ]

 [project.optional-dependencies]
@@ -61,7 +59,7 @@ show_missing = true

 [tool.black]
 line-length = 79
-target_version = ['py38', 'py39', 'py310']
+target_version = ['py311']
 include = '\.pyi?$'
 exclude = '''
 /(
--- a/setup.py
+++ b/setup.py
@@ -0,0 +1,49 @@
+"""
+    Calling
+    $python setup.py build_ext --inplace
+    will build the extension library in the current file.
+"""
+
+from setuptools import Extension, setup
+from torch.utils.cpp_extension import (
+    BuildExtension,
+    CppExtension,
+    include_paths,
+)
+
+
+setup(
+    ext_modules=[
+        Extension(
+            name="bayesclass.cppSelectFeatures",
+            sources=[
+                "bayesclass/cSelectFeatures.pyx",
+                "bayesclass/cpp/FeatureSelect.cpp",
+            ],
+            language="c++",
+            include_dirs=["bayesclass"],
+            extra_compile_args=[
+                "-std=c++17",
+            ],
+        ),
+        CppExtension(
+            name="bayesclass.BayesNet",
+            sources=[
+                "bayesclass/BayesNetwork.pyx",
+                "bayesclass/cpp/Network.cc",
+                "bayesclass/cpp/Node.cc",
+                "bayesclass/cpp/Metrics.cc",
+                "bayesclass/cpp/utils.cc",
+                "bayesclass/cpp/Mst.cc",
+                "bayesclass/cpp/BaseClassifier.cc",
+                "bayesclass/cpp/Ensemble.cc",
+                "bayesclass/cpp/TAN.cc",
+                "bayesclass/cpp/KDB.cc",
+                "bayesclass/cpp/SPODE.cc",
+                "bayesclass/cpp/AODE.cc",
+            ],
+            include_dirs=include_paths(),
+        ),
+    ],
+    cmdclass={"build_ext": BuildExtension},
+)
Author	SHA1	Message	Date
Ricardo Montañana	7f5ea1ab1e	Refactor library	2023-07-19 16:16:15 +02:00
Ricardo Montañana	168cc368ee	Complete CPP model integration	2023-07-18 23:39:50 +02:00
Ricardo Montañana	d1cafc230b	Fix some small mistakes	2023-07-13 17:11:08 +02:00
Ricardo Montañana	99083ceede	Fix KDB algorithm argsort	2023-07-13 16:59:37 +02:00
Ricardo Montañana	64f1500176	Refactor cpp library methods	2023-07-12 12:59:02 +02:00
Ricardo Montañana	aef22306ef	Complete refactor of KDB with BayesNet library	2023-07-12 12:07:01 +02:00
Ricardo Montañana	2ff38f73e7	refactor conditionalEdgeWeights	2023-07-12 11:20:05 +02:00
Ricardo Montañana	1af3edd050	Adding Metrics	2023-07-12 03:24:40 +02:00
Ricardo Montañana	8b6624e08a	Add getStates	2023-07-11 21:28:29 +02:00
Ricardo Montañana	36cc875615	Refator kdb with new BayesNetwork	2023-07-08 10:40:33 +02:00
Ricardo Montañana	260997c872	transpose dimensions of X in BayesNetwork	2023-07-08 01:13:29 +02:00
Ricardo Montañana	8a9c86a22d	Update BayesNetwork class	2023-07-08 00:39:10 +02:00
Ricardo Montañana	4bad5ccfee	Complete integration with BayesNet	2023-07-07 19:19:52 +02:00
Ricardo Montañana	5866e19fae	Add predict_proba	2023-07-07 00:36:14 +02:00
Ricardo Montañana	61e4c176eb	First try to link with bayesnet	2023-07-07 00:23:47 +02:00
Ricardo Montañana	ea473fc604	First complete boostAODE	2023-06-26 10:09:28 +02:00
Ricardo Montañana	9d7e787f6c	Finish cppSelectFeatures	2023-06-23 20:07:26 +02:00
Ricardo Montañana	d7425e5af0	Remove unneeded small value added to logs	2023-06-23 01:25:23 +02:00
Ricardo Montañana	30cc744033	Chcked mutual_info with sklearn	2023-06-23 01:21:24 +02:00
Ricardo Montañana	0094d500d4	Begin cython structure	2023-06-22 17:56:34 +02:00
Ricardo Montañana	99321043ec	Complete feature_selection with weighted entropy	2023-06-21 16:40:29 +02:00
Ricardo Montañana	fbaa5eb7d3	continue feature selection	2023-06-21 14:42:33 +02:00
Ricardo Montañana	0b27d9d9b0	Begin implementation	2023-06-21 11:27:14 +02:00
				`@@ -0,0 +1 @@`
				`#error Do not use this file, it is the result of a failed Cython compilation.`