Add KDBNew estimator

2025-08-18 09:05:55 +00:00 · 2023-02-04 17:39:32 +01:00
parent 9019b878f0
commit de45a94c9b
2 changed files with 70 additions and 32 deletions
--- a/bayesclass/init.py
+++ b/bayesclass/init.py
@@ -16,4 +16,5 @@ __all__ = [
    "TAN",
    "KDB",
    "AODE",
    "KDBNew",
 ]
--- a/bayesclass/clfs.py
+++ b/bayesclass/clfs.py
@@ -12,6 +12,7 @@ import networkx as nx
 from pgmpy.estimators import TreeSearch, BayesianEstimator
 from pgmpy.models import BayesianNetwork
 import matplotlib.pyplot as plt
 from fimdlp.mdlp import MultiDiscretizer
 from ._version import __version__
@@ -75,7 +76,7 @@ class BayesBase(BaseEstimator, ClassifierMixin):
        return self.states_
    def fit(self, X, y, **kwargs):
-        """A reference implementation of a fitting function for a classifier.
+        """Fit classifier
        Parameters
        ----------
@@ -130,6 +131,9 @@ class BayesBase(BaseEstimator, ClassifierMixin):
        # Return the classifier
        return self
    def _build(self):
        pass
    def _train(self, kwargs):
        self.model_ = BayesianNetwork(
            self.dag_.edges(), show_progress=self.show_progress
@@ -260,37 +264,38 @@ class TAN(BayesBase):
        return X, y
    def _build(self):
-        # est = TreeSearch(self.dataset_,
+        est = TreeSearch(
-        # root_node=self.feature_names_in_[self.head_])
+            self.dataset_, root_node=self.feature_names_in_[self.head_]
-        # self.dag_ = est.estimate(
+        )
-        #     estimator_type="tan",
+        self.dag_ = est.estimate(
-        #     class_node=self.class_name_,
+            estimator_type="tan",
-        #     show_progress=self.show_progress,
+            class_node=self.class_name_,
-        # )
+            show_progress=self.show_progress,
        )
        # Code taken from pgmpy
-        n_jobs = -1
+        # n_jobs = -1
-        weights = TreeSearch._get_conditional_weights(
+        # weights = TreeSearch._get_conditional_weights(
-            self.dataset_,
+        #     self.dataset_,
-            self.class_name_,
+        #     self.class_name_,
-            "mutual_info",
+        #     "mutual_info",
-            n_jobs,
+        #     n_jobs,
-            self.show_progress,
+        #     self.show_progress,
-        )
+        # )
-        # Step 4.2: Construct chow-liu DAG on {data.columns - class_node}
+        # # Step 4.2: Construct chow-liu DAG on {data.columns - class_node}
-        class_node_idx = np.where(self.dataset_.columns == self.class_name_)[
+        # class_node_idx = np.where(self.dataset_.columns == self.class_name_)[
-            0
+        #     0
-        ][0]
+        # ][0]
-        weights = np.delete(weights, class_node_idx, axis=0)
+        # weights = np.delete(weights, class_node_idx, axis=0)
-        weights = np.delete(weights, class_node_idx, axis=1)
+        # weights = np.delete(weights, class_node_idx, axis=1)
-        reduced_columns = np.delete(self.dataset_.columns, class_node_idx)
+        # reduced_columns = np.delete(self.dataset_.columns, class_node_idx)
-        D = TreeSearch._create_tree_and_dag(
+        # D = TreeSearch._create_tree_and_dag(
-            weights, reduced_columns, self.feature_names_in_[self.head_]
+        #     weights, reduced_columns, self.feature_names_in_[self.head_]
-        )
+        # )
-        # Step 4.3: Add edges from class_node to all other nodes.
+        # # Step 4.3: Add edges from class_node to all other nodes.
-        D.add_edges_from(
+        # D.add_edges_from(
-            [(self.class_name_, node) for node in reduced_columns]
+        #     [(self.class_name_, node) for node in reduced_columns]
-        )
+        # )
-        self.dag_ = D
+        # self.dag_ = D
 class KDB(BayesBase):
@@ -345,7 +350,6 @@ class KDB(BayesBase):
        Compute the conditional probabilility infered by the structure of BN by
        using counts from DB, and output BN.
        """
        # 1. get the mutual information between each feature and the class
        mutual = mutual_info_classif(self.X_, self.y_, discrete_features=True)
        # 2. symmetric matrix where each element represents I(X, Y| class_node)
@@ -449,3 +453,36 @@ class AODE(BayesBase, BaseEnsemble):
        for index, model in enumerate(self.models_):
            result[:, index] = model.predict(dataset).values.ravel()
        return mode(result, axis=1, keepdims=False).mode.ravel()
 class KDBNew(KDB):
    def fit(self, X, y, **kwargs):
        self.discretizer_ = MultiDiscretizer(n_jobs=1)
        Xd = self.discretizer_.fit_transform(X, y)
        features = kwargs["features"]
        states = {
            features[i]: np.unique(Xd[:, i]).tolist()
            for i in range(Xd.shape[1])
        }
        kwargs["state_names"] = states
        return super().fit(Xd, y, **kwargs)
    def predict(self, X, **kwargs):
        return super().predict(self.discretizer_.transform(X))
    def check_integrity(self, X, state_names, features):
        for i in range(X.shape[1]):
            if not np.array_equal(
                np.unique(X[:, i]), np.array(state_names[features[i]])
            ):
                print(
                    "i",
                    i,
                    "features[i]",
                    features[i],
                    "np.unique(X[:, i])",
                    np.unique(X[:, i]),
                    "np.array(state_names[features[i]])",
                    np.array(state_names[features[i]]),
                )
                raise ValueError("Discretization error")