From 80b1ab3699c5a72da560495d17e6369c515cfbb5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= <rmontanana@gmail.com>
Date: Wed, 29 Mar 2023 19:05:55 +0200
Subject: [PATCH] Refactor AODE

---
 bayesclass/clfs.py | 71 ++++++++++++++++++++++++++++++++++++----------
 1 file changed, 56 insertions(+), 15 deletions(-)

diff --git a/bayesclass/clfs.py b/bayesclass/clfs.py
index 852f127..de78df6 100644
--- a/bayesclass/clfs.py
+++ b/bayesclass/clfs.py
@@ -382,7 +382,7 @@ class KDB(BayesBase):
         self.dag_ = dag
 
 
-def build_spode(features, class_name):
+def build_spodes(features, class_name):
     """Build SPODE estimators (Super Parent One Dependent Estimator)"""
     class_edges = [(class_name, f) for f in features]
     for idx in range(len(features)):
@@ -394,15 +394,17 @@ def build_spode(features, class_name):
         yield model
 
 
-class AODE(BayesBase, BaseEnsemble):
+class AODE(ClassifierMixin, BaseEnsemble):
     def __init__(self, show_progress=False, random_state=None):
-        super().__init__(
+        self.base_model = BayesBase(
             show_progress=show_progress, random_state=random_state
         )
+        self.show_progress = show_progress
+        self.random_state = random_state
 
     def _check_params(self, X, y, kwargs):
         expected_args = ["class_name", "features", "state_names"]
-        return self._check_params_fit(X, y, expected_args, kwargs)
+        return self.base_model._check_params_fit(X, y, expected_args, kwargs)
 
     def nodes_edges(self):
         nodes = 0
@@ -412,6 +414,30 @@ class AODE(BayesBase, BaseEnsemble):
             edges = sum([len(x.edges()) for x in self.models_])
         return nodes, edges
 
+    def version(self):
+        return self.base_model.version()
+
+    def fit(self, X, y, **kwargs):
+        X_, y_ = self._check_params(X, y, kwargs)
+        self.class_name_ = self.base_model.class_name_
+        self.feature_names_in_ = self.base_model.feature_names_in_
+        self.classes_ = self.base_model.classes_
+        self.n_features_in_ = self.base_model.n_features_in_
+        # Store the information needed to build the model
+        self.X_ = X_
+        self.y_ = y_
+        self.dataset_ = pd.DataFrame(
+            self.X_, columns=self.feature_names_in_, dtype=np.int32
+        )
+        self.dataset_[self.class_name_] = self.y_
+        # Train the model
+        self._train(kwargs)
+        self.fitted_ = True
+        # To keep compatiblity with the benchmark platform
+        self.nodes_leaves = self.nodes_edges
+        # Return the classifier
+        return self
+
     @property
     def states_(self):
         if hasattr(self, "fitted_"):
@@ -424,13 +450,14 @@ class AODE(BayesBase, BaseEnsemble):
             ) / len(self.models_)
         return 0
 
-    def _build(self):
-        self.dag_ = None
+    @property
+    def depth_(self):
+        return self.states_
 
     def _train(self, kwargs):
         self.models_ = []
         states = dict(state_names=kwargs.pop("state_names", []))
-        for model in build_spode(self.feature_names_in_, self.class_name_):
+        for model in build_spodes(self.feature_names_in_, self.class_name_):
             model.fit(
                 self.dataset_,
                 estimator=BayesianEstimator,
@@ -442,8 +469,8 @@ class AODE(BayesBase, BaseEnsemble):
     def plot(self, title=""):
         warnings.simplefilter("ignore", UserWarning)
         for idx, model in enumerate(self.models_):
-            self.model_ = model
-            super().plot(title=f"{idx} {title}")
+            self.base_model.model_ = model
+            self.base_model.plot(title=f"{idx} {title}")
 
     def predict(self, X: np.ndarray) -> np.ndarray:
         check_is_fitted(self, ["X_", "y_", "fitted_"])
@@ -481,7 +508,6 @@ class TANNew(TAN):
         return self.estimator.fit(X, y, **kwargs)
 
     def predict(self, X):
-        self.plot()
         return self.estimator.predict(X)
 
 
@@ -510,13 +536,28 @@ class KDBNew(KDB):
         return self.estimator.predict(X)
 
 
-class AODENew(AODE):
+class SpodeNew(BayesBase):
+    """This class implements a classifier for the SPODE algorithm similar to TANNew and KDBNew"""
+
+    def __init__(self, random_state, show_progress, structure):
+        super().__init__(
+            random_state=random_state, show_progress=show_progress
+        )
+        self.structure = structure
+
+
+class AODENew:
+    def __init__(self, show_progress=False, random_state=None):
+        self.show_progress = show_progress
+        self.random_state = random_state
+
     def _train(self, kwargs):
         self.estimators_ = []
         states = dict(state_names=kwargs.pop("state_names", []))
         kwargs["states"] = states
-        for model in build_spode(self.feature_names_in_, self.class_name_):
-            estimator = Proposal(model)
+        for spode in build_spodes(self.feature_names_in_, self.class_name_):
+            model = SpodeNew(self.random_state, self.show_progress, spode)
+            estimator = Proposal(spode)
             self.estimators_.append(estimator.fit(self.X_, self.y_, **kwargs))
         return self
 
@@ -551,7 +592,8 @@ class Proposal:
 
     def fit(self, X, y, **kwargs):
         # Check parameters
-        super(self.class_type, self.estimator)._check_params(X, y, kwargs)
+        self.estimator._check_params(X, y, kwargs)
+
         # Discretize train data
         self.discretizer = FImdlp(
             n_jobs=1,
@@ -563,7 +605,6 @@ class Proposal:
         kwargs = self.update_kwargs(y, kwargs)
         # Build the model
         super(self.class_type, self.estimator).fit(self.Xd, y, **kwargs)
-        self.check_integrity("fit", self.Xd)
         # Local discretization based on the model
         features = kwargs["features"]
         # assign indices to feature names