From 42ac57eb79bc0cd5600a2b98035e77c280b9334d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= <rmontanana@gmail.com>
Date: Tue, 7 Feb 2023 18:02:35 +0100
Subject: [PATCH] Continue with New estimators

---
 bayesclass/clfs.py | 163 +++++++++++++++++----------------------------
 1 file changed, 61 insertions(+), 102 deletions(-)

diff --git a/bayesclass/clfs.py b/bayesclass/clfs.py
index 4e5808e..b9cf8de 100644
--- a/bayesclass/clfs.py
+++ b/bayesclass/clfs.py
@@ -139,9 +139,6 @@ class BayesBase(BaseEstimator, ClassifierMixin):
         # Return the classifier
         return self
 
-    def _build(self):
-        pass
-
     def _train(self, kwargs):
         self.model_ = BayesianNetwork(
             self.dag_.edges(), show_progress=self.show_progress
@@ -463,116 +460,54 @@ class AODE(BayesBase, BaseEnsemble):
 
 
 class TANNew(TAN):
-    pass
+    def fit(self, X, y, **kwargs):
+        self.estimator = Proposal(self)
+        return self.estimator.fit(X, y, **kwargs)
+
+    def predict(self, X):
+        self.plot()
+        return self.estimator.predict(X)
 
 
 class KDBNew(KDB):
-    def __init__(self, k, theta=0.3, show_progress=False, random_state=None):
-        super().__init__(
-            k, theta, show_progress=show_progress, random_state=random_state
-        )
-        self.estimator = Proposal(self)
-
     def fit(self, X, y, **kwargs):
-        Xd, kwargs = self.estimator.discretize_train(X, y, kwargs)
-        super().fit(Xd, y, **kwargs)
-        upgraded, Xd, kwargs = self.estimator.local_discretization(kwargs)
-        if upgraded:
-            super().fit(Xd, y, **kwargs)
+        self.estimator = Proposal(self)
+        return self.estimator.fit(X, y, **kwargs)
 
     def predict(self, X):
+        self.plot()
         return self.estimator.predict(X)
 
-    # def fit(self, X, y, **kwargs):
-    #     self.discretizer_ = FImdlp(n_jobs=1)
-    #     Xd = self.discretizer_.fit_transform(X, y)
-    #     features = (
-    #         kwargs["features"]
-    #         if "features" in kwargs
-    #         else self.default_feature_names(Xd.shape[1])
-    #     )
-    #     self.compute_kwargs(Xd, y, kwargs)
-    #     # Build the model
-    #     super().fit(Xd, y, **kwargs)
-    #     self.idx_features_ = dict(list(zip(features, range(len(features)))))
-    #     self.proposal(Xd)
-    #     return self
-
-    # def predict(self, X):
-    #     return super().predict(self.discretizer_.transform(X))
-
-    # def compute_kwargs(self, Xd, y, kwargs):
-    #     features = kwargs["features"]
-    #     states = {
-    #         features[i]: np.unique(Xd[:, i]).tolist()
-    #         for i in range(Xd.shape[1])
-    #     }
-    #     class_name = (
-    #         kwargs["class_name"]
-    #         if "class_name" in kwargs
-    #         else self.default_class_name()
-    #     )
-    #     states[class_name] = np.unique(y).tolist()
-    #     kwargs["state_names"] = states
-    #     self.kwargs_ = kwargs
-
-    # def check_integrity(self, X, state_names, features):
-    #     for i in range(X.shape[1]):
-    #         if not np.array_equal(
-    #             np.unique(X[:, i]), np.array(state_names[features[i]])
-    #         ):
-    #             print(
-    #                 "i",
-    #                 i,
-    #                 "features[i]",
-    #                 features[i],
-    #                 "np.unique(X[:, i])",
-    #                 np.unique(X[:, i]),
-    #                 "np.array(state_names[features[i]])",
-    #                 np.array(state_names[features[i]]),
-    #             )
-    #             raise ValueError("Discretization error")
-
-    # def proposal(self, Xd):
-    #     """Discretize each feature with its fathers and the class"""
-    #     res = Xd.copy()
-    #     upgraded = False
-    #     for idx, feature in enumerate(self.feature_names_in_):
-    #         fathers = self.dag_.get_parents(feature)
-    #         if len(fathers) > 1:
-    #             # First remove the class name as it will be added later
-    #             fathers.remove(self.class_name_)
-    #             # Get the fathers indices
-    #             features = [self.idx_features_[f] for f in fathers]
-    #             # Update the discretization of the feature
-    #             res[:, idx] = self.discretizer_.join_fit(
-    #                 target=idx, features=features, data=Xd
-    #             )
-    #             upgraded = True
-    #     if upgraded:
-    #         self.compute_kwargs(res, self.y_, self.kwargs_)
-    #         super().fit(res, self.y_, **self.kwargs_)
-
 
 class Proposal:
     def __init__(self, estimator):
         self.estimator = estimator
+        self.class_type = estimator.__class__
 
-    def discretize_train(self, X, y, kwargs):
-        self.discretizer_ = FImdlp(n_jobs=1)
-        self.Xd = self.discretizer_.fit_transform(X, y)
-        kwargs = self.compute_kwargs(y, kwargs)
-        return self.Xd, kwargs
-
-    def local_discretization(self, kwargs):
-        features = kwargs["features"]
-        self.idx_features_ = dict(list(zip(features, range(len(features)))))
-        return self._local_discretization(kwargs)
+    def fit(self, X, y, **kwargs):
+        # Discretize train data
+        self.discretizer = FImdlp(n_jobs=1)
+        self.Xd = self.discretizer.fit_transform(X, y)
+        kwargs = self.update_kwargs(y, kwargs)
+        # Build the model
+        super(self.class_type, self.estimator).fit(self.Xd, y, **kwargs)
+        self.check_integrity("f", self.Xd)
+        # # Local discretization based on the model
+        # features = kwargs["features"]
+        # # assign indices to feature names
+        # self.idx_features_ = dict(list(zip(features, range(len(features)))))
+        # upgraded, self.Xd = self._local_discretization()
+        # if upgraded:
+        #     kwargs = self.update_kwargs(y, kwargs)
+        #     super(self.class_type, self.estimator).fit(self.Xd, y, **kwargs)
 
     def predict(self, X):
-        return self.estimator.predict(self.discretizer_.transform(X))
+        self.check_integrity("p", self.discretizer.transform(X))
+        return super(self.class_type, self.estimator).predict(
+            self.discretizer.transform(X)
+        )
 
-    def compute_kwargs(self, y, kwargs):
+    def update_kwargs(self, y, kwargs):
         features = (
             kwargs["features"]
             if "features" in kwargs
@@ -589,26 +524,50 @@ class Proposal:
         )
         states[class_name] = np.unique(y).tolist()
         kwargs["state_names"] = states
+        self.state_names_ = states
+        self.features_ = features
         kwargs["features"] = features
         kwargs["class_name"] = class_name
         return kwargs
 
-    def _local_discretization(self, kwargs):
+    def _local_discretization(self):
         """Discretize each feature with its fathers and the class"""
         res = self.Xd.copy()
         upgraded = False
+        print("-" * 80)
         for idx, feature in enumerate(self.estimator.feature_names_in_):
             fathers = self.estimator.dag_.get_parents(feature)
             if len(fathers) > 1:
+                print(
+                    "Discretizing " + feature + " with " + str(fathers),
+                    end=" ",
+                )
                 # First remove the class name as it will be added later
                 fathers.remove(self.estimator.class_name_)
                 # Get the fathers indices
                 features = [self.idx_features_[f] for f in fathers]
                 # Update the discretization of the feature
-                res[:, idx] = self.discretizer_.join_fit(
+                res[:, idx] = self.discretizer.join_fit(
                     target=idx, features=features, data=self.Xd
                 )
+                print(self.discretizer.y_join[:5])
                 upgraded = True
-        if upgraded:
-            kwargs = self.compute_kwargs(res, self.estimator.y_, kwargs)
-        return upgraded, res, kwargs
+        return upgraded, res
+
+    def check_integrity(self, source, X):
+        print(f"Checking integrity of {source} data")
+        for i in range(X.shape[1]):
+            if not set(np.unique(X[:, i]).tolist()).issubset(
+                set(self.state_names_[self.features_[i]])
+            ):
+                print(
+                    "i",
+                    i,
+                    "features[i]",
+                    self.features_[i],
+                    "np.unique(X[:, i])",
+                    np.unique(X[:, i]),
+                    "np.array(state_names[features[i]])",
+                    np.array(self.state_names_[self.features_[i]]),
+                )
+                raise ValueError("Discretization error")