Continue with New estimators

2025-08-17 00:26:10 +00:00 · 2023-02-07 18:02:35 +01:00
parent 63a2feef3a
commit 42ac57eb79
1 changed files with 61 additions and 102 deletions
--- a/bayesclass/clfs.py
+++ b/bayesclass/clfs.py
@@ -139,9 +139,6 @@ class BayesBase(BaseEstimator, ClassifierMixin):
        # Return the classifier
        return self
    def _build(self):
        pass
    def _train(self, kwargs):
        self.model_ = BayesianNetwork(
            self.dag_.edges(), show_progress=self.show_progress
@@ -463,116 +460,54 @@ class AODE(BayesBase, BaseEnsemble):
 class TANNew(TAN):
-    pass
+    def fit(self, X, y, **kwargs):
        self.estimator = Proposal(self)
        return self.estimator.fit(X, y, **kwargs)
    def predict(self, X):
        self.plot()
        return self.estimator.predict(X)
 class KDBNew(KDB):
    def __init__(self, k, theta=0.3, show_progress=False, random_state=None):
        super().__init__(
            k, theta, show_progress=show_progress, random_state=random_state
        )
        self.estimator = Proposal(self)
    def fit(self, X, y, **kwargs):
-        Xd, kwargs = self.estimator.discretize_train(X, y, kwargs)
+        self.estimator = Proposal(self)
-        super().fit(Xd, y, **kwargs)
+        return self.estimator.fit(X, y, **kwargs)
        upgraded, Xd, kwargs = self.estimator.local_discretization(kwargs)
        if upgraded:
            super().fit(Xd, y, **kwargs)
    def predict(self, X):
        self.plot()
        return self.estimator.predict(X)
    # def fit(self, X, y, **kwargs):
    #     self.discretizer_ = FImdlp(n_jobs=1)
    #     Xd = self.discretizer_.fit_transform(X, y)
    #     features = (
    #         kwargs["features"]
    #         if "features" in kwargs
    #         else self.default_feature_names(Xd.shape[1])
    #     )
    #     self.compute_kwargs(Xd, y, kwargs)
    #     # Build the model
    #     super().fit(Xd, y, **kwargs)
    #     self.idx_features_ = dict(list(zip(features, range(len(features)))))
    #     self.proposal(Xd)
    #     return self
    # def predict(self, X):
    #     return super().predict(self.discretizer_.transform(X))
    # def compute_kwargs(self, Xd, y, kwargs):
    #     features = kwargs["features"]
    #     states = {
    #         features[i]: np.unique(Xd[:, i]).tolist()
    #         for i in range(Xd.shape[1])
    #     }
    #     class_name = (
    #         kwargs["class_name"]
    #         if "class_name" in kwargs
    #         else self.default_class_name()
    #     )
    #     states[class_name] = np.unique(y).tolist()
    #     kwargs["state_names"] = states
    #     self.kwargs_ = kwargs
    # def check_integrity(self, X, state_names, features):
    #     for i in range(X.shape[1]):
    #         if not np.array_equal(
    #             np.unique(X[:, i]), np.array(state_names[features[i]])
    #         ):
    #             print(
    #                 "i",
    #                 i,
    #                 "features[i]",
    #                 features[i],
    #                 "np.unique(X[:, i])",
    #                 np.unique(X[:, i]),
    #                 "np.array(state_names[features[i]])",
    #                 np.array(state_names[features[i]]),
    #             )
    #             raise ValueError("Discretization error")
    # def proposal(self, Xd):
    #     """Discretize each feature with its fathers and the class"""
    #     res = Xd.copy()
    #     upgraded = False
    #     for idx, feature in enumerate(self.feature_names_in_):
    #         fathers = self.dag_.get_parents(feature)
    #         if len(fathers) > 1:
    #             # First remove the class name as it will be added later
    #             fathers.remove(self.class_name_)
    #             # Get the fathers indices
    #             features = [self.idx_features_[f] for f in fathers]
    #             # Update the discretization of the feature
    #             res[:, idx] = self.discretizer_.join_fit(
    #                 target=idx, features=features, data=Xd
    #             )
    #             upgraded = True
    #     if upgraded:
    #         self.compute_kwargs(res, self.y_, self.kwargs_)
    #         super().fit(res, self.y_, **self.kwargs_)
 class Proposal:
    def __init__(self, estimator):
        self.estimator = estimator
        self.class_type = estimator.__class__
-    def discretize_train(self, X, y, kwargs):
+    def fit(self, X, y, **kwargs):
-        self.discretizer_ = FImdlp(n_jobs=1)
+        # Discretize train data
-        self.Xd = self.discretizer_.fit_transform(X, y)
+        self.discretizer = FImdlp(n_jobs=1)
-        kwargs = self.compute_kwargs(y, kwargs)
+        self.Xd = self.discretizer.fit_transform(X, y)
-        return self.Xd, kwargs
+        kwargs = self.update_kwargs(y, kwargs)
-
+        # Build the model
-    def local_discretization(self, kwargs):
+        super(self.class_type, self.estimator).fit(self.Xd, y, **kwargs)
-        features = kwargs["features"]
+        self.check_integrity("f", self.Xd)
-        self.idx_features_ = dict(list(zip(features, range(len(features)))))
+        # # Local discretization based on the model
-        return self._local_discretization(kwargs)
+        # features = kwargs["features"]
        # # assign indices to feature names
        # self.idx_features_ = dict(list(zip(features, range(len(features)))))
        # upgraded, self.Xd = self._local_discretization()
        # if upgraded:
        #     kwargs = self.update_kwargs(y, kwargs)
        #     super(self.class_type, self.estimator).fit(self.Xd, y, **kwargs)
    def predict(self, X):
-        return self.estimator.predict(self.discretizer_.transform(X))
+        self.check_integrity("p", self.discretizer.transform(X))
        return super(self.class_type, self.estimator).predict(
            self.discretizer.transform(X)
        )
-    def compute_kwargs(self, y, kwargs):
+    def update_kwargs(self, y, kwargs):
        features = (
            kwargs["features"]
            if "features" in kwargs
@@ -589,26 +524,50 @@ class Proposal:
        )
        states[class_name] = np.unique(y).tolist()
        kwargs["state_names"] = states
        self.state_names_ = states
        self.features_ = features
        kwargs["features"] = features
        kwargs["class_name"] = class_name
        return kwargs
-    def _local_discretization(self, kwargs):
+    def _local_discretization(self):
        """Discretize each feature with its fathers and the class"""
        res = self.Xd.copy()
        upgraded = False
        print("-" * 80)
        for idx, feature in enumerate(self.estimator.feature_names_in_):
            fathers = self.estimator.dag_.get_parents(feature)
            if len(fathers) > 1:
                print(
                    "Discretizing " + feature + " with " + str(fathers),
                    end=" ",
                )
                # First remove the class name as it will be added later
                fathers.remove(self.estimator.class_name_)
                # Get the fathers indices
                features = [self.idx_features_[f] for f in fathers]
                # Update the discretization of the feature
-                res[:, idx] = self.discretizer_.join_fit(
+                res[:, idx] = self.discretizer.join_fit(
                    target=idx, features=features, data=self.Xd
                )
                print(self.discretizer.y_join[:5])
                upgraded = True
-        if upgraded:
+        return upgraded, res
-            kwargs = self.compute_kwargs(res, self.estimator.y_, kwargs)
+
-        return upgraded, res, kwargs
+    def check_integrity(self, source, X):
        print(f"Checking integrity of {source} data")
        for i in range(X.shape[1]):
            if not set(np.unique(X[:, i]).tolist()).issubset(
                set(self.state_names_[self.features_[i]])
            ):
                print(
                    "i",
                    i,
                    "features[i]",
                    self.features_[i],
                    "np.unique(X[:, i])",
                    np.unique(X[:, i]),
                    "np.array(state_names[features[i]])",
                    np.array(self.state_names_[self.features_[i]]),
                )
                raise ValueError("Discretization error")