Add test_BoostAODE

2025-08-15 15:45:54 +00:00 · 2023-06-18 16:51:38 +02:00
parent a797381c00
commit 212f7e5584
3 changed files with 111 additions and 13 deletions
--- a/bayesclass/clfs.py
+++ b/bayesclass/clfs.py
@@ -818,6 +818,7 @@ class BoostSPODE(BayesBase):

    def _train(self, kwargs):
        states = dict(state_names=kwargs.get("state_names", []))
+        breakpoint()
        self.model_ = BayesianNetwork(self.dag_.edges(), show_progress=False)
        self.model_.fit(
            self.dataset_,
@@ -834,9 +835,11 @@ class BoostAODE(ClassifierMixin, BaseEnsemble):
        show_progress=False,
        random_state=None,
        estimator=None,
+        n_estimators=10,
    ):
        self.show_progress = show_progress
        self.random_state = random_state
+        self.n_estimators = n_estimators
        super().__init__(estimator=estimator)

    def _validate_estimator(self) -> None:
@@ -858,6 +861,7 @@ class BoostAODE(ClassifierMixin, BaseEnsemble):
        self.y_ = y
        self.n_samples_ = X.shape[0]
        self.estimators_ = []
+        self._validate_estimator()
        self._train(kwargs)
        self.fitted_ = True
        # To keep compatiblity with the benchmark platform
@@ -868,44 +872,37 @@ class BoostAODE(ClassifierMixin, BaseEnsemble):
        """Build boosted SPODEs"""
        weights = [1 / self.n_samples_] * self.n_samples_
        # Step 0: Set the finish condition
-        pending_features = self.feature_names_in_.copy()
-        exit_condition = len(pending_features) == 0
-        while not exit_condition:
+        for num in range(self.n_estimators):
            # Step 1: Build ranking with mutual information
+            # OJO MAL, ESTO NO ACTUALIZA EL RANKING CON LOS PESOS
+            # SIEMPRE VA A SACAR LO MISMO
            feature = (
-                SelectKBest(k="all")
+                SelectKBest(k=1)
                .fit(self.X_, self.y_)
                .get_feature_names_out(self.feature_names_in_)
                .tolist()[0]
            )
            # Step 2: Build & train spode with the first feature as sparent
-            self._validate_estimator()
            estimator = clone(self.estimator_)
            _args = kwargs.copy()
            _args["sparent"] = feature
            _args["sample_weight"] = weights
            _args["weighted"] = True
-            _args["X"] = self.X_
-            _args["y"] = self.y_
            # Step 2.1: build dataset
            # Step 2.2: Train the model
-            estimator.fit(**_args)
+            estimator.fit(self.X_, self.y_, **_args)
            # Step 3: Compute errors (epsilon sub m & alpha sub m)
            # Explanation in https://medium.datadriveninvestor.com/understanding-adaboost-and-scikit-learns-algorithm-c8d8af5ace10
            y_pred = estimator.predict(self.X_)
            em = np.sum(weights * (y_pred != self.y_)) / np.sum(weights)
-            am = np.log((1 - em) / em) + np.log(self.n_classes_ - 1)
+            am = np.log((1 - em) / em) + np.log(estimator.n_classes_ - 1)
            # Step 3.2: Update weights for next classifier
            weights = [
                wm * np.exp(am * (ym != y_pred))
                for wm, ym in zip(weights, self.y_)
            ]
-            print(weights)
            # Step 4: Add the new model
            self.estimators_.append(estimator)
-            # Final step: Update the finish condition
-            pending_features.remove(feature)
-            exit_condition = len(pending_features) == 0
        """
        class_edges = [(self.class_name_, f) for f in self.feature_names_in_]
        feature_edges = [
--- a/bayesclass/tests/test_BoostAODE.py
+++ b/bayesclass/tests/test_BoostAODE.py
@@ -0,0 +1,100 @@
+import pytest
+import numpy as np
+from sklearn.preprocessing import KBinsDiscretizer
+from matplotlib.testing.decorators import image_comparison
+from matplotlib.testing.conftest import mpl_test_settings
+
+
+from bayesclass.clfs import BoostAODE
+from .._version import __version__
+
+
+@pytest.fixture
+def clf():
+    return BoostAODE(random_state=17)
+
+
+def test_BoostAODE_default_hyperparameters(data_disc, clf):
+    # Test default values of hyperparameters
+    assert not clf.show_progress
+    assert clf.random_state == 17
+    clf = BoostAODE(show_progress=True)
+    assert clf.show_progress
+    assert clf.random_state is None
+    clf.fit(*data_disc)
+    assert clf.class_name_ == "class"
+    assert clf.feature_names_in_ == [
+        "feature_0",
+        "feature_1",
+        "feature_2",
+        "feature_3",
+    ]
+
+
+# @image_comparison(
+#     baseline_images=["line_dashes_AODE"], remove_text=True, extensions=["png"]
+# )
+# def test_BoostAODE_plot(data_disc, features, clf):
+#     # mpl_test_settings will automatically clean these internal side effects
+#     mpl_test_settings
+#     clf.fit(*data_disc, features=features)
+#     clf.plot("AODE Iris")
+
+
+# def test_BoostAODE_version(clf, features, data_disc):
+#     """Check AODE version."""
+#     assert __version__ == clf.version()
+#     clf.fit(*data_disc, features=features)
+#     assert __version__ == clf.version()
+
+
+# def test_BoostAODE_nodes_edges(clf, data_disc):
+#     assert clf.nodes_edges() == (0, 0)
+#     clf.fit(*data_disc)
+#     assert clf.nodes_leaves() == (20, 28)
+
+
+# def test_BoostAODE_states(clf, data_disc):
+#     assert clf.states_ == 0
+#     clf.fit(*data_disc)
+#     assert clf.states_ == 19
+#     assert clf.depth_ == clf.states_
+
+
+# def test_BoostAODE_classifier(data_disc, clf):
+#     clf.fit(*data_disc)
+#     attribs = [
+#         "feature_names_in_",
+#         "class_name_",
+#         "n_features_in_",
+#         "X_",
+#         "y_",
+#     ]
+#     for attr in attribs:
+#         assert hasattr(clf, attr)
+#     X = data_disc[0]
+#     y = data_disc[1]
+#     y_pred = clf.predict(X)
+#     assert y_pred.shape == (X.shape[0],)
+#     assert sum(y == y_pred) == 146
+
+
+# def test_BoostAODE_wrong_num_features(data_disc, clf):
+#     with pytest.raises(
+#         ValueError,
+#         match="Number of features does not match the number of columns in X",
+#     ):
+#         clf.fit(*data_disc, features=["feature_1", "feature_2"])
+
+
+# def test_BoostAODE_wrong_hyperparam(data_disc, clf):
+#     with pytest.raises(ValueError, match="Unexpected argument: wrong_param"):
+#         clf.fit(*data_disc, wrong_param="wrong_param")
+
+
+# def test_BoostAODE_error_size_predict(data_disc, clf):
+#     X, y = data_disc
+#     clf.fit(X, y)
+#     with pytest.raises(ValueError):
+#         X_diff_size = np.ones((10, X.shape[1] + 1))
+#         clf.predict(X_diff_size)
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,6 @@
 numpy
 scipy
+pandas
 scikit-learn
 matplotlib
 networkx