mirror of
https://github.com/Doctorado-ML/bayesclass.git
synced 2025-08-16 08:05:57 +00:00
Add test_BoostAODE
This commit is contained in:
@@ -818,6 +818,7 @@ class BoostSPODE(BayesBase):
|
|||||||
|
|
||||||
def _train(self, kwargs):
|
def _train(self, kwargs):
|
||||||
states = dict(state_names=kwargs.get("state_names", []))
|
states = dict(state_names=kwargs.get("state_names", []))
|
||||||
|
breakpoint()
|
||||||
self.model_ = BayesianNetwork(self.dag_.edges(), show_progress=False)
|
self.model_ = BayesianNetwork(self.dag_.edges(), show_progress=False)
|
||||||
self.model_.fit(
|
self.model_.fit(
|
||||||
self.dataset_,
|
self.dataset_,
|
||||||
@@ -834,9 +835,11 @@ class BoostAODE(ClassifierMixin, BaseEnsemble):
|
|||||||
show_progress=False,
|
show_progress=False,
|
||||||
random_state=None,
|
random_state=None,
|
||||||
estimator=None,
|
estimator=None,
|
||||||
|
n_estimators=10,
|
||||||
):
|
):
|
||||||
self.show_progress = show_progress
|
self.show_progress = show_progress
|
||||||
self.random_state = random_state
|
self.random_state = random_state
|
||||||
|
self.n_estimators = n_estimators
|
||||||
super().__init__(estimator=estimator)
|
super().__init__(estimator=estimator)
|
||||||
|
|
||||||
def _validate_estimator(self) -> None:
|
def _validate_estimator(self) -> None:
|
||||||
@@ -858,6 +861,7 @@ class BoostAODE(ClassifierMixin, BaseEnsemble):
|
|||||||
self.y_ = y
|
self.y_ = y
|
||||||
self.n_samples_ = X.shape[0]
|
self.n_samples_ = X.shape[0]
|
||||||
self.estimators_ = []
|
self.estimators_ = []
|
||||||
|
self._validate_estimator()
|
||||||
self._train(kwargs)
|
self._train(kwargs)
|
||||||
self.fitted_ = True
|
self.fitted_ = True
|
||||||
# To keep compatiblity with the benchmark platform
|
# To keep compatiblity with the benchmark platform
|
||||||
@@ -868,44 +872,37 @@ class BoostAODE(ClassifierMixin, BaseEnsemble):
|
|||||||
"""Build boosted SPODEs"""
|
"""Build boosted SPODEs"""
|
||||||
weights = [1 / self.n_samples_] * self.n_samples_
|
weights = [1 / self.n_samples_] * self.n_samples_
|
||||||
# Step 0: Set the finish condition
|
# Step 0: Set the finish condition
|
||||||
pending_features = self.feature_names_in_.copy()
|
for num in range(self.n_estimators):
|
||||||
exit_condition = len(pending_features) == 0
|
|
||||||
while not exit_condition:
|
|
||||||
# Step 1: Build ranking with mutual information
|
# Step 1: Build ranking with mutual information
|
||||||
|
# OJO MAL, ESTO NO ACTUALIZA EL RANKING CON LOS PESOS
|
||||||
|
# SIEMPRE VA A SACAR LO MISMO
|
||||||
feature = (
|
feature = (
|
||||||
SelectKBest(k="all")
|
SelectKBest(k=1)
|
||||||
.fit(self.X_, self.y_)
|
.fit(self.X_, self.y_)
|
||||||
.get_feature_names_out(self.feature_names_in_)
|
.get_feature_names_out(self.feature_names_in_)
|
||||||
.tolist()[0]
|
.tolist()[0]
|
||||||
)
|
)
|
||||||
# Step 2: Build & train spode with the first feature as sparent
|
# Step 2: Build & train spode with the first feature as sparent
|
||||||
self._validate_estimator()
|
|
||||||
estimator = clone(self.estimator_)
|
estimator = clone(self.estimator_)
|
||||||
_args = kwargs.copy()
|
_args = kwargs.copy()
|
||||||
_args["sparent"] = feature
|
_args["sparent"] = feature
|
||||||
_args["sample_weight"] = weights
|
_args["sample_weight"] = weights
|
||||||
_args["weighted"] = True
|
_args["weighted"] = True
|
||||||
_args["X"] = self.X_
|
|
||||||
_args["y"] = self.y_
|
|
||||||
# Step 2.1: build dataset
|
# Step 2.1: build dataset
|
||||||
# Step 2.2: Train the model
|
# Step 2.2: Train the model
|
||||||
estimator.fit(**_args)
|
estimator.fit(self.X_, self.y_, **_args)
|
||||||
# Step 3: Compute errors (epsilon sub m & alpha sub m)
|
# Step 3: Compute errors (epsilon sub m & alpha sub m)
|
||||||
# Explanation in https://medium.datadriveninvestor.com/understanding-adaboost-and-scikit-learns-algorithm-c8d8af5ace10
|
# Explanation in https://medium.datadriveninvestor.com/understanding-adaboost-and-scikit-learns-algorithm-c8d8af5ace10
|
||||||
y_pred = estimator.predict(self.X_)
|
y_pred = estimator.predict(self.X_)
|
||||||
em = np.sum(weights * (y_pred != self.y_)) / np.sum(weights)
|
em = np.sum(weights * (y_pred != self.y_)) / np.sum(weights)
|
||||||
am = np.log((1 - em) / em) + np.log(self.n_classes_ - 1)
|
am = np.log((1 - em) / em) + np.log(estimator.n_classes_ - 1)
|
||||||
# Step 3.2: Update weights for next classifier
|
# Step 3.2: Update weights for next classifier
|
||||||
weights = [
|
weights = [
|
||||||
wm * np.exp(am * (ym != y_pred))
|
wm * np.exp(am * (ym != y_pred))
|
||||||
for wm, ym in zip(weights, self.y_)
|
for wm, ym in zip(weights, self.y_)
|
||||||
]
|
]
|
||||||
print(weights)
|
|
||||||
# Step 4: Add the new model
|
# Step 4: Add the new model
|
||||||
self.estimators_.append(estimator)
|
self.estimators_.append(estimator)
|
||||||
# Final step: Update the finish condition
|
|
||||||
pending_features.remove(feature)
|
|
||||||
exit_condition = len(pending_features) == 0
|
|
||||||
"""
|
"""
|
||||||
class_edges = [(self.class_name_, f) for f in self.feature_names_in_]
|
class_edges = [(self.class_name_, f) for f in self.feature_names_in_]
|
||||||
feature_edges = [
|
feature_edges = [
|
||||||
|
100
bayesclass/tests/test_BoostAODE.py
Normal file
100
bayesclass/tests/test_BoostAODE.py
Normal file
@@ -0,0 +1,100 @@
|
|||||||
|
import pytest
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.preprocessing import KBinsDiscretizer
|
||||||
|
from matplotlib.testing.decorators import image_comparison
|
||||||
|
from matplotlib.testing.conftest import mpl_test_settings
|
||||||
|
|
||||||
|
|
||||||
|
from bayesclass.clfs import BoostAODE
|
||||||
|
from .._version import __version__
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def clf():
|
||||||
|
return BoostAODE(random_state=17)
|
||||||
|
|
||||||
|
|
||||||
|
def test_BoostAODE_default_hyperparameters(data_disc, clf):
|
||||||
|
# Test default values of hyperparameters
|
||||||
|
assert not clf.show_progress
|
||||||
|
assert clf.random_state == 17
|
||||||
|
clf = BoostAODE(show_progress=True)
|
||||||
|
assert clf.show_progress
|
||||||
|
assert clf.random_state is None
|
||||||
|
clf.fit(*data_disc)
|
||||||
|
assert clf.class_name_ == "class"
|
||||||
|
assert clf.feature_names_in_ == [
|
||||||
|
"feature_0",
|
||||||
|
"feature_1",
|
||||||
|
"feature_2",
|
||||||
|
"feature_3",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# @image_comparison(
|
||||||
|
# baseline_images=["line_dashes_AODE"], remove_text=True, extensions=["png"]
|
||||||
|
# )
|
||||||
|
# def test_BoostAODE_plot(data_disc, features, clf):
|
||||||
|
# # mpl_test_settings will automatically clean these internal side effects
|
||||||
|
# mpl_test_settings
|
||||||
|
# clf.fit(*data_disc, features=features)
|
||||||
|
# clf.plot("AODE Iris")
|
||||||
|
|
||||||
|
|
||||||
|
# def test_BoostAODE_version(clf, features, data_disc):
|
||||||
|
# """Check AODE version."""
|
||||||
|
# assert __version__ == clf.version()
|
||||||
|
# clf.fit(*data_disc, features=features)
|
||||||
|
# assert __version__ == clf.version()
|
||||||
|
|
||||||
|
|
||||||
|
# def test_BoostAODE_nodes_edges(clf, data_disc):
|
||||||
|
# assert clf.nodes_edges() == (0, 0)
|
||||||
|
# clf.fit(*data_disc)
|
||||||
|
# assert clf.nodes_leaves() == (20, 28)
|
||||||
|
|
||||||
|
|
||||||
|
# def test_BoostAODE_states(clf, data_disc):
|
||||||
|
# assert clf.states_ == 0
|
||||||
|
# clf.fit(*data_disc)
|
||||||
|
# assert clf.states_ == 19
|
||||||
|
# assert clf.depth_ == clf.states_
|
||||||
|
|
||||||
|
|
||||||
|
# def test_BoostAODE_classifier(data_disc, clf):
|
||||||
|
# clf.fit(*data_disc)
|
||||||
|
# attribs = [
|
||||||
|
# "feature_names_in_",
|
||||||
|
# "class_name_",
|
||||||
|
# "n_features_in_",
|
||||||
|
# "X_",
|
||||||
|
# "y_",
|
||||||
|
# ]
|
||||||
|
# for attr in attribs:
|
||||||
|
# assert hasattr(clf, attr)
|
||||||
|
# X = data_disc[0]
|
||||||
|
# y = data_disc[1]
|
||||||
|
# y_pred = clf.predict(X)
|
||||||
|
# assert y_pred.shape == (X.shape[0],)
|
||||||
|
# assert sum(y == y_pred) == 146
|
||||||
|
|
||||||
|
|
||||||
|
# def test_BoostAODE_wrong_num_features(data_disc, clf):
|
||||||
|
# with pytest.raises(
|
||||||
|
# ValueError,
|
||||||
|
# match="Number of features does not match the number of columns in X",
|
||||||
|
# ):
|
||||||
|
# clf.fit(*data_disc, features=["feature_1", "feature_2"])
|
||||||
|
|
||||||
|
|
||||||
|
# def test_BoostAODE_wrong_hyperparam(data_disc, clf):
|
||||||
|
# with pytest.raises(ValueError, match="Unexpected argument: wrong_param"):
|
||||||
|
# clf.fit(*data_disc, wrong_param="wrong_param")
|
||||||
|
|
||||||
|
|
||||||
|
# def test_BoostAODE_error_size_predict(data_disc, clf):
|
||||||
|
# X, y = data_disc
|
||||||
|
# clf.fit(X, y)
|
||||||
|
# with pytest.raises(ValueError):
|
||||||
|
# X_diff_size = np.ones((10, X.shape[1] + 1))
|
||||||
|
# clf.predict(X_diff_size)
|
@@ -1,5 +1,6 @@
|
|||||||
numpy
|
numpy
|
||||||
scipy
|
scipy
|
||||||
|
pandas
|
||||||
scikit-learn
|
scikit-learn
|
||||||
matplotlib
|
matplotlib
|
||||||
networkx
|
networkx
|
||||||
|
Reference in New Issue
Block a user