Update state_names hyperparameter to fit tests

Add computed nodes to classifiers
This commit is contained in:
Ricardo Montañana Gómez
2023-01-12 12:04:54 +01:00
parent 65d41488cb
commit dd2e0a3b7e
6 changed files with 32 additions and 13 deletions

View File

@@ -1 +1 @@
__version__ = "0.1.0" __version__ = "0.1.1"

View File

@@ -34,7 +34,8 @@ class BayesBase(BaseEstimator, ClassifierMixin):
def nodes_leaves(self): def nodes_leaves(self):
"""To keep compatiblity with the benchmark platform""" """To keep compatiblity with the benchmark platform"""
return 0, 0 nodes = len(self.dag_) if hasattr(self, "dag_") else 0
return nodes, 0
def _check_params_fit(self, X, y, expected_args, kwargs): def _check_params_fit(self, X, y, expected_args, kwargs):
"""Check the common parameters passed to fit""" """Check the common parameters passed to fit"""
@@ -57,6 +58,7 @@ class BayesBase(BaseEstimator, ClassifierMixin):
raise ValueError( raise ValueError(
"Number of features does not match the number of columns in X" "Number of features does not match the number of columns in X"
) )
self.n_features_in_ = X.shape[1]
return X, y return X, y
def fit(self, X, y, **kwargs): def fit(self, X, y, **kwargs):
@@ -115,11 +117,12 @@ class BayesBase(BaseEstimator, ClassifierMixin):
self.model_ = BayesianNetwork( self.model_ = BayesianNetwork(
self.dag_.edges(), show_progress=self.show_progress self.dag_.edges(), show_progress=self.show_progress
) )
states = dict(state_names=kwargs.pop("state_names", []))
self.model_.fit( self.model_.fit(
self.dataset_, self.dataset_,
estimator=BayesianEstimator, estimator=BayesianEstimator,
prior_type="K2", prior_type="K2",
state_names=kwargs["state_names"], **states,
) )
def predict(self, X): def predict(self, X):
@@ -267,7 +270,7 @@ class KDB(BayesBase):
5.1. Select feature Xmax which is not in S and has the largest value I(Xmax;C). 5.1. Select feature Xmax which is not in S and has the largest value I(Xmax;C).
5.2. Add a node to BN representing Xmax. 5.2. Add a node to BN representing Xmax.
5.3. Add an arc from C to Xmax in BN. 5.3. Add an arc from C to Xmax in BN.
5.4. Add m =min(lSl,/c) arcs from m distinct features Xj in S with the highest value for I(Xmax;X,jC). 5.4. Add m = min(lSl,/c) arcs from m distinct features Xj in S with the highest value for I(Xmax;X,jC).
5.5. Add Xmax to S. 5.5. Add Xmax to S.
Compute the conditional probabilility infered by the structure of BN by using counts from DB, and output BN. Compute the conditional probabilility infered by the structure of BN by using counts from DB, and output BN.
""" """
@@ -331,8 +334,14 @@ class AODE(BayesBase, BaseEnsemble):
expected_args = ["class_name", "features", "state_names"] expected_args = ["class_name", "features", "state_names"]
return self._check_params_fit(X, y, expected_args, kwargs) return self._check_params_fit(X, y, expected_args, kwargs)
def _build(self): def nodes_leaves(self):
"""To keep compatiblity with the benchmark platform"""
nodes = 0
if hasattr(self, "fitted_"):
nodes = sum([len(x) for x in self.models_])
return nodes, 0
def _build(self):
self.dag_ = None self.dag_ = None
def _train(self, kwargs): def _train(self, kwargs):
@@ -349,11 +358,12 @@ class AODE(BayesBase, BaseEnsemble):
model = BayesianNetwork( model = BayesianNetwork(
feature_edges, show_progress=self.show_progress feature_edges, show_progress=self.show_progress
) )
states = dict(state_names=kwargs.pop("state_names", []))
model.fit( model.fit(
self.dataset_, self.dataset_,
estimator=BayesianEstimator, estimator=BayesianEstimator,
prior_type="K2", prior_type="K2",
state_names=kwargs["state_names"], **states,
) )
self.models_.append(model) self.models_.append(model)

View File

@@ -55,8 +55,10 @@ def test_AODE_version(clf):
assert __version__ == clf.version() assert __version__ == clf.version()
def test_AODE_nodes_leaves(clf): def test_AODE_nodes_leaves(clf, data):
assert clf.nodes_leaves() == (0, 0) assert clf.nodes_leaves() == (0, 0)
clf.fit(*data)
assert clf.nodes_leaves() == (20, 0)
def test_AODE_classifier(data, clf): def test_AODE_classifier(data, clf):

View File

@@ -46,8 +46,10 @@ def test_KDB_version(clf):
assert __version__ == clf.version() assert __version__ == clf.version()
def test_KDB_nodes_leaves(clf): def test_KDB_nodes_leaves(clf, data):
assert clf.nodes_leaves() == (0, 0) assert clf.nodes_leaves() == (0, 0)
clf.fit(*data)
assert clf.nodes_leaves() == (5, 0)
def test_KDB_classifier(data, clf): def test_KDB_classifier(data, clf):

View File

@@ -45,8 +45,11 @@ def test_TAN_version(clf):
assert __version__ == clf.version() assert __version__ == clf.version()
def test_TAN_nodes_leaves(clf): def test_TAN_nodes_leaves(clf, data):
assert clf.nodes_leaves() == (0, 0) assert clf.nodes_leaves() == (0, 0)
clf = TAN(random_state=17)
clf.fit(*data, head="random")
assert clf.nodes_leaves() == (5, 0)
def test_TAN_random_head(data): def test_TAN_random_head(data):

View File

@@ -5,10 +5,12 @@ from sklearn.utils.estimator_checks import check_estimator
from bayesclass.clfs import TAN, KDB, AODE from bayesclass.clfs import TAN, KDB, AODE
@pytest.mark.parametrize("estimator", [TAN(), KDB(k=2), AODE()]) @pytest.mark.parametrize("estimators", [TAN(), KDB(k=2), AODE()])
# @pytest.mark.parametrize("estimator", [AODE()]) # @pytest.mark.parametrize("estimators", [TAN()])
def test_all_estimators(estimator):
def test_all_estimators(estimators):
i = 0 i = 0
for estimator, test in check_estimator(estimator, generate_only=True): for estimator, test in check_estimator(estimators, generate_only=True):
print(i := i + 1, test) print(i := i + 1, test)
# test(estimator) # test(estimator)