mirror of
https://github.com/Doctorado-ML/bayesclass.git
synced 2025-08-15 15:45:54 +00:00
Add KDBNew and TANNew tests
This commit is contained in:
@@ -17,4 +17,5 @@ __all__ = [
|
|||||||
"KDB",
|
"KDB",
|
||||||
"AODE",
|
"AODE",
|
||||||
"KDBNew",
|
"KDBNew",
|
||||||
|
"AODENew",
|
||||||
]
|
]
|
||||||
|
@@ -460,6 +460,21 @@ class AODE(BayesBase, BaseEnsemble):
|
|||||||
|
|
||||||
|
|
||||||
class TANNew(TAN):
|
class TANNew(TAN):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
show_progress=False,
|
||||||
|
random_state=None,
|
||||||
|
discretizer_depth=1e6,
|
||||||
|
discretizer_length=3,
|
||||||
|
discretizer_cuts=0,
|
||||||
|
):
|
||||||
|
self.discretizer_depth = discretizer_depth
|
||||||
|
self.discretizer_length = discretizer_length
|
||||||
|
self.discretizer_cuts = discretizer_cuts
|
||||||
|
super().__init__(
|
||||||
|
show_progress=show_progress, random_state=random_state
|
||||||
|
)
|
||||||
|
|
||||||
def fit(self, X, y, **kwargs):
|
def fit(self, X, y, **kwargs):
|
||||||
self.estimator = Proposal(self)
|
self.estimator = Proposal(self)
|
||||||
return self.estimator.fit(X, y, **kwargs)
|
return self.estimator.fit(X, y, **kwargs)
|
||||||
@@ -470,6 +485,22 @@ class TANNew(TAN):
|
|||||||
|
|
||||||
|
|
||||||
class KDBNew(KDB):
|
class KDBNew(KDB):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
k=2,
|
||||||
|
show_progress=False,
|
||||||
|
random_state=None,
|
||||||
|
discretizer_depth=1e6,
|
||||||
|
discretizer_length=3,
|
||||||
|
discretizer_cuts=0,
|
||||||
|
):
|
||||||
|
self.discretizer_depth = discretizer_depth
|
||||||
|
self.discretizer_length = discretizer_length
|
||||||
|
self.discretizer_cuts = discretizer_cuts
|
||||||
|
super().__init__(
|
||||||
|
k=k, show_progress=show_progress, random_state=random_state
|
||||||
|
)
|
||||||
|
|
||||||
def fit(self, X, y, **kwargs):
|
def fit(self, X, y, **kwargs):
|
||||||
self.estimator = Proposal(self)
|
self.estimator = Proposal(self)
|
||||||
return self.estimator.fit(X, y, **kwargs)
|
return self.estimator.fit(X, y, **kwargs)
|
||||||
@@ -478,14 +509,25 @@ class KDBNew(KDB):
|
|||||||
return self.estimator.predict(X)
|
return self.estimator.predict(X)
|
||||||
|
|
||||||
|
|
||||||
|
class AODENew(AODE):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class Proposal:
|
class Proposal:
|
||||||
def __init__(self, estimator):
|
def __init__(self, estimator):
|
||||||
self.estimator = estimator
|
self.estimator = estimator
|
||||||
self.class_type = estimator.__class__
|
self.class_type = estimator.__class__
|
||||||
|
|
||||||
def fit(self, X, y, **kwargs):
|
def fit(self, X, y, **kwargs):
|
||||||
|
# Check parameters
|
||||||
|
super(self.class_type, self.estimator)._check_params(X, y, kwargs)
|
||||||
# Discretize train data
|
# Discretize train data
|
||||||
self.discretizer = FImdlp(n_jobs=1)
|
self.discretizer = FImdlp(
|
||||||
|
n_jobs=1,
|
||||||
|
max_depth=self.estimator.discretizer_depth,
|
||||||
|
min_length=self.estimator.discretizer_length,
|
||||||
|
max_cuts=self.estimator.discretizer_cuts,
|
||||||
|
)
|
||||||
self.Xd = self.discretizer.fit_transform(X, y)
|
self.Xd = self.discretizer.fit_transform(X, y)
|
||||||
kwargs = self.update_kwargs(y, kwargs)
|
kwargs = self.update_kwargs(y, kwargs)
|
||||||
# Build the model
|
# Build the model
|
||||||
|
Binary file not shown.
After Width: | Height: | Size: 50 KiB |
Binary file not shown.
After Width: | Height: | Size: 41 KiB |
@@ -55,7 +55,6 @@ def test_KDB_nodes_edges(clf, data):
|
|||||||
|
|
||||||
def test_KDB_states(clf, data):
|
def test_KDB_states(clf, data):
|
||||||
assert clf.states_ == 0
|
assert clf.states_ == 0
|
||||||
clf = KDB(k=3, random_state=17)
|
|
||||||
clf.fit(*data)
|
clf.fit(*data)
|
||||||
assert clf.states_ == 23
|
assert clf.states_ == 23
|
||||||
assert clf.depth_ == clf.states_
|
assert clf.depth_ == clf.states_
|
||||||
|
127
bayesclass/tests/test_KDBNew.py
Normal file
127
bayesclass/tests/test_KDBNew.py
Normal file
@@ -0,0 +1,127 @@
|
|||||||
|
import pytest
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.datasets import load_iris
|
||||||
|
from sklearn.preprocessing import KBinsDiscretizer
|
||||||
|
from matplotlib.testing.decorators import image_comparison
|
||||||
|
from matplotlib.testing.conftest import mpl_test_settings
|
||||||
|
from pgmpy.models import BayesianNetwork
|
||||||
|
|
||||||
|
|
||||||
|
from bayesclass.clfs import KDBNew
|
||||||
|
from .._version import __version__
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def data():
|
||||||
|
X, y = load_iris(return_X_y=True)
|
||||||
|
enc = KBinsDiscretizer(encode="ordinal")
|
||||||
|
return enc.fit_transform(X), y
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def clf():
|
||||||
|
return KDBNew(k=3)
|
||||||
|
|
||||||
|
|
||||||
|
def test_KDBNew_default_hyperparameters(data, clf):
|
||||||
|
# Test default values of hyperparameters
|
||||||
|
assert not clf.show_progress
|
||||||
|
assert clf.random_state is None
|
||||||
|
assert clf.theta == 0.03
|
||||||
|
clf = KDBNew(show_progress=True, random_state=17, k=3)
|
||||||
|
assert clf.show_progress
|
||||||
|
assert clf.random_state == 17
|
||||||
|
assert clf.k == 3
|
||||||
|
clf.fit(*data)
|
||||||
|
assert clf.class_name_ == "class"
|
||||||
|
assert clf.feature_names_in_ == [
|
||||||
|
"feature_0",
|
||||||
|
"feature_1",
|
||||||
|
"feature_2",
|
||||||
|
"feature_3",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_KDBNew_version(clf):
|
||||||
|
"""Check KDBNew version."""
|
||||||
|
assert __version__ == clf.version()
|
||||||
|
|
||||||
|
|
||||||
|
def test_KDBNew_nodes_edges(clf, data):
|
||||||
|
assert clf.nodes_edges() == (0, 0)
|
||||||
|
clf.fit(*data)
|
||||||
|
assert clf.nodes_leaves() == (5, 10)
|
||||||
|
|
||||||
|
|
||||||
|
def test_KDBNew_states(clf, data):
|
||||||
|
assert clf.states_ == 0
|
||||||
|
clf.fit(*data)
|
||||||
|
assert clf.states_ == 23
|
||||||
|
assert clf.depth_ == clf.states_
|
||||||
|
|
||||||
|
|
||||||
|
def test_KDBNew_classifier(data, clf):
|
||||||
|
clf.fit(*data)
|
||||||
|
attribs = ["classes_", "X_", "y_", "feature_names_in_", "class_name_"]
|
||||||
|
for attr in attribs:
|
||||||
|
assert hasattr(clf, attr)
|
||||||
|
X = data[0]
|
||||||
|
y = data[1]
|
||||||
|
y_pred = clf.predict(X)
|
||||||
|
assert y_pred.shape == (X.shape[0],)
|
||||||
|
assert sum(y == y_pred) == 148
|
||||||
|
|
||||||
|
|
||||||
|
@image_comparison(
|
||||||
|
baseline_images=["line_dashes_KDBNew"],
|
||||||
|
remove_text=True,
|
||||||
|
extensions=["png"],
|
||||||
|
)
|
||||||
|
def test_KDBNew_plot(data, clf):
|
||||||
|
# mpl_test_settings will automatically clean these internal side effects
|
||||||
|
mpl_test_settings
|
||||||
|
dataset = load_iris(as_frame=True)
|
||||||
|
clf.fit(*data, features=dataset["feature_names"])
|
||||||
|
clf.plot("KDBNew Iris")
|
||||||
|
|
||||||
|
|
||||||
|
def test_KDBNew_wrong_num_features(data, clf):
|
||||||
|
with pytest.raises(
|
||||||
|
ValueError,
|
||||||
|
match="Number of features does not match the number of columns in X",
|
||||||
|
):
|
||||||
|
clf.fit(*data, features=["feature_1", "feature_2"])
|
||||||
|
|
||||||
|
|
||||||
|
def test_KDBNew_wrong_hyperparam(data, clf):
|
||||||
|
with pytest.raises(ValueError, match="Unexpected argument: wrong_param"):
|
||||||
|
clf.fit(*data, wrong_param="wrong_param")
|
||||||
|
|
||||||
|
|
||||||
|
def test_KDBNew_error_size_predict(data, clf):
|
||||||
|
X, y = data
|
||||||
|
clf.fit(X, y)
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
X_diff_size = np.ones((10, X.shape[1] + 1))
|
||||||
|
clf.predict(X_diff_size)
|
||||||
|
|
||||||
|
|
||||||
|
def test_KDBNew_dont_do_cycles():
|
||||||
|
clf = KDBNew(k=4)
|
||||||
|
dag = BayesianNetwork()
|
||||||
|
clf.feature_names_in_ = [
|
||||||
|
"feature_0",
|
||||||
|
"feature_1",
|
||||||
|
"feature_2",
|
||||||
|
"feature_3",
|
||||||
|
]
|
||||||
|
nodes = list(range(4))
|
||||||
|
weights = np.ones((4, 4))
|
||||||
|
for idx in range(1, 4):
|
||||||
|
dag.add_edge(clf.feature_names_in_[0], clf.feature_names_in_[idx])
|
||||||
|
dag.add_edge(clf.feature_names_in_[1], clf.feature_names_in_[2])
|
||||||
|
dag.add_edge(clf.feature_names_in_[1], clf.feature_names_in_[3])
|
||||||
|
dag.add_edge(clf.feature_names_in_[2], clf.feature_names_in_[3])
|
||||||
|
for idx in range(4):
|
||||||
|
clf._add_m_edges(dag, idx, nodes, weights)
|
||||||
|
assert len(dag.edges()) == 6
|
@@ -19,16 +19,16 @@ def data():
|
|||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def clf():
|
def clf():
|
||||||
return TAN()
|
return TAN(random_state=17)
|
||||||
|
|
||||||
|
|
||||||
def test_TAN_default_hyperparameters(data, clf):
|
def test_TAN_default_hyperparameters(data, clf):
|
||||||
# Test default values of hyperparameters
|
# Test default values of hyperparameters
|
||||||
assert not clf.show_progress
|
assert not clf.show_progress
|
||||||
assert clf.random_state is None
|
|
||||||
clf = TAN(show_progress=True, random_state=17)
|
|
||||||
assert clf.show_progress
|
|
||||||
assert clf.random_state == 17
|
assert clf.random_state == 17
|
||||||
|
clf = TAN(show_progress=True)
|
||||||
|
assert clf.show_progress
|
||||||
|
assert clf.random_state is None
|
||||||
clf.fit(*data)
|
clf.fit(*data)
|
||||||
assert clf.head_ == 0
|
assert clf.head_ == 0
|
||||||
assert clf.class_name_ == "class"
|
assert clf.class_name_ == "class"
|
||||||
@@ -47,21 +47,18 @@ def test_TAN_version(clf):
|
|||||||
|
|
||||||
def test_TAN_nodes_edges(clf, data):
|
def test_TAN_nodes_edges(clf, data):
|
||||||
assert clf.nodes_edges() == (0, 0)
|
assert clf.nodes_edges() == (0, 0)
|
||||||
clf = TAN(random_state=17)
|
|
||||||
clf.fit(*data, head="random")
|
clf.fit(*data, head="random")
|
||||||
assert clf.nodes_leaves() == (5, 7)
|
assert clf.nodes_leaves() == (5, 7)
|
||||||
|
|
||||||
|
|
||||||
def test_TAN_states(clf, data):
|
def test_TAN_states(clf, data):
|
||||||
assert clf.states_ == 0
|
assert clf.states_ == 0
|
||||||
clf = TAN(random_state=17)
|
|
||||||
clf.fit(*data)
|
clf.fit(*data)
|
||||||
assert clf.states_ == 23
|
assert clf.states_ == 23
|
||||||
assert clf.depth_ == clf.states_
|
assert clf.depth_ == clf.states_
|
||||||
|
|
||||||
|
|
||||||
def test_TAN_random_head(data):
|
def test_TAN_random_head(clf, data):
|
||||||
clf = TAN(random_state=17)
|
|
||||||
clf.fit(*data, head="random")
|
clf.fit(*data, head="random")
|
||||||
assert clf.head_ == 3
|
assert clf.head_ == 3
|
||||||
|
|
||||||
|
121
bayesclass/tests/test_TANNew.py
Normal file
121
bayesclass/tests/test_TANNew.py
Normal file
@@ -0,0 +1,121 @@
|
|||||||
|
import pytest
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.datasets import load_iris
|
||||||
|
from sklearn.preprocessing import KBinsDiscretizer
|
||||||
|
from matplotlib.testing.decorators import image_comparison
|
||||||
|
from matplotlib.testing.conftest import mpl_test_settings
|
||||||
|
|
||||||
|
|
||||||
|
from bayesclass.clfs import TANNew
|
||||||
|
from .._version import __version__
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def data():
|
||||||
|
X, y = load_iris(return_X_y=True)
|
||||||
|
enc = KBinsDiscretizer(encode="ordinal")
|
||||||
|
return enc.fit_transform(X), y
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def clf():
|
||||||
|
return TANNew(random_state=17)
|
||||||
|
|
||||||
|
|
||||||
|
def test_TANNew_default_hyperparameters(data, clf):
|
||||||
|
# Test default values of hyperparameters
|
||||||
|
assert not clf.show_progress
|
||||||
|
assert clf.random_state == 17
|
||||||
|
clf = TANNew(show_progress=True)
|
||||||
|
assert clf.show_progress
|
||||||
|
assert clf.random_state is None
|
||||||
|
clf.fit(*data)
|
||||||
|
assert clf.head_ == 0
|
||||||
|
assert clf.class_name_ == "class"
|
||||||
|
assert clf.feature_names_in_ == [
|
||||||
|
"feature_0",
|
||||||
|
"feature_1",
|
||||||
|
"feature_2",
|
||||||
|
"feature_3",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_TANNew_version(clf):
|
||||||
|
"""Check TANNew version."""
|
||||||
|
assert __version__ == clf.version()
|
||||||
|
|
||||||
|
|
||||||
|
def test_TANNew_nodes_edges(clf, data):
|
||||||
|
assert clf.nodes_edges() == (0, 0)
|
||||||
|
clf.fit(*data, head="random")
|
||||||
|
assert clf.nodes_leaves() == (5, 7)
|
||||||
|
|
||||||
|
|
||||||
|
def test_TANNew_states(clf, data):
|
||||||
|
assert clf.states_ == 0
|
||||||
|
clf.fit(*data)
|
||||||
|
assert clf.states_ == 22
|
||||||
|
assert clf.depth_ == clf.states_
|
||||||
|
|
||||||
|
|
||||||
|
def test_TANNew_random_head(clf, data):
|
||||||
|
clf.fit(*data, head="random")
|
||||||
|
assert clf.head_ == 3
|
||||||
|
|
||||||
|
|
||||||
|
def test_TANNew_classifier(data, clf):
|
||||||
|
clf.fit(*data)
|
||||||
|
attribs = [
|
||||||
|
"classes_",
|
||||||
|
"X_",
|
||||||
|
"y_",
|
||||||
|
"head_",
|
||||||
|
"feature_names_in_",
|
||||||
|
"class_name_",
|
||||||
|
]
|
||||||
|
for attr in attribs:
|
||||||
|
assert hasattr(clf, attr)
|
||||||
|
X = data[0]
|
||||||
|
y = data[1]
|
||||||
|
y_pred = clf.predict(X)
|
||||||
|
assert y_pred.shape == (X.shape[0],)
|
||||||
|
assert sum(y == y_pred) == 145
|
||||||
|
|
||||||
|
|
||||||
|
@image_comparison(
|
||||||
|
baseline_images=["line_dashes_TANNew"],
|
||||||
|
remove_text=True,
|
||||||
|
extensions=["png"],
|
||||||
|
)
|
||||||
|
def test_TANNew_plot(data, clf):
|
||||||
|
# mpl_test_settings will automatically clean these internal side effects
|
||||||
|
mpl_test_settings
|
||||||
|
dataset = load_iris(as_frame=True)
|
||||||
|
clf.fit(*data, features=dataset["feature_names"], head=0)
|
||||||
|
clf.plot("TANNew Iris head=0")
|
||||||
|
|
||||||
|
|
||||||
|
def test_TANNew_wrong_num_features(data, clf):
|
||||||
|
with pytest.raises(
|
||||||
|
ValueError,
|
||||||
|
match="Number of features does not match the number of columns in X",
|
||||||
|
):
|
||||||
|
clf.fit(*data, features=["feature_1", "feature_2"])
|
||||||
|
|
||||||
|
|
||||||
|
def test_TANNew_wrong_hyperparam(data, clf):
|
||||||
|
with pytest.raises(ValueError, match="Unexpected argument: wrong_param"):
|
||||||
|
clf.fit(*data, wrong_param="wrong_param")
|
||||||
|
|
||||||
|
|
||||||
|
def test_TANNew_head_out_of_range(data, clf):
|
||||||
|
with pytest.raises(ValueError, match="Head index out of range"):
|
||||||
|
clf.fit(*data, head=4)
|
||||||
|
|
||||||
|
|
||||||
|
def test_TANNew_error_size_predict(data, clf):
|
||||||
|
X, y = data
|
||||||
|
clf.fit(X, y)
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
X_diff_size = np.ones((10, X.shape[1] + 1))
|
||||||
|
clf.predict(X_diff_size)
|
@@ -25,6 +25,7 @@ dependencies = [
|
|||||||
"pgmpy",
|
"pgmpy",
|
||||||
"networkx",
|
"networkx",
|
||||||
"matplotlib",
|
"matplotlib",
|
||||||
|
"fimdlp",
|
||||||
]
|
]
|
||||||
requires-python = ">=3.8"
|
requires-python = ">=3.8"
|
||||||
classifiers = [
|
classifiers = [
|
||||||
|
Reference in New Issue
Block a user