test: 🧪 Add cycle test in KDB to get 100% coverage

This commit is contained in:
Ricardo Montañana Gómez
2023-01-17 11:33:55 +01:00
parent d08aea4681
commit 8fd796155d
2 changed files with 37 additions and 23 deletions

View File

@@ -260,6 +260,25 @@ class KDB(BayesBase):
expected_args = ["class_name", "features", "state_names"] expected_args = ["class_name", "features", "state_names"]
return self._check_params_fit(X, y, expected_args, kwargs) return self._check_params_fit(X, y, expected_args, kwargs)
def _add_m_edges(self, dag, idx, S_nodes, conditional_weights):
n_edges = min(self.k, len(S_nodes))
cond_w = conditional_weights.copy()
exit_cond = self.k == 0
num = 0
while not exit_cond:
max_minfo = np.argmax(cond_w[idx, :])
if max_minfo in S_nodes and cond_w[idx, max_minfo] > self.theta:
try:
dag.add_edge(
self.features_[max_minfo], self.features_[idx]
)
num += 1
except ValueError:
# Loops are not allowed
pass
cond_w[idx, max_minfo] = -1
exit_cond = num == n_edges or np.all(cond_w[idx, :] <= 0)
def _build(self): def _build(self):
""" """
1. For each feature Xi, compute mutual information, I(X;;C), where C is the class. 1. For each feature Xi, compute mutual information, I(X;;C), where C is the class.
@@ -275,28 +294,6 @@ class KDB(BayesBase):
Compute the conditional probabilility infered by the structure of BN by using counts from DB, and output BN. Compute the conditional probabilility infered by the structure of BN by using counts from DB, and output BN.
""" """
def add_m_edges(dag, idx, S_nodes, conditional_weights):
n_edges = min(self.k, len(S_nodes))
cond_w = conditional_weights.copy()
exit_cond = self.k == 0
num = 0
while not exit_cond:
max_minfo = np.argmax(cond_w[idx, :])
if (
max_minfo in S_nodes
and cond_w[idx, max_minfo] > self.theta
):
try:
dag.add_edge(
self.features_[max_minfo], self.features_[idx]
)
num += 1
except ValueError:
# Loops are not allowed
pass
cond_w[idx, max_minfo] = -1
exit_cond = num == n_edges or np.all(cond_w[idx, :] <= 0)
# 1. get the mutual information between each feature and the class # 1. get the mutual information between each feature and the class
mutual = mutual_info_classif(self.X_, self.y_, discrete_features=True) mutual = mutual_info_classif(self.X_, self.y_, discrete_features=True)
# 2. symmetric matrix where each element represents I(X, Y| class_node) # 2. symmetric matrix where each element represents I(X, Y| class_node)
@@ -318,7 +315,7 @@ class KDB(BayesBase):
# 5.3 # 5.3
dag.add_edge(self.class_name_, feature) dag.add_edge(self.class_name_, feature)
# 5.4 # 5.4
add_m_edges(dag, idx, S_nodes, conditional_weights) self._add_m_edges(dag, idx, S_nodes, conditional_weights)
# 5.5 # 5.5
S_nodes.append(idx) S_nodes.append(idx)
self.dag_ = dag self.dag_ = dag

View File

@@ -4,6 +4,7 @@ from sklearn.datasets import load_iris
from sklearn.preprocessing import KBinsDiscretizer from sklearn.preprocessing import KBinsDiscretizer
from matplotlib.testing.decorators import image_comparison from matplotlib.testing.decorators import image_comparison
from matplotlib.testing.conftest import mpl_test_settings from matplotlib.testing.conftest import mpl_test_settings
from pgmpy.models import BayesianNetwork
from bayesclass.clfs import KDB from bayesclass.clfs import KDB
@@ -94,3 +95,19 @@ def test_KDB_error_size_predict(data, clf):
with pytest.raises(ValueError): with pytest.raises(ValueError):
X_diff_size = np.ones((10, X.shape[1] + 1)) X_diff_size = np.ones((10, X.shape[1] + 1))
clf.predict(X_diff_size) clf.predict(X_diff_size)
def test_KDB_dont_do_cycles():
clf = KDB(k=4)
dag = BayesianNetwork()
clf.features_ = ["feature_0", "feature_1", "feature_2", "feature_3"]
nodes = list(range(4))
weights = np.ones((4, 4))
for idx in range(1, 4):
dag.add_edge(clf.features_[0], clf.features_[idx])
dag.add_edge(clf.features_[1], clf.features_[2])
dag.add_edge(clf.features_[1], clf.features_[3])
dag.add_edge(clf.features_[2], clf.features_[3])
for idx in range(4):
clf._add_m_edges(dag, idx, nodes, weights)
assert len(dag.edges()) == 6