mirror of
https://github.com/Doctorado-ML/bayesclass.git
synced 2025-08-17 16:45:54 +00:00
Compare commits
39 Commits
Author | SHA1 | Date | |
---|---|---|---|
212f7e5584
|
|||
a797381c00
|
|||
3812d271e5
|
|||
923a06b3be
|
|||
c906d6a361
|
|||
|
f0f7c43944 | ||
f9b35f61f0
|
|||
74cd8a6aa2
|
|||
9843f5f8db
|
|||
c6390d9da9
|
|||
c9afafbf60
|
|||
3af05c9511
|
|||
80b1ab3699
|
|||
5a772b0bca
|
|||
ea251aca05
|
|||
7b66097728
|
|||
ea8c5b805e
|
|||
2ffc06b232
|
|||
a5244f1c7f
|
|||
42ac57eb79
|
|||
63a2feef3a
|
|||
3e049ac89d
|
|||
2a6547c71d
|
|||
de45a94c9b
|
|||
9019b878f0
|
|||
bba9255605
|
|||
41ca6fad5e
|
|||
c88591dd64
|
|||
8089e4fd57
|
|||
6f9488f281
|
|||
e837c6cef7
|
|||
a4edc74e8d
|
|||
|
4d416959ad
|
||
|
bdd3f483d9
|
||
|
8fd796155d
|
||
|
d08aea4681
|
||
|
dd2e0a3b7e
|
||
65d41488cb
|
|||
e7300366ca
|
6
Makefile
6
Makefile
@@ -37,6 +37,12 @@ doc-clean: ## Update documentation
|
|||||||
audit: ## Audit pip
|
audit: ## Audit pip
|
||||||
pip-audit
|
pip-audit
|
||||||
|
|
||||||
|
version:
|
||||||
|
@echo "Current Python version .....: $(shell python --version)"
|
||||||
|
@echo "Current Bayesclass version .: $(shell python -c "from bayesclass import _version; print(_version.__version__)")"
|
||||||
|
@echo "Installed Bayesclass version: $(shell pip show bayesclass | grep Version | cut -d' ' -f2)"
|
||||||
|
@echo "Installed pgmpy version ....: $(shell pip show pgmpy | grep Version | cut -d' ' -f2)"
|
||||||
|
|
||||||
help: ## Show help message
|
help: ## Show help message
|
||||||
@IFS=$$'\n' ; \
|
@IFS=$$'\n' ; \
|
||||||
help_lines=(`fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##/:/'`); \
|
help_lines=(`fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##/:/'`); \
|
||||||
|
@@ -16,4 +16,8 @@ __all__ = [
|
|||||||
"TAN",
|
"TAN",
|
||||||
"KDB",
|
"KDB",
|
||||||
"AODE",
|
"AODE",
|
||||||
|
"KDBNew",
|
||||||
|
"AODENew",
|
||||||
|
"BoostAODE",
|
||||||
|
"BoostSPODE",
|
||||||
]
|
]
|
||||||
|
@@ -1 +1 @@
|
|||||||
__version__ = "0.1.0"
|
__version__ = "0.1.1"
|
||||||
|
@@ -1,8 +1,10 @@
|
|||||||
import random
|
import random
|
||||||
|
import warnings
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from scipy.stats import mode
|
from scipy.stats import mode
|
||||||
from sklearn.base import ClassifierMixin, BaseEstimator
|
from sklearn.base import clone, ClassifierMixin, BaseEstimator
|
||||||
|
from sklearn.feature_selection import SelectKBest
|
||||||
from sklearn.ensemble import BaseEnsemble
|
from sklearn.ensemble import BaseEnsemble
|
||||||
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
|
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
|
||||||
from sklearn.utils.multiclass import unique_labels
|
from sklearn.utils.multiclass import unique_labels
|
||||||
@@ -10,10 +12,16 @@ from sklearn.feature_selection import mutual_info_classif
|
|||||||
import networkx as nx
|
import networkx as nx
|
||||||
from pgmpy.estimators import TreeSearch, BayesianEstimator
|
from pgmpy.estimators import TreeSearch, BayesianEstimator
|
||||||
from pgmpy.models import BayesianNetwork
|
from pgmpy.models import BayesianNetwork
|
||||||
|
from pgmpy.base import DAG
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
|
from fimdlp.mdlp import FImdlp
|
||||||
from ._version import __version__
|
from ._version import __version__
|
||||||
|
|
||||||
|
|
||||||
|
def default_feature_names(num_features):
|
||||||
|
return [f"feature_{i}" for i in range(num_features)]
|
||||||
|
|
||||||
|
|
||||||
class BayesBase(BaseEstimator, ClassifierMixin):
|
class BayesBase(BaseEstimator, ClassifierMixin):
|
||||||
def __init__(self, random_state, show_progress):
|
def __init__(self, random_state, show_progress):
|
||||||
self.random_state = random_state
|
self.random_state = random_state
|
||||||
@@ -23,7 +31,7 @@ class BayesBase(BaseEstimator, ClassifierMixin):
|
|||||||
return {
|
return {
|
||||||
"requires_positive_X": True,
|
"requires_positive_X": True,
|
||||||
"requires_positive_y": True,
|
"requires_positive_y": True,
|
||||||
"preserve_dtype": [np.int64, np.int32],
|
"preserve_dtype": [np.int32, np.int64],
|
||||||
"requires_y": True,
|
"requires_y": True,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -32,35 +40,68 @@ class BayesBase(BaseEstimator, ClassifierMixin):
|
|||||||
"""Return the version of the package."""
|
"""Return the version of the package."""
|
||||||
return __version__
|
return __version__
|
||||||
|
|
||||||
def nodes_leaves(self):
|
def nodes_edges(self):
|
||||||
"""To keep compatiblity with the benchmark platform"""
|
if hasattr(self, "dag_"):
|
||||||
|
return len(self.dag_), len(self.dag_.edges())
|
||||||
return 0, 0
|
return 0, 0
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def default_class_name():
|
||||||
|
return "class"
|
||||||
|
|
||||||
|
def build_dataset(self):
|
||||||
|
self.dataset_ = pd.DataFrame(
|
||||||
|
self.X_, columns=self.feature_names_in_, dtype=np.int32
|
||||||
|
)
|
||||||
|
self.dataset_[self.class_name_] = self.y_
|
||||||
|
if self.sample_weight_ is not None:
|
||||||
|
self.dataset_["_weight"] = self.sample_weight_
|
||||||
|
|
||||||
def _check_params_fit(self, X, y, expected_args, kwargs):
|
def _check_params_fit(self, X, y, expected_args, kwargs):
|
||||||
"""Check the common parameters passed to fit"""
|
"""Check the common parameters passed to fit"""
|
||||||
# Check that X and y have correct shape
|
# Check that X and y have correct shape
|
||||||
X, y = check_X_y(X, y)
|
X, y = check_X_y(X, y)
|
||||||
|
X = self._validate_data(X, reset=True)
|
||||||
# Store the classes seen during fit
|
# Store the classes seen during fit
|
||||||
self.classes_ = unique_labels(y)
|
self.classes_ = unique_labels(y)
|
||||||
self.n_classes_ = self.classes_.shape[0]
|
self.n_classes_ = self.classes_.shape[0]
|
||||||
# Default values
|
# Default values
|
||||||
self.class_name_ = "class"
|
self.weighted_ = False
|
||||||
self.features_ = [f"feature_{i}" for i in range(X.shape[1])]
|
self.sample_weight_ = None
|
||||||
|
self.class_name_ = self.default_class_name()
|
||||||
|
self.features_ = default_feature_names(X.shape[1])
|
||||||
for key, value in kwargs.items():
|
for key, value in kwargs.items():
|
||||||
if key in expected_args:
|
if key in expected_args:
|
||||||
setattr(self, f"{key}_", value)
|
setattr(self, f"{key}_", value)
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unexpected argument: {key}")
|
raise ValueError(f"Unexpected argument: {key}")
|
||||||
|
self.feature_names_in_ = self.features_
|
||||||
|
# used for local discretization
|
||||||
|
self.indexed_features_ = {
|
||||||
|
feature: i for i, feature in enumerate(self.features_)
|
||||||
|
}
|
||||||
if self.random_state is not None:
|
if self.random_state is not None:
|
||||||
random.seed(self.random_state)
|
random.seed(self.random_state)
|
||||||
if len(self.features_) != X.shape[1]:
|
if len(self.feature_names_in_) != X.shape[1]:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"Number of features does not match the number of columns in X"
|
"Number of features does not match the number of columns in X"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
self.n_features_in_ = X.shape[1]
|
||||||
return X, y
|
return X, y
|
||||||
|
|
||||||
|
@property
|
||||||
|
def states_(self):
|
||||||
|
if hasattr(self, "fitted_"):
|
||||||
|
return sum([len(item) for _, item in self.model_.states.items()])
|
||||||
|
return 0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def depth_(self):
|
||||||
|
return self.states_
|
||||||
|
|
||||||
def fit(self, X, y, **kwargs):
|
def fit(self, X, y, **kwargs):
|
||||||
"""A reference implementation of a fitting function for a classifier.
|
"""Fit classifier
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
@@ -97,28 +138,43 @@ class BayesBase(BaseEstimator, ClassifierMixin):
|
|||||||
>>> model.fit(train_data, train_y, features=features, class_name='E')
|
>>> model.fit(train_data, train_y, features=features, class_name='E')
|
||||||
TAN(random_state=17)
|
TAN(random_state=17)
|
||||||
"""
|
"""
|
||||||
X_, y_ = self._check_params(X, y, kwargs)
|
self.X_, self.y_ = self._check_params(X, y, kwargs)
|
||||||
# Store the information needed to build the model
|
# Store the information needed to build the model
|
||||||
self.X_ = X_
|
self.build_dataset()
|
||||||
self.y_ = y_
|
|
||||||
self.dataset_ = pd.DataFrame(self.X_, columns=self.features_)
|
|
||||||
self.dataset_[self.class_name_] = self.y_
|
|
||||||
# Build the DAG
|
# Build the DAG
|
||||||
self._build()
|
self._build()
|
||||||
# Train the model
|
# Train the model
|
||||||
self._train()
|
self._train(kwargs)
|
||||||
self.fitted_ = True
|
self.fitted_ = True
|
||||||
|
# To keep compatiblity with the benchmark platform
|
||||||
|
self.nodes_leaves = self.nodes_edges
|
||||||
# Return the classifier
|
# Return the classifier
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def _train(self):
|
def _build(self):
|
||||||
|
"""This method should be implemented by the subclasses to
|
||||||
|
build the DAG
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
def _train(self, kwargs):
|
||||||
|
"""Build and train a BayesianNetwork from the DAG and the dataset
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
kwargs : dict
|
||||||
|
fit parameters
|
||||||
|
"""
|
||||||
self.model_ = BayesianNetwork(
|
self.model_ = BayesianNetwork(
|
||||||
self.dag_.edges(), show_progress=self.show_progress
|
self.dag_.edges(), show_progress=self.show_progress
|
||||||
)
|
)
|
||||||
|
states = dict(state_names=kwargs.pop("state_names", []))
|
||||||
self.model_.fit(
|
self.model_.fit(
|
||||||
self.dataset_,
|
self.dataset_,
|
||||||
estimator=BayesianEstimator,
|
estimator=BayesianEstimator,
|
||||||
prior_type="K2",
|
prior_type="K2",
|
||||||
|
weighted=self.weighted_,
|
||||||
|
**states,
|
||||||
)
|
)
|
||||||
|
|
||||||
def predict(self, X):
|
def predict(self, X):
|
||||||
@@ -169,13 +225,15 @@ class BayesBase(BaseEstimator, ClassifierMixin):
|
|||||||
"""
|
"""
|
||||||
# Check is fit had been called
|
# Check is fit had been called
|
||||||
check_is_fitted(self, ["X_", "y_", "fitted_"])
|
check_is_fitted(self, ["X_", "y_", "fitted_"])
|
||||||
|
|
||||||
# Input validation
|
# Input validation
|
||||||
X = check_array(X)
|
X = check_array(X)
|
||||||
dataset = pd.DataFrame(X, columns=self.features_, dtype="int16")
|
dataset = pd.DataFrame(
|
||||||
|
X, columns=self.feature_names_in_, dtype=np.int32
|
||||||
|
)
|
||||||
return self.model_.predict(dataset).values.ravel()
|
return self.model_.predict(dataset).values.ravel()
|
||||||
|
|
||||||
def plot(self, title="", node_size=800):
|
def plot(self, title="", node_size=800):
|
||||||
|
warnings.simplefilter("ignore", UserWarning)
|
||||||
nx.draw_circular(
|
nx.draw_circular(
|
||||||
self.model_,
|
self.model_,
|
||||||
with_labels=True,
|
with_labels=True,
|
||||||
@@ -208,7 +266,7 @@ class TAN(BayesBase):
|
|||||||
The classes seen at :meth:`fit`.
|
The classes seen at :meth:`fit`.
|
||||||
class_name_ : str
|
class_name_ : str
|
||||||
The name of the class column
|
The name of the class column
|
||||||
features_ : list
|
feature_names_in_ : list
|
||||||
The list of features names
|
The list of features names
|
||||||
head_ : int
|
head_ : int
|
||||||
The index of the node used as head for the initial DAG
|
The index of the node used as head for the initial DAG
|
||||||
@@ -227,21 +285,47 @@ class TAN(BayesBase):
|
|||||||
|
|
||||||
def _check_params(self, X, y, kwargs):
|
def _check_params(self, X, y, kwargs):
|
||||||
self.head_ = 0
|
self.head_ = 0
|
||||||
expected_args = ["class_name", "features", "head"]
|
expected_args = ["class_name", "features", "head", "state_names"]
|
||||||
X, y = self._check_params_fit(X, y, expected_args, kwargs)
|
X, y = self._check_params_fit(X, y, expected_args, kwargs)
|
||||||
if self.head_ == "random":
|
if self.head_ == "random":
|
||||||
self.head_ = random.randint(0, len(self.features_) - 1)
|
self.head_ = random.randint(0, self.n_features_in_ - 1)
|
||||||
if self.head_ is not None and self.head_ >= len(self.features_):
|
if self.head_ is not None and self.head_ >= self.n_features_in_:
|
||||||
raise ValueError("Head index out of range")
|
raise ValueError("Head index out of range")
|
||||||
return X, y
|
return X, y
|
||||||
|
|
||||||
def _build(self):
|
def _build(self):
|
||||||
est = TreeSearch(self.dataset_, root_node=self.features_[self.head_])
|
est = TreeSearch(
|
||||||
|
self.dataset_, root_node=self.feature_names_in_[self.head_]
|
||||||
|
)
|
||||||
self.dag_ = est.estimate(
|
self.dag_ = est.estimate(
|
||||||
estimator_type="tan",
|
estimator_type="tan",
|
||||||
class_node=self.class_name_,
|
class_node=self.class_name_,
|
||||||
show_progress=self.show_progress,
|
show_progress=self.show_progress,
|
||||||
)
|
)
|
||||||
|
# Code taken from pgmpy
|
||||||
|
# n_jobs = -1
|
||||||
|
# weights = TreeSearch._get_conditional_weights(
|
||||||
|
# self.dataset_,
|
||||||
|
# self.class_name_,
|
||||||
|
# "mutual_info",
|
||||||
|
# n_jobs,
|
||||||
|
# self.show_progress,
|
||||||
|
# )
|
||||||
|
# # Step 4.2: Construct chow-liu DAG on {data.columns - class_node}
|
||||||
|
# class_node_idx = np.where(self.dataset_.columns == self.class_name_)[
|
||||||
|
# 0
|
||||||
|
# ][0]
|
||||||
|
# weights = np.delete(weights, class_node_idx, axis=0)
|
||||||
|
# weights = np.delete(weights, class_node_idx, axis=1)
|
||||||
|
# reduced_columns = np.delete(self.dataset_.columns, class_node_idx)
|
||||||
|
# D = TreeSearch._create_tree_and_dag(
|
||||||
|
# weights, reduced_columns, self.feature_names_in_[self.head_]
|
||||||
|
# )
|
||||||
|
# # Step 4.3: Add edges from class_node to all other nodes.
|
||||||
|
# D.add_edges_from(
|
||||||
|
# [(self.class_name_, node) for node in reduced_columns]
|
||||||
|
# )
|
||||||
|
# self.dag_ = D
|
||||||
|
|
||||||
|
|
||||||
class KDB(BayesBase):
|
class KDB(BayesBase):
|
||||||
@@ -253,46 +337,55 @@ class KDB(BayesBase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def _check_params(self, X, y, kwargs):
|
def _check_params(self, X, y, kwargs):
|
||||||
expected_args = ["class_name", "features"]
|
expected_args = [
|
||||||
|
"class_name",
|
||||||
|
"features",
|
||||||
|
"state_names",
|
||||||
|
"sample_weight",
|
||||||
|
"weighted",
|
||||||
|
]
|
||||||
return self._check_params_fit(X, y, expected_args, kwargs)
|
return self._check_params_fit(X, y, expected_args, kwargs)
|
||||||
|
|
||||||
|
def _add_m_edges(self, dag, idx, S_nodes, conditional_weights):
|
||||||
|
n_edges = min(self.k, len(S_nodes))
|
||||||
|
cond_w = conditional_weights.copy()
|
||||||
|
exit_cond = self.k == 0
|
||||||
|
num = 0
|
||||||
|
while not exit_cond:
|
||||||
|
max_minfo = np.argmax(cond_w[idx, :])
|
||||||
|
if max_minfo in S_nodes and cond_w[idx, max_minfo] > self.theta:
|
||||||
|
try:
|
||||||
|
dag.add_edge(
|
||||||
|
self.feature_names_in_[max_minfo],
|
||||||
|
self.feature_names_in_[idx],
|
||||||
|
)
|
||||||
|
num += 1
|
||||||
|
except ValueError:
|
||||||
|
# Loops are not allowed
|
||||||
|
pass
|
||||||
|
cond_w[idx, max_minfo] = -1
|
||||||
|
exit_cond = num == n_edges or np.all(cond_w[idx, :] <= self.theta)
|
||||||
|
|
||||||
def _build(self):
|
def _build(self):
|
||||||
"""
|
"""
|
||||||
1. For each feature Xi, compute mutual information, I(X;;C), where C is the class.
|
1. For each feature Xi, compute mutual information, I(X;;C),
|
||||||
2. Compute class conditional mutual information I(Xi;XjIC), f or each pair of features Xi and Xj, where i#j.
|
where C is the class.
|
||||||
|
2. Compute class conditional mutual information I(Xi;XjIC), f or each
|
||||||
|
pair of features Xi and Xj, where i#j.
|
||||||
3. Let the used variable list, S, be empty.
|
3. Let the used variable list, S, be empty.
|
||||||
4. Let the Bayesian network being constructed, BN, begin with a single class node, C.
|
4. Let the DAG network being constructed, BN, begin with a single
|
||||||
|
class node, C.
|
||||||
5. Repeat until S includes all domain features
|
5. Repeat until S includes all domain features
|
||||||
5.1. Select feature Xmax which is not in S and has the largest value I(Xmax;C).
|
5.1. Select feature Xmax which is not in S and has the largest value
|
||||||
|
I(Xmax;C).
|
||||||
5.2. Add a node to BN representing Xmax.
|
5.2. Add a node to BN representing Xmax.
|
||||||
5.3. Add an arc from C to Xmax in BN.
|
5.3. Add an arc from C to Xmax in BN.
|
||||||
5.4. Add m =min(lSl,/c) arcs from m distinct features Xj in S with the highest value for I(Xmax;X,jC).
|
5.4. Add m = min(lSl,/c) arcs from m distinct features Xj in S with
|
||||||
|
the highest value for I(Xmax;X,jC).
|
||||||
5.5. Add Xmax to S.
|
5.5. Add Xmax to S.
|
||||||
Compute the conditional probabilility infered by the structure of BN by using counts from DB, and output BN.
|
Compute the conditional probabilility infered by the structure of BN by
|
||||||
|
using counts from DB, and output BN.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def add_m_edges(dag, idx, S_nodes, conditional_weights):
|
|
||||||
n_edges = min(self.k, len(S_nodes))
|
|
||||||
cond_w = conditional_weights.copy()
|
|
||||||
exit_cond = self.k == 0
|
|
||||||
num = 0
|
|
||||||
while not exit_cond:
|
|
||||||
max_minfo = np.argmax(cond_w[idx, :])
|
|
||||||
if (
|
|
||||||
max_minfo in S_nodes
|
|
||||||
and cond_w[idx, max_minfo] > self.theta
|
|
||||||
):
|
|
||||||
try:
|
|
||||||
dag.add_edge(
|
|
||||||
self.features_[max_minfo], self.features_[idx]
|
|
||||||
)
|
|
||||||
num += 1
|
|
||||||
except ValueError:
|
|
||||||
# Loops are not allowed
|
|
||||||
pass
|
|
||||||
cond_w[idx, max_minfo] = -1
|
|
||||||
exit_cond = num == n_edges or np.all(cond_w[idx, :] <= 0)
|
|
||||||
|
|
||||||
# 1. get the mutual information between each feature and the class
|
# 1. get the mutual information between each feature and the class
|
||||||
mutual = mutual_info_classif(self.X_, self.y_, discrete_features=True)
|
mutual = mutual_info_classif(self.X_, self.y_, discrete_features=True)
|
||||||
# 2. symmetric matrix where each element represents I(X, Y| class_node)
|
# 2. symmetric matrix where each element represents I(X, Y| class_node)
|
||||||
@@ -301,73 +394,522 @@ class KDB(BayesBase):
|
|||||||
)._get_conditional_weights(
|
)._get_conditional_weights(
|
||||||
self.dataset_, self.class_name_, show_progress=self.show_progress
|
self.dataset_, self.class_name_, show_progress=self.show_progress
|
||||||
)
|
)
|
||||||
# 3.
|
# 3. Let the used variable list, S, be empty.
|
||||||
S_nodes = []
|
S_nodes = []
|
||||||
# 4.
|
# 4. Let the DAG being constructed, BN, begin with a single class node
|
||||||
dag = BayesianNetwork()
|
dag = BayesianNetwork(show_progress=self.show_progress)
|
||||||
dag.add_node(self.class_name_) # , state_names=self.classes_)
|
dag.add_node(self.class_name_) # , state_names=self.classes_)
|
||||||
# 5. 5.1
|
# 5. Repeat until S includes all domain features
|
||||||
|
# 5.1 Select feature Xmax which is not in S and has the largest value
|
||||||
for idx in np.argsort(mutual):
|
for idx in np.argsort(mutual):
|
||||||
# 5.2
|
# 5.2 Add a node to BN representing Xmax.
|
||||||
feature = self.features_[idx]
|
feature = self.feature_names_in_[idx]
|
||||||
dag.add_node(feature)
|
dag.add_node(feature)
|
||||||
# 5.3
|
# 5.3 Add an arc from C to Xmax in BN.
|
||||||
dag.add_edge(self.class_name_, feature)
|
dag.add_edge(self.class_name_, feature)
|
||||||
# 5.4
|
# 5.4 Add m = min(lSl,/c) arcs from m distinct features Xj in S
|
||||||
add_m_edges(dag, idx, S_nodes, conditional_weights)
|
self._add_m_edges(dag, idx, S_nodes, conditional_weights)
|
||||||
# 5.5
|
# 5.5 Add Xmax to S.
|
||||||
S_nodes.append(idx)
|
S_nodes.append(idx)
|
||||||
self.dag_ = dag
|
self.dag_ = dag
|
||||||
|
|
||||||
|
|
||||||
class AODE(BayesBase, BaseEnsemble):
|
def build_spodes(features, class_name):
|
||||||
def __init__(self, show_progress=False, random_state=None):
|
"""Build SPODE estimators (Super Parent One Dependent Estimator)"""
|
||||||
|
class_edges = [(class_name, f) for f in features]
|
||||||
|
for idx in range(len(features)):
|
||||||
|
feature_edges = [
|
||||||
|
(features[idx], f) for f in features if f != features[idx]
|
||||||
|
]
|
||||||
|
feature_edges.extend(class_edges)
|
||||||
|
model = BayesianNetwork(feature_edges, show_progress=False)
|
||||||
|
yield model
|
||||||
|
|
||||||
|
|
||||||
|
class SPODE(BayesBase):
|
||||||
|
def _check_params(self, X, y, kwargs):
|
||||||
|
expected_args = [
|
||||||
|
"class_name",
|
||||||
|
"features",
|
||||||
|
"state_names",
|
||||||
|
"sample_weight",
|
||||||
|
"weighted",
|
||||||
|
]
|
||||||
|
return self._check_params_fit(X, y, expected_args, kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
class AODE(ClassifierMixin, BaseEnsemble):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
show_progress=False,
|
||||||
|
random_state=None,
|
||||||
|
estimator=None,
|
||||||
|
):
|
||||||
|
self.show_progress = show_progress
|
||||||
|
self.random_state = random_state
|
||||||
|
super().__init__(estimator=estimator)
|
||||||
|
|
||||||
|
def _validate_estimator(self) -> None:
|
||||||
|
"""Check the estimator and set the estimator_ attribute."""
|
||||||
|
super()._validate_estimator(
|
||||||
|
default=SPODE(
|
||||||
|
random_state=self.random_state,
|
||||||
|
show_progress=self.show_progress,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
def fit(self, X, y, **kwargs):
|
||||||
|
self.n_features_in_ = X.shape[1]
|
||||||
|
self.feature_names_in_ = kwargs.get(
|
||||||
|
"features", default_feature_names(self.n_features_in_)
|
||||||
|
)
|
||||||
|
self.class_name_ = kwargs.get("class_name", "class")
|
||||||
|
# build estimator
|
||||||
|
self._validate_estimator()
|
||||||
|
self.X_ = X
|
||||||
|
self.y_ = y
|
||||||
|
self.n_samples_ = X.shape[0]
|
||||||
|
self.estimators_ = []
|
||||||
|
self._train(kwargs)
|
||||||
|
self.fitted_ = True
|
||||||
|
# To keep compatiblity with the benchmark platform
|
||||||
|
self.nodes_leaves = self.nodes_edges
|
||||||
|
return self
|
||||||
|
|
||||||
|
def _train(self, kwargs):
|
||||||
|
for dag in build_spodes(self.feature_names_in_, self.class_name_):
|
||||||
|
estimator = clone(self.estimator_)
|
||||||
|
estimator.dag_ = estimator.model_ = dag
|
||||||
|
estimator.fit(self.X_, self.y_, **kwargs)
|
||||||
|
self.estimators_.append(estimator)
|
||||||
|
|
||||||
|
def predict(self, X: np.ndarray) -> np.ndarray:
|
||||||
|
n_samples = X.shape[0]
|
||||||
|
n_estimators = len(self.estimators_)
|
||||||
|
result = np.empty((n_samples, n_estimators))
|
||||||
|
for index, estimator in enumerate(self.estimators_):
|
||||||
|
result[:, index] = estimator.predict(X)
|
||||||
|
return mode(result, axis=1, keepdims=False).mode.ravel()
|
||||||
|
|
||||||
|
def version(self):
|
||||||
|
if hasattr(self, "fitted_"):
|
||||||
|
return self.estimator_.version()
|
||||||
|
return SPODE(None, False).version()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def states_(self):
|
||||||
|
if hasattr(self, "fitted_"):
|
||||||
|
return sum(
|
||||||
|
[
|
||||||
|
len(item)
|
||||||
|
for model in self.estimators_
|
||||||
|
for _, item in model.model_.states.items()
|
||||||
|
]
|
||||||
|
) / len(self.estimators_)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def depth_(self):
|
||||||
|
return self.states_
|
||||||
|
|
||||||
|
def nodes_edges(self):
|
||||||
|
nodes = 0
|
||||||
|
edges = 0
|
||||||
|
if hasattr(self, "fitted_"):
|
||||||
|
nodes = sum([len(x.dag_) for x in self.estimators_])
|
||||||
|
edges = sum([len(x.dag_.edges()) for x in self.estimators_])
|
||||||
|
return nodes, edges
|
||||||
|
|
||||||
|
def plot(self, title=""):
|
||||||
|
warnings.simplefilter("ignore", UserWarning)
|
||||||
|
for idx, model in enumerate(self.estimators_):
|
||||||
|
model.plot(title=f"{idx} {title}")
|
||||||
|
|
||||||
|
|
||||||
|
class TANNew(TAN):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
show_progress=False,
|
||||||
|
random_state=None,
|
||||||
|
discretizer_depth=1e6,
|
||||||
|
discretizer_length=3,
|
||||||
|
discretizer_cuts=0,
|
||||||
|
):
|
||||||
|
self.discretizer_depth = discretizer_depth
|
||||||
|
self.discretizer_length = discretizer_length
|
||||||
|
self.discretizer_cuts = discretizer_cuts
|
||||||
super().__init__(
|
super().__init__(
|
||||||
show_progress=show_progress, random_state=random_state
|
show_progress=show_progress, random_state=random_state
|
||||||
)
|
)
|
||||||
|
|
||||||
def _check_params(self, X, y, kwargs):
|
def fit(self, X, y, **kwargs):
|
||||||
expected_args = ["class_name", "features"]
|
self.estimator_ = Proposal(self)
|
||||||
return self._check_params_fit(X, y, expected_args, kwargs)
|
self.estimator_.fit(X, y, **kwargs)
|
||||||
|
return self
|
||||||
|
|
||||||
def _build(self):
|
def predict(self, X):
|
||||||
|
return self.estimator_.predict(X)
|
||||||
|
|
||||||
self.dag_ = None
|
|
||||||
|
|
||||||
def _train(self):
|
class KDBNew(KDB):
|
||||||
"""Build SPODE estimators (Super Parent One Dependent Estimator)"""
|
def __init__(
|
||||||
self.models_ = []
|
self,
|
||||||
class_edges = [(self.class_name_, f) for f in self.features_]
|
k=2,
|
||||||
for idx in range(len(self.features_)):
|
show_progress=False,
|
||||||
feature_edges = [
|
random_state=None,
|
||||||
(self.features_[idx], f)
|
discretizer_depth=1e6,
|
||||||
for f in self.features_
|
discretizer_length=3,
|
||||||
if f != self.features_[idx]
|
discretizer_cuts=0,
|
||||||
]
|
):
|
||||||
feature_edges.extend(class_edges)
|
self.discretizer_depth = discretizer_depth
|
||||||
model = BayesianNetwork(
|
self.discretizer_length = discretizer_length
|
||||||
feature_edges, show_progress=self.show_progress
|
self.discretizer_cuts = discretizer_cuts
|
||||||
)
|
super().__init__(
|
||||||
model.fit(
|
k=k, show_progress=show_progress, random_state=random_state
|
||||||
self.dataset_,
|
)
|
||||||
estimator=BayesianEstimator,
|
|
||||||
prior_type="K2",
|
|
||||||
)
|
|
||||||
self.models_.append(model)
|
|
||||||
|
|
||||||
def plot(self, title=""):
|
def fit(self, X, y, **kwargs):
|
||||||
for idx, model in enumerate(self.models_):
|
self.estimator_ = Proposal(self)
|
||||||
self.model_ = model
|
self.estimator_.fit(X, y, **kwargs)
|
||||||
super().plot(title=f"{idx} {title}")
|
return self
|
||||||
|
|
||||||
|
def predict(self, X):
|
||||||
|
return self.estimator_.predict(X)
|
||||||
|
|
||||||
|
|
||||||
|
class SPODENew(SPODE):
|
||||||
|
"""This class implements a classifier for the SPODE algorithm similar to
|
||||||
|
TANNew and KDBNew"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
random_state,
|
||||||
|
show_progress,
|
||||||
|
discretizer_depth=1e6,
|
||||||
|
discretizer_length=3,
|
||||||
|
discretizer_cuts=0,
|
||||||
|
):
|
||||||
|
super().__init__(
|
||||||
|
random_state=random_state, show_progress=show_progress
|
||||||
|
)
|
||||||
|
self.discretizer_depth = discretizer_depth
|
||||||
|
self.discretizer_length = discretizer_length
|
||||||
|
self.discretizer_cuts = discretizer_cuts
|
||||||
|
|
||||||
|
|
||||||
|
class AODENew(AODE):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
random_state=None,
|
||||||
|
show_progress=False,
|
||||||
|
discretizer_depth=1e6,
|
||||||
|
discretizer_length=3,
|
||||||
|
discretizer_cuts=0,
|
||||||
|
):
|
||||||
|
self.discretizer_depth = discretizer_depth
|
||||||
|
self.discretizer_length = discretizer_length
|
||||||
|
self.discretizer_cuts = discretizer_cuts
|
||||||
|
super().__init__(
|
||||||
|
random_state=random_state,
|
||||||
|
show_progress=show_progress,
|
||||||
|
estimator=Proposal(
|
||||||
|
SPODENew(
|
||||||
|
random_state=random_state,
|
||||||
|
show_progress=show_progress,
|
||||||
|
discretizer_depth=discretizer_depth,
|
||||||
|
discretizer_length=discretizer_length,
|
||||||
|
discretizer_cuts=discretizer_cuts,
|
||||||
|
)
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
def _train(self, kwargs):
|
||||||
|
for dag in build_spodes(self.feature_names_in_, self.class_name_):
|
||||||
|
proposal = clone(self.estimator_)
|
||||||
|
proposal.estimator.dag_ = proposal.estimator.model_ = dag
|
||||||
|
self.estimators_.append(proposal.fit(self.X_, self.y_, **kwargs))
|
||||||
|
self.n_estimators_ = len(self.estimators_)
|
||||||
|
|
||||||
def predict(self, X: np.ndarray) -> np.ndarray:
|
def predict(self, X: np.ndarray) -> np.ndarray:
|
||||||
check_is_fitted(self, ["X_", "y_", "fitted_"])
|
check_is_fitted(self, ["X_", "y_", "fitted_"])
|
||||||
# Input validation
|
# Input validation
|
||||||
X = self._validate_data(X, reset=False)
|
X = check_array(X)
|
||||||
n_samples = X.shape[0]
|
result = np.empty((X.shape[0], self.n_estimators_))
|
||||||
n_estimators = len(self.models_)
|
for index, model in enumerate(self.estimators_):
|
||||||
result = np.empty((n_samples, n_estimators))
|
result[:, index] = model.predict(X)
|
||||||
dataset = pd.DataFrame(X, columns=self.features_, dtype="int16")
|
|
||||||
for index, model in enumerate(self.models_):
|
|
||||||
result[:, index] = model.predict(dataset).values.ravel()
|
|
||||||
return mode(result, axis=1, keepdims=False).mode.ravel()
|
return mode(result, axis=1, keepdims=False).mode.ravel()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def states_(self):
|
||||||
|
if hasattr(self, "fitted_"):
|
||||||
|
return sum(
|
||||||
|
[
|
||||||
|
len(item)
|
||||||
|
for model in self.estimators_
|
||||||
|
for _, item in model.estimator.model_.states.items()
|
||||||
|
]
|
||||||
|
) / len(self.estimators_)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def depth_(self):
|
||||||
|
return self.states_
|
||||||
|
|
||||||
|
def nodes_edges(self):
|
||||||
|
nodes = 0
|
||||||
|
edges = 0
|
||||||
|
if hasattr(self, "fitted_"):
|
||||||
|
nodes = sum([len(x.estimator.dag_) for x in self.estimators_])
|
||||||
|
edges = sum(
|
||||||
|
[len(x.estimator.dag_.edges()) for x in self.estimators_]
|
||||||
|
)
|
||||||
|
return nodes, edges
|
||||||
|
|
||||||
|
def plot(self, title=""):
|
||||||
|
warnings.simplefilter("ignore", UserWarning)
|
||||||
|
for idx, model in enumerate(self.estimators_):
|
||||||
|
model.estimator.plot(title=f"{idx} {title}")
|
||||||
|
|
||||||
|
def version(self):
|
||||||
|
if hasattr(self, "fitted_"):
|
||||||
|
return self.estimator_.estimator.version()
|
||||||
|
return SPODENew(None, False).version()
|
||||||
|
|
||||||
|
|
||||||
|
class Proposal(BaseEstimator):
|
||||||
|
def __init__(self, estimator):
|
||||||
|
self.estimator = estimator
|
||||||
|
self.class_type = estimator.__class__
|
||||||
|
|
||||||
|
def fit(self, X, y, **kwargs):
|
||||||
|
# Check parameters
|
||||||
|
self.estimator._check_params(X, y, kwargs)
|
||||||
|
# Discretize train data
|
||||||
|
self.discretizer_ = FImdlp(
|
||||||
|
n_jobs=1,
|
||||||
|
max_depth=self.estimator.discretizer_depth,
|
||||||
|
min_length=self.estimator.discretizer_length,
|
||||||
|
max_cuts=self.estimator.discretizer_cuts,
|
||||||
|
)
|
||||||
|
self.Xd = self.discretizer_.fit_transform(X, y)
|
||||||
|
kwargs = self.update_kwargs(y, kwargs)
|
||||||
|
# Build the model
|
||||||
|
super(self.class_type, self.estimator).fit(self.Xd, y, **kwargs)
|
||||||
|
# Local discretization based on the model
|
||||||
|
self._local_discretization()
|
||||||
|
# self.check_integrity("fit", self.Xd)
|
||||||
|
self.fitted_ = True
|
||||||
|
return self
|
||||||
|
|
||||||
|
def predict(self, X):
|
||||||
|
# Check is fit had been called
|
||||||
|
check_is_fitted(self, ["fitted_"])
|
||||||
|
# Input validation
|
||||||
|
X = check_array(X)
|
||||||
|
Xd = self.discretizer_.transform(X)
|
||||||
|
# self.check_integrity("predict", Xd)
|
||||||
|
return super(self.class_type, self.estimator).predict(Xd)
|
||||||
|
|
||||||
|
def update_kwargs(self, y, kwargs):
|
||||||
|
features = (
|
||||||
|
kwargs["features"]
|
||||||
|
if "features" in kwargs
|
||||||
|
else default_feature_names(self.Xd.shape[1])
|
||||||
|
)
|
||||||
|
states = {
|
||||||
|
features[i]: self.discretizer_.get_states_feature(i)
|
||||||
|
for i in range(self.Xd.shape[1])
|
||||||
|
}
|
||||||
|
class_name = (
|
||||||
|
kwargs["class_name"]
|
||||||
|
if "class_name" in kwargs
|
||||||
|
else self.estimator.default_class_name()
|
||||||
|
)
|
||||||
|
states[class_name] = np.unique(y).tolist()
|
||||||
|
kwargs["state_names"] = states
|
||||||
|
self.state_names_ = states
|
||||||
|
self.features_ = features
|
||||||
|
kwargs["features"] = features
|
||||||
|
kwargs["class_name"] = class_name
|
||||||
|
return kwargs
|
||||||
|
|
||||||
|
def _local_discretization(self):
|
||||||
|
"""Discretize each feature with its fathers and the class"""
|
||||||
|
upgrade = False
|
||||||
|
# order of local discretization is important. no good 0, 1, 2...
|
||||||
|
ancestral_order = list(nx.topological_sort(self.estimator.dag_))
|
||||||
|
for feature in ancestral_order:
|
||||||
|
if feature == self.estimator.class_name_:
|
||||||
|
continue
|
||||||
|
idx = self.estimator.indexed_features_[feature]
|
||||||
|
fathers = self.estimator.dag_.get_parents(feature)
|
||||||
|
if len(fathers) > 1:
|
||||||
|
# First remove the class name as it will be added later
|
||||||
|
fathers.remove(self.estimator.class_name_)
|
||||||
|
# Get the fathers indices
|
||||||
|
features = [
|
||||||
|
self.estimator.indexed_features_[f] for f in fathers
|
||||||
|
]
|
||||||
|
# Update the discretization of the feature
|
||||||
|
self.Xd[:, idx] = self.discretizer_.join_fit(
|
||||||
|
# each feature has to use previous discretization data=res
|
||||||
|
target=idx,
|
||||||
|
features=features,
|
||||||
|
data=self.Xd,
|
||||||
|
)
|
||||||
|
upgrade = True
|
||||||
|
if upgrade:
|
||||||
|
# Update the dataset
|
||||||
|
self.estimator.X_ = self.Xd
|
||||||
|
self.estimator.build_dataset()
|
||||||
|
self.state_names_ = {
|
||||||
|
key: self.discretizer_.get_states_feature(value)
|
||||||
|
for key, value in self.estimator.indexed_features_.items()
|
||||||
|
}
|
||||||
|
states = {"state_names": self.state_names_}
|
||||||
|
# Update the model
|
||||||
|
self.estimator.model_.fit(
|
||||||
|
self.estimator.dataset_,
|
||||||
|
estimator=BayesianEstimator,
|
||||||
|
prior_type="K2",
|
||||||
|
**states,
|
||||||
|
)
|
||||||
|
|
||||||
|
# def check_integrity(self, source, X):
|
||||||
|
# # print(f"Checking integrity of {source} data")
|
||||||
|
# for i in range(X.shape[1]):
|
||||||
|
# if not set(np.unique(X[:, i]).tolist()).issubset(
|
||||||
|
# set(self.state_names_[self.features_[i]])
|
||||||
|
# ):
|
||||||
|
# print(
|
||||||
|
# "i",
|
||||||
|
# i,
|
||||||
|
# "features[i]",
|
||||||
|
# self.features_[i],
|
||||||
|
# "np.unique(X[:, i])",
|
||||||
|
# np.unique(X[:, i]),
|
||||||
|
# "np.array(state_names[features[i]])",
|
||||||
|
# np.array(self.state_names_[self.features_[i]]),
|
||||||
|
# )
|
||||||
|
# raise ValueError("Discretization error")
|
||||||
|
|
||||||
|
|
||||||
|
class BoostSPODE(BayesBase):
|
||||||
|
def _check_params(self, X, y, kwargs):
|
||||||
|
expected_args = [
|
||||||
|
"class_name",
|
||||||
|
"features",
|
||||||
|
"state_names",
|
||||||
|
"sample_weight",
|
||||||
|
"weighted",
|
||||||
|
"sparent",
|
||||||
|
]
|
||||||
|
return self._check_params_fit(X, y, expected_args, kwargs)
|
||||||
|
|
||||||
|
def _build(self):
|
||||||
|
class_edges = [(self.class_name_, f) for f in self.feature_names_in_]
|
||||||
|
feature_edges = [
|
||||||
|
(self.sparent_, f)
|
||||||
|
for f in self.feature_names_in_
|
||||||
|
if f != self.sparent_
|
||||||
|
]
|
||||||
|
feature_edges.extend(class_edges)
|
||||||
|
self.dag_ = DAG(feature_edges)
|
||||||
|
|
||||||
|
def _train(self, kwargs):
|
||||||
|
states = dict(state_names=kwargs.get("state_names", []))
|
||||||
|
breakpoint()
|
||||||
|
self.model_ = BayesianNetwork(self.dag_.edges(), show_progress=False)
|
||||||
|
self.model_.fit(
|
||||||
|
self.dataset_,
|
||||||
|
estimator=BayesianEstimator,
|
||||||
|
prior_type="K2",
|
||||||
|
weighted=self.weighted_,
|
||||||
|
**states,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class BoostAODE(ClassifierMixin, BaseEnsemble):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
show_progress=False,
|
||||||
|
random_state=None,
|
||||||
|
estimator=None,
|
||||||
|
n_estimators=10,
|
||||||
|
):
|
||||||
|
self.show_progress = show_progress
|
||||||
|
self.random_state = random_state
|
||||||
|
self.n_estimators = n_estimators
|
||||||
|
super().__init__(estimator=estimator)
|
||||||
|
|
||||||
|
def _validate_estimator(self) -> None:
|
||||||
|
"""Check the estimator and set the estimator_ attribute."""
|
||||||
|
super()._validate_estimator(
|
||||||
|
default=BoostSPODE(
|
||||||
|
random_state=self.random_state,
|
||||||
|
show_progress=self.show_progress,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
def fit(self, X, y, **kwargs):
|
||||||
|
self.n_features_in_ = X.shape[1]
|
||||||
|
self.feature_names_in_ = kwargs.get(
|
||||||
|
"features", default_feature_names(self.n_features_in_)
|
||||||
|
)
|
||||||
|
self.class_name_ = kwargs.get("class_name", "class")
|
||||||
|
self.X_ = X
|
||||||
|
self.y_ = y
|
||||||
|
self.n_samples_ = X.shape[0]
|
||||||
|
self.estimators_ = []
|
||||||
|
self._validate_estimator()
|
||||||
|
self._train(kwargs)
|
||||||
|
self.fitted_ = True
|
||||||
|
# To keep compatiblity with the benchmark platform
|
||||||
|
self.nodes_leaves = self.nodes_edges
|
||||||
|
return self
|
||||||
|
|
||||||
|
def _train(self, kwargs):
|
||||||
|
"""Build boosted SPODEs"""
|
||||||
|
weights = [1 / self.n_samples_] * self.n_samples_
|
||||||
|
# Step 0: Set the finish condition
|
||||||
|
for num in range(self.n_estimators):
|
||||||
|
# Step 1: Build ranking with mutual information
|
||||||
|
# OJO MAL, ESTO NO ACTUALIZA EL RANKING CON LOS PESOS
|
||||||
|
# SIEMPRE VA A SACAR LO MISMO
|
||||||
|
feature = (
|
||||||
|
SelectKBest(k=1)
|
||||||
|
.fit(self.X_, self.y_)
|
||||||
|
.get_feature_names_out(self.feature_names_in_)
|
||||||
|
.tolist()[0]
|
||||||
|
)
|
||||||
|
# Step 2: Build & train spode with the first feature as sparent
|
||||||
|
estimator = clone(self.estimator_)
|
||||||
|
_args = kwargs.copy()
|
||||||
|
_args["sparent"] = feature
|
||||||
|
_args["sample_weight"] = weights
|
||||||
|
_args["weighted"] = True
|
||||||
|
# Step 2.1: build dataset
|
||||||
|
# Step 2.2: Train the model
|
||||||
|
estimator.fit(self.X_, self.y_, **_args)
|
||||||
|
# Step 3: Compute errors (epsilon sub m & alpha sub m)
|
||||||
|
# Explanation in https://medium.datadriveninvestor.com/understanding-adaboost-and-scikit-learns-algorithm-c8d8af5ace10
|
||||||
|
y_pred = estimator.predict(self.X_)
|
||||||
|
em = np.sum(weights * (y_pred != self.y_)) / np.sum(weights)
|
||||||
|
am = np.log((1 - em) / em) + np.log(estimator.n_classes_ - 1)
|
||||||
|
# Step 3.2: Update weights for next classifier
|
||||||
|
weights = [
|
||||||
|
wm * np.exp(am * (ym != y_pred))
|
||||||
|
for wm, ym in zip(weights, self.y_)
|
||||||
|
]
|
||||||
|
# Step 4: Add the new model
|
||||||
|
self.estimators_.append(estimator)
|
||||||
|
"""
|
||||||
|
class_edges = [(self.class_name_, f) for f in self.feature_names_in_]
|
||||||
|
feature_edges = [
|
||||||
|
(sparent, f) for f in self.feature_names_in_ if f != sparent
|
||||||
|
]
|
||||||
|
self.weights_ = weights.copy() if weights is not None else None
|
||||||
|
feature_edges.extend(class_edges)
|
||||||
|
self.model_ = BayesianNetwork(feature_edges, show_progress=False)
|
||||||
|
return self.model_
|
||||||
|
"""
|
||||||
|
Binary file not shown.
After Width: | Height: | Size: 55 KiB |
Binary file not shown.
After Width: | Height: | Size: 55 KiB |
Binary file not shown.
Before Width: | Height: | Size: 50 KiB After Width: | Height: | Size: 49 KiB |
Binary file not shown.
After Width: | Height: | Size: 49 KiB |
Binary file not shown.
After Width: | Height: | Size: 44 KiB |
38
bayesclass/tests/conftest.py
Normal file
38
bayesclass/tests/conftest.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
import pytest
|
||||||
|
from sklearn.datasets import load_iris
|
||||||
|
from fimdlp.mdlp import FImdlp
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def iris():
|
||||||
|
dataset = load_iris()
|
||||||
|
X = dataset["data"]
|
||||||
|
y = dataset["target"]
|
||||||
|
features = dataset["feature_names"]
|
||||||
|
# To make iris dataset has the same values as our iris.arff dataset
|
||||||
|
patch = {(34, 3): (0.2, 0.1), (37, 1): (3.6, 3.1), (37, 2): (1.4, 1.5)}
|
||||||
|
for key, value in patch.items():
|
||||||
|
X[key] = value[1]
|
||||||
|
return X, y, features
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def data(iris):
|
||||||
|
return iris[0], iris[1]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def features(iris):
|
||||||
|
return iris[2]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def class_name():
|
||||||
|
return "class"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def data_disc(data):
|
||||||
|
clf = FImdlp()
|
||||||
|
X, y = data
|
||||||
|
return clf.fit_transform(X, y), y
|
@@ -1,6 +1,5 @@
|
|||||||
import pytest
|
import pytest
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from sklearn.datasets import load_iris
|
|
||||||
from sklearn.preprocessing import KBinsDiscretizer
|
from sklearn.preprocessing import KBinsDiscretizer
|
||||||
from matplotlib.testing.decorators import image_comparison
|
from matplotlib.testing.decorators import image_comparison
|
||||||
from matplotlib.testing.conftest import mpl_test_settings
|
from matplotlib.testing.conftest import mpl_test_settings
|
||||||
@@ -10,28 +9,21 @@ from bayesclass.clfs import AODE
|
|||||||
from .._version import __version__
|
from .._version import __version__
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def data():
|
|
||||||
X, y = load_iris(return_X_y=True)
|
|
||||||
enc = KBinsDiscretizer(encode="ordinal")
|
|
||||||
return enc.fit_transform(X), y
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def clf():
|
def clf():
|
||||||
return AODE()
|
return AODE(random_state=17)
|
||||||
|
|
||||||
|
|
||||||
def test_AODE_default_hyperparameters(data, clf):
|
def test_AODE_default_hyperparameters(data_disc, clf):
|
||||||
# Test default values of hyperparameters
|
# Test default values of hyperparameters
|
||||||
assert not clf.show_progress
|
assert not clf.show_progress
|
||||||
assert clf.random_state is None
|
|
||||||
clf = AODE(show_progress=True, random_state=17)
|
|
||||||
assert clf.show_progress
|
|
||||||
assert clf.random_state == 17
|
assert clf.random_state == 17
|
||||||
clf.fit(*data)
|
clf = AODE(show_progress=True)
|
||||||
|
assert clf.show_progress
|
||||||
|
assert clf.random_state is None
|
||||||
|
clf.fit(*data_disc)
|
||||||
assert clf.class_name_ == "class"
|
assert clf.class_name_ == "class"
|
||||||
assert clf.features_ == [
|
assert clf.feature_names_in_ == [
|
||||||
"feature_0",
|
"feature_0",
|
||||||
"feature_1",
|
"feature_1",
|
||||||
"feature_2",
|
"feature_2",
|
||||||
@@ -42,50 +34,66 @@ def test_AODE_default_hyperparameters(data, clf):
|
|||||||
@image_comparison(
|
@image_comparison(
|
||||||
baseline_images=["line_dashes_AODE"], remove_text=True, extensions=["png"]
|
baseline_images=["line_dashes_AODE"], remove_text=True, extensions=["png"]
|
||||||
)
|
)
|
||||||
def test_AODE_plot(data, clf):
|
def test_AODE_plot(data_disc, features, clf):
|
||||||
# mpl_test_settings will automatically clean these internal side effects
|
# mpl_test_settings will automatically clean these internal side effects
|
||||||
mpl_test_settings
|
mpl_test_settings
|
||||||
dataset = load_iris(as_frame=True)
|
clf.fit(*data_disc, features=features)
|
||||||
clf.fit(*data, features=dataset["feature_names"])
|
|
||||||
clf.plot("AODE Iris")
|
clf.plot("AODE Iris")
|
||||||
|
|
||||||
|
|
||||||
def test_AODE_version(clf):
|
def test_AODE_version(clf, features, data_disc):
|
||||||
"""Check AODE version."""
|
"""Check AODE version."""
|
||||||
assert __version__ == clf.version()
|
assert __version__ == clf.version()
|
||||||
|
clf.fit(*data_disc, features=features)
|
||||||
|
assert __version__ == clf.version()
|
||||||
|
|
||||||
|
|
||||||
def test_AODE_nodes_leaves(clf):
|
def test_AODE_nodes_edges(clf, data_disc):
|
||||||
assert clf.nodes_leaves() == (0, 0)
|
assert clf.nodes_edges() == (0, 0)
|
||||||
|
clf.fit(*data_disc)
|
||||||
|
assert clf.nodes_leaves() == (20, 28)
|
||||||
|
|
||||||
|
|
||||||
def test_AODE_classifier(data, clf):
|
def test_AODE_states(clf, data_disc):
|
||||||
clf.fit(*data)
|
assert clf.states_ == 0
|
||||||
attribs = ["classes_", "X_", "y_", "features_", "class_name_"]
|
clf.fit(*data_disc)
|
||||||
|
assert clf.states_ == 19
|
||||||
|
assert clf.depth_ == clf.states_
|
||||||
|
|
||||||
|
|
||||||
|
def test_AODE_classifier(data_disc, clf):
|
||||||
|
clf.fit(*data_disc)
|
||||||
|
attribs = [
|
||||||
|
"feature_names_in_",
|
||||||
|
"class_name_",
|
||||||
|
"n_features_in_",
|
||||||
|
"X_",
|
||||||
|
"y_",
|
||||||
|
]
|
||||||
for attr in attribs:
|
for attr in attribs:
|
||||||
assert hasattr(clf, attr)
|
assert hasattr(clf, attr)
|
||||||
X = data[0]
|
X = data_disc[0]
|
||||||
y = data[1]
|
y = data_disc[1]
|
||||||
y_pred = clf.predict(X)
|
y_pred = clf.predict(X)
|
||||||
assert y_pred.shape == (X.shape[0],)
|
assert y_pred.shape == (X.shape[0],)
|
||||||
assert sum(y == y_pred) == 147
|
assert sum(y == y_pred) == 146
|
||||||
|
|
||||||
|
|
||||||
def test_AODE_wrong_num_features(data, clf):
|
def test_AODE_wrong_num_features(data_disc, clf):
|
||||||
with pytest.raises(
|
with pytest.raises(
|
||||||
ValueError,
|
ValueError,
|
||||||
match="Number of features does not match the number of columns in X",
|
match="Number of features does not match the number of columns in X",
|
||||||
):
|
):
|
||||||
clf.fit(*data, features=["feature_1", "feature_2"])
|
clf.fit(*data_disc, features=["feature_1", "feature_2"])
|
||||||
|
|
||||||
|
|
||||||
def test_AODE_wrong_hyperparam(data, clf):
|
def test_AODE_wrong_hyperparam(data_disc, clf):
|
||||||
with pytest.raises(ValueError, match="Unexpected argument: wrong_param"):
|
with pytest.raises(ValueError, match="Unexpected argument: wrong_param"):
|
||||||
clf.fit(*data, wrong_param="wrong_param")
|
clf.fit(*data_disc, wrong_param="wrong_param")
|
||||||
|
|
||||||
|
|
||||||
def test_AODE_error_size_predict(data, clf):
|
def test_AODE_error_size_predict(data_disc, clf):
|
||||||
X, y = data
|
X, y = data_disc
|
||||||
clf.fit(X, y)
|
clf.fit(X, y)
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
X_diff_size = np.ones((10, X.shape[1] + 1))
|
X_diff_size = np.ones((10, X.shape[1] + 1))
|
||||||
|
123
bayesclass/tests/test_AODENew.py
Normal file
123
bayesclass/tests/test_AODENew.py
Normal file
@@ -0,0 +1,123 @@
|
|||||||
|
import pytest
|
||||||
|
import numpy as np
|
||||||
|
from matplotlib.testing.decorators import image_comparison
|
||||||
|
from matplotlib.testing.conftest import mpl_test_settings
|
||||||
|
|
||||||
|
|
||||||
|
from bayesclass.clfs import AODENew
|
||||||
|
from .._version import __version__
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def clf():
|
||||||
|
return AODENew(random_state=17)
|
||||||
|
|
||||||
|
|
||||||
|
def test_AODENew_default_hyperparameters(data, clf):
|
||||||
|
# Test default values of hyperparameters
|
||||||
|
assert not clf.show_progress
|
||||||
|
assert clf.random_state == 17
|
||||||
|
clf = AODENew(show_progress=True)
|
||||||
|
assert clf.show_progress
|
||||||
|
assert clf.random_state is None
|
||||||
|
clf.fit(*data)
|
||||||
|
assert clf.class_name_ == "class"
|
||||||
|
assert clf.feature_names_in_ == [
|
||||||
|
"feature_0",
|
||||||
|
"feature_1",
|
||||||
|
"feature_2",
|
||||||
|
"feature_3",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@image_comparison(
|
||||||
|
baseline_images=["line_dashes_AODENew"],
|
||||||
|
remove_text=True,
|
||||||
|
extensions=["png"],
|
||||||
|
)
|
||||||
|
def test_AODENew_plot(data, features, clf):
|
||||||
|
# mpl_test_settings will automatically clean these internal side effects
|
||||||
|
mpl_test_settings
|
||||||
|
clf.fit(*data, features=features)
|
||||||
|
clf.plot("AODE Iris")
|
||||||
|
|
||||||
|
|
||||||
|
def test_AODENew_version(clf, data):
|
||||||
|
"""Check AODENew version."""
|
||||||
|
assert __version__ == clf.version()
|
||||||
|
clf.fit(*data)
|
||||||
|
assert __version__ == clf.version()
|
||||||
|
|
||||||
|
|
||||||
|
def test_AODENew_nodes_edges(clf, data):
|
||||||
|
assert clf.nodes_edges() == (0, 0)
|
||||||
|
clf.fit(*data)
|
||||||
|
assert clf.nodes_leaves() == (20, 28)
|
||||||
|
|
||||||
|
|
||||||
|
def test_AODENew_states(clf, data):
|
||||||
|
assert clf.states_ == 0
|
||||||
|
clf.fit(*data)
|
||||||
|
assert clf.states_ == 17.75
|
||||||
|
assert clf.depth_ == clf.states_
|
||||||
|
|
||||||
|
|
||||||
|
def test_AODENew_classifier(data, clf):
|
||||||
|
clf.fit(*data)
|
||||||
|
attribs = [
|
||||||
|
"feature_names_in_",
|
||||||
|
"class_name_",
|
||||||
|
"n_features_in_",
|
||||||
|
"X_",
|
||||||
|
"y_",
|
||||||
|
]
|
||||||
|
for attr in attribs:
|
||||||
|
assert hasattr(clf, attr)
|
||||||
|
X = data[0]
|
||||||
|
y = data[1]
|
||||||
|
y_pred = clf.predict(X)
|
||||||
|
assert y_pred.shape == (X.shape[0],)
|
||||||
|
assert sum(y == y_pred) == 146
|
||||||
|
|
||||||
|
|
||||||
|
def test_AODENew_local_discretization(clf, data_disc):
|
||||||
|
expected_data = [
|
||||||
|
[-1, [0, -1], [0, -1], [0, -1]],
|
||||||
|
[[1, -1], -1, [1, -1], [1, -1]],
|
||||||
|
[[2, -1], [2, -1], -1, [2, -1]],
|
||||||
|
[[3, -1], [3, -1], [3, -1], -1],
|
||||||
|
]
|
||||||
|
clf.fit(*data_disc)
|
||||||
|
for idx, estimator in enumerate(clf.estimators_):
|
||||||
|
expected = expected_data[idx]
|
||||||
|
for feature in range(4):
|
||||||
|
computed = estimator.discretizer_.target_[feature]
|
||||||
|
if type(computed) == list:
|
||||||
|
for j, k in zip(expected[feature], computed):
|
||||||
|
assert j == k
|
||||||
|
else:
|
||||||
|
assert (
|
||||||
|
expected[feature]
|
||||||
|
== estimator.discretizer_.target_[feature]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_AODENew_wrong_num_features(data, clf):
|
||||||
|
with pytest.raises(
|
||||||
|
ValueError,
|
||||||
|
match="Number of features does not match the number of columns in X",
|
||||||
|
):
|
||||||
|
clf.fit(*data, features=["feature_1", "feature_2"])
|
||||||
|
|
||||||
|
|
||||||
|
def test_AODENew_wrong_hyperparam(data, clf):
|
||||||
|
with pytest.raises(ValueError, match="Unexpected argument: wrong_param"):
|
||||||
|
clf.fit(*data, wrong_param="wrong_param")
|
||||||
|
|
||||||
|
|
||||||
|
def test_AODENew_error_size_predict(data, clf):
|
||||||
|
X, y = data
|
||||||
|
clf.fit(X, y)
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
X_diff_size = np.ones((10, X.shape[1] + 1))
|
||||||
|
clf.predict(X_diff_size)
|
100
bayesclass/tests/test_BoostAODE.py
Normal file
100
bayesclass/tests/test_BoostAODE.py
Normal file
@@ -0,0 +1,100 @@
|
|||||||
|
import pytest
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.preprocessing import KBinsDiscretizer
|
||||||
|
from matplotlib.testing.decorators import image_comparison
|
||||||
|
from matplotlib.testing.conftest import mpl_test_settings
|
||||||
|
|
||||||
|
|
||||||
|
from bayesclass.clfs import BoostAODE
|
||||||
|
from .._version import __version__
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def clf():
|
||||||
|
return BoostAODE(random_state=17)
|
||||||
|
|
||||||
|
|
||||||
|
def test_BoostAODE_default_hyperparameters(data_disc, clf):
|
||||||
|
# Test default values of hyperparameters
|
||||||
|
assert not clf.show_progress
|
||||||
|
assert clf.random_state == 17
|
||||||
|
clf = BoostAODE(show_progress=True)
|
||||||
|
assert clf.show_progress
|
||||||
|
assert clf.random_state is None
|
||||||
|
clf.fit(*data_disc)
|
||||||
|
assert clf.class_name_ == "class"
|
||||||
|
assert clf.feature_names_in_ == [
|
||||||
|
"feature_0",
|
||||||
|
"feature_1",
|
||||||
|
"feature_2",
|
||||||
|
"feature_3",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# @image_comparison(
|
||||||
|
# baseline_images=["line_dashes_AODE"], remove_text=True, extensions=["png"]
|
||||||
|
# )
|
||||||
|
# def test_BoostAODE_plot(data_disc, features, clf):
|
||||||
|
# # mpl_test_settings will automatically clean these internal side effects
|
||||||
|
# mpl_test_settings
|
||||||
|
# clf.fit(*data_disc, features=features)
|
||||||
|
# clf.plot("AODE Iris")
|
||||||
|
|
||||||
|
|
||||||
|
# def test_BoostAODE_version(clf, features, data_disc):
|
||||||
|
# """Check AODE version."""
|
||||||
|
# assert __version__ == clf.version()
|
||||||
|
# clf.fit(*data_disc, features=features)
|
||||||
|
# assert __version__ == clf.version()
|
||||||
|
|
||||||
|
|
||||||
|
# def test_BoostAODE_nodes_edges(clf, data_disc):
|
||||||
|
# assert clf.nodes_edges() == (0, 0)
|
||||||
|
# clf.fit(*data_disc)
|
||||||
|
# assert clf.nodes_leaves() == (20, 28)
|
||||||
|
|
||||||
|
|
||||||
|
# def test_BoostAODE_states(clf, data_disc):
|
||||||
|
# assert clf.states_ == 0
|
||||||
|
# clf.fit(*data_disc)
|
||||||
|
# assert clf.states_ == 19
|
||||||
|
# assert clf.depth_ == clf.states_
|
||||||
|
|
||||||
|
|
||||||
|
# def test_BoostAODE_classifier(data_disc, clf):
|
||||||
|
# clf.fit(*data_disc)
|
||||||
|
# attribs = [
|
||||||
|
# "feature_names_in_",
|
||||||
|
# "class_name_",
|
||||||
|
# "n_features_in_",
|
||||||
|
# "X_",
|
||||||
|
# "y_",
|
||||||
|
# ]
|
||||||
|
# for attr in attribs:
|
||||||
|
# assert hasattr(clf, attr)
|
||||||
|
# X = data_disc[0]
|
||||||
|
# y = data_disc[1]
|
||||||
|
# y_pred = clf.predict(X)
|
||||||
|
# assert y_pred.shape == (X.shape[0],)
|
||||||
|
# assert sum(y == y_pred) == 146
|
||||||
|
|
||||||
|
|
||||||
|
# def test_BoostAODE_wrong_num_features(data_disc, clf):
|
||||||
|
# with pytest.raises(
|
||||||
|
# ValueError,
|
||||||
|
# match="Number of features does not match the number of columns in X",
|
||||||
|
# ):
|
||||||
|
# clf.fit(*data_disc, features=["feature_1", "feature_2"])
|
||||||
|
|
||||||
|
|
||||||
|
# def test_BoostAODE_wrong_hyperparam(data_disc, clf):
|
||||||
|
# with pytest.raises(ValueError, match="Unexpected argument: wrong_param"):
|
||||||
|
# clf.fit(*data_disc, wrong_param="wrong_param")
|
||||||
|
|
||||||
|
|
||||||
|
# def test_BoostAODE_error_size_predict(data_disc, clf):
|
||||||
|
# X, y = data_disc
|
||||||
|
# clf.fit(X, y)
|
||||||
|
# with pytest.raises(ValueError):
|
||||||
|
# X_diff_size = np.ones((10, X.shape[1] + 1))
|
||||||
|
# clf.predict(X_diff_size)
|
@@ -1,28 +1,21 @@
|
|||||||
import pytest
|
import pytest
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from sklearn.datasets import load_iris
|
|
||||||
from sklearn.preprocessing import KBinsDiscretizer
|
from sklearn.preprocessing import KBinsDiscretizer
|
||||||
from matplotlib.testing.decorators import image_comparison
|
from matplotlib.testing.decorators import image_comparison
|
||||||
from matplotlib.testing.conftest import mpl_test_settings
|
from matplotlib.testing.conftest import mpl_test_settings
|
||||||
|
from pgmpy.models import BayesianNetwork
|
||||||
|
|
||||||
|
|
||||||
from bayesclass.clfs import KDB
|
from bayesclass.clfs import KDB
|
||||||
from .._version import __version__
|
from .._version import __version__
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def data():
|
|
||||||
X, y = load_iris(return_X_y=True)
|
|
||||||
enc = KBinsDiscretizer(encode="ordinal")
|
|
||||||
return enc.fit_transform(X), y
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def clf():
|
def clf():
|
||||||
return KDB(k=3)
|
return KDB(k=3, show_progress=False)
|
||||||
|
|
||||||
|
|
||||||
def test_KDB_default_hyperparameters(data, clf):
|
def test_KDB_default_hyperparameters(data_disc, clf):
|
||||||
# Test default values of hyperparameters
|
# Test default values of hyperparameters
|
||||||
assert not clf.show_progress
|
assert not clf.show_progress
|
||||||
assert clf.random_state is None
|
assert clf.random_state is None
|
||||||
@@ -31,9 +24,9 @@ def test_KDB_default_hyperparameters(data, clf):
|
|||||||
assert clf.show_progress
|
assert clf.show_progress
|
||||||
assert clf.random_state == 17
|
assert clf.random_state == 17
|
||||||
assert clf.k == 3
|
assert clf.k == 3
|
||||||
clf.fit(*data)
|
clf.fit(*data_disc)
|
||||||
assert clf.class_name_ == "class"
|
assert clf.class_name_ == "class"
|
||||||
assert clf.features_ == [
|
assert clf.feature_names_in_ == [
|
||||||
"feature_0",
|
"feature_0",
|
||||||
"feature_1",
|
"feature_1",
|
||||||
"feature_2",
|
"feature_2",
|
||||||
@@ -46,49 +39,85 @@ def test_KDB_version(clf):
|
|||||||
assert __version__ == clf.version()
|
assert __version__ == clf.version()
|
||||||
|
|
||||||
|
|
||||||
def test_KDB_nodes_leaves(clf):
|
def test_KDB_nodes_edges(clf, data_disc):
|
||||||
assert clf.nodes_leaves() == (0, 0)
|
assert clf.nodes_edges() == (0, 0)
|
||||||
|
clf.fit(*data_disc)
|
||||||
|
assert clf.nodes_leaves() == (5, 9)
|
||||||
|
|
||||||
|
|
||||||
def test_KDB_classifier(data, clf):
|
def test_KDB_states(clf, data_disc):
|
||||||
clf.fit(*data)
|
assert clf.states_ == 0
|
||||||
attribs = ["classes_", "X_", "y_", "features_", "class_name_"]
|
clf.fit(*data_disc)
|
||||||
|
assert clf.states_ == 19
|
||||||
|
assert clf.depth_ == clf.states_
|
||||||
|
|
||||||
|
|
||||||
|
def test_KDB_classifier(data_disc, clf):
|
||||||
|
clf.fit(*data_disc)
|
||||||
|
attribs = ["classes_", "X_", "y_", "feature_names_in_", "class_name_"]
|
||||||
for attr in attribs:
|
for attr in attribs:
|
||||||
assert hasattr(clf, attr)
|
assert hasattr(clf, attr)
|
||||||
X = data[0]
|
X = data_disc[0]
|
||||||
y = data[1]
|
y = data_disc[1]
|
||||||
y_pred = clf.predict(X)
|
y_pred = clf.predict(X)
|
||||||
assert y_pred.shape == (X.shape[0],)
|
assert y_pred.shape == (X.shape[0],)
|
||||||
assert sum(y == y_pred) == 148
|
assert sum(y == y_pred) == 146
|
||||||
|
|
||||||
|
|
||||||
|
def test_KDB_classifier_weighted(data_disc, clf):
|
||||||
|
sample_weight = [1] * data_disc[0].shape[0]
|
||||||
|
sample_weight[:50] = [0] * 50
|
||||||
|
clf.fit(*data_disc, sample_weight=sample_weight, weighted=True)
|
||||||
|
assert clf.score(*data_disc) == 0.64
|
||||||
|
|
||||||
|
|
||||||
@image_comparison(
|
@image_comparison(
|
||||||
baseline_images=["line_dashes_KDB"], remove_text=True, extensions=["png"]
|
baseline_images=["line_dashes_KDB"], remove_text=True, extensions=["png"]
|
||||||
)
|
)
|
||||||
def test_KDB_plot(data, clf):
|
def test_KDB_plot(data_disc, features, clf):
|
||||||
# mpl_test_settings will automatically clean these internal side effects
|
# mpl_test_settings will automatically clean these internal side effects
|
||||||
mpl_test_settings
|
mpl_test_settings
|
||||||
dataset = load_iris(as_frame=True)
|
clf.fit(*data_disc, features=features)
|
||||||
clf.fit(*data, features=dataset["feature_names"])
|
|
||||||
clf.plot("KDB Iris")
|
clf.plot("KDB Iris")
|
||||||
|
|
||||||
|
|
||||||
def test_KDB_wrong_num_features(data, clf):
|
def test_KDB_wrong_num_features(data_disc, clf):
|
||||||
with pytest.raises(
|
with pytest.raises(
|
||||||
ValueError,
|
ValueError,
|
||||||
match="Number of features does not match the number of columns in X",
|
match="Number of features does not match the number of columns in X",
|
||||||
):
|
):
|
||||||
clf.fit(*data, features=["feature_1", "feature_2"])
|
clf.fit(*data_disc, features=["feature_1", "feature_2"])
|
||||||
|
|
||||||
|
|
||||||
def test_KDB_wrong_hyperparam(data, clf):
|
def test_KDB_wrong_hyperparam(data_disc, clf):
|
||||||
with pytest.raises(ValueError, match="Unexpected argument: wrong_param"):
|
with pytest.raises(ValueError, match="Unexpected argument: wrong_param"):
|
||||||
clf.fit(*data, wrong_param="wrong_param")
|
clf.fit(*data_disc, wrong_param="wrong_param")
|
||||||
|
|
||||||
|
|
||||||
def test_KDB_error_size_predict(data, clf):
|
def test_KDB_error_size_predict(data_disc, clf):
|
||||||
X, y = data
|
X, y = data_disc
|
||||||
clf.fit(X, y)
|
clf.fit(X, y)
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
X_diff_size = np.ones((10, X.shape[1] + 1))
|
X_diff_size = np.ones((10, X.shape[1] + 1))
|
||||||
clf.predict(X_diff_size)
|
clf.predict(X_diff_size)
|
||||||
|
|
||||||
|
|
||||||
|
def test_KDB_dont_do_cycles():
|
||||||
|
clf = KDB(k=4)
|
||||||
|
dag = BayesianNetwork(show_progress=False)
|
||||||
|
clf.feature_names_in_ = [
|
||||||
|
"feature_0",
|
||||||
|
"feature_1",
|
||||||
|
"feature_2",
|
||||||
|
"feature_3",
|
||||||
|
]
|
||||||
|
nodes = list(range(4))
|
||||||
|
weights = np.ones((4, 4))
|
||||||
|
for idx in range(1, 4):
|
||||||
|
dag.add_edge(clf.feature_names_in_[0], clf.feature_names_in_[idx])
|
||||||
|
dag.add_edge(clf.feature_names_in_[1], clf.feature_names_in_[2])
|
||||||
|
dag.add_edge(clf.feature_names_in_[1], clf.feature_names_in_[3])
|
||||||
|
dag.add_edge(clf.feature_names_in_[2], clf.feature_names_in_[3])
|
||||||
|
for idx in range(4):
|
||||||
|
clf._add_m_edges(dag, idx, nodes, weights)
|
||||||
|
assert len(dag.edges()) == 6
|
||||||
|
132
bayesclass/tests/test_KDBNew.py
Normal file
132
bayesclass/tests/test_KDBNew.py
Normal file
@@ -0,0 +1,132 @@
|
|||||||
|
import pytest
|
||||||
|
import numpy as np
|
||||||
|
from matplotlib.testing.decorators import image_comparison
|
||||||
|
from matplotlib.testing.conftest import mpl_test_settings
|
||||||
|
from pgmpy.models import BayesianNetwork
|
||||||
|
|
||||||
|
|
||||||
|
from bayesclass.clfs import KDBNew
|
||||||
|
from .._version import __version__
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def clf():
|
||||||
|
return KDBNew(k=3, show_progress=False)
|
||||||
|
|
||||||
|
|
||||||
|
def test_KDBNew_default_hyperparameters(data, clf):
|
||||||
|
# Test default values of hyperparameters
|
||||||
|
assert not clf.show_progress
|
||||||
|
assert clf.random_state is None
|
||||||
|
assert clf.theta == 0.03
|
||||||
|
clf = KDBNew(show_progress=True, random_state=17, k=3)
|
||||||
|
assert clf.show_progress
|
||||||
|
assert clf.random_state == 17
|
||||||
|
assert clf.k == 3
|
||||||
|
clf.fit(*data)
|
||||||
|
assert clf.class_name_ == "class"
|
||||||
|
assert clf.feature_names_in_ == [
|
||||||
|
"feature_0",
|
||||||
|
"feature_1",
|
||||||
|
"feature_2",
|
||||||
|
"feature_3",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_KDBNew_version(clf):
|
||||||
|
"""Check KDBNew version."""
|
||||||
|
assert __version__ == clf.version()
|
||||||
|
|
||||||
|
|
||||||
|
def test_KDBNew_nodes_edges(clf, data):
|
||||||
|
assert clf.nodes_edges() == (0, 0)
|
||||||
|
clf.fit(*data)
|
||||||
|
assert clf.nodes_leaves() == (5, 9)
|
||||||
|
|
||||||
|
|
||||||
|
def test_KDBNew_states(clf, data):
|
||||||
|
assert clf.states_ == 0
|
||||||
|
clf.fit(*data)
|
||||||
|
assert clf.states_ == 22
|
||||||
|
assert clf.depth_ == clf.states_
|
||||||
|
|
||||||
|
|
||||||
|
def test_KDBNew_classifier(data, clf):
|
||||||
|
clf.fit(*data)
|
||||||
|
attribs = ["classes_", "X_", "y_", "feature_names_in_", "class_name_"]
|
||||||
|
for attr in attribs:
|
||||||
|
assert hasattr(clf, attr)
|
||||||
|
X = data[0]
|
||||||
|
y = data[1]
|
||||||
|
y_pred = clf.predict(X)
|
||||||
|
assert y_pred.shape == (X.shape[0],)
|
||||||
|
assert sum(y == y_pred) == 145
|
||||||
|
|
||||||
|
|
||||||
|
def test_KDBNew_local_discretization(clf, data):
|
||||||
|
expected = [[1, -1], -1, [0, 1, 3, -1], [1, -1]]
|
||||||
|
clf.fit(*data)
|
||||||
|
for feature in range(4):
|
||||||
|
computed = clf.estimator_.discretizer_.target_[feature]
|
||||||
|
if type(computed) == list:
|
||||||
|
for j, k in zip(expected[feature], computed):
|
||||||
|
assert j == k
|
||||||
|
else:
|
||||||
|
assert (
|
||||||
|
expected[feature]
|
||||||
|
== clf.estimator_.discretizer_.target_[feature]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@image_comparison(
|
||||||
|
baseline_images=["line_dashes_KDBNew"],
|
||||||
|
remove_text=True,
|
||||||
|
extensions=["png"],
|
||||||
|
)
|
||||||
|
def test_KDBNew_plot(data, features, class_name, clf):
|
||||||
|
# mpl_test_settings will automatically clean these internal side effects
|
||||||
|
mpl_test_settings
|
||||||
|
clf.fit(*data, features=features, class_name=class_name)
|
||||||
|
clf.plot("KDBNew Iris")
|
||||||
|
|
||||||
|
|
||||||
|
def test_KDBNew_wrong_num_features(data, clf):
|
||||||
|
with pytest.raises(
|
||||||
|
ValueError,
|
||||||
|
match="Number of features does not match the number of columns in X",
|
||||||
|
):
|
||||||
|
clf.fit(*data, features=["feature_1", "feature_2"])
|
||||||
|
|
||||||
|
|
||||||
|
def test_KDBNew_wrong_hyperparam(data, clf):
|
||||||
|
with pytest.raises(ValueError, match="Unexpected argument: wrong_param"):
|
||||||
|
clf.fit(*data, wrong_param="wrong_param")
|
||||||
|
|
||||||
|
|
||||||
|
def test_KDBNew_error_size_predict(data, clf):
|
||||||
|
X, y = data
|
||||||
|
clf.fit(X, y)
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
X_diff_size = np.ones((10, X.shape[1] + 1))
|
||||||
|
clf.predict(X_diff_size)
|
||||||
|
|
||||||
|
|
||||||
|
def test_KDBNew_dont_do_cycles():
|
||||||
|
clf = KDBNew(k=4)
|
||||||
|
dag = BayesianNetwork(show_progress=False)
|
||||||
|
clf.feature_names_in_ = [
|
||||||
|
"feature_0",
|
||||||
|
"feature_1",
|
||||||
|
"feature_2",
|
||||||
|
"feature_3",
|
||||||
|
]
|
||||||
|
nodes = list(range(4))
|
||||||
|
weights = np.ones((4, 4))
|
||||||
|
for idx in range(1, 4):
|
||||||
|
dag.add_edge(clf.feature_names_in_[0], clf.feature_names_in_[idx])
|
||||||
|
dag.add_edge(clf.feature_names_in_[1], clf.feature_names_in_[2])
|
||||||
|
dag.add_edge(clf.feature_names_in_[1], clf.feature_names_in_[3])
|
||||||
|
dag.add_edge(clf.feature_names_in_[2], clf.feature_names_in_[3])
|
||||||
|
for idx in range(4):
|
||||||
|
clf._add_m_edges(dag, idx, nodes, weights)
|
||||||
|
assert len(dag.edges()) == 6
|
@@ -1,7 +1,5 @@
|
|||||||
import pytest
|
import pytest
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from sklearn.datasets import load_iris
|
|
||||||
from sklearn.preprocessing import KBinsDiscretizer
|
|
||||||
from matplotlib.testing.decorators import image_comparison
|
from matplotlib.testing.decorators import image_comparison
|
||||||
from matplotlib.testing.conftest import mpl_test_settings
|
from matplotlib.testing.conftest import mpl_test_settings
|
||||||
|
|
||||||
@@ -10,29 +8,22 @@ from bayesclass.clfs import TAN
|
|||||||
from .._version import __version__
|
from .._version import __version__
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def data():
|
|
||||||
X, y = load_iris(return_X_y=True)
|
|
||||||
enc = KBinsDiscretizer(encode="ordinal")
|
|
||||||
return enc.fit_transform(X), y
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def clf():
|
def clf():
|
||||||
return TAN()
|
return TAN(random_state=17, show_progress=False)
|
||||||
|
|
||||||
|
|
||||||
def test_TAN_default_hyperparameters(data, clf):
|
def test_TAN_default_hyperparameters(data_disc, clf):
|
||||||
# Test default values of hyperparameters
|
# Test default values of hyperparameters
|
||||||
assert not clf.show_progress
|
assert not clf.show_progress
|
||||||
assert clf.random_state is None
|
|
||||||
clf = TAN(show_progress=True, random_state=17)
|
|
||||||
assert clf.show_progress
|
|
||||||
assert clf.random_state == 17
|
assert clf.random_state == 17
|
||||||
clf.fit(*data)
|
clf = TAN(show_progress=True)
|
||||||
|
assert clf.show_progress
|
||||||
|
assert clf.random_state is None
|
||||||
|
clf.fit(*data_disc)
|
||||||
assert clf.head_ == 0
|
assert clf.head_ == 0
|
||||||
assert clf.class_name_ == "class"
|
assert clf.class_name_ == "class"
|
||||||
assert clf.features_ == [
|
assert clf.feature_names_in_ == [
|
||||||
"feature_0",
|
"feature_0",
|
||||||
"feature_1",
|
"feature_1",
|
||||||
"feature_2",
|
"feature_2",
|
||||||
@@ -45,59 +36,73 @@ def test_TAN_version(clf):
|
|||||||
assert __version__ == clf.version()
|
assert __version__ == clf.version()
|
||||||
|
|
||||||
|
|
||||||
def test_TAN_nodes_leaves(clf):
|
def test_TAN_nodes_edges(clf, data_disc):
|
||||||
assert clf.nodes_leaves() == (0, 0)
|
assert clf.nodes_edges() == (0, 0)
|
||||||
|
clf.fit(*data_disc, head="random")
|
||||||
|
assert clf.nodes_leaves() == (5, 7)
|
||||||
|
|
||||||
|
|
||||||
def test_TAN_random_head(data):
|
def test_TAN_states(clf, data_disc):
|
||||||
clf = TAN(random_state=17)
|
assert clf.states_ == 0
|
||||||
clf.fit(*data, head="random")
|
clf.fit(*data_disc)
|
||||||
|
assert clf.states_ == 19
|
||||||
|
assert clf.depth_ == clf.states_
|
||||||
|
|
||||||
|
|
||||||
|
def test_TAN_random_head(clf, data_disc):
|
||||||
|
clf.fit(*data_disc, head="random")
|
||||||
assert clf.head_ == 3
|
assert clf.head_ == 3
|
||||||
|
|
||||||
|
|
||||||
def test_TAN_classifier(data, clf):
|
def test_TAN_classifier(data_disc, clf):
|
||||||
clf.fit(*data)
|
clf.fit(*data_disc)
|
||||||
attribs = ["classes_", "X_", "y_", "head_", "features_", "class_name_"]
|
attribs = [
|
||||||
|
"classes_",
|
||||||
|
"X_",
|
||||||
|
"y_",
|
||||||
|
"head_",
|
||||||
|
"feature_names_in_",
|
||||||
|
"class_name_",
|
||||||
|
]
|
||||||
for attr in attribs:
|
for attr in attribs:
|
||||||
assert hasattr(clf, attr)
|
assert hasattr(clf, attr)
|
||||||
X = data[0]
|
X = data_disc[0]
|
||||||
y = data[1]
|
y = data_disc[1]
|
||||||
y_pred = clf.predict(X)
|
y_pred = clf.predict(X)
|
||||||
assert y_pred.shape == (X.shape[0],)
|
assert y_pred.shape == (X.shape[0],)
|
||||||
assert sum(y == y_pred) == 147
|
assert sum(y == y_pred) == 146
|
||||||
|
|
||||||
|
|
||||||
@image_comparison(
|
@image_comparison(
|
||||||
baseline_images=["line_dashes_TAN"], remove_text=True, extensions=["png"]
|
baseline_images=["line_dashes_TAN"], remove_text=True, extensions=["png"]
|
||||||
)
|
)
|
||||||
def test_TAN_plot(data, clf):
|
def test_TAN_plot(data_disc, features, clf):
|
||||||
# mpl_test_settings will automatically clean these internal side effects
|
# mpl_test_settings will automatically clean these internal side effects
|
||||||
mpl_test_settings
|
mpl_test_settings
|
||||||
dataset = load_iris(as_frame=True)
|
clf.fit(*data_disc, features=features, head=0)
|
||||||
clf.fit(*data, features=dataset["feature_names"], head=0)
|
|
||||||
clf.plot("TAN Iris head=0")
|
clf.plot("TAN Iris head=0")
|
||||||
|
|
||||||
|
|
||||||
def test_KDB_wrong_num_features(data, clf):
|
def test_TAN_wrong_num_features(data_disc, clf):
|
||||||
with pytest.raises(
|
with pytest.raises(
|
||||||
ValueError,
|
ValueError,
|
||||||
match="Number of features does not match the number of columns in X",
|
match="Number of features does not match the number of columns in X",
|
||||||
):
|
):
|
||||||
clf.fit(*data, features=["feature_1", "feature_2"])
|
clf.fit(*data_disc, features=["feature_1", "feature_2"])
|
||||||
|
|
||||||
|
|
||||||
def test_TAN_wrong_hyperparam(data, clf):
|
def test_TAN_wrong_hyperparam(data_disc, clf):
|
||||||
with pytest.raises(ValueError, match="Unexpected argument: wrong_param"):
|
with pytest.raises(ValueError, match="Unexpected argument: wrong_param"):
|
||||||
clf.fit(*data, wrong_param="wrong_param")
|
clf.fit(*data_disc, wrong_param="wrong_param")
|
||||||
|
|
||||||
|
|
||||||
def test_TAN_head_out_of_range(data, clf):
|
def test_TAN_head_out_of_range(data_disc, clf):
|
||||||
with pytest.raises(ValueError, match="Head index out of range"):
|
with pytest.raises(ValueError, match="Head index out of range"):
|
||||||
clf.fit(*data, head=4)
|
clf.fit(*data_disc, head=4)
|
||||||
|
|
||||||
|
|
||||||
def test_TAN_error_size_predict(data, clf):
|
def test_TAN_error_size_predict(data_disc, clf):
|
||||||
X, y = data
|
X, y = data_disc
|
||||||
clf.fit(X, y)
|
clf.fit(X, y)
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
X_diff_size = np.ones((10, X.shape[1] + 1))
|
X_diff_size = np.ones((10, X.shape[1] + 1))
|
||||||
|
120
bayesclass/tests/test_TANNew.py
Normal file
120
bayesclass/tests/test_TANNew.py
Normal file
@@ -0,0 +1,120 @@
|
|||||||
|
import pytest
|
||||||
|
import numpy as np
|
||||||
|
from matplotlib.testing.decorators import image_comparison
|
||||||
|
from matplotlib.testing.conftest import mpl_test_settings
|
||||||
|
|
||||||
|
|
||||||
|
from bayesclass.clfs import TANNew
|
||||||
|
from .._version import __version__
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def clf():
|
||||||
|
return TANNew(random_state=17)
|
||||||
|
|
||||||
|
|
||||||
|
def test_TANNew_default_hyperparameters(data, clf):
|
||||||
|
# Test default values of hyperparameters
|
||||||
|
assert not clf.show_progress
|
||||||
|
assert clf.random_state == 17
|
||||||
|
clf = TANNew(show_progress=True)
|
||||||
|
assert clf.show_progress
|
||||||
|
assert clf.random_state is None
|
||||||
|
clf.fit(*data)
|
||||||
|
assert clf.head_ == 0
|
||||||
|
assert clf.class_name_ == "class"
|
||||||
|
assert clf.feature_names_in_ == [
|
||||||
|
"feature_0",
|
||||||
|
"feature_1",
|
||||||
|
"feature_2",
|
||||||
|
"feature_3",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_TANNew_version(clf):
|
||||||
|
"""Check TANNew version."""
|
||||||
|
assert __version__ == clf.version()
|
||||||
|
|
||||||
|
|
||||||
|
def test_TANNew_nodes_edges(clf, data):
|
||||||
|
assert clf.nodes_edges() == (0, 0)
|
||||||
|
clf.fit(*data, head="random")
|
||||||
|
assert clf.nodes_leaves() == (5, 7)
|
||||||
|
|
||||||
|
|
||||||
|
def test_TANNew_states(clf, data):
|
||||||
|
assert clf.states_ == 0
|
||||||
|
clf.fit(*data)
|
||||||
|
assert clf.states_ == 18
|
||||||
|
assert clf.depth_ == clf.states_
|
||||||
|
|
||||||
|
|
||||||
|
def test_TANNew_random_head(clf, data):
|
||||||
|
clf.fit(*data, head="random")
|
||||||
|
assert clf.head_ == 3
|
||||||
|
|
||||||
|
|
||||||
|
def test_TANNew_local_discretization(clf, data):
|
||||||
|
expected = [-1, [0, -1], [0, -1], [1, -1]]
|
||||||
|
clf.fit(*data)
|
||||||
|
for feature in range(4):
|
||||||
|
assert (
|
||||||
|
expected[feature] == clf.estimator_.discretizer_.target_[feature]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_TANNew_classifier(data, clf):
|
||||||
|
clf.fit(*data)
|
||||||
|
attribs = [
|
||||||
|
"classes_",
|
||||||
|
"X_",
|
||||||
|
"y_",
|
||||||
|
"head_",
|
||||||
|
"feature_names_in_",
|
||||||
|
"class_name_",
|
||||||
|
]
|
||||||
|
for attr in attribs:
|
||||||
|
assert hasattr(clf, attr)
|
||||||
|
X = data[0]
|
||||||
|
y = data[1]
|
||||||
|
y_pred = clf.predict(X)
|
||||||
|
assert y_pred.shape == (X.shape[0],)
|
||||||
|
assert sum(y == y_pred) == 146
|
||||||
|
|
||||||
|
|
||||||
|
@image_comparison(
|
||||||
|
baseline_images=["line_dashes_TANNew"],
|
||||||
|
remove_text=True,
|
||||||
|
extensions=["png"],
|
||||||
|
)
|
||||||
|
def test_TANNew_plot(data, features, clf):
|
||||||
|
# mpl_test_settings will automatically clean these internal side effects
|
||||||
|
mpl_test_settings
|
||||||
|
clf.fit(*data, features=features, head=0)
|
||||||
|
clf.plot("TANNew Iris head=0")
|
||||||
|
|
||||||
|
|
||||||
|
def test_TANNew_wrong_num_features(data, clf):
|
||||||
|
with pytest.raises(
|
||||||
|
ValueError,
|
||||||
|
match="Number of features does not match the number of columns in X",
|
||||||
|
):
|
||||||
|
clf.fit(*data, features=["feature_1", "feature_2"])
|
||||||
|
|
||||||
|
|
||||||
|
def test_TANNew_wrong_hyperparam(data, clf):
|
||||||
|
with pytest.raises(ValueError, match="Unexpected argument: wrong_param"):
|
||||||
|
clf.fit(*data, wrong_param="wrong_param")
|
||||||
|
|
||||||
|
|
||||||
|
def test_TANNew_head_out_of_range(data, clf):
|
||||||
|
with pytest.raises(ValueError, match="Head index out of range"):
|
||||||
|
clf.fit(*data, head=4)
|
||||||
|
|
||||||
|
|
||||||
|
def test_TANNew_error_size_predict(data, clf):
|
||||||
|
X, y = data
|
||||||
|
clf.fit(X, y)
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
X_diff_size = np.ones((10, X.shape[1] + 1))
|
||||||
|
clf.predict(X_diff_size)
|
@@ -1,14 +1,29 @@
|
|||||||
import pytest
|
import pytest
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
from sklearn.utils.estimator_checks import check_estimator
|
from sklearn.utils.estimator_checks import check_estimator
|
||||||
|
|
||||||
from bayesclass.clfs import TAN, KDB, AODE
|
from bayesclass.clfs import BayesBase, TAN, KDB, AODE
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("estimator", [TAN(), KDB(k=2), AODE()])
|
def test_more_tags():
|
||||||
# @pytest.mark.parametrize("estimator", [AODE()])
|
expected = {
|
||||||
def test_all_estimators(estimator):
|
"requires_positive_X": True,
|
||||||
|
"requires_positive_y": True,
|
||||||
|
"preserve_dtype": [np.int32, np.int64],
|
||||||
|
"requires_y": True,
|
||||||
|
}
|
||||||
|
clf = BayesBase(None, True)
|
||||||
|
computed = clf._more_tags()
|
||||||
|
for key, value in expected.items():
|
||||||
|
assert key in computed
|
||||||
|
assert computed[key] == value
|
||||||
|
|
||||||
|
|
||||||
|
# @pytest.mark.parametrize("estimators", [TAN(), KDB(k=2), AODE()])
|
||||||
|
@pytest.mark.parametrize("estimators", [AODE()])
|
||||||
|
def test_all_estimators(estimators):
|
||||||
i = 0
|
i = 0
|
||||||
for estimator, test in check_estimator(estimator, generate_only=True):
|
for estimator, test in check_estimator(estimators, generate_only=True):
|
||||||
print(i := i + 1, test)
|
print(i := i + 1, test)
|
||||||
# test(estimator)
|
# test(estimator)
|
||||||
|
32
patch_pgmpy_0.1.22.diff
Normal file
32
patch_pgmpy_0.1.22.diff
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
diff --git a/pgmpy/models/BayesianNetwork.py b/pgmpy/models/BayesianNetwork.py
|
||||||
|
index bd90122d..70ae38f7 100644
|
||||||
|
--- a/pgmpy/models/BayesianNetwork.py
|
||||||
|
+++ b/pgmpy/models/BayesianNetwork.py
|
||||||
|
@@ -27,7 +27,7 @@ class BayesianNetwork(DAG):
|
||||||
|
Base class for Bayesian Models.
|
||||||
|
"""
|
||||||
|
|
||||||
|
- def __init__(self, ebunch=None, latents=set()):
|
||||||
|
+ def __init__(self, ebunch=None, latents=set(), show_progress=False):
|
||||||
|
"""
|
||||||
|
Initializes a Bayesian Model.
|
||||||
|
A models stores nodes and edges with conditional probability
|
||||||
|
@@ -95,6 +95,7 @@ class BayesianNetwork(DAG):
|
||||||
|
>>> len(G) # number of nodes in graph
|
||||||
|
3
|
||||||
|
"""
|
||||||
|
+ self.show_progress = show_progress
|
||||||
|
super(BayesianNetwork, self).__init__(ebunch=ebunch, latents=latents)
|
||||||
|
self.cpds = []
|
||||||
|
self.cardinalities = defaultdict(int)
|
||||||
|
@@ -738,7 +739,9 @@ class BayesianNetwork(DAG):
|
||||||
|
show_progress=False,
|
||||||
|
)
|
||||||
|
for index, data_point in tqdm(
|
||||||
|
- data_unique.iterrows(), total=data_unique.shape[0]
|
||||||
|
+ data_unique.iterrows(),
|
||||||
|
+ total=data_unique.shape[0],
|
||||||
|
+ disable=not self.show_progress,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
@@ -25,6 +25,7 @@ dependencies = [
|
|||||||
"pgmpy",
|
"pgmpy",
|
||||||
"networkx",
|
"networkx",
|
||||||
"matplotlib",
|
"matplotlib",
|
||||||
|
"fimdlp",
|
||||||
]
|
]
|
||||||
requires-python = ">=3.8"
|
requires-python = ">=3.8"
|
||||||
classifiers = [
|
classifiers = [
|
||||||
|
@@ -1,5 +1,6 @@
|
|||||||
numpy
|
numpy
|
||||||
scipy
|
scipy
|
||||||
|
pandas
|
||||||
scikit-learn
|
scikit-learn
|
||||||
matplotlib
|
matplotlib
|
||||||
networkx
|
networkx
|
||||||
|
Reference in New Issue
Block a user