mirror of
https://github.com/Doctorado-ML/bayesclass.git
synced 2025-08-18 09:05:55 +00:00
Add KDBNew estimator
This commit is contained in:
@@ -16,4 +16,5 @@ __all__ = [
|
|||||||
"TAN",
|
"TAN",
|
||||||
"KDB",
|
"KDB",
|
||||||
"AODE",
|
"AODE",
|
||||||
|
"KDBNew",
|
||||||
]
|
]
|
||||||
|
@@ -12,6 +12,7 @@ import networkx as nx
|
|||||||
from pgmpy.estimators import TreeSearch, BayesianEstimator
|
from pgmpy.estimators import TreeSearch, BayesianEstimator
|
||||||
from pgmpy.models import BayesianNetwork
|
from pgmpy.models import BayesianNetwork
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
|
from fimdlp.mdlp import MultiDiscretizer
|
||||||
from ._version import __version__
|
from ._version import __version__
|
||||||
|
|
||||||
|
|
||||||
@@ -75,7 +76,7 @@ class BayesBase(BaseEstimator, ClassifierMixin):
|
|||||||
return self.states_
|
return self.states_
|
||||||
|
|
||||||
def fit(self, X, y, **kwargs):
|
def fit(self, X, y, **kwargs):
|
||||||
"""A reference implementation of a fitting function for a classifier.
|
"""Fit classifier
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
@@ -130,6 +131,9 @@ class BayesBase(BaseEstimator, ClassifierMixin):
|
|||||||
# Return the classifier
|
# Return the classifier
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
def _build(self):
|
||||||
|
pass
|
||||||
|
|
||||||
def _train(self, kwargs):
|
def _train(self, kwargs):
|
||||||
self.model_ = BayesianNetwork(
|
self.model_ = BayesianNetwork(
|
||||||
self.dag_.edges(), show_progress=self.show_progress
|
self.dag_.edges(), show_progress=self.show_progress
|
||||||
@@ -260,37 +264,38 @@ class TAN(BayesBase):
|
|||||||
return X, y
|
return X, y
|
||||||
|
|
||||||
def _build(self):
|
def _build(self):
|
||||||
# est = TreeSearch(self.dataset_,
|
est = TreeSearch(
|
||||||
# root_node=self.feature_names_in_[self.head_])
|
self.dataset_, root_node=self.feature_names_in_[self.head_]
|
||||||
# self.dag_ = est.estimate(
|
)
|
||||||
# estimator_type="tan",
|
self.dag_ = est.estimate(
|
||||||
# class_node=self.class_name_,
|
estimator_type="tan",
|
||||||
# show_progress=self.show_progress,
|
class_node=self.class_name_,
|
||||||
# )
|
show_progress=self.show_progress,
|
||||||
|
)
|
||||||
# Code taken from pgmpy
|
# Code taken from pgmpy
|
||||||
n_jobs = -1
|
# n_jobs = -1
|
||||||
weights = TreeSearch._get_conditional_weights(
|
# weights = TreeSearch._get_conditional_weights(
|
||||||
self.dataset_,
|
# self.dataset_,
|
||||||
self.class_name_,
|
# self.class_name_,
|
||||||
"mutual_info",
|
# "mutual_info",
|
||||||
n_jobs,
|
# n_jobs,
|
||||||
self.show_progress,
|
# self.show_progress,
|
||||||
)
|
# )
|
||||||
# Step 4.2: Construct chow-liu DAG on {data.columns - class_node}
|
# # Step 4.2: Construct chow-liu DAG on {data.columns - class_node}
|
||||||
class_node_idx = np.where(self.dataset_.columns == self.class_name_)[
|
# class_node_idx = np.where(self.dataset_.columns == self.class_name_)[
|
||||||
0
|
# 0
|
||||||
][0]
|
# ][0]
|
||||||
weights = np.delete(weights, class_node_idx, axis=0)
|
# weights = np.delete(weights, class_node_idx, axis=0)
|
||||||
weights = np.delete(weights, class_node_idx, axis=1)
|
# weights = np.delete(weights, class_node_idx, axis=1)
|
||||||
reduced_columns = np.delete(self.dataset_.columns, class_node_idx)
|
# reduced_columns = np.delete(self.dataset_.columns, class_node_idx)
|
||||||
D = TreeSearch._create_tree_and_dag(
|
# D = TreeSearch._create_tree_and_dag(
|
||||||
weights, reduced_columns, self.feature_names_in_[self.head_]
|
# weights, reduced_columns, self.feature_names_in_[self.head_]
|
||||||
)
|
# )
|
||||||
# Step 4.3: Add edges from class_node to all other nodes.
|
# # Step 4.3: Add edges from class_node to all other nodes.
|
||||||
D.add_edges_from(
|
# D.add_edges_from(
|
||||||
[(self.class_name_, node) for node in reduced_columns]
|
# [(self.class_name_, node) for node in reduced_columns]
|
||||||
)
|
# )
|
||||||
self.dag_ = D
|
# self.dag_ = D
|
||||||
|
|
||||||
|
|
||||||
class KDB(BayesBase):
|
class KDB(BayesBase):
|
||||||
@@ -345,7 +350,6 @@ class KDB(BayesBase):
|
|||||||
Compute the conditional probabilility infered by the structure of BN by
|
Compute the conditional probabilility infered by the structure of BN by
|
||||||
using counts from DB, and output BN.
|
using counts from DB, and output BN.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# 1. get the mutual information between each feature and the class
|
# 1. get the mutual information between each feature and the class
|
||||||
mutual = mutual_info_classif(self.X_, self.y_, discrete_features=True)
|
mutual = mutual_info_classif(self.X_, self.y_, discrete_features=True)
|
||||||
# 2. symmetric matrix where each element represents I(X, Y| class_node)
|
# 2. symmetric matrix where each element represents I(X, Y| class_node)
|
||||||
@@ -449,3 +453,36 @@ class AODE(BayesBase, BaseEnsemble):
|
|||||||
for index, model in enumerate(self.models_):
|
for index, model in enumerate(self.models_):
|
||||||
result[:, index] = model.predict(dataset).values.ravel()
|
result[:, index] = model.predict(dataset).values.ravel()
|
||||||
return mode(result, axis=1, keepdims=False).mode.ravel()
|
return mode(result, axis=1, keepdims=False).mode.ravel()
|
||||||
|
|
||||||
|
|
||||||
|
class KDBNew(KDB):
|
||||||
|
def fit(self, X, y, **kwargs):
|
||||||
|
self.discretizer_ = MultiDiscretizer(n_jobs=1)
|
||||||
|
Xd = self.discretizer_.fit_transform(X, y)
|
||||||
|
features = kwargs["features"]
|
||||||
|
states = {
|
||||||
|
features[i]: np.unique(Xd[:, i]).tolist()
|
||||||
|
for i in range(Xd.shape[1])
|
||||||
|
}
|
||||||
|
kwargs["state_names"] = states
|
||||||
|
return super().fit(Xd, y, **kwargs)
|
||||||
|
|
||||||
|
def predict(self, X, **kwargs):
|
||||||
|
return super().predict(self.discretizer_.transform(X))
|
||||||
|
|
||||||
|
def check_integrity(self, X, state_names, features):
|
||||||
|
for i in range(X.shape[1]):
|
||||||
|
if not np.array_equal(
|
||||||
|
np.unique(X[:, i]), np.array(state_names[features[i]])
|
||||||
|
):
|
||||||
|
print(
|
||||||
|
"i",
|
||||||
|
i,
|
||||||
|
"features[i]",
|
||||||
|
features[i],
|
||||||
|
"np.unique(X[:, i])",
|
||||||
|
np.unique(X[:, i]),
|
||||||
|
"np.array(state_names[features[i]])",
|
||||||
|
np.array(state_names[features[i]]),
|
||||||
|
)
|
||||||
|
raise ValueError("Discretization error")
|
||||||
|
Reference in New Issue
Block a user