Add KDBNew estimator

This commit is contained in:
2023-02-04 17:39:32 +01:00
parent 9019b878f0
commit de45a94c9b
2 changed files with 70 additions and 32 deletions

View File

@@ -16,4 +16,5 @@ __all__ = [
"TAN", "TAN",
"KDB", "KDB",
"AODE", "AODE",
"KDBNew",
] ]

View File

@@ -12,6 +12,7 @@ import networkx as nx
from pgmpy.estimators import TreeSearch, BayesianEstimator from pgmpy.estimators import TreeSearch, BayesianEstimator
from pgmpy.models import BayesianNetwork from pgmpy.models import BayesianNetwork
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from fimdlp.mdlp import MultiDiscretizer
from ._version import __version__ from ._version import __version__
@@ -75,7 +76,7 @@ class BayesBase(BaseEstimator, ClassifierMixin):
return self.states_ return self.states_
def fit(self, X, y, **kwargs): def fit(self, X, y, **kwargs):
"""A reference implementation of a fitting function for a classifier. """Fit classifier
Parameters Parameters
---------- ----------
@@ -130,6 +131,9 @@ class BayesBase(BaseEstimator, ClassifierMixin):
# Return the classifier # Return the classifier
return self return self
def _build(self):
pass
def _train(self, kwargs): def _train(self, kwargs):
self.model_ = BayesianNetwork( self.model_ = BayesianNetwork(
self.dag_.edges(), show_progress=self.show_progress self.dag_.edges(), show_progress=self.show_progress
@@ -260,37 +264,38 @@ class TAN(BayesBase):
return X, y return X, y
def _build(self): def _build(self):
# est = TreeSearch(self.dataset_, est = TreeSearch(
# root_node=self.feature_names_in_[self.head_]) self.dataset_, root_node=self.feature_names_in_[self.head_]
# self.dag_ = est.estimate( )
# estimator_type="tan", self.dag_ = est.estimate(
# class_node=self.class_name_, estimator_type="tan",
# show_progress=self.show_progress, class_node=self.class_name_,
# ) show_progress=self.show_progress,
)
# Code taken from pgmpy # Code taken from pgmpy
n_jobs = -1 # n_jobs = -1
weights = TreeSearch._get_conditional_weights( # weights = TreeSearch._get_conditional_weights(
self.dataset_, # self.dataset_,
self.class_name_, # self.class_name_,
"mutual_info", # "mutual_info",
n_jobs, # n_jobs,
self.show_progress, # self.show_progress,
) # )
# Step 4.2: Construct chow-liu DAG on {data.columns - class_node} # # Step 4.2: Construct chow-liu DAG on {data.columns - class_node}
class_node_idx = np.where(self.dataset_.columns == self.class_name_)[ # class_node_idx = np.where(self.dataset_.columns == self.class_name_)[
0 # 0
][0] # ][0]
weights = np.delete(weights, class_node_idx, axis=0) # weights = np.delete(weights, class_node_idx, axis=0)
weights = np.delete(weights, class_node_idx, axis=1) # weights = np.delete(weights, class_node_idx, axis=1)
reduced_columns = np.delete(self.dataset_.columns, class_node_idx) # reduced_columns = np.delete(self.dataset_.columns, class_node_idx)
D = TreeSearch._create_tree_and_dag( # D = TreeSearch._create_tree_and_dag(
weights, reduced_columns, self.feature_names_in_[self.head_] # weights, reduced_columns, self.feature_names_in_[self.head_]
) # )
# Step 4.3: Add edges from class_node to all other nodes. # # Step 4.3: Add edges from class_node to all other nodes.
D.add_edges_from( # D.add_edges_from(
[(self.class_name_, node) for node in reduced_columns] # [(self.class_name_, node) for node in reduced_columns]
) # )
self.dag_ = D # self.dag_ = D
class KDB(BayesBase): class KDB(BayesBase):
@@ -345,7 +350,6 @@ class KDB(BayesBase):
Compute the conditional probabilility infered by the structure of BN by Compute the conditional probabilility infered by the structure of BN by
using counts from DB, and output BN. using counts from DB, and output BN.
""" """
# 1. get the mutual information between each feature and the class # 1. get the mutual information between each feature and the class
mutual = mutual_info_classif(self.X_, self.y_, discrete_features=True) mutual = mutual_info_classif(self.X_, self.y_, discrete_features=True)
# 2. symmetric matrix where each element represents I(X, Y| class_node) # 2. symmetric matrix where each element represents I(X, Y| class_node)
@@ -449,3 +453,36 @@ class AODE(BayesBase, BaseEnsemble):
for index, model in enumerate(self.models_): for index, model in enumerate(self.models_):
result[:, index] = model.predict(dataset).values.ravel() result[:, index] = model.predict(dataset).values.ravel()
return mode(result, axis=1, keepdims=False).mode.ravel() return mode(result, axis=1, keepdims=False).mode.ravel()
class KDBNew(KDB):
def fit(self, X, y, **kwargs):
self.discretizer_ = MultiDiscretizer(n_jobs=1)
Xd = self.discretizer_.fit_transform(X, y)
features = kwargs["features"]
states = {
features[i]: np.unique(Xd[:, i]).tolist()
for i in range(Xd.shape[1])
}
kwargs["state_names"] = states
return super().fit(Xd, y, **kwargs)
def predict(self, X, **kwargs):
return super().predict(self.discretizer_.transform(X))
def check_integrity(self, X, state_names, features):
for i in range(X.shape[1]):
if not np.array_equal(
np.unique(X[:, i]), np.array(state_names[features[i]])
):
print(
"i",
i,
"features[i]",
features[i],
"np.unique(X[:, i])",
np.unique(X[:, i]),
"np.array(state_names[features[i]])",
np.array(state_names[features[i]]),
)
raise ValueError("Discretization error")