Complete KDBNew

This commit is contained in:
2023-02-05 00:30:25 +01:00
parent de45a94c9b
commit 2a6547c71d

View File

@@ -12,7 +12,7 @@ import networkx as nx
from pgmpy.estimators import TreeSearch, BayesianEstimator from pgmpy.estimators import TreeSearch, BayesianEstimator
from pgmpy.models import BayesianNetwork from pgmpy.models import BayesianNetwork
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from fimdlp.mdlp import MultiDiscretizer from fimdlp.mdlp import FImdlp
from ._version import __version__ from ._version import __version__
@@ -194,7 +194,6 @@ class BayesBase(BaseEstimator, ClassifierMixin):
""" """
# Check is fit had been called # Check is fit had been called
check_is_fitted(self, ["X_", "y_", "fitted_"]) check_is_fitted(self, ["X_", "y_", "fitted_"])
# Input validation # Input validation
X = check_array(X) X = check_array(X)
dataset = pd.DataFrame( dataset = pd.DataFrame(
@@ -457,18 +456,28 @@ class AODE(BayesBase, BaseEnsemble):
class KDBNew(KDB): class KDBNew(KDB):
def fit(self, X, y, **kwargs): def fit(self, X, y, **kwargs):
self.discretizer_ = MultiDiscretizer(n_jobs=1) self.discretizer_ = FImdlp(n_jobs=1)
Xd = self.discretizer_.fit_transform(X, y) Xd = self.discretizer_.fit_transform(X, y)
features = kwargs["features"]
self.compute_kwargs(Xd, y, kwargs)
# Build the model
super().fit(Xd, y, **kwargs)
self.idx_features_ = dict(list(zip(features, range(len(features)))))
self.proposal(Xd)
return self
def predict(self, X):
return super().predict(self.discretizer_.transform(X))
def compute_kwargs(self, Xd, y, kwargs):
features = kwargs["features"] features = kwargs["features"]
states = { states = {
features[i]: np.unique(Xd[:, i]).tolist() features[i]: np.unique(Xd[:, i]).tolist()
for i in range(Xd.shape[1]) for i in range(Xd.shape[1])
} }
states[kwargs["class_name"]] = np.unique(y).tolist()
kwargs["state_names"] = states kwargs["state_names"] = states
return super().fit(Xd, y, **kwargs) self.kwargs_ = kwargs
def predict(self, X, **kwargs):
return super().predict(self.discretizer_.transform(X))
def check_integrity(self, X, state_names, features): def check_integrity(self, X, state_names, features):
for i in range(X.shape[1]): for i in range(X.shape[1]):
@@ -486,3 +495,23 @@ class KDBNew(KDB):
np.array(state_names[features[i]]), np.array(state_names[features[i]]),
) )
raise ValueError("Discretization error") raise ValueError("Discretization error")
def proposal(self, Xd):
"""Discretize each feature with its fathers and the class"""
res = Xd.copy()
upgraded = False
for idx, feature in enumerate(self.feature_names_in_):
fathers = self.dag_.get_parents(feature)
if len(fathers) > 1:
# First remove the class name as it will be added later
fathers.remove(self.class_name_)
# Get the fathers indices
features = [self.idx_features_[f] for f in fathers]
# Update the discretization of the feature
res[:, idx] = self.discretizer_.join_fit(
target=idx, features=features, data=Xd
)
upgraded = True
if upgraded:
self.compute_kwargs(res, self.y_, self.kwargs_)
super().fit(res, self.y_, **self.kwargs_)