Continue with New estimators

This commit is contained in:
2023-02-07 18:02:35 +01:00
parent 63a2feef3a
commit 42ac57eb79

View File

@@ -139,9 +139,6 @@ class BayesBase(BaseEstimator, ClassifierMixin):
# Return the classifier
return self
def _build(self):
pass
def _train(self, kwargs):
self.model_ = BayesianNetwork(
self.dag_.edges(), show_progress=self.show_progress
@@ -463,116 +460,54 @@ class AODE(BayesBase, BaseEnsemble):
class TANNew(TAN):
pass
def fit(self, X, y, **kwargs):
self.estimator = Proposal(self)
return self.estimator.fit(X, y, **kwargs)
def predict(self, X):
self.plot()
return self.estimator.predict(X)
class KDBNew(KDB):
def __init__(self, k, theta=0.3, show_progress=False, random_state=None):
super().__init__(
k, theta, show_progress=show_progress, random_state=random_state
)
self.estimator = Proposal(self)
def fit(self, X, y, **kwargs):
Xd, kwargs = self.estimator.discretize_train(X, y, kwargs)
super().fit(Xd, y, **kwargs)
upgraded, Xd, kwargs = self.estimator.local_discretization(kwargs)
if upgraded:
super().fit(Xd, y, **kwargs)
self.estimator = Proposal(self)
return self.estimator.fit(X, y, **kwargs)
def predict(self, X):
self.plot()
return self.estimator.predict(X)
# def fit(self, X, y, **kwargs):
# self.discretizer_ = FImdlp(n_jobs=1)
# Xd = self.discretizer_.fit_transform(X, y)
# features = (
# kwargs["features"]
# if "features" in kwargs
# else self.default_feature_names(Xd.shape[1])
# )
# self.compute_kwargs(Xd, y, kwargs)
# # Build the model
# super().fit(Xd, y, **kwargs)
# self.idx_features_ = dict(list(zip(features, range(len(features)))))
# self.proposal(Xd)
# return self
# def predict(self, X):
# return super().predict(self.discretizer_.transform(X))
# def compute_kwargs(self, Xd, y, kwargs):
# features = kwargs["features"]
# states = {
# features[i]: np.unique(Xd[:, i]).tolist()
# for i in range(Xd.shape[1])
# }
# class_name = (
# kwargs["class_name"]
# if "class_name" in kwargs
# else self.default_class_name()
# )
# states[class_name] = np.unique(y).tolist()
# kwargs["state_names"] = states
# self.kwargs_ = kwargs
# def check_integrity(self, X, state_names, features):
# for i in range(X.shape[1]):
# if not np.array_equal(
# np.unique(X[:, i]), np.array(state_names[features[i]])
# ):
# print(
# "i",
# i,
# "features[i]",
# features[i],
# "np.unique(X[:, i])",
# np.unique(X[:, i]),
# "np.array(state_names[features[i]])",
# np.array(state_names[features[i]]),
# )
# raise ValueError("Discretization error")
# def proposal(self, Xd):
# """Discretize each feature with its fathers and the class"""
# res = Xd.copy()
# upgraded = False
# for idx, feature in enumerate(self.feature_names_in_):
# fathers = self.dag_.get_parents(feature)
# if len(fathers) > 1:
# # First remove the class name as it will be added later
# fathers.remove(self.class_name_)
# # Get the fathers indices
# features = [self.idx_features_[f] for f in fathers]
# # Update the discretization of the feature
# res[:, idx] = self.discretizer_.join_fit(
# target=idx, features=features, data=Xd
# )
# upgraded = True
# if upgraded:
# self.compute_kwargs(res, self.y_, self.kwargs_)
# super().fit(res, self.y_, **self.kwargs_)
class Proposal:
def __init__(self, estimator):
self.estimator = estimator
self.class_type = estimator.__class__
def discretize_train(self, X, y, kwargs):
self.discretizer_ = FImdlp(n_jobs=1)
self.Xd = self.discretizer_.fit_transform(X, y)
kwargs = self.compute_kwargs(y, kwargs)
return self.Xd, kwargs
def local_discretization(self, kwargs):
features = kwargs["features"]
self.idx_features_ = dict(list(zip(features, range(len(features)))))
return self._local_discretization(kwargs)
def fit(self, X, y, **kwargs):
# Discretize train data
self.discretizer = FImdlp(n_jobs=1)
self.Xd = self.discretizer.fit_transform(X, y)
kwargs = self.update_kwargs(y, kwargs)
# Build the model
super(self.class_type, self.estimator).fit(self.Xd, y, **kwargs)
self.check_integrity("f", self.Xd)
# # Local discretization based on the model
# features = kwargs["features"]
# # assign indices to feature names
# self.idx_features_ = dict(list(zip(features, range(len(features)))))
# upgraded, self.Xd = self._local_discretization()
# if upgraded:
# kwargs = self.update_kwargs(y, kwargs)
# super(self.class_type, self.estimator).fit(self.Xd, y, **kwargs)
def predict(self, X):
return self.estimator.predict(self.discretizer_.transform(X))
self.check_integrity("p", self.discretizer.transform(X))
return super(self.class_type, self.estimator).predict(
self.discretizer.transform(X)
)
def compute_kwargs(self, y, kwargs):
def update_kwargs(self, y, kwargs):
features = (
kwargs["features"]
if "features" in kwargs
@@ -589,26 +524,50 @@ class Proposal:
)
states[class_name] = np.unique(y).tolist()
kwargs["state_names"] = states
self.state_names_ = states
self.features_ = features
kwargs["features"] = features
kwargs["class_name"] = class_name
return kwargs
def _local_discretization(self, kwargs):
def _local_discretization(self):
"""Discretize each feature with its fathers and the class"""
res = self.Xd.copy()
upgraded = False
print("-" * 80)
for idx, feature in enumerate(self.estimator.feature_names_in_):
fathers = self.estimator.dag_.get_parents(feature)
if len(fathers) > 1:
print(
"Discretizing " + feature + " with " + str(fathers),
end=" ",
)
# First remove the class name as it will be added later
fathers.remove(self.estimator.class_name_)
# Get the fathers indices
features = [self.idx_features_[f] for f in fathers]
# Update the discretization of the feature
res[:, idx] = self.discretizer_.join_fit(
res[:, idx] = self.discretizer.join_fit(
target=idx, features=features, data=self.Xd
)
print(self.discretizer.y_join[:5])
upgraded = True
if upgraded:
kwargs = self.compute_kwargs(res, self.estimator.y_, kwargs)
return upgraded, res, kwargs
return upgraded, res
def check_integrity(self, source, X):
print(f"Checking integrity of {source} data")
for i in range(X.shape[1]):
if not set(np.unique(X[:, i]).tolist()).issubset(
set(self.state_names_[self.features_[i]])
):
print(
"i",
i,
"features[i]",
self.features_[i],
"np.unique(X[:, i])",
np.unique(X[:, i]),
"np.array(state_names[features[i]])",
np.array(self.state_names_[self.features_[i]]),
)
raise ValueError("Discretization error")