mirror of
https://github.com/Doctorado-ML/bayesclass.git
synced 2025-08-15 23:55:57 +00:00
Continue with New estimators
This commit is contained in:
@@ -139,9 +139,6 @@ class BayesBase(BaseEstimator, ClassifierMixin):
|
||||
# Return the classifier
|
||||
return self
|
||||
|
||||
def _build(self):
|
||||
pass
|
||||
|
||||
def _train(self, kwargs):
|
||||
self.model_ = BayesianNetwork(
|
||||
self.dag_.edges(), show_progress=self.show_progress
|
||||
@@ -463,116 +460,54 @@ class AODE(BayesBase, BaseEnsemble):
|
||||
|
||||
|
||||
class TANNew(TAN):
|
||||
pass
|
||||
def fit(self, X, y, **kwargs):
|
||||
self.estimator = Proposal(self)
|
||||
return self.estimator.fit(X, y, **kwargs)
|
||||
|
||||
def predict(self, X):
|
||||
self.plot()
|
||||
return self.estimator.predict(X)
|
||||
|
||||
|
||||
class KDBNew(KDB):
|
||||
def __init__(self, k, theta=0.3, show_progress=False, random_state=None):
|
||||
super().__init__(
|
||||
k, theta, show_progress=show_progress, random_state=random_state
|
||||
)
|
||||
self.estimator = Proposal(self)
|
||||
|
||||
def fit(self, X, y, **kwargs):
|
||||
Xd, kwargs = self.estimator.discretize_train(X, y, kwargs)
|
||||
super().fit(Xd, y, **kwargs)
|
||||
upgraded, Xd, kwargs = self.estimator.local_discretization(kwargs)
|
||||
if upgraded:
|
||||
super().fit(Xd, y, **kwargs)
|
||||
self.estimator = Proposal(self)
|
||||
return self.estimator.fit(X, y, **kwargs)
|
||||
|
||||
def predict(self, X):
|
||||
self.plot()
|
||||
return self.estimator.predict(X)
|
||||
|
||||
# def fit(self, X, y, **kwargs):
|
||||
# self.discretizer_ = FImdlp(n_jobs=1)
|
||||
# Xd = self.discretizer_.fit_transform(X, y)
|
||||
# features = (
|
||||
# kwargs["features"]
|
||||
# if "features" in kwargs
|
||||
# else self.default_feature_names(Xd.shape[1])
|
||||
# )
|
||||
# self.compute_kwargs(Xd, y, kwargs)
|
||||
# # Build the model
|
||||
# super().fit(Xd, y, **kwargs)
|
||||
# self.idx_features_ = dict(list(zip(features, range(len(features)))))
|
||||
# self.proposal(Xd)
|
||||
# return self
|
||||
|
||||
# def predict(self, X):
|
||||
# return super().predict(self.discretizer_.transform(X))
|
||||
|
||||
# def compute_kwargs(self, Xd, y, kwargs):
|
||||
# features = kwargs["features"]
|
||||
# states = {
|
||||
# features[i]: np.unique(Xd[:, i]).tolist()
|
||||
# for i in range(Xd.shape[1])
|
||||
# }
|
||||
# class_name = (
|
||||
# kwargs["class_name"]
|
||||
# if "class_name" in kwargs
|
||||
# else self.default_class_name()
|
||||
# )
|
||||
# states[class_name] = np.unique(y).tolist()
|
||||
# kwargs["state_names"] = states
|
||||
# self.kwargs_ = kwargs
|
||||
|
||||
# def check_integrity(self, X, state_names, features):
|
||||
# for i in range(X.shape[1]):
|
||||
# if not np.array_equal(
|
||||
# np.unique(X[:, i]), np.array(state_names[features[i]])
|
||||
# ):
|
||||
# print(
|
||||
# "i",
|
||||
# i,
|
||||
# "features[i]",
|
||||
# features[i],
|
||||
# "np.unique(X[:, i])",
|
||||
# np.unique(X[:, i]),
|
||||
# "np.array(state_names[features[i]])",
|
||||
# np.array(state_names[features[i]]),
|
||||
# )
|
||||
# raise ValueError("Discretization error")
|
||||
|
||||
# def proposal(self, Xd):
|
||||
# """Discretize each feature with its fathers and the class"""
|
||||
# res = Xd.copy()
|
||||
# upgraded = False
|
||||
# for idx, feature in enumerate(self.feature_names_in_):
|
||||
# fathers = self.dag_.get_parents(feature)
|
||||
# if len(fathers) > 1:
|
||||
# # First remove the class name as it will be added later
|
||||
# fathers.remove(self.class_name_)
|
||||
# # Get the fathers indices
|
||||
# features = [self.idx_features_[f] for f in fathers]
|
||||
# # Update the discretization of the feature
|
||||
# res[:, idx] = self.discretizer_.join_fit(
|
||||
# target=idx, features=features, data=Xd
|
||||
# )
|
||||
# upgraded = True
|
||||
# if upgraded:
|
||||
# self.compute_kwargs(res, self.y_, self.kwargs_)
|
||||
# super().fit(res, self.y_, **self.kwargs_)
|
||||
|
||||
|
||||
class Proposal:
|
||||
def __init__(self, estimator):
|
||||
self.estimator = estimator
|
||||
self.class_type = estimator.__class__
|
||||
|
||||
def discretize_train(self, X, y, kwargs):
|
||||
self.discretizer_ = FImdlp(n_jobs=1)
|
||||
self.Xd = self.discretizer_.fit_transform(X, y)
|
||||
kwargs = self.compute_kwargs(y, kwargs)
|
||||
return self.Xd, kwargs
|
||||
|
||||
def local_discretization(self, kwargs):
|
||||
features = kwargs["features"]
|
||||
self.idx_features_ = dict(list(zip(features, range(len(features)))))
|
||||
return self._local_discretization(kwargs)
|
||||
def fit(self, X, y, **kwargs):
|
||||
# Discretize train data
|
||||
self.discretizer = FImdlp(n_jobs=1)
|
||||
self.Xd = self.discretizer.fit_transform(X, y)
|
||||
kwargs = self.update_kwargs(y, kwargs)
|
||||
# Build the model
|
||||
super(self.class_type, self.estimator).fit(self.Xd, y, **kwargs)
|
||||
self.check_integrity("f", self.Xd)
|
||||
# # Local discretization based on the model
|
||||
# features = kwargs["features"]
|
||||
# # assign indices to feature names
|
||||
# self.idx_features_ = dict(list(zip(features, range(len(features)))))
|
||||
# upgraded, self.Xd = self._local_discretization()
|
||||
# if upgraded:
|
||||
# kwargs = self.update_kwargs(y, kwargs)
|
||||
# super(self.class_type, self.estimator).fit(self.Xd, y, **kwargs)
|
||||
|
||||
def predict(self, X):
|
||||
return self.estimator.predict(self.discretizer_.transform(X))
|
||||
self.check_integrity("p", self.discretizer.transform(X))
|
||||
return super(self.class_type, self.estimator).predict(
|
||||
self.discretizer.transform(X)
|
||||
)
|
||||
|
||||
def compute_kwargs(self, y, kwargs):
|
||||
def update_kwargs(self, y, kwargs):
|
||||
features = (
|
||||
kwargs["features"]
|
||||
if "features" in kwargs
|
||||
@@ -589,26 +524,50 @@ class Proposal:
|
||||
)
|
||||
states[class_name] = np.unique(y).tolist()
|
||||
kwargs["state_names"] = states
|
||||
self.state_names_ = states
|
||||
self.features_ = features
|
||||
kwargs["features"] = features
|
||||
kwargs["class_name"] = class_name
|
||||
return kwargs
|
||||
|
||||
def _local_discretization(self, kwargs):
|
||||
def _local_discretization(self):
|
||||
"""Discretize each feature with its fathers and the class"""
|
||||
res = self.Xd.copy()
|
||||
upgraded = False
|
||||
print("-" * 80)
|
||||
for idx, feature in enumerate(self.estimator.feature_names_in_):
|
||||
fathers = self.estimator.dag_.get_parents(feature)
|
||||
if len(fathers) > 1:
|
||||
print(
|
||||
"Discretizing " + feature + " with " + str(fathers),
|
||||
end=" ",
|
||||
)
|
||||
# First remove the class name as it will be added later
|
||||
fathers.remove(self.estimator.class_name_)
|
||||
# Get the fathers indices
|
||||
features = [self.idx_features_[f] for f in fathers]
|
||||
# Update the discretization of the feature
|
||||
res[:, idx] = self.discretizer_.join_fit(
|
||||
res[:, idx] = self.discretizer.join_fit(
|
||||
target=idx, features=features, data=self.Xd
|
||||
)
|
||||
print(self.discretizer.y_join[:5])
|
||||
upgraded = True
|
||||
if upgraded:
|
||||
kwargs = self.compute_kwargs(res, self.estimator.y_, kwargs)
|
||||
return upgraded, res, kwargs
|
||||
return upgraded, res
|
||||
|
||||
def check_integrity(self, source, X):
|
||||
print(f"Checking integrity of {source} data")
|
||||
for i in range(X.shape[1]):
|
||||
if not set(np.unique(X[:, i]).tolist()).issubset(
|
||||
set(self.state_names_[self.features_[i]])
|
||||
):
|
||||
print(
|
||||
"i",
|
||||
i,
|
||||
"features[i]",
|
||||
self.features_[i],
|
||||
"np.unique(X[:, i])",
|
||||
np.unique(X[:, i]),
|
||||
"np.array(state_names[features[i]])",
|
||||
np.array(self.state_names_[self.features_[i]]),
|
||||
)
|
||||
raise ValueError("Discretization error")
|
||||
|
Reference in New Issue
Block a user