mirror of
https://github.com/Doctorado-ML/bayesclass.git
synced 2025-08-17 00:26:10 +00:00
Continue with New estimators
This commit is contained in:
@@ -139,9 +139,6 @@ class BayesBase(BaseEstimator, ClassifierMixin):
|
|||||||
# Return the classifier
|
# Return the classifier
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def _build(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def _train(self, kwargs):
|
def _train(self, kwargs):
|
||||||
self.model_ = BayesianNetwork(
|
self.model_ = BayesianNetwork(
|
||||||
self.dag_.edges(), show_progress=self.show_progress
|
self.dag_.edges(), show_progress=self.show_progress
|
||||||
@@ -463,116 +460,54 @@ class AODE(BayesBase, BaseEnsemble):
|
|||||||
|
|
||||||
|
|
||||||
class TANNew(TAN):
|
class TANNew(TAN):
|
||||||
pass
|
def fit(self, X, y, **kwargs):
|
||||||
|
self.estimator = Proposal(self)
|
||||||
|
return self.estimator.fit(X, y, **kwargs)
|
||||||
|
|
||||||
|
def predict(self, X):
|
||||||
|
self.plot()
|
||||||
|
return self.estimator.predict(X)
|
||||||
|
|
||||||
|
|
||||||
class KDBNew(KDB):
|
class KDBNew(KDB):
|
||||||
def __init__(self, k, theta=0.3, show_progress=False, random_state=None):
|
|
||||||
super().__init__(
|
|
||||||
k, theta, show_progress=show_progress, random_state=random_state
|
|
||||||
)
|
|
||||||
self.estimator = Proposal(self)
|
|
||||||
|
|
||||||
def fit(self, X, y, **kwargs):
|
def fit(self, X, y, **kwargs):
|
||||||
Xd, kwargs = self.estimator.discretize_train(X, y, kwargs)
|
self.estimator = Proposal(self)
|
||||||
super().fit(Xd, y, **kwargs)
|
return self.estimator.fit(X, y, **kwargs)
|
||||||
upgraded, Xd, kwargs = self.estimator.local_discretization(kwargs)
|
|
||||||
if upgraded:
|
|
||||||
super().fit(Xd, y, **kwargs)
|
|
||||||
|
|
||||||
def predict(self, X):
|
def predict(self, X):
|
||||||
|
self.plot()
|
||||||
return self.estimator.predict(X)
|
return self.estimator.predict(X)
|
||||||
|
|
||||||
# def fit(self, X, y, **kwargs):
|
|
||||||
# self.discretizer_ = FImdlp(n_jobs=1)
|
|
||||||
# Xd = self.discretizer_.fit_transform(X, y)
|
|
||||||
# features = (
|
|
||||||
# kwargs["features"]
|
|
||||||
# if "features" in kwargs
|
|
||||||
# else self.default_feature_names(Xd.shape[1])
|
|
||||||
# )
|
|
||||||
# self.compute_kwargs(Xd, y, kwargs)
|
|
||||||
# # Build the model
|
|
||||||
# super().fit(Xd, y, **kwargs)
|
|
||||||
# self.idx_features_ = dict(list(zip(features, range(len(features)))))
|
|
||||||
# self.proposal(Xd)
|
|
||||||
# return self
|
|
||||||
|
|
||||||
# def predict(self, X):
|
|
||||||
# return super().predict(self.discretizer_.transform(X))
|
|
||||||
|
|
||||||
# def compute_kwargs(self, Xd, y, kwargs):
|
|
||||||
# features = kwargs["features"]
|
|
||||||
# states = {
|
|
||||||
# features[i]: np.unique(Xd[:, i]).tolist()
|
|
||||||
# for i in range(Xd.shape[1])
|
|
||||||
# }
|
|
||||||
# class_name = (
|
|
||||||
# kwargs["class_name"]
|
|
||||||
# if "class_name" in kwargs
|
|
||||||
# else self.default_class_name()
|
|
||||||
# )
|
|
||||||
# states[class_name] = np.unique(y).tolist()
|
|
||||||
# kwargs["state_names"] = states
|
|
||||||
# self.kwargs_ = kwargs
|
|
||||||
|
|
||||||
# def check_integrity(self, X, state_names, features):
|
|
||||||
# for i in range(X.shape[1]):
|
|
||||||
# if not np.array_equal(
|
|
||||||
# np.unique(X[:, i]), np.array(state_names[features[i]])
|
|
||||||
# ):
|
|
||||||
# print(
|
|
||||||
# "i",
|
|
||||||
# i,
|
|
||||||
# "features[i]",
|
|
||||||
# features[i],
|
|
||||||
# "np.unique(X[:, i])",
|
|
||||||
# np.unique(X[:, i]),
|
|
||||||
# "np.array(state_names[features[i]])",
|
|
||||||
# np.array(state_names[features[i]]),
|
|
||||||
# )
|
|
||||||
# raise ValueError("Discretization error")
|
|
||||||
|
|
||||||
# def proposal(self, Xd):
|
|
||||||
# """Discretize each feature with its fathers and the class"""
|
|
||||||
# res = Xd.copy()
|
|
||||||
# upgraded = False
|
|
||||||
# for idx, feature in enumerate(self.feature_names_in_):
|
|
||||||
# fathers = self.dag_.get_parents(feature)
|
|
||||||
# if len(fathers) > 1:
|
|
||||||
# # First remove the class name as it will be added later
|
|
||||||
# fathers.remove(self.class_name_)
|
|
||||||
# # Get the fathers indices
|
|
||||||
# features = [self.idx_features_[f] for f in fathers]
|
|
||||||
# # Update the discretization of the feature
|
|
||||||
# res[:, idx] = self.discretizer_.join_fit(
|
|
||||||
# target=idx, features=features, data=Xd
|
|
||||||
# )
|
|
||||||
# upgraded = True
|
|
||||||
# if upgraded:
|
|
||||||
# self.compute_kwargs(res, self.y_, self.kwargs_)
|
|
||||||
# super().fit(res, self.y_, **self.kwargs_)
|
|
||||||
|
|
||||||
|
|
||||||
class Proposal:
|
class Proposal:
|
||||||
def __init__(self, estimator):
|
def __init__(self, estimator):
|
||||||
self.estimator = estimator
|
self.estimator = estimator
|
||||||
|
self.class_type = estimator.__class__
|
||||||
|
|
||||||
def discretize_train(self, X, y, kwargs):
|
def fit(self, X, y, **kwargs):
|
||||||
self.discretizer_ = FImdlp(n_jobs=1)
|
# Discretize train data
|
||||||
self.Xd = self.discretizer_.fit_transform(X, y)
|
self.discretizer = FImdlp(n_jobs=1)
|
||||||
kwargs = self.compute_kwargs(y, kwargs)
|
self.Xd = self.discretizer.fit_transform(X, y)
|
||||||
return self.Xd, kwargs
|
kwargs = self.update_kwargs(y, kwargs)
|
||||||
|
# Build the model
|
||||||
def local_discretization(self, kwargs):
|
super(self.class_type, self.estimator).fit(self.Xd, y, **kwargs)
|
||||||
features = kwargs["features"]
|
self.check_integrity("f", self.Xd)
|
||||||
self.idx_features_ = dict(list(zip(features, range(len(features)))))
|
# # Local discretization based on the model
|
||||||
return self._local_discretization(kwargs)
|
# features = kwargs["features"]
|
||||||
|
# # assign indices to feature names
|
||||||
|
# self.idx_features_ = dict(list(zip(features, range(len(features)))))
|
||||||
|
# upgraded, self.Xd = self._local_discretization()
|
||||||
|
# if upgraded:
|
||||||
|
# kwargs = self.update_kwargs(y, kwargs)
|
||||||
|
# super(self.class_type, self.estimator).fit(self.Xd, y, **kwargs)
|
||||||
|
|
||||||
def predict(self, X):
|
def predict(self, X):
|
||||||
return self.estimator.predict(self.discretizer_.transform(X))
|
self.check_integrity("p", self.discretizer.transform(X))
|
||||||
|
return super(self.class_type, self.estimator).predict(
|
||||||
|
self.discretizer.transform(X)
|
||||||
|
)
|
||||||
|
|
||||||
def compute_kwargs(self, y, kwargs):
|
def update_kwargs(self, y, kwargs):
|
||||||
features = (
|
features = (
|
||||||
kwargs["features"]
|
kwargs["features"]
|
||||||
if "features" in kwargs
|
if "features" in kwargs
|
||||||
@@ -589,26 +524,50 @@ class Proposal:
|
|||||||
)
|
)
|
||||||
states[class_name] = np.unique(y).tolist()
|
states[class_name] = np.unique(y).tolist()
|
||||||
kwargs["state_names"] = states
|
kwargs["state_names"] = states
|
||||||
|
self.state_names_ = states
|
||||||
|
self.features_ = features
|
||||||
kwargs["features"] = features
|
kwargs["features"] = features
|
||||||
kwargs["class_name"] = class_name
|
kwargs["class_name"] = class_name
|
||||||
return kwargs
|
return kwargs
|
||||||
|
|
||||||
def _local_discretization(self, kwargs):
|
def _local_discretization(self):
|
||||||
"""Discretize each feature with its fathers and the class"""
|
"""Discretize each feature with its fathers and the class"""
|
||||||
res = self.Xd.copy()
|
res = self.Xd.copy()
|
||||||
upgraded = False
|
upgraded = False
|
||||||
|
print("-" * 80)
|
||||||
for idx, feature in enumerate(self.estimator.feature_names_in_):
|
for idx, feature in enumerate(self.estimator.feature_names_in_):
|
||||||
fathers = self.estimator.dag_.get_parents(feature)
|
fathers = self.estimator.dag_.get_parents(feature)
|
||||||
if len(fathers) > 1:
|
if len(fathers) > 1:
|
||||||
|
print(
|
||||||
|
"Discretizing " + feature + " with " + str(fathers),
|
||||||
|
end=" ",
|
||||||
|
)
|
||||||
# First remove the class name as it will be added later
|
# First remove the class name as it will be added later
|
||||||
fathers.remove(self.estimator.class_name_)
|
fathers.remove(self.estimator.class_name_)
|
||||||
# Get the fathers indices
|
# Get the fathers indices
|
||||||
features = [self.idx_features_[f] for f in fathers]
|
features = [self.idx_features_[f] for f in fathers]
|
||||||
# Update the discretization of the feature
|
# Update the discretization of the feature
|
||||||
res[:, idx] = self.discretizer_.join_fit(
|
res[:, idx] = self.discretizer.join_fit(
|
||||||
target=idx, features=features, data=self.Xd
|
target=idx, features=features, data=self.Xd
|
||||||
)
|
)
|
||||||
|
print(self.discretizer.y_join[:5])
|
||||||
upgraded = True
|
upgraded = True
|
||||||
if upgraded:
|
return upgraded, res
|
||||||
kwargs = self.compute_kwargs(res, self.estimator.y_, kwargs)
|
|
||||||
return upgraded, res, kwargs
|
def check_integrity(self, source, X):
|
||||||
|
print(f"Checking integrity of {source} data")
|
||||||
|
for i in range(X.shape[1]):
|
||||||
|
if not set(np.unique(X[:, i]).tolist()).issubset(
|
||||||
|
set(self.state_names_[self.features_[i]])
|
||||||
|
):
|
||||||
|
print(
|
||||||
|
"i",
|
||||||
|
i,
|
||||||
|
"features[i]",
|
||||||
|
self.features_[i],
|
||||||
|
"np.unique(X[:, i])",
|
||||||
|
np.unique(X[:, i]),
|
||||||
|
"np.array(state_names[features[i]])",
|
||||||
|
np.array(self.state_names_[self.features_[i]]),
|
||||||
|
)
|
||||||
|
raise ValueError("Discretization error")
|
||||||
|
Reference in New Issue
Block a user