mirror of
https://github.com/Doctorado-ML/bayesclass.git
synced 2025-08-15 23:55:57 +00:00
Add weights to KDB classifier
This commit is contained in:
@@ -52,6 +52,8 @@ class BayesBase(BaseEstimator, ClassifierMixin):
|
|||||||
self.X_, columns=self.feature_names_in_, dtype=np.int32
|
self.X_, columns=self.feature_names_in_, dtype=np.int32
|
||||||
)
|
)
|
||||||
self.dataset_[self.class_name_] = self.y_
|
self.dataset_[self.class_name_] = self.y_
|
||||||
|
if self.sample_weight_ is not None:
|
||||||
|
self.dataset_["_weight"] = self.sample_weight_
|
||||||
|
|
||||||
def _check_params_fit(self, X, y, expected_args, kwargs):
|
def _check_params_fit(self, X, y, expected_args, kwargs):
|
||||||
"""Check the common parameters passed to fit"""
|
"""Check the common parameters passed to fit"""
|
||||||
@@ -62,6 +64,8 @@ class BayesBase(BaseEstimator, ClassifierMixin):
|
|||||||
self.classes_ = unique_labels(y)
|
self.classes_ = unique_labels(y)
|
||||||
self.n_classes_ = self.classes_.shape[0]
|
self.n_classes_ = self.classes_.shape[0]
|
||||||
# Default values
|
# Default values
|
||||||
|
self.weighted_ = False
|
||||||
|
self.sample_weight_ = None
|
||||||
self.class_name_ = self.default_class_name()
|
self.class_name_ = self.default_class_name()
|
||||||
self.features_ = default_feature_names(X.shape[1])
|
self.features_ = default_feature_names(X.shape[1])
|
||||||
for key, value in kwargs.items():
|
for key, value in kwargs.items():
|
||||||
@@ -80,6 +84,7 @@ class BayesBase(BaseEstimator, ClassifierMixin):
|
|||||||
raise ValueError(
|
raise ValueError(
|
||||||
"Number of features does not match the number of columns in X"
|
"Number of features does not match the number of columns in X"
|
||||||
)
|
)
|
||||||
|
|
||||||
self.n_features_in_ = X.shape[1]
|
self.n_features_in_ = X.shape[1]
|
||||||
return X, y
|
return X, y
|
||||||
|
|
||||||
@@ -151,13 +156,14 @@ class BayesBase(BaseEstimator, ClassifierMixin):
|
|||||||
|
|
||||||
def _train(self, kwargs):
|
def _train(self, kwargs):
|
||||||
self.model_ = BayesianNetwork(
|
self.model_ = BayesianNetwork(
|
||||||
self.dag_.edges(), show_progress=self.show_progress
|
self.dag_.edges() # , show_progress=self.show_progress
|
||||||
)
|
)
|
||||||
states = dict(state_names=kwargs.pop("state_names", []))
|
states = dict(state_names=kwargs.pop("state_names", []))
|
||||||
self.model_.fit(
|
self.model_.fit(
|
||||||
self.dataset_,
|
self.dataset_,
|
||||||
estimator=BayesianEstimator,
|
estimator=BayesianEstimator,
|
||||||
prior_type="K2",
|
prior_type="K2",
|
||||||
|
weighted=self.weighted_,
|
||||||
**states,
|
**states,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -321,7 +327,13 @@ class KDB(BayesBase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def _check_params(self, X, y, kwargs):
|
def _check_params(self, X, y, kwargs):
|
||||||
expected_args = ["class_name", "features", "state_names"]
|
expected_args = [
|
||||||
|
"class_name",
|
||||||
|
"features",
|
||||||
|
"state_names",
|
||||||
|
"sample_weight",
|
||||||
|
"weighted",
|
||||||
|
]
|
||||||
return self._check_params_fit(X, y, expected_args, kwargs)
|
return self._check_params_fit(X, y, expected_args, kwargs)
|
||||||
|
|
||||||
def _add_m_edges(self, dag, idx, S_nodes, conditional_weights):
|
def _add_m_edges(self, dag, idx, S_nodes, conditional_weights):
|
||||||
|
@@ -1,19 +0,0 @@
|
|||||||
from bayesclass.clfs import AODENew, TANNew, KDBNew, AODE
|
|
||||||
from benchmark.datasets import Datasets
|
|
||||||
import os
|
|
||||||
|
|
||||||
os.chdir("../discretizbench")
|
|
||||||
dt = Datasets()
|
|
||||||
clfan = AODENew()
|
|
||||||
clftn = TANNew()
|
|
||||||
clfkn = KDBNew()
|
|
||||||
# clfa = AODE()
|
|
||||||
X, y = dt.load("iris")
|
|
||||||
# clfa.fit(X, y)
|
|
||||||
clfan.fit(X, y)
|
|
||||||
clftn.fit(X, y)
|
|
||||||
clfkn.fit(X, y)
|
|
||||||
|
|
||||||
|
|
||||||
self.discretizer_.target_
|
|
||||||
self.estimator.indexed_features_
|
|
@@ -64,6 +64,13 @@ def test_KDB_classifier(data_disc, clf):
|
|||||||
assert sum(y == y_pred) == 146
|
assert sum(y == y_pred) == 146
|
||||||
|
|
||||||
|
|
||||||
|
def test_KDB_classifier_weighted(data_disc, clf):
|
||||||
|
sample_weight = [1] * data_disc[0].shape[0]
|
||||||
|
sample_weight[:50] = [0] * 50
|
||||||
|
clf.fit(*data_disc, sample_weight=sample_weight, weighted=True)
|
||||||
|
assert clf.score(*data_disc) == 0.64
|
||||||
|
|
||||||
|
|
||||||
@image_comparison(
|
@image_comparison(
|
||||||
baseline_images=["line_dashes_KDB"], remove_text=True, extensions=["png"]
|
baseline_images=["line_dashes_KDB"], remove_text=True, extensions=["png"]
|
||||||
)
|
)
|
||||||
|
@@ -68,7 +68,6 @@ def test_KDBNew_local_discretization(clf, data):
|
|||||||
clf.fit(*data)
|
clf.fit(*data)
|
||||||
for feature in range(4):
|
for feature in range(4):
|
||||||
computed = clf.estimator_.discretizer_.target_[feature]
|
computed = clf.estimator_.discretizer_.target_[feature]
|
||||||
print("computed:", computed)
|
|
||||||
if type(computed) == list:
|
if type(computed) == list:
|
||||||
for j, k in zip(expected[feature], computed):
|
for j, k in zip(expected[feature], computed):
|
||||||
assert j == k
|
assert j == k
|
||||||
|
Reference in New Issue
Block a user