Add weights to KDB classifier

2025-08-15 23:55:57 +00:00 · 2023-06-15 14:13:15 +02:00
parent f0f7c43944
commit c906d6a361
4 changed files with 21 additions and 22 deletions
--- a/bayesclass/clfs.py
+++ b/bayesclass/clfs.py
@@ -52,6 +52,8 @@ class BayesBase(BaseEstimator, ClassifierMixin):
            self.X_, columns=self.feature_names_in_, dtype=np.int32
        )
        self.dataset_[self.class_name_] = self.y_
        if self.sample_weight_ is not None:
            self.dataset_["_weight"] = self.sample_weight_
    def _check_params_fit(self, X, y, expected_args, kwargs):
        """Check the common parameters passed to fit"""
@@ -62,6 +64,8 @@ class BayesBase(BaseEstimator, ClassifierMixin):
        self.classes_ = unique_labels(y)
        self.n_classes_ = self.classes_.shape[0]
        # Default values
        self.weighted_ = False
        self.sample_weight_ = None
        self.class_name_ = self.default_class_name()
        self.features_ = default_feature_names(X.shape[1])
        for key, value in kwargs.items():
@@ -80,6 +84,7 @@ class BayesBase(BaseEstimator, ClassifierMixin):
            raise ValueError(
                "Number of features does not match the number of columns in X"
            )
        self.n_features_in_ = X.shape[1]
        return X, y
@@ -151,13 +156,14 @@ class BayesBase(BaseEstimator, ClassifierMixin):
    def _train(self, kwargs):
        self.model_ = BayesianNetwork(
-            self.dag_.edges(), show_progress=self.show_progress
+            self.dag_.edges()  # , show_progress=self.show_progress
        )
        states = dict(state_names=kwargs.pop("state_names", []))
        self.model_.fit(
            self.dataset_,
            estimator=BayesianEstimator,
            prior_type="K2",
            weighted=self.weighted_,
            **states,
        )
@@ -321,7 +327,13 @@ class KDB(BayesBase):
        )
    def _check_params(self, X, y, kwargs):
-        expected_args = ["class_name", "features", "state_names"]
+        expected_args = [
            "class_name",
            "features",
            "state_names",
            "sample_weight",
            "weighted",
        ]
        return self._check_params_fit(X, y, expected_args, kwargs)
    def _add_m_edges(self, dag, idx, S_nodes, conditional_weights):
--- a/bayesclass/test.py
+++ b/bayesclass/test.py
@@ -1,19 +0,0 @@
 from bayesclass.clfs import AODENew, TANNew, KDBNew, AODE
 from benchmark.datasets import Datasets
 import os
 os.chdir("../discretizbench")
 dt = Datasets()
 clfan = AODENew()
 clftn = TANNew()
 clfkn = KDBNew()
 # clfa = AODE()
 X, y = dt.load("iris")
 # clfa.fit(X, y)
 clfan.fit(X, y)
 clftn.fit(X, y)
 clfkn.fit(X, y)
 self.discretizer_.target_
 self.estimator.indexed_features_
--- a/bayesclass/tests/test_KDB.py
+++ b/bayesclass/tests/test_KDB.py
@@ -64,6 +64,13 @@ def test_KDB_classifier(data_disc, clf):
    assert sum(y == y_pred) == 146
 def test_KDB_classifier_weighted(data_disc, clf):
    sample_weight = [1] * data_disc[0].shape[0]
    sample_weight[:50] = [0] * 50
    clf.fit(*data_disc, sample_weight=sample_weight, weighted=True)
    assert clf.score(*data_disc) == 0.64
@image_comparison(
    baseline_images=["line_dashes_KDB"], remove_text=True, extensions=["png"]
 )
--- a/bayesclass/tests/test_KDBNew.py
+++ b/bayesclass/tests/test_KDBNew.py
@@ -68,7 +68,6 @@ def test_KDBNew_local_discretization(clf, data):
    clf.fit(*data)
    for feature in range(4):
        computed = clf.estimator_.discretizer_.target_[feature]
        print("computed:", computed)
        if type(computed) == list:
            for j, k in zip(expected[feature], computed):
                assert j == k