From c906d6a3612907d82f9ea97abdc0a03367b05429 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= <rmontanana@gmail.com>
Date: Thu, 15 Jun 2023 14:13:15 +0200
Subject: [PATCH] Add weights to KDB classifier

---
 bayesclass/clfs.py              | 16 ++++++++++++++--
 bayesclass/test.py              | 19 -------------------
 bayesclass/tests/test_KDB.py    |  7 +++++++
 bayesclass/tests/test_KDBNew.py |  1 -
 4 files changed, 21 insertions(+), 22 deletions(-)
 delete mode 100644 bayesclass/test.py

diff --git a/bayesclass/clfs.py b/bayesclass/clfs.py
index 5868027..db40900 100644
--- a/bayesclass/clfs.py
+++ b/bayesclass/clfs.py
@@ -52,6 +52,8 @@ class BayesBase(BaseEstimator, ClassifierMixin):
             self.X_, columns=self.feature_names_in_, dtype=np.int32
         )
         self.dataset_[self.class_name_] = self.y_
+        if self.sample_weight_ is not None:
+            self.dataset_["_weight"] = self.sample_weight_
 
     def _check_params_fit(self, X, y, expected_args, kwargs):
         """Check the common parameters passed to fit"""
@@ -62,6 +64,8 @@ class BayesBase(BaseEstimator, ClassifierMixin):
         self.classes_ = unique_labels(y)
         self.n_classes_ = self.classes_.shape[0]
         # Default values
+        self.weighted_ = False
+        self.sample_weight_ = None
         self.class_name_ = self.default_class_name()
         self.features_ = default_feature_names(X.shape[1])
         for key, value in kwargs.items():
@@ -80,6 +84,7 @@ class BayesBase(BaseEstimator, ClassifierMixin):
             raise ValueError(
                 "Number of features does not match the number of columns in X"
             )
+
         self.n_features_in_ = X.shape[1]
         return X, y
 
@@ -151,13 +156,14 @@ class BayesBase(BaseEstimator, ClassifierMixin):
 
     def _train(self, kwargs):
         self.model_ = BayesianNetwork(
-            self.dag_.edges(), show_progress=self.show_progress
+            self.dag_.edges()  # , show_progress=self.show_progress
         )
         states = dict(state_names=kwargs.pop("state_names", []))
         self.model_.fit(
             self.dataset_,
             estimator=BayesianEstimator,
             prior_type="K2",
+            weighted=self.weighted_,
             **states,
         )
 
@@ -321,7 +327,13 @@ class KDB(BayesBase):
         )
 
     def _check_params(self, X, y, kwargs):
-        expected_args = ["class_name", "features", "state_names"]
+        expected_args = [
+            "class_name",
+            "features",
+            "state_names",
+            "sample_weight",
+            "weighted",
+        ]
         return self._check_params_fit(X, y, expected_args, kwargs)
 
     def _add_m_edges(self, dag, idx, S_nodes, conditional_weights):
diff --git a/bayesclass/test.py b/bayesclass/test.py
deleted file mode 100644
index fd983d6..0000000
--- a/bayesclass/test.py
+++ /dev/null
@@ -1,19 +0,0 @@
-from bayesclass.clfs import AODENew, TANNew, KDBNew, AODE
-from benchmark.datasets import Datasets
-import os
-
-os.chdir("../discretizbench")
-dt = Datasets()
-clfan = AODENew()
-clftn = TANNew()
-clfkn = KDBNew()
-# clfa = AODE()
-X, y = dt.load("iris")
-# clfa.fit(X, y)
-clfan.fit(X, y)
-clftn.fit(X, y)
-clfkn.fit(X, y)
-
-
-self.discretizer_.target_
-self.estimator.indexed_features_
diff --git a/bayesclass/tests/test_KDB.py b/bayesclass/tests/test_KDB.py
index aa35751..d365d7d 100644
--- a/bayesclass/tests/test_KDB.py
+++ b/bayesclass/tests/test_KDB.py
@@ -64,6 +64,13 @@ def test_KDB_classifier(data_disc, clf):
     assert sum(y == y_pred) == 146
 
 
+def test_KDB_classifier_weighted(data_disc, clf):
+    sample_weight = [1] * data_disc[0].shape[0]
+    sample_weight[:50] = [0] * 50
+    clf.fit(*data_disc, sample_weight=sample_weight, weighted=True)
+    assert clf.score(*data_disc) == 0.64
+
+
 @image_comparison(
     baseline_images=["line_dashes_KDB"], remove_text=True, extensions=["png"]
 )
diff --git a/bayesclass/tests/test_KDBNew.py b/bayesclass/tests/test_KDBNew.py
index 36de4da..6c0ce73 100644
--- a/bayesclass/tests/test_KDBNew.py
+++ b/bayesclass/tests/test_KDBNew.py
@@ -68,7 +68,6 @@ def test_KDBNew_local_discretization(clf, data):
     clf.fit(*data)
     for feature in range(4):
         computed = clf.estimator_.discretizer_.target_[feature]
-        print("computed:", computed)
         if type(computed) == list:
             for j, k in zip(expected[feature], computed):
                 assert j == k