From 0768d68a36d02fe4b99bd12092ac01b5d0247644 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= <rmontanana@gmail.com>
Date: Sat, 8 Apr 2023 12:22:03 +0200
Subject: [PATCH] add join_fit target info

---
 k.py                            | 12 ------------
 src/fimdlp/mdlp.py              | 13 ++++++++++---
 src/fimdlp/tests/FImdlp_test.py | 12 +++++++++++-
 3 files changed, 21 insertions(+), 16 deletions(-)
 delete mode 100644 k.py

diff --git a/k.py b/k.py
deleted file mode 100644
index 47e0856..0000000
--- a/k.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from sklearn.datasets import load_wine
-from fimdlp.mdlp import FImdlp
-
-X, y = load_wine(return_X_y=True)
-trans = FImdlp()
-Xt = trans.join_transform(X, y, 12)
-print("X shape = ", X.shape)
-print("Xt.shape=", Xt.shape)
-print("Xt ", Xt[:10])
-print("trans.X_ shape = ", trans.X_.shape)
-print("trans.y_ ", trans.y_[:10])
-print("y_join ", trans.y_join_[:10])
diff --git a/src/fimdlp/mdlp.py b/src/fimdlp/mdlp.py
index 5a8ea8c..36ce3a0 100644
--- a/src/fimdlp/mdlp.py
+++ b/src/fimdlp/mdlp.py
@@ -6,8 +6,6 @@ from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
 from joblib import Parallel, delayed
 from ._version import __version__
 
-# from ._version import __version__
-
 
 class FImdlp(TransformerMixin, BaseEstimator):
     def __init__(self, n_jobs=-1, min_length=3, max_depth=1e6, max_cuts=0):
@@ -24,6 +22,12 @@ class FImdlp(TransformerMixin, BaseEstimator):
         The number of jobs to run in parallel. :meth:`fit` and
         :meth:`transform`, are parallelized over the features. ``-1`` means
         using all cores available.
+    min_length: int, default=3
+        The minimum length of an interval to be considered to be discretized.
+    max_depth: int, default=1e6
+        The maximum depth of the discretization process.
+    max_cuts: float, default=0
+        The maximum number of cut points to be computed for each feature. 
 
     Attributes
     ----------
@@ -109,6 +113,8 @@ class FImdlp(TransformerMixin, BaseEstimator):
             delayed(self._fit_discretizer)(feature)
             for feature in range(self.n_features_in_)
         )
+        # target of every feature. Start with -1 => y (see join_fit)
+        self.target_ = [-1] * self.n_features_in_
         return self
 
     def _fit_discretizer(self, feature):
@@ -244,11 +250,12 @@ class FImdlp(TransformerMixin, BaseEstimator):
                 f"Target {target} not in range [0, {self.n_features_in_})"
             )
         if target in features:
-            raise ValueError("Target cannot in features to join")
+            raise ValueError("Target cannot be in features to join")
         y_join = [
             f"{str(item_y)}{''.join([str(x) for x in items_x])}".encode()
             for item_y, items_x in zip(self.y_, data[:, features])
         ]
+        self.target_[target] = features + [-1]
         self.y_join_ = y_join
         self.discretizer_[target].fit(self.X_[:, target], factorize(y_join))
         self.cut_points_[target] = self.discretizer_[target].get_cut_points()
diff --git a/src/fimdlp/tests/FImdlp_test.py b/src/fimdlp/tests/FImdlp_test.py
index 111f960..0215509 100644
--- a/src/fimdlp/tests/FImdlp_test.py
+++ b/src/fimdlp/tests/FImdlp_test.py
@@ -196,7 +196,7 @@ class FImdlpTest(unittest.TestCase):
             clf.join_fit([0, 2], 2, x)
         self.assertEqual(
             str(exception.exception),
-            "Target cannot in features to join",
+            "Target cannot be in features to join",
         )
 
     def test_factorize(self):
@@ -209,6 +209,16 @@ class FImdlpTest(unittest.TestCase):
         computed = clf.factorize(y)
         self.assertListEqual([0, 1, 1, 2, 3], computed)
 
+    def test_join_fit_info(self):
+        clf = FImdlp()
+        X, y = load_iris(return_X_y=True)
+        clf.fit(X, y)
+        clf.join_fit([0, 2], 1, X)
+        clf.join_fit([0, 3], 2, X)
+        clf.join_fit([1, 2], 3, X)
+        expected = [-1, [0, 2, -1], [0, 3, -1], [1, 2, -1]]
+        self.assertListEqual(expected, clf.target_)
+
     @staticmethod
     def test_sklearn_transformer():
         for check, test in check_estimator(FImdlp(), generate_only=True):