From ca7d158ac8fa99fe3a06542a71a18d5ac4049b1b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= <rmontanana@gmail.com>
Date: Thu, 26 Jan 2023 10:47:27 +0100
Subject: [PATCH 01/14] feat: :alembic: Add join_transform method and cpp
 factorize

---
 k.py                            | 12 +++++++
 setup.py                        |  5 ++-
 src/fimdlp/Factorize.cpp        | 18 ++++++++++
 src/fimdlp/Factorize.h          | 10 ++++++
 src/fimdlp/cfimdlp.pyx          |  5 +++
 src/fimdlp/mdlp.py              | 63 ++++++++++++++++++++++++---------
 src/fimdlp/tests/FImdlp_test.py | 54 +++++++++++++++++++++++++++-
 7 files changed, 148 insertions(+), 19 deletions(-)
 create mode 100644 k.py
 create mode 100644 src/fimdlp/Factorize.cpp
 create mode 100644 src/fimdlp/Factorize.h
diff --git a/k.py b/k.py
new file mode 100644
index 0000000..47e0856
--- /dev/null
+++ b/k.py
@@ -0,0 +1,12 @@
+from sklearn.datasets import load_wine
+from fimdlp.mdlp import FImdlp
+
+X, y = load_wine(return_X_y=True)
+trans = FImdlp()
+Xt = trans.join_transform(X, y, 12)
+print("X shape = ", X.shape)
+print("Xt.shape=", Xt.shape)
+print("Xt ", Xt[:10])
+print("trans.X_ shape = ", trans.X_.shape)
+print("trans.y_ ", trans.y_[:10])
+print("y_join ", trans.y_join_[:10])
diff --git a/setup.py b/setup.py
index db8a696..0ba294e 100644
--- a/setup.py
+++ b/setup.py
@@ -14,10 +14,13 @@ setup(
                 "src/fimdlp/cfimdlp.pyx",
                 "src/cppmdlp/CPPFImdlp.cpp",
                 "src/cppmdlp/Metrics.cpp",
+                "src/fimdlp/Factorize.cpp",
             ],
             language="c++",
             include_dirs=["fimdlp"],
-            extra_compile_args=["-std=c++2a"],
+            extra_compile_args=[
+                "-std=c++11",
+            ],
         ),
     ]
 )
diff --git a/src/fimdlp/Factorize.cpp b/src/fimdlp/Factorize.cpp
new file mode 100644
index 0000000..f814d6f
--- /dev/null
+++ b/src/fimdlp/Factorize.cpp
@@ -0,0 +1,18 @@
+#include "Factorize.h"
+
+namespace utils {
+    vector<int> cppFactorize(const vector<string>& labels_t)
+    {
+        vector<int> yy;
+        yy.reserve(labels_t.size());
+        map<string, int> labelMap;
+        int i = 0;
+        for (string label : labels_t) {
+            if (labelMap.find(label) == labelMap.end()) {
+                labelMap[label] = i++;
+            }
+            yy.push_back(labelMap[label]);
+        }
+        return yy;
+    }
+}
\ No newline at end of file
diff --git a/src/fimdlp/Factorize.h b/src/fimdlp/Factorize.h
new file mode 100644
index 0000000..28f4c74
--- /dev/null
+++ b/src/fimdlp/Factorize.h
@@ -0,0 +1,10 @@
+#ifndef FACTORIZE_H
+#define FACTORIZE_H
+#include <vector>
+#include <map>
+#include <string>
+namespace utils {
+    using namespace std;
+    vector<int> cppFactorize(const vector<string>&);
+}
+#endif
\ No newline at end of file
diff --git a/src/fimdlp/cfimdlp.pyx b/src/fimdlp/cfimdlp.pyx
index 9b548dd..c09d3e1 100644
--- a/src/fimdlp/cfimdlp.pyx
+++ b/src/fimdlp/cfimdlp.pyx
@@ -24,3 +24,8 @@ cdef class CFImdlp:
         return self.thisptr.getCutPoints()
     def get_version(self):
         return self.thisptr.version()
+
+cdef extern from "Factorize.h" namespace "utils":
+    vector[int] cppFactorize(vector[string] &input_vector)
+def factorize(input_vector):
+    return cppFactorize(input_vector)
\ No newline at end of file
diff --git a/src/fimdlp/mdlp.py b/src/fimdlp/mdlp.py
index e4e4437..0010dfb 100644
--- a/src/fimdlp/mdlp.py
+++ b/src/fimdlp/mdlp.py
@@ -1,5 +1,5 @@
 import numpy as np
-from .cppfimdlp import CFImdlp
+from .cppfimdlp import CFImdlp, factorize
 from sklearn.base import BaseEstimator, TransformerMixin
 from sklearn.utils.multiclass import unique_labels
 from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
@@ -33,21 +33,17 @@ class FImdlp(TransformerMixin, BaseEstimator):
         The list of discretizers, one for each feature.
     cut_points_ : list
         The list of cut points for each feature.
-    X_ : array
-        the samples used to fit, shape (n_samples, n_features)
-    y_ : array
-        the labels used to fit, shape (n_samples,)
+    X_ : array, shape (n_samples, n_features)
+        the samples used to fit
+    y_ : array, shape(n_samples,)
+        the labels used to fit
     features_ : list
         the list of features to be discretized
     """
 
-    def _check_params_fit(self, X, y, expected_args, kwargs):
-        """Check the common parameters passed to fit"""
+    def _check_args(self, X, y, expected_args, kwargs):
         # Check that X and y have correct shape
         X, y = check_X_y(X, y)
-        # Store the classes seen during fit
-        self.classes_ = unique_labels(y)
-        self.n_classes_ = self.classes_.shape[0]
         # Default values
         self.features_ = [i for i in range(X.shape[1])]
         for key, value in kwargs.items():
@@ -68,15 +64,20 @@ class FImdlp(TransformerMixin, BaseEstimator):
             raise ValueError("Feature index out of range")
         return X, y
 
+    def _update_params(self, X, y):
+        # Store the classes seen during fit
+        self.classes_ = unique_labels(y)
+        self.n_classes_ = self.classes_.shape[0]
+        self.n_features_ = X.shape[1]
+
     def fit(self, X, y, **kwargs):
         """A reference implementation of a fitting function for a transformer.
         Parameters
         ----------
-        X : {array-like, sparse matrix}, shape (n_samples, n_features)
+        X : array, shape (n_samples, n_features)
             The training input samples.
-        y : None
-            There is no need of a target in a transformer, yet the pipeline API
-            requires this parameter.
+        y : array, shape (n_samples,)
+            the labels used to fit
         features : list, default=[i for i in range(n_features)]
             The list of features to be discretized.
         Returns
@@ -84,10 +85,10 @@ class FImdlp(TransformerMixin, BaseEstimator):
         self : object
             Returns self.
         """
-        X, y = self._check_params_fit(
+        X, y = self._check_args(
             X, y, expected_args=["features"], kwargs=kwargs
         )
-        self.n_features_ = X.shape[1]
+        self._update_params(X, y)
         self.X_ = X
         self.y_ = y
         self.discretizer_ = [None] * self.n_features_
@@ -119,7 +120,7 @@ class FImdlp(TransformerMixin, BaseEstimator):
         """Discretize X values.
         Parameters
         ----------
-        X : {array-like}, shape (n_samples, n_features)
+        X : array, shape (n_samples, n_features)
             The input samples.
         Returns
         -------
@@ -146,6 +147,34 @@ class FImdlp(TransformerMixin, BaseEstimator):
         )
         return result
 
+    def join_transform(self, X, y, feature, **kwargs):
+        """Join the selected feature with the labels and discretize the values
+        join - fit - transform
+
+        Parameters
+        ----------
+        X : array, shape (n_samples, n_features)
+            The input samples.
+        y : array
+            the labels used to fit
+        feature : int
+            index of the feature to join with the labels
+        """
+        X, y = self._check_args(
+            X, y, expected_args=["features"], kwargs=kwargs
+        )
+        if feature < 0 or feature >= X.shape[1]:
+            raise ValueError(
+                f"Feature {feature} not in range [0, {X.shape[1]})"
+            )
+        self.y_join_ = [
+            f"{str(item_y)}{str(item_x)}".encode()
+            for item_y, item_x in zip(y, X[:, feature])
+        ]
+        yy = factorize(self.y_join_)
+        XX = np.delete(X, feature, axis=1)
+        return self.fit(XX, yy).transform(XX)
+
     def get_cut_points(self):
         """Get the cut points for each feature.
         Returns
diff --git a/src/fimdlp/tests/FImdlp_test.py b/src/fimdlp/tests/FImdlp_test.py
index 99c5864..ec7096d 100644
--- a/src/fimdlp/tests/FImdlp_test.py
+++ b/src/fimdlp/tests/FImdlp_test.py
@@ -1,7 +1,8 @@
 import unittest
 import sklearn
-from sklearn.datasets import load_iris
 import numpy as np
+from sklearn.datasets import load_iris
+from ..cppfimdlp import factorize
 from ..mdlp import FImdlp
 from .. import version
 from .._version import __version__
@@ -159,3 +160,54 @@ class FImdlpTest(unittest.TestCase):
         with self.assertRaises(sklearn.exceptions.NotFittedError):
             clf = FImdlp(algorithm=1)
             clf.transform([[1, 2], [3, 4]])
+
+    def test_factorize(self):
+        source = [
+            b"f0",
+            b"f1",
+            b"f2",
+            b"f3",
+            b"f4",
+            b"f5",
+            b"f6",
+            b"f1",
+            b"f1",
+            b"f7",
+            b"f8",
+        ]
+        expected = [0, 1, 2, 3, 4, 5, 6, 1, 1, 7, 8]
+        computed = factorize(source)
+        self.assertListEqual(expected, computed)
+
+    def test_join_transform(self):
+        y = ["f0", "f0", "f2", "f3", "f4"]
+        x = [
+            [0, 1, 2, 3, 4],
+            [0, 1, 2, 3, 4],
+            [1, 2, 3, 4, 5],
+            [2, 3, 4, 5, 6],
+            [3, 4, 5, 6, 7],
+        ]
+        expected = [
+            [0, 0, 0, 0],
+            [0, 0, 0, 0],
+            [1, 1, 1, 1],
+            [2, 2, 2, 2],
+            [2, 2, 2, 2],
+        ]
+        clf = FImdlp()
+        computed = clf.join_transform(x, y, 0)
+        for computed, expected in zip(computed, expected):
+            self.assertListEqual(expected, computed.tolist())
+
+    def test_join_transform_error(self):
+        y = ["f0", "f0", "f2", "f3", "f4"]
+        x = [
+            [0, 1, 2, 3, 4],
+            [0, 1, 2, 3, 4],
+            [1, 2, 3, 4, 5],
+            [2, 3, 4, 5, 6],
+            [3, 4, 5, 6, 7],
+        ]
+        with self.assertRaises(ValueError):
+            FImdlp().join_transform(x, y, 5)

From 16b31ec29333417038ba544e64047f2837bec5fd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= <rmontanana@gmail.com>
Date: Thu, 26 Jan 2023 11:17:10 +0100
Subject: [PATCH 02/14] test: :white_check_mark: Complete join_transform test

---
 src/fimdlp/tests/FImdlp_test.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/fimdlp/tests/FImdlp_test.py b/src/fimdlp/tests/FImdlp_test.py
index ec7096d..315c8b8 100644
--- a/src/fimdlp/tests/FImdlp_test.py
+++ b/src/fimdlp/tests/FImdlp_test.py
@@ -199,6 +199,8 @@ class FImdlpTest(unittest.TestCase):
         computed = clf.join_transform(x, y, 0)
         for computed, expected in zip(computed, expected):
             self.assertListEqual(expected, computed.tolist())
+        expected_y = [b"f00", b"f00", b"f21", b"f32", b"f43"]
+        self.assertListEqual(expected_y, clf.y_join_)
 
     def test_join_transform_error(self):
         y = ["f0", "f0", "f2", "f3", "f4"]

From 29fc88cecc0bad78aefa427affda895adb68ff7d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= <rmontanana@gmail.com>
Date: Thu, 26 Jan 2023 23:20:51 +0100
Subject: [PATCH 03/14] test: :zap: Add scikit learn compatibility
 check_estimator test

---
 src/fimdlp/cfimdlp.pyx          |  6 +++++-
 src/fimdlp/mdlp.py              | 23 +++++++++++++----------
 src/fimdlp/tests/FImdlp_test.py | 17 +++++++++++------
 3 files changed, 29 insertions(+), 17 deletions(-)

diff --git a/src/fimdlp/cfimdlp.pyx b/src/fimdlp/cfimdlp.pyx
index c09d3e1..18e1d81 100644
--- a/src/fimdlp/cfimdlp.pyx
+++ b/src/fimdlp/cfimdlp.pyx
@@ -13,7 +13,9 @@ cdef extern from "../cppmdlp/CPPFImdlp.h" namespace "mdlp":
         
 cdef class CFImdlp:
     cdef CPPFImdlp *thisptr
-    def __cinit__(self, algorithm):
+    cdef int algorithm
+    def __cinit__(self, algorithm:int ):
+        self.algorithm = algorithm
         self.thisptr = new CPPFImdlp(algorithm)
     def __dealloc__(self):
         del self.thisptr
@@ -24,6 +26,8 @@ cdef class CFImdlp:
         return self.thisptr.getCutPoints()
     def get_version(self):
         return self.thisptr.version()
+    def __reduce__(self):
+        return (CFImdlp, (self.algorithm,))
 
 cdef extern from "Factorize.h" namespace "utils":
     vector[int] cppFactorize(vector[string] &input_vector)
diff --git a/src/fimdlp/mdlp.py b/src/fimdlp/mdlp.py
index 0010dfb..a9ec2b8 100644
--- a/src/fimdlp/mdlp.py
+++ b/src/fimdlp/mdlp.py
@@ -27,7 +27,7 @@ class FImdlp(TransformerMixin, BaseEstimator):
 
     Attributes
     ----------
-    n_features_ : int
+    n_features_in_ : int
         The number of features of the data passed to :meth:`fit`.
     discretizer_ : list
         The list of discretizers, one for each feature.
@@ -41,6 +41,9 @@ class FImdlp(TransformerMixin, BaseEstimator):
         the list of features to be discretized
     """
 
+    def _more_tags(self):
+        return {"preserves_dtype": [np.int32], "requires_y": True}
+
     def _check_args(self, X, y, expected_args, kwargs):
         # Check that X and y have correct shape
         X, y = check_X_y(X, y)
@@ -68,7 +71,7 @@ class FImdlp(TransformerMixin, BaseEstimator):
         # Store the classes seen during fit
         self.classes_ = unique_labels(y)
         self.n_classes_ = self.classes_.shape[0]
-        self.n_features_ = X.shape[1]
+        self.n_features_in_ = X.shape[1]
 
     def fit(self, X, y, **kwargs):
         """A reference implementation of a fitting function for a transformer.
@@ -91,11 +94,11 @@ class FImdlp(TransformerMixin, BaseEstimator):
         self._update_params(X, y)
         self.X_ = X
         self.y_ = y
-        self.discretizer_ = [None] * self.n_features_
-        self.cut_points_ = [None] * self.n_features_
+        self.discretizer_ = [None] * self.n_features_in_
+        self.cut_points_ = [None] * self.n_features_in_
         Parallel(n_jobs=self.n_jobs, prefer="threads")(
             delayed(self._fit_discretizer)(feature)
-            for feature in range(self.n_features_)
+            for feature in range(self.n_features_in_)
         )
         return self
 
@@ -128,22 +131,22 @@ class FImdlp(TransformerMixin, BaseEstimator):
             The array containing the discretized values of ``X``.
         """
         # Check is fit had been called
-        check_is_fitted(self, "n_features_")
+        check_is_fitted(self, "n_features_in_")
         # Input validation
         X = check_array(X)
         # Check that the input is of the same shape as the one passed
         # during fit.
-        if X.shape[1] != self.n_features_:
+        if X.shape[1] != self.n_features_in_:
             raise ValueError(
                 "Shape of input is different from what was seen in `fit`"
             )
-        if len(self.features_) == self.n_features_:
+        if len(self.features_) == self.n_features_in_:
             result = np.zeros_like(X, dtype=np.int32) - 1
         else:
             result = np.zeros_like(X) - 1
         Parallel(n_jobs=self.n_jobs, prefer="threads")(
             delayed(self._discretize_feature)(feature, X[:, feature], result)
-            for feature in range(self.n_features_)
+            for feature in range(self.n_features_in_)
         )
         return result
 
@@ -183,6 +186,6 @@ class FImdlp(TransformerMixin, BaseEstimator):
             The list of cut points for each feature.
         """
         result = []
-        for feature in range(self.n_features_):
+        for feature in range(self.n_features_in_):
             result.append(self.cut_points_[feature])
         return result
diff --git a/src/fimdlp/tests/FImdlp_test.py b/src/fimdlp/tests/FImdlp_test.py
index 315c8b8..2de67ab 100644
--- a/src/fimdlp/tests/FImdlp_test.py
+++ b/src/fimdlp/tests/FImdlp_test.py
@@ -2,6 +2,7 @@ import unittest
 import sklearn
 import numpy as np
 from sklearn.datasets import load_iris
+from sklearn.utils.estimator_checks import check_estimator
 from ..cppfimdlp import factorize
 from ..mdlp import FImdlp
 from .. import version
@@ -23,13 +24,13 @@ class FImdlpTest(unittest.TestCase):
     def test_fit_definitive(self):
         clf = FImdlp(algorithm=0)
         clf.fit([[1, 2], [3, 4]], [1, 2])
-        self.assertEqual(clf.n_features_, 2)
+        self.assertEqual(clf.n_features_in_, 2)
         self.assertListEqual(clf.X_.tolist(), [[1, 2], [3, 4]])
         self.assertListEqual(clf.y_.tolist(), [1, 2])
         self.assertListEqual([[2.0], [3.0]], clf.get_cut_points())
         X, y = load_iris(return_X_y=True)
         clf.fit(X, y)
-        self.assertEqual(clf.n_features_, 4)
+        self.assertEqual(clf.n_features_in_, 4)
         self.assertTrue(np.array_equal(X, clf.X_))
         self.assertTrue(np.array_equal(y, clf.y_))
         expected = [
@@ -46,13 +47,13 @@ class FImdlpTest(unittest.TestCase):
     def test_fit_alternative(self):
         clf = FImdlp(algorithm=1)
         clf.fit([[1, 2], [3, 4]], [1, 2])
-        self.assertEqual(clf.n_features_, 2)
+        self.assertEqual(clf.n_features_in_, 2)
         self.assertListEqual(clf.X_.tolist(), [[1, 2], [3, 4]])
         self.assertListEqual(clf.y_.tolist(), [1, 2])
         self.assertListEqual([[2], [3]], clf.get_cut_points())
         X, y = load_iris(return_X_y=True)
         clf.fit(X, y)
-        self.assertEqual(clf.n_features_, 4)
+        self.assertEqual(clf.n_features_in_, 4)
         self.assertTrue(np.array_equal(X, clf.X_))
         self.assertTrue(np.array_equal(y, clf.y_))
 
@@ -107,7 +108,7 @@ class FImdlpTest(unittest.TestCase):
         )
         X, y = load_iris(return_X_y=True)
         clf.fit(X, y)
-        self.assertEqual(clf.n_features_, 4)
+        self.assertEqual(clf.n_features_in_, 4)
         self.assertTrue(np.array_equal(X, clf.X_))
         self.assertTrue(np.array_equal(y, clf.y_))
         X_transformed = clf.transform(X)
@@ -139,7 +140,7 @@ class FImdlpTest(unittest.TestCase):
         )
         X, y = load_iris(return_X_y=True)
         clf.fit(X, y)
-        self.assertEqual(clf.n_features_, 4)
+        self.assertEqual(clf.n_features_in_, 4)
         self.assertTrue(np.array_equal(X, clf.X_))
         self.assertTrue(np.array_equal(y, clf.y_))
         self.assertListEqual(
@@ -213,3 +214,7 @@ class FImdlpTest(unittest.TestCase):
         ]
         with self.assertRaises(ValueError):
             FImdlp().join_transform(x, y, 5)
+
+    def test_sklearn_transformer(self):
+        for check, test in check_estimator(FImdlp(), generate_only=True):
+            test(check)

From 050b9236316c4fc49873e29f82ce5df4780c53b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= <rmontanana@gmail.com>
Date: Sat, 28 Jan 2023 10:35:07 +0100
Subject: [PATCH 04/14] feat: :zap: Add factorize method to transformer

---
 src/fimdlp/mdlp.py              | 17 ++++++++++++++++-
 src/fimdlp/tests/FImdlp_test.py | 12 +++++++++++-
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/src/fimdlp/mdlp.py b/src/fimdlp/mdlp.py
index a9ec2b8..415705b 100644
--- a/src/fimdlp/mdlp.py
+++ b/src/fimdlp/mdlp.py
@@ -150,6 +150,21 @@ class FImdlp(TransformerMixin, BaseEstimator):
         )
         return result
 
+    def factorize(self, yy):
+        """Factorize the input labels
+
+        Parameters
+        ----------
+        yy : array, shape (n_samples,)
+            Labels to be factorized, MUST be bytes, i.e. b"0", b"1", ...
+
+        Returns
+        -------
+        array, shape (n_samples,)
+            Factorized labels
+        """
+        return factorize(yy)
+
     def join_transform(self, X, y, feature, **kwargs):
         """Join the selected feature with the labels and discretize the values
         join - fit - transform
@@ -174,7 +189,7 @@ class FImdlp(TransformerMixin, BaseEstimator):
             f"{str(item_y)}{str(item_x)}".encode()
             for item_y, item_x in zip(y, X[:, feature])
         ]
-        yy = factorize(self.y_join_)
+        yy = self.factorize(self.y_join_)
         XX = np.delete(X, feature, axis=1)
         return self.fit(XX, yy).transform(XX)
 
diff --git a/src/fimdlp/tests/FImdlp_test.py b/src/fimdlp/tests/FImdlp_test.py
index 2de67ab..5e522f3 100644
--- a/src/fimdlp/tests/FImdlp_test.py
+++ b/src/fimdlp/tests/FImdlp_test.py
@@ -162,7 +162,7 @@ class FImdlpTest(unittest.TestCase):
             clf = FImdlp(algorithm=1)
             clf.transform([[1, 2], [3, 4]])
 
-    def test_factorize(self):
+    def test_cppfactorize(self):
         source = [
             b"f0",
             b"f1",
@@ -215,6 +215,16 @@ class FImdlpTest(unittest.TestCase):
         with self.assertRaises(ValueError):
             FImdlp().join_transform(x, y, 5)
 
+    def test_factorize(self):
+        y = np.array([b"f0", b"f0", b"f2", b"f3", b"f4"])
+        clf = FImdlp()
+        computed = clf.factorize(y)
+        self.assertListEqual([0, 0, 1, 2, 3], computed)
+        y = [b"f4", b"f0", b"f0", b"f2", b"f3"]
+        clf = FImdlp()
+        computed = clf.factorize(y)
+        self.assertListEqual([0, 1, 1, 2, 3], computed)
+
     def test_sklearn_transformer(self):
         for check, test in check_estimator(FImdlp(), generate_only=True):
             test(check)

From 7913f5151ee0a4843b7c04974aa730ea4c8521ee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= <rmontanana@gmail.com>
Date: Sat, 28 Jan 2023 19:14:32 +0100
Subject: [PATCH 05/14] Add version command to Makefile

---
 Makefile | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Makefile b/Makefile
index 53b8218..ffc0216 100644
--- a/Makefile
+++ b/Makefile
@@ -37,6 +37,11 @@ install:  ## Build extension
 audit: ## Audit pip
 	pip-audit
 
+version:
+	@echo "Current Python version .: $(shell python --version)"
+	@echo "Current FImdlp version .: $(shell python -c "from fimdlp import _version; print(_version.__version__)")"
+	@echo "Installed FImdlp version: $(shell pip show fimdlp | grep Version | cut -d' ' -f2)"
+
 help: ## Show help message
 	@IFS=$$'\n' ; \
 	help_lines=(`fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##/:/'`); \

From 1186e4ad5361a7a63c1a120f412c7e8b0364871e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= <rmontanana@gmail.com>
Date: Sat, 28 Jan 2023 19:15:26 +0100
Subject: [PATCH 06/14] chore: :bookmark: Upgrade version number to 0.9.3

---
 src/fimdlp/_version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fimdlp/_version.py b/src/fimdlp/_version.py
index a2fecb4..c598173 100644
--- a/src/fimdlp/_version.py
+++ b/src/fimdlp/_version.py
@@ -1 +1 @@
-__version__ = "0.9.2"
+__version__ = "0.9.3"

From cf09d92cccea4404acb0da442958d6b18bae549e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= <rmontanana@gmail.com>
Date: Sat, 4 Feb 2023 17:45:36 +0100
Subject: [PATCH 07/14] add MultiDiscretizer

---
 src/fimdlp/mdlp.py | 104 +++++++++++++++++++++++++++++++++------------
 1 file changed, 76 insertions(+), 28 deletions(-)

diff --git a/src/fimdlp/mdlp.py b/src/fimdlp/mdlp.py
index 415705b..438f2db 100644
--- a/src/fimdlp/mdlp.py
+++ b/src/fimdlp/mdlp.py
@@ -165,34 +165,6 @@ class FImdlp(TransformerMixin, BaseEstimator):
         """
         return factorize(yy)
 
-    def join_transform(self, X, y, feature, **kwargs):
-        """Join the selected feature with the labels and discretize the values
-        join - fit - transform
-
-        Parameters
-        ----------
-        X : array, shape (n_samples, n_features)
-            The input samples.
-        y : array
-            the labels used to fit
-        feature : int
-            index of the feature to join with the labels
-        """
-        X, y = self._check_args(
-            X, y, expected_args=["features"], kwargs=kwargs
-        )
-        if feature < 0 or feature >= X.shape[1]:
-            raise ValueError(
-                f"Feature {feature} not in range [0, {X.shape[1]})"
-            )
-        self.y_join_ = [
-            f"{str(item_y)}{str(item_x)}".encode()
-            for item_y, item_x in zip(y, X[:, feature])
-        ]
-        yy = self.factorize(self.y_join_)
-        XX = np.delete(X, feature, axis=1)
-        return self.fit(XX, yy).transform(XX)
-
     def get_cut_points(self):
         """Get the cut points for each feature.
         Returns
@@ -204,3 +176,79 @@ class FImdlp(TransformerMixin, BaseEstimator):
         for feature in range(self.n_features_in_):
             result.append(self.cut_points_[feature])
         return result
+
+
+class MultiDiscretizer:
+    def __init__(self, algorithm=0, n_jobs=-1):
+        self.algorithm = algorithm
+        self.n_jobs = n_jobs
+
+    def fit_transform(self, X, y, **kwargs):
+        X, y = check_X_y(X, y)
+        self.X_ = X
+        self.y_ = y
+        self.n_features_in_ = X.shape[1]
+        self.discretizer_ = FImdlp(
+            algorithm=self.algorithm, n_jobs=self.n_jobs
+        )
+        self.discretizers_ = [None] * self.n_features_in_
+        self.discretized_ = [None] * self.n_features_in_
+        self.yy_ = [None] * self.n_features_in_
+        self.X_d_ = self.discretizer_.fit_transform(X, y, **kwargs)
+        return self.X_d_
+
+    def transform(self, X):
+        X = check_array(X)
+        if not hasattr(self, "discretizer_"):
+            raise ValueError("Must call fit_transform first")
+        return self.discretizer_.transform(X)
+
+    def join_transform(self, features, target):
+        """Join the selected features with the labels and discretize the values
+        of the target variable
+        join - fit - transform
+
+        Parameters
+        ----------
+        features : [list]
+            index of the features to join with the labels
+        target : [int]
+            index of the target variable to discretize
+        """
+        # Check is fit had been called
+        check_is_fitted(self, "n_features_in_")
+        if len(features) < 1 or len(features) > self.n_features_in_:
+            raise ValueError(
+                "Number of features must be in range [1, "
+                f"{self.n_features_in_}]"
+            )
+        for feature in features:
+            if feature < 0 or feature >= self.n_features_in_:
+                raise ValueError(
+                    f"Feature {feature} not in range [0, "
+                    f"{self.n_features_in_})"
+                )
+        if target < 0 or target >= self.n_features_in_:
+            raise ValueError(
+                f"Target {target} not in range [0, {self.n_features_in_})"
+            )
+        y_join = [
+            f"{str(item_y)}{''.join([str(x) for x in items_x])}".encode()
+            for item_y, items_x in zip(self.y_, self.X_d_[:, features])
+        ]
+        self.yy_[target] = self.discretizer_.factorize(y_join)
+        self.discretizers_[target] = FImdlp(
+            algorithm=self.algorithm, n_jobs=self.n_jobs
+        )
+        self.discretized_[target] = self.discretizers_[target].fit_transform(
+            self.X_[:, target].reshape(-1, 1), self.yy_[target]
+        )
+        return self.discretized_[target]
+
+
+# from sklearn.datasets import load_wine
+# X, y = load_wine(return_X_y=True)
+# from fimdlp.mdlp import MultiDiscretizer
+# clf = MultiDiscretizer()
+# clf.fit(X, y)
+# clf.join_transform([1, 3, 5], 7)

From f20496203e36571eb5597dbd891e7569b5057935 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= <rmontanana@gmail.com>
Date: Sat, 4 Feb 2023 19:23:15 +0100
Subject: [PATCH 08/14] refactor Multidiscretizer to use one per column

---
 src/fimdlp/mdlp.py | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/src/fimdlp/mdlp.py b/src/fimdlp/mdlp.py
index 438f2db..9b55cf0 100644
--- a/src/fimdlp/mdlp.py
+++ b/src/fimdlp/mdlp.py
@@ -183,25 +183,38 @@ class MultiDiscretizer:
         self.algorithm = algorithm
         self.n_jobs = n_jobs
 
-    def fit_transform(self, X, y, **kwargs):
+    def initial_fit_transform(self, X, y):
         X, y = check_X_y(X, y)
         self.X_ = X
         self.y_ = y
         self.n_features_in_ = X.shape[1]
-        self.discretizer_ = FImdlp(
-            algorithm=self.algorithm, n_jobs=self.n_jobs
-        )
         self.discretizers_ = [None] * self.n_features_in_
         self.discretized_ = [None] * self.n_features_in_
-        self.yy_ = [None] * self.n_features_in_
-        self.X_d_ = self.discretizer_.fit_transform(X, y, **kwargs)
+        # self.yy_ = [None] * self.n_features_in_
+        self.X_d_ = np.zeros_like(X, dtype=np.int32) - 1
+        for feature in range(self.n_features_in_):
+            self.discretizers_[feature] = FImdlp(
+                algorithm=self.algorithm, n_jobs=self.n_jobs
+            )
+            self.discretized_[feature] = self.discretizers_[
+                feature
+            ].fit_transform(X[:, feature].reshape(-1, 1), y)
+            # self.yy_[feature] = self.discretizers_[feature].factorize(y)
+            self.X_d_[:, feature] = self.discretized_[feature].ravel()
         return self.X_d_
 
     def transform(self, X):
         X = check_array(X)
-        if not hasattr(self, "discretizer_"):
+        if not hasattr(self, "discretizers_"):
             raise ValueError("Must call fit_transform first")
-        return self.discretizer_.transform(X)
+        result = np.zeros_like(X, dtype=np.int32) - 1
+        for feature in range(self.n_features_in_):
+            result[:, feature] = (
+                self.discretizers_[feature]
+                .transform(X[:, feature].reshape(-1, 1))
+                .ravel()
+            )
+        return result
 
     def join_transform(self, features, target):
         """Join the selected features with the labels and discretize the values

From 9899781640911f8acdacb8e5de4b16aeadc578b5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= <rmontanana@gmail.com>
Date: Sun, 5 Feb 2023 00:30:03 +0100
Subject: [PATCH 09/14] Complete join_fit and remove MultiDiscretizer

---
 src/fimdlp/mdlp.py | 71 ++++++++--------------------------------------
 1 file changed, 12 insertions(+), 59 deletions(-)

diff --git a/src/fimdlp/mdlp.py b/src/fimdlp/mdlp.py
index 9b55cf0..b119688 100644
--- a/src/fimdlp/mdlp.py
+++ b/src/fimdlp/mdlp.py
@@ -177,47 +177,8 @@ class FImdlp(TransformerMixin, BaseEstimator):
             result.append(self.cut_points_[feature])
         return result
 
-
-class MultiDiscretizer:
-    def __init__(self, algorithm=0, n_jobs=-1):
-        self.algorithm = algorithm
-        self.n_jobs = n_jobs
-
-    def initial_fit_transform(self, X, y):
-        X, y = check_X_y(X, y)
-        self.X_ = X
-        self.y_ = y
-        self.n_features_in_ = X.shape[1]
-        self.discretizers_ = [None] * self.n_features_in_
-        self.discretized_ = [None] * self.n_features_in_
-        # self.yy_ = [None] * self.n_features_in_
-        self.X_d_ = np.zeros_like(X, dtype=np.int32) - 1
-        for feature in range(self.n_features_in_):
-            self.discretizers_[feature] = FImdlp(
-                algorithm=self.algorithm, n_jobs=self.n_jobs
-            )
-            self.discretized_[feature] = self.discretizers_[
-                feature
-            ].fit_transform(X[:, feature].reshape(-1, 1), y)
-            # self.yy_[feature] = self.discretizers_[feature].factorize(y)
-            self.X_d_[:, feature] = self.discretized_[feature].ravel()
-        return self.X_d_
-
-    def transform(self, X):
-        X = check_array(X)
-        if not hasattr(self, "discretizers_"):
-            raise ValueError("Must call fit_transform first")
-        result = np.zeros_like(X, dtype=np.int32) - 1
-        for feature in range(self.n_features_in_):
-            result[:, feature] = (
-                self.discretizers_[feature]
-                .transform(X[:, feature].reshape(-1, 1))
-                .ravel()
-            )
-        return result
-
-    def join_transform(self, features, target):
-        """Join the selected features with the labels and discretize the values
+    def join_fit(self, features, target, data):
+        """Join the selected features with the labels and fit the discretizer
         of the target variable
         join - fit - transform
 
@@ -227,8 +188,12 @@ class MultiDiscretizer:
             index of the features to join with the labels
         target : [int]
             index of the target variable to discretize
+
+        Returns
+        -------
+        result: np.array
+            The target variable newly discretized
         """
-        # Check is fit had been called
         check_is_fitted(self, "n_features_in_")
         if len(features) < 1 or len(features) > self.n_features_in_:
             raise ValueError(
@@ -247,21 +212,9 @@ class MultiDiscretizer:
             )
         y_join = [
             f"{str(item_y)}{''.join([str(x) for x in items_x])}".encode()
-            for item_y, items_x in zip(self.y_, self.X_d_[:, features])
+            for item_y, items_x in zip(self.y_, data[:, features])
         ]
-        self.yy_[target] = self.discretizer_.factorize(y_join)
-        self.discretizers_[target] = FImdlp(
-            algorithm=self.algorithm, n_jobs=self.n_jobs
-        )
-        self.discretized_[target] = self.discretizers_[target].fit_transform(
-            self.X_[:, target].reshape(-1, 1), self.yy_[target]
-        )
-        return self.discretized_[target]
-
-
-# from sklearn.datasets import load_wine
-# X, y = load_wine(return_X_y=True)
-# from fimdlp.mdlp import MultiDiscretizer
-# clf = MultiDiscretizer()
-# clf.fit(X, y)
-# clf.join_transform([1, 3, 5], 7)
+        self.discretizer_[target].fit(self.X_[:, target], factorize(y_join))
+        self.cut_points_[target] = self.discretizer_[target].get_cut_points()
+        # return the discretized target variable with the new cut points
+        return np.searchsorted(self.cut_points_[target], self.X_[:, target])

From 2d495293bb9ba462bbdbbbcad1fa8b04b4cdccac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= <rmontanana@gmail.com>
Date: Mon, 13 Feb 2023 16:15:50 +0100
Subject: [PATCH 10/14] Add range_features method

---
 src/fimdlp/mdlp.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/fimdlp/mdlp.py b/src/fimdlp/mdlp.py
index b119688..ac0dd05 100644
--- a/src/fimdlp/mdlp.py
+++ b/src/fimdlp/mdlp.py
@@ -119,6 +119,15 @@ class FImdlp(TransformerMixin, BaseEstimator):
         else:
             result[:, feature] = X
 
+    def range_features(self):
+        res = []
+        for i in range(self.n_features_in_):
+            if i in self.features_:
+                res.append(list(range(len(self.cut_points_[i]))))
+            else:
+                res.append([])
+        return res
+
     def transform(self, X):
         """Discretize X values.
         Parameters
@@ -214,6 +223,7 @@ class FImdlp(TransformerMixin, BaseEstimator):
             f"{str(item_y)}{''.join([str(x) for x in items_x])}".encode()
             for item_y, items_x in zip(self.y_, data[:, features])
         ]
+        self.y_join = y_join
         self.discretizer_[target].fit(self.X_[:, target], factorize(y_join))
         self.cut_points_[target] = self.discretizer_[target].get_cut_points()
         # return the discretized target variable with the new cut points

From 31d79a77fa82fb1bc0869cbf3eeec5814a5d8b5c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= <rmontanana@gmail.com>
Date: Mon, 13 Feb 2023 17:34:50 +0100
Subject: [PATCH 11/14] Add get_states_feature method

---
 src/fimdlp/mdlp.py | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/src/fimdlp/mdlp.py b/src/fimdlp/mdlp.py
index ac0dd05..808e75a 100644
--- a/src/fimdlp/mdlp.py
+++ b/src/fimdlp/mdlp.py
@@ -119,15 +119,6 @@ class FImdlp(TransformerMixin, BaseEstimator):
         else:
             result[:, feature] = X
 
-    def range_features(self):
-        res = []
-        for i in range(self.n_features_in_):
-            if i in self.features_:
-                res.append(list(range(len(self.cut_points_[i]))))
-            else:
-                res.append([])
-        return res
-
     def transform(self, X):
         """Discretize X values.
         Parameters
@@ -186,6 +177,23 @@ class FImdlp(TransformerMixin, BaseEstimator):
             result.append(self.cut_points_[feature])
         return result
 
+    def get_states_feature(self, feature):
+        """Return the states a feature can take
+
+        Parameters
+        ----------
+        feature : int
+            feature to get the states
+
+        Returns
+        -------
+        list
+            states of the feature
+        """
+        if feature in self.features_:
+            return list(range(len(self.cut_points_[feature]) + 1))
+        return None
+
     def join_fit(self, features, target, data):
         """Join the selected features with the labels and fit the discretizer
         of the target variable

From e0b7cae9a0902b1af661b434963038e32752f969 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= <rmontanana@gmail.com>
Date: Mon, 20 Feb 2023 18:26:51 +0100
Subject: [PATCH 12/14] Remove algorithm hyperparameter in  discretizer

---
 src/fimdlp/__init__.py          |   4 -
 src/fimdlp/cfimdlp.pyx          |  10 +-
 src/fimdlp/mdlp.py              |  23 ++--
 src/fimdlp/tests/FImdlp_test.py | 218 ++++++++++++++++----------------
 4 files changed, 126 insertions(+), 129 deletions(-)

diff --git a/src/fimdlp/__init__.py b/src/fimdlp/__init__.py
index 3a99d3b..0abf8ef 100644
--- a/src/fimdlp/__init__.py
+++ b/src/fimdlp/__init__.py
@@ -1,8 +1,4 @@
 from ._version import __version__
 
 
-def version():
-    return __version__
-
-
 all = ["FImdlp", "__version__"]
diff --git a/src/fimdlp/cfimdlp.pyx b/src/fimdlp/cfimdlp.pyx
index 18e1d81..8892e8b 100644
--- a/src/fimdlp/cfimdlp.pyx
+++ b/src/fimdlp/cfimdlp.pyx
@@ -6,17 +6,15 @@ from libcpp.string cimport string
 cdef extern from "../cppmdlp/CPPFImdlp.h" namespace "mdlp":
     ctypedef float precision_t
     cdef cppclass CPPFImdlp:
-        CPPFImdlp(int) except + 
+        CPPFImdlp() except + 
         CPPFImdlp& fit(vector[precision_t]&, vector[int]&)
         vector[precision_t] getCutPoints()
         string version()
         
 cdef class CFImdlp:
     cdef CPPFImdlp *thisptr
-    cdef int algorithm
-    def __cinit__(self, algorithm:int ):
-        self.algorithm = algorithm
-        self.thisptr = new CPPFImdlp(algorithm)
+    def __cinit__(self):
+        self.thisptr = new CPPFImdlp()
     def __dealloc__(self):
         del self.thisptr
     def fit(self, X, y):
@@ -27,7 +25,7 @@ cdef class CFImdlp:
     def get_version(self):
         return self.thisptr.version()
     def __reduce__(self):
-        return (CFImdlp, (self.algorithm,))
+        return (CFImdlp, ())
 
 cdef extern from "Factorize.h" namespace "utils":
     vector[int] cppFactorize(vector[string] &input_vector)
diff --git a/src/fimdlp/mdlp.py b/src/fimdlp/mdlp.py
index 808e75a..0378ebf 100644
--- a/src/fimdlp/mdlp.py
+++ b/src/fimdlp/mdlp.py
@@ -4,22 +4,19 @@ from sklearn.base import BaseEstimator, TransformerMixin
 from sklearn.utils.multiclass import unique_labels
 from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
 from joblib import Parallel, delayed
+from ._version import __version__
+
+# from ._version import __version__
 
 
 class FImdlp(TransformerMixin, BaseEstimator):
-    def __init__(self, algorithm=0, n_jobs=-1):
-        self.algorithm = algorithm
+    def __init__(self, n_jobs=-1):
         self.n_jobs = n_jobs
 
     """Fayyad - Irani MDLP discretization algorithm based implementation.
 
     Parameters
     ----------
-    algorithm : int, default=0
-        The type of algorithm to use computing the cut points.
-        0 - Definitive implementation
-        1 - Alternative proposal
-        2 - Classic proposal
     n_jobs : int, default=-1
         The number of jobs to run in parallel. :meth:`fit` and
         :meth:`transform`, are parallelized over the features. ``-1`` means
@@ -73,6 +70,10 @@ class FImdlp(TransformerMixin, BaseEstimator):
         self.n_classes_ = self.classes_.shape[0]
         self.n_features_in_ = X.shape[1]
 
+    @staticmethod
+    def get_version():
+        return f"{__version__}({CFImdlp().get_version().decode()})"
+
     def fit(self, X, y, **kwargs):
         """A reference implementation of a fitting function for a transformer.
         Parameters
@@ -104,7 +105,7 @@ class FImdlp(TransformerMixin, BaseEstimator):
 
     def _fit_discretizer(self, feature):
         if feature in self.features_:
-            self.discretizer_[feature] = CFImdlp(algorithm=self.algorithm)
+            self.discretizer_[feature] = CFImdlp()
             self.discretizer_[feature].fit(self.X_[:, feature], self.y_)
             self.cut_points_[feature] = self.discretizer_[
                 feature
@@ -205,6 +206,8 @@ class FImdlp(TransformerMixin, BaseEstimator):
             index of the features to join with the labels
         target : [int]
             index of the target variable to discretize
+        data: [array] shape (n_samples, n_features)
+            dataset that contains the features to join
 
         Returns
         -------
@@ -227,11 +230,13 @@ class FImdlp(TransformerMixin, BaseEstimator):
             raise ValueError(
                 f"Target {target} not in range [0, {self.n_features_in_})"
             )
+        if target in features:
+            raise ValueError("Target cannot in features to join")
         y_join = [
             f"{str(item_y)}{''.join([str(x) for x in items_x])}".encode()
             for item_y, items_x in zip(self.y_, data[:, features])
         ]
-        self.y_join = y_join
+        self.y_join_ = y_join
         self.discretizer_[target].fit(self.X_[:, target], factorize(y_join))
         self.cut_points_[target] = self.discretizer_[target].get_cut_points()
         # return the discretized target variable with the new cut points
diff --git a/src/fimdlp/tests/FImdlp_test.py b/src/fimdlp/tests/FImdlp_test.py
index 5e522f3..fcfd888 100644
--- a/src/fimdlp/tests/FImdlp_test.py
+++ b/src/fimdlp/tests/FImdlp_test.py
@@ -3,67 +3,44 @@ import sklearn
 import numpy as np
 from sklearn.datasets import load_iris
 from sklearn.utils.estimator_checks import check_estimator
-from ..cppfimdlp import factorize
+from ..cppfimdlp import CFImdlp, factorize
 from ..mdlp import FImdlp
-from .. import version
-from .._version import __version__
+from .. import __version__
+
+# from .._version import __version__
 
 
 class FImdlpTest(unittest.TestCase):
     def test_version(self):
-        self.assertEqual(version(), __version__)
+        clf = FImdlp()
+        self.assertEqual(
+            clf.get_version(),
+            f"{__version__}({CFImdlp().get_version().decode()})",
+        )
 
     def test_init(self):
         clf = FImdlp()
         self.assertEqual(-1, clf.n_jobs)
-        self.assertEqual(0, clf.algorithm)
-        clf = FImdlp(algorithm=1, n_jobs=7)
-        self.assertEqual(1, clf.algorithm)
+        clf = FImdlp(n_jobs=7)
         self.assertEqual(7, clf.n_jobs)
 
     def test_fit_definitive(self):
-        clf = FImdlp(algorithm=0)
-        clf.fit([[1, 2], [3, 4]], [1, 2])
-        self.assertEqual(clf.n_features_in_, 2)
-        self.assertListEqual(clf.X_.tolist(), [[1, 2], [3, 4]])
-        self.assertListEqual(clf.y_.tolist(), [1, 2])
-        self.assertListEqual([[2.0], [3.0]], clf.get_cut_points())
+        clf = FImdlp()
         X, y = load_iris(return_X_y=True)
         clf.fit(X, y)
         self.assertEqual(clf.n_features_in_, 4)
         self.assertTrue(np.array_equal(X, clf.X_))
         self.assertTrue(np.array_equal(y, clf.y_))
-        expected = [
-            [5.449999809265137, 6.25],
-            [2.8499999046325684, 3.0, 3.049999952316284, 3.3499999046325684],
-            [2.450000047683716, 4.75, 5.050000190734863],
-            [0.800000011920929, 1.4500000476837158, 1.75],
-        ]
-        self.assertListEqual(expected, clf.get_cut_points())
-        self.assertListEqual([0, 1, 2, 3], clf.features_)
-        clf.fit(X, y, features=[0, 2, 3])
-        self.assertListEqual([0, 2, 3], clf.features_)
-
-    def test_fit_alternative(self):
-        clf = FImdlp(algorithm=1)
-        clf.fit([[1, 2], [3, 4]], [1, 2])
-        self.assertEqual(clf.n_features_in_, 2)
-        self.assertListEqual(clf.X_.tolist(), [[1, 2], [3, 4]])
-        self.assertListEqual(clf.y_.tolist(), [1, 2])
-        self.assertListEqual([[2], [3]], clf.get_cut_points())
-        X, y = load_iris(return_X_y=True)
-        clf.fit(X, y)
-        self.assertEqual(clf.n_features_in_, 4)
-        self.assertTrue(np.array_equal(X, clf.X_))
-        self.assertTrue(np.array_equal(y, clf.y_))
-
         expected = [
             [5.449999809265137, 5.75],
-            [2.8499999046325684, 3.3499999046325684],
-            [2.450000047683716, 4.75],
-            [0.800000011920929, 1.75],
+            [2.75, 2.8499999046325684, 2.95, 3.05, 3.3499999046325684],
+            [2.45, 4.75, 5.050000190734863],
+            [0.8, 1.75],
         ]
-        self.assertListEqual(expected, clf.get_cut_points())
+        computed = clf.get_cut_points()
+        for item_computed, item_expected in zip(computed, expected):
+            for x_, y_ in zip(item_computed, item_expected):
+                self.assertAlmostEqual(x_, y_)
         self.assertListEqual([0, 1, 2, 3], clf.features_)
         clf.fit(X, y, features=[0, 2, 3])
         self.assertListEqual([0, 2, 3], clf.features_)
@@ -84,8 +61,12 @@ class FImdlpTest(unittest.TestCase):
             clf.fit([[1, 2], [3, 4]], [1, 2], features=[0, 2])
 
     def test_fit_features(self):
-        clf = FImdlp()
+        clf = FImdlp(n_jobs=-1)
+        # Two samples doesn't have enough information to split
         clf.fit([[1, -2], [3, 4]], [1, 2], features=[0])
+        self.assertListEqual(clf.get_cut_points(), [[], []])
+        clf.fit([[1, -2], [3, 4], [5, 6]], [1, 2, 2], features=[0])
+        self.assertListEqual(clf.get_cut_points(), [[2], []])
         res = clf.transform([[1, -2], [3, 4]])
         self.assertListEqual(res.tolist(), [[0, -2], [1, 4]])
         X, y = load_iris(return_X_y=True)
@@ -100,9 +81,9 @@ class FImdlpTest(unittest.TestCase):
         )
         self.assertEqual(X_computed.dtype, np.float64)
 
-    def test_transform_definitive(self):
-        clf = FImdlp(algorithm=0)
-        clf.fit([[1, 2], [3, 4]], [1, 2])
+    def test_transform(self):
+        clf = FImdlp()
+        clf.fit([[1, 2], [3, 4], [5, 6]], [1, 2, 2])
         self.assertEqual(
             clf.transform([[1, 2], [3, 4]]).tolist(), [[0, 0], [1, 1]]
         )
@@ -118,48 +99,18 @@ class FImdlpTest(unittest.TestCase):
         self.assertEqual(X_transformed.dtype, np.int32)
         expected = [
             [1, 0, 1, 1],
-            [1, 1, 1, 1],
-            [1, 0, 1, 1],
-            [0, 0, 1, 1],
-            [1, 0, 1, 1],
-            [1, 1, 1, 1],
-            [1, 1, 1, 1],
-        ]
-        self.assertTrue(np.array_equal(clf.transform(X[90:97]), expected))
-        with self.assertRaises(ValueError):
-            clf.transform([[1, 2, 3], [4, 5, 6]])
-        with self.assertRaises(sklearn.exceptions.NotFittedError):
-            clf = FImdlp(algorithm=0)
-            clf.transform([[1, 2], [3, 4]])
-
-    def test_transform_alternative(self):
-        clf = FImdlp(algorithm=1)
-        clf.fit([[1, 2], [3, 4]], [1, 2])
-        self.assertEqual(
-            clf.transform([[1, 2], [3, 4]]).tolist(), [[0, 0], [1, 1]]
-        )
-        X, y = load_iris(return_X_y=True)
-        clf.fit(X, y)
-        self.assertEqual(clf.n_features_in_, 4)
-        self.assertTrue(np.array_equal(X, clf.X_))
-        self.assertTrue(np.array_equal(y, clf.y_))
-        self.assertListEqual(
-            clf.transform(X).tolist(), clf.fit(X, y).transform(X).tolist()
-        )
-        expected = [
-            [1, 0, 1, 1],
-            [2, 1, 1, 1],
+            [2, 3, 1, 1],
             [2, 0, 1, 1],
             [0, 0, 1, 1],
             [1, 0, 1, 1],
-            [1, 1, 1, 1],
-            [1, 1, 1, 1],
+            [1, 3, 1, 1],
+            [1, 2, 1, 1],
         ]
         self.assertTrue(np.array_equal(clf.transform(X[90:97]), expected))
         with self.assertRaises(ValueError):
             clf.transform([[1, 2, 3], [4, 5, 6]])
         with self.assertRaises(sklearn.exceptions.NotFittedError):
-            clf = FImdlp(algorithm=1)
+            clf = FImdlp()
             clf.transform([[1, 2], [3, 4]])
 
     def test_cppfactorize(self):
@@ -180,40 +131,69 @@ class FImdlpTest(unittest.TestCase):
         computed = factorize(source)
         self.assertListEqual(expected, computed)
 
-    def test_join_transform(self):
-        y = ["f0", "f0", "f2", "f3", "f4"]
-        x = [
-            [0, 1, 2, 3, 4],
-            [0, 1, 2, 3, 4],
-            [1, 2, 3, 4, 5],
-            [2, 3, 4, 5, 6],
-            [3, 4, 5, 6, 7],
-        ]
-        expected = [
-            [0, 0, 0, 0],
-            [0, 0, 0, 0],
-            [1, 1, 1, 1],
-            [2, 2, 2, 2],
-            [2, 2, 2, 2],
-        ]
+    def test_join_fit(self):
+        y = np.array([b"f0", b"f0", b"f2", b"f3", b"f4"])
+        x = np.array(
+            [
+                [0, 1, 2, 3, 4],
+                [0, 1, 2, 3, 4],
+                [1, 2, 3, 4, 5],
+                [2, 3, 4, 5, 6],
+                [3, 4, 5, 6, 7],
+            ]
+        )
+        expected = [0, 0, 1, 2, 2]
         clf = FImdlp()
-        computed = clf.join_transform(x, y, 0)
-        for computed, expected in zip(computed, expected):
-            self.assertListEqual(expected, computed.tolist())
-        expected_y = [b"f00", b"f00", b"f21", b"f32", b"f43"]
+        clf.fit(x, factorize(y))
+        computed = clf.join_fit([0, 2], 1, x)
+        self.assertListEqual(computed.tolist(), expected)
+        expected_y = [b"002", b"002", b"113", b"224", b"335"]
         self.assertListEqual(expected_y, clf.y_join_)
 
-    def test_join_transform_error(self):
-        y = ["f0", "f0", "f2", "f3", "f4"]
-        x = [
-            [0, 1, 2, 3, 4],
-            [0, 1, 2, 3, 4],
-            [1, 2, 3, 4, 5],
-            [2, 3, 4, 5, 6],
-            [3, 4, 5, 6, 7],
-        ]
-        with self.assertRaises(ValueError):
-            FImdlp().join_transform(x, y, 5)
+    def test_join_fit_error(self):
+        y = np.array([b"f0", b"f0", b"f2", b"f3", b"f4"])
+        x = np.array(
+            [
+                [0, 1, 2, 3, 4],
+                [0, 1, 2, 3, 4],
+                [1, 2, 3, 4, 5],
+                [2, 3, 4, 5, 6],
+                [3, 4, 5, 6, 7],
+            ]
+        )
+        clf = FImdlp()
+        clf.fit(x, factorize(y))
+        with self.assertRaises(ValueError) as exception:
+            clf.join_fit([], 1, x)
+        self.assertEqual(
+            str(exception.exception),
+            "Number of features must be in range [1, 5]",
+        )
+        with self.assertRaises(ValueError) as exception:
+            FImdlp().join_fit([0, 4], 1, x)
+        self.assertTrue(
+            str(exception.exception).startswith(
+                "This FImdlp instance is not fitted yet."
+            )
+        )
+        with self.assertRaises(ValueError) as exception:
+            clf.join_fit([0, 5], 1, x)
+        self.assertEqual(
+            str(exception.exception),
+            "Feature 5 not in range [0, 5)",
+        )
+        with self.assertRaises(ValueError) as exception:
+            clf.join_fit([0, 2], 5, x)
+        self.assertEqual(
+            str(exception.exception),
+            "Target 5 not in range [0, 5)",
+        )
+        with self.assertRaises(ValueError) as exception:
+            clf.join_fit([0, 2], 2, x)
+        self.assertEqual(
+            str(exception.exception),
+            "Target cannot in features to join",
+        )
 
     def test_factorize(self):
         y = np.array([b"f0", b"f0", b"f2", b"f3", b"f4"])
@@ -228,3 +208,21 @@ class FImdlpTest(unittest.TestCase):
     def test_sklearn_transformer(self):
         for check, test in check_estimator(FImdlp(), generate_only=True):
             test(check)
+
+    def test_states_feature(self):
+        clf = FImdlp()
+        X, y = load_iris(return_X_y=True)
+        clf.fit(X, y)
+        expected = []
+        for i in [3, 6, 4, 3]:
+            expected.append(list(range(i)))
+        for feature in range(X.shape[1]):
+            self.assertListEqual(
+                expected[feature], clf.get_states_feature(feature)
+            )
+
+    def test_states_no_feature(self):
+        clf = FImdlp()
+        X, y = load_iris(return_X_y=True)
+        clf.fit(X, y)
+        self.assertIsNone(clf.get_states_feature(4))

From 718c9d0e63872b4797c5d3f1ebbfd9dc449ef089 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= <rmontanana@gmail.com>
Date: Mon, 20 Feb 2023 20:12:36 +0100
Subject: [PATCH 13/14] make static methods factorize and test_sklrn_trans

---
 src/fimdlp/mdlp.py              | 3 ++-
 src/fimdlp/tests/FImdlp_test.py | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/fimdlp/mdlp.py b/src/fimdlp/mdlp.py
index 0378ebf..2a2114a 100644
--- a/src/fimdlp/mdlp.py
+++ b/src/fimdlp/mdlp.py
@@ -151,7 +151,8 @@ class FImdlp(TransformerMixin, BaseEstimator):
         )
         return result
 
-    def factorize(self, yy):
+    @staticmethod
+    def factorize(yy):
         """Factorize the input labels
 
         Parameters
diff --git a/src/fimdlp/tests/FImdlp_test.py b/src/fimdlp/tests/FImdlp_test.py
index fcfd888..068a4e8 100644
--- a/src/fimdlp/tests/FImdlp_test.py
+++ b/src/fimdlp/tests/FImdlp_test.py
@@ -205,7 +205,8 @@ class FImdlpTest(unittest.TestCase):
         computed = clf.factorize(y)
         self.assertListEqual([0, 1, 1, 2, 3], computed)
 
-    def test_sklearn_transformer(self):
+    @staticmethod
+    def test_sklearn_transformer():
         for check, test in check_estimator(FImdlp(), generate_only=True):
             test(check)
 

From 40871f128d61fc897ce117d9adf00008a840a43c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= <rmontanana@gmail.com>
Date: Wed, 22 Feb 2023 10:15:33 +0100
Subject: [PATCH 14/14] Add 1.1.0 version of mdlp

---
 src/cppmdlp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/cppmdlp b/src/cppmdlp
index 1b89f59..e97aea2 160000
--- a/src/cppmdlp
+++ b/src/cppmdlp
@@ -1 +1 @@
-Subproject commit 1b89f5927c3add921b19fe29094d354780f98b5f
+Subproject commit e97aea2a4de7e4e4a24e87744d8987b899b1a239