diff --git a/src/fimdlp/cfimdlp.pyx b/src/fimdlp/cfimdlp.pyx index c09d3e1..18e1d81 100644 --- a/src/fimdlp/cfimdlp.pyx +++ b/src/fimdlp/cfimdlp.pyx @@ -13,7 +13,9 @@ cdef extern from "../cppmdlp/CPPFImdlp.h" namespace "mdlp": cdef class CFImdlp: cdef CPPFImdlp *thisptr - def __cinit__(self, algorithm): + cdef int algorithm + def __cinit__(self, algorithm:int ): + self.algorithm = algorithm self.thisptr = new CPPFImdlp(algorithm) def __dealloc__(self): del self.thisptr @@ -24,6 +26,8 @@ cdef class CFImdlp: return self.thisptr.getCutPoints() def get_version(self): return self.thisptr.version() + def __reduce__(self): + return (CFImdlp, (self.algorithm,)) cdef extern from "Factorize.h" namespace "utils": vector[int] cppFactorize(vector[string] &input_vector) diff --git a/src/fimdlp/mdlp.py b/src/fimdlp/mdlp.py index 0010dfb..a9ec2b8 100644 --- a/src/fimdlp/mdlp.py +++ b/src/fimdlp/mdlp.py @@ -27,7 +27,7 @@ class FImdlp(TransformerMixin, BaseEstimator): Attributes ---------- - n_features_ : int + n_features_in_ : int The number of features of the data passed to :meth:`fit`. discretizer_ : list The list of discretizers, one for each feature. @@ -41,6 +41,9 @@ class FImdlp(TransformerMixin, BaseEstimator): the list of features to be discretized """ + def _more_tags(self): + return {"preserves_dtype": [np.int32], "requires_y": True} + def _check_args(self, X, y, expected_args, kwargs): # Check that X and y have correct shape X, y = check_X_y(X, y) @@ -68,7 +71,7 @@ class FImdlp(TransformerMixin, BaseEstimator): # Store the classes seen during fit self.classes_ = unique_labels(y) self.n_classes_ = self.classes_.shape[0] - self.n_features_ = X.shape[1] + self.n_features_in_ = X.shape[1] def fit(self, X, y, **kwargs): """A reference implementation of a fitting function for a transformer. @@ -91,11 +94,11 @@ class FImdlp(TransformerMixin, BaseEstimator): self._update_params(X, y) self.X_ = X self.y_ = y - self.discretizer_ = [None] * self.n_features_ - self.cut_points_ = [None] * self.n_features_ + self.discretizer_ = [None] * self.n_features_in_ + self.cut_points_ = [None] * self.n_features_in_ Parallel(n_jobs=self.n_jobs, prefer="threads")( delayed(self._fit_discretizer)(feature) - for feature in range(self.n_features_) + for feature in range(self.n_features_in_) ) return self @@ -128,22 +131,22 @@ class FImdlp(TransformerMixin, BaseEstimator): The array containing the discretized values of ``X``. """ # Check is fit had been called - check_is_fitted(self, "n_features_") + check_is_fitted(self, "n_features_in_") # Input validation X = check_array(X) # Check that the input is of the same shape as the one passed # during fit. - if X.shape[1] != self.n_features_: + if X.shape[1] != self.n_features_in_: raise ValueError( "Shape of input is different from what was seen in `fit`" ) - if len(self.features_) == self.n_features_: + if len(self.features_) == self.n_features_in_: result = np.zeros_like(X, dtype=np.int32) - 1 else: result = np.zeros_like(X) - 1 Parallel(n_jobs=self.n_jobs, prefer="threads")( delayed(self._discretize_feature)(feature, X[:, feature], result) - for feature in range(self.n_features_) + for feature in range(self.n_features_in_) ) return result @@ -183,6 +186,6 @@ class FImdlp(TransformerMixin, BaseEstimator): The list of cut points for each feature. """ result = [] - for feature in range(self.n_features_): + for feature in range(self.n_features_in_): result.append(self.cut_points_[feature]) return result diff --git a/src/fimdlp/tests/FImdlp_test.py b/src/fimdlp/tests/FImdlp_test.py index 315c8b8..2de67ab 100644 --- a/src/fimdlp/tests/FImdlp_test.py +++ b/src/fimdlp/tests/FImdlp_test.py @@ -2,6 +2,7 @@ import unittest import sklearn import numpy as np from sklearn.datasets import load_iris +from sklearn.utils.estimator_checks import check_estimator from ..cppfimdlp import factorize from ..mdlp import FImdlp from .. import version @@ -23,13 +24,13 @@ class FImdlpTest(unittest.TestCase): def test_fit_definitive(self): clf = FImdlp(algorithm=0) clf.fit([[1, 2], [3, 4]], [1, 2]) - self.assertEqual(clf.n_features_, 2) + self.assertEqual(clf.n_features_in_, 2) self.assertListEqual(clf.X_.tolist(), [[1, 2], [3, 4]]) self.assertListEqual(clf.y_.tolist(), [1, 2]) self.assertListEqual([[2.0], [3.0]], clf.get_cut_points()) X, y = load_iris(return_X_y=True) clf.fit(X, y) - self.assertEqual(clf.n_features_, 4) + self.assertEqual(clf.n_features_in_, 4) self.assertTrue(np.array_equal(X, clf.X_)) self.assertTrue(np.array_equal(y, clf.y_)) expected = [ @@ -46,13 +47,13 @@ class FImdlpTest(unittest.TestCase): def test_fit_alternative(self): clf = FImdlp(algorithm=1) clf.fit([[1, 2], [3, 4]], [1, 2]) - self.assertEqual(clf.n_features_, 2) + self.assertEqual(clf.n_features_in_, 2) self.assertListEqual(clf.X_.tolist(), [[1, 2], [3, 4]]) self.assertListEqual(clf.y_.tolist(), [1, 2]) self.assertListEqual([[2], [3]], clf.get_cut_points()) X, y = load_iris(return_X_y=True) clf.fit(X, y) - self.assertEqual(clf.n_features_, 4) + self.assertEqual(clf.n_features_in_, 4) self.assertTrue(np.array_equal(X, clf.X_)) self.assertTrue(np.array_equal(y, clf.y_)) @@ -107,7 +108,7 @@ class FImdlpTest(unittest.TestCase): ) X, y = load_iris(return_X_y=True) clf.fit(X, y) - self.assertEqual(clf.n_features_, 4) + self.assertEqual(clf.n_features_in_, 4) self.assertTrue(np.array_equal(X, clf.X_)) self.assertTrue(np.array_equal(y, clf.y_)) X_transformed = clf.transform(X) @@ -139,7 +140,7 @@ class FImdlpTest(unittest.TestCase): ) X, y = load_iris(return_X_y=True) clf.fit(X, y) - self.assertEqual(clf.n_features_, 4) + self.assertEqual(clf.n_features_in_, 4) self.assertTrue(np.array_equal(X, clf.X_)) self.assertTrue(np.array_equal(y, clf.y_)) self.assertListEqual( @@ -213,3 +214,7 @@ class FImdlpTest(unittest.TestCase): ] with self.assertRaises(ValueError): FImdlp().join_transform(x, y, 5) + + def test_sklearn_transformer(self): + for check, test in check_estimator(FImdlp(), generate_only=True): + test(check)