test: Add scikit learn compatibility check_estimator test

This commit is contained in:
2023-01-26 23:20:51 +01:00
parent 16b31ec293
commit 29fc88cecc
3 changed files with 29 additions and 17 deletions

View File

@@ -13,7 +13,9 @@ cdef extern from "../cppmdlp/CPPFImdlp.h" namespace "mdlp":
cdef class CFImdlp: cdef class CFImdlp:
cdef CPPFImdlp *thisptr cdef CPPFImdlp *thisptr
def __cinit__(self, algorithm): cdef int algorithm
def __cinit__(self, algorithm:int ):
self.algorithm = algorithm
self.thisptr = new CPPFImdlp(algorithm) self.thisptr = new CPPFImdlp(algorithm)
def __dealloc__(self): def __dealloc__(self):
del self.thisptr del self.thisptr
@@ -24,6 +26,8 @@ cdef class CFImdlp:
return self.thisptr.getCutPoints() return self.thisptr.getCutPoints()
def get_version(self): def get_version(self):
return self.thisptr.version() return self.thisptr.version()
def __reduce__(self):
return (CFImdlp, (self.algorithm,))
cdef extern from "Factorize.h" namespace "utils": cdef extern from "Factorize.h" namespace "utils":
vector[int] cppFactorize(vector[string] &input_vector) vector[int] cppFactorize(vector[string] &input_vector)

View File

@@ -27,7 +27,7 @@ class FImdlp(TransformerMixin, BaseEstimator):
Attributes Attributes
---------- ----------
n_features_ : int n_features_in_ : int
The number of features of the data passed to :meth:`fit`. The number of features of the data passed to :meth:`fit`.
discretizer_ : list discretizer_ : list
The list of discretizers, one for each feature. The list of discretizers, one for each feature.
@@ -41,6 +41,9 @@ class FImdlp(TransformerMixin, BaseEstimator):
the list of features to be discretized the list of features to be discretized
""" """
def _more_tags(self):
return {"preserves_dtype": [np.int32], "requires_y": True}
def _check_args(self, X, y, expected_args, kwargs): def _check_args(self, X, y, expected_args, kwargs):
# Check that X and y have correct shape # Check that X and y have correct shape
X, y = check_X_y(X, y) X, y = check_X_y(X, y)
@@ -68,7 +71,7 @@ class FImdlp(TransformerMixin, BaseEstimator):
# Store the classes seen during fit # Store the classes seen during fit
self.classes_ = unique_labels(y) self.classes_ = unique_labels(y)
self.n_classes_ = self.classes_.shape[0] self.n_classes_ = self.classes_.shape[0]
self.n_features_ = X.shape[1] self.n_features_in_ = X.shape[1]
def fit(self, X, y, **kwargs): def fit(self, X, y, **kwargs):
"""A reference implementation of a fitting function for a transformer. """A reference implementation of a fitting function for a transformer.
@@ -91,11 +94,11 @@ class FImdlp(TransformerMixin, BaseEstimator):
self._update_params(X, y) self._update_params(X, y)
self.X_ = X self.X_ = X
self.y_ = y self.y_ = y
self.discretizer_ = [None] * self.n_features_ self.discretizer_ = [None] * self.n_features_in_
self.cut_points_ = [None] * self.n_features_ self.cut_points_ = [None] * self.n_features_in_
Parallel(n_jobs=self.n_jobs, prefer="threads")( Parallel(n_jobs=self.n_jobs, prefer="threads")(
delayed(self._fit_discretizer)(feature) delayed(self._fit_discretizer)(feature)
for feature in range(self.n_features_) for feature in range(self.n_features_in_)
) )
return self return self
@@ -128,22 +131,22 @@ class FImdlp(TransformerMixin, BaseEstimator):
The array containing the discretized values of ``X``. The array containing the discretized values of ``X``.
""" """
# Check is fit had been called # Check is fit had been called
check_is_fitted(self, "n_features_") check_is_fitted(self, "n_features_in_")
# Input validation # Input validation
X = check_array(X) X = check_array(X)
# Check that the input is of the same shape as the one passed # Check that the input is of the same shape as the one passed
# during fit. # during fit.
if X.shape[1] != self.n_features_: if X.shape[1] != self.n_features_in_:
raise ValueError( raise ValueError(
"Shape of input is different from what was seen in `fit`" "Shape of input is different from what was seen in `fit`"
) )
if len(self.features_) == self.n_features_: if len(self.features_) == self.n_features_in_:
result = np.zeros_like(X, dtype=np.int32) - 1 result = np.zeros_like(X, dtype=np.int32) - 1
else: else:
result = np.zeros_like(X) - 1 result = np.zeros_like(X) - 1
Parallel(n_jobs=self.n_jobs, prefer="threads")( Parallel(n_jobs=self.n_jobs, prefer="threads")(
delayed(self._discretize_feature)(feature, X[:, feature], result) delayed(self._discretize_feature)(feature, X[:, feature], result)
for feature in range(self.n_features_) for feature in range(self.n_features_in_)
) )
return result return result
@@ -183,6 +186,6 @@ class FImdlp(TransformerMixin, BaseEstimator):
The list of cut points for each feature. The list of cut points for each feature.
""" """
result = [] result = []
for feature in range(self.n_features_): for feature in range(self.n_features_in_):
result.append(self.cut_points_[feature]) result.append(self.cut_points_[feature])
return result return result

View File

@@ -2,6 +2,7 @@ import unittest
import sklearn import sklearn
import numpy as np import numpy as np
from sklearn.datasets import load_iris from sklearn.datasets import load_iris
from sklearn.utils.estimator_checks import check_estimator
from ..cppfimdlp import factorize from ..cppfimdlp import factorize
from ..mdlp import FImdlp from ..mdlp import FImdlp
from .. import version from .. import version
@@ -23,13 +24,13 @@ class FImdlpTest(unittest.TestCase):
def test_fit_definitive(self): def test_fit_definitive(self):
clf = FImdlp(algorithm=0) clf = FImdlp(algorithm=0)
clf.fit([[1, 2], [3, 4]], [1, 2]) clf.fit([[1, 2], [3, 4]], [1, 2])
self.assertEqual(clf.n_features_, 2) self.assertEqual(clf.n_features_in_, 2)
self.assertListEqual(clf.X_.tolist(), [[1, 2], [3, 4]]) self.assertListEqual(clf.X_.tolist(), [[1, 2], [3, 4]])
self.assertListEqual(clf.y_.tolist(), [1, 2]) self.assertListEqual(clf.y_.tolist(), [1, 2])
self.assertListEqual([[2.0], [3.0]], clf.get_cut_points()) self.assertListEqual([[2.0], [3.0]], clf.get_cut_points())
X, y = load_iris(return_X_y=True) X, y = load_iris(return_X_y=True)
clf.fit(X, y) clf.fit(X, y)
self.assertEqual(clf.n_features_, 4) self.assertEqual(clf.n_features_in_, 4)
self.assertTrue(np.array_equal(X, clf.X_)) self.assertTrue(np.array_equal(X, clf.X_))
self.assertTrue(np.array_equal(y, clf.y_)) self.assertTrue(np.array_equal(y, clf.y_))
expected = [ expected = [
@@ -46,13 +47,13 @@ class FImdlpTest(unittest.TestCase):
def test_fit_alternative(self): def test_fit_alternative(self):
clf = FImdlp(algorithm=1) clf = FImdlp(algorithm=1)
clf.fit([[1, 2], [3, 4]], [1, 2]) clf.fit([[1, 2], [3, 4]], [1, 2])
self.assertEqual(clf.n_features_, 2) self.assertEqual(clf.n_features_in_, 2)
self.assertListEqual(clf.X_.tolist(), [[1, 2], [3, 4]]) self.assertListEqual(clf.X_.tolist(), [[1, 2], [3, 4]])
self.assertListEqual(clf.y_.tolist(), [1, 2]) self.assertListEqual(clf.y_.tolist(), [1, 2])
self.assertListEqual([[2], [3]], clf.get_cut_points()) self.assertListEqual([[2], [3]], clf.get_cut_points())
X, y = load_iris(return_X_y=True) X, y = load_iris(return_X_y=True)
clf.fit(X, y) clf.fit(X, y)
self.assertEqual(clf.n_features_, 4) self.assertEqual(clf.n_features_in_, 4)
self.assertTrue(np.array_equal(X, clf.X_)) self.assertTrue(np.array_equal(X, clf.X_))
self.assertTrue(np.array_equal(y, clf.y_)) self.assertTrue(np.array_equal(y, clf.y_))
@@ -107,7 +108,7 @@ class FImdlpTest(unittest.TestCase):
) )
X, y = load_iris(return_X_y=True) X, y = load_iris(return_X_y=True)
clf.fit(X, y) clf.fit(X, y)
self.assertEqual(clf.n_features_, 4) self.assertEqual(clf.n_features_in_, 4)
self.assertTrue(np.array_equal(X, clf.X_)) self.assertTrue(np.array_equal(X, clf.X_))
self.assertTrue(np.array_equal(y, clf.y_)) self.assertTrue(np.array_equal(y, clf.y_))
X_transformed = clf.transform(X) X_transformed = clf.transform(X)
@@ -139,7 +140,7 @@ class FImdlpTest(unittest.TestCase):
) )
X, y = load_iris(return_X_y=True) X, y = load_iris(return_X_y=True)
clf.fit(X, y) clf.fit(X, y)
self.assertEqual(clf.n_features_, 4) self.assertEqual(clf.n_features_in_, 4)
self.assertTrue(np.array_equal(X, clf.X_)) self.assertTrue(np.array_equal(X, clf.X_))
self.assertTrue(np.array_equal(y, clf.y_)) self.assertTrue(np.array_equal(y, clf.y_))
self.assertListEqual( self.assertListEqual(
@@ -213,3 +214,7 @@ class FImdlpTest(unittest.TestCase):
] ]
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
FImdlp().join_transform(x, y, 5) FImdlp().join_transform(x, y, 5)
def test_sklearn_transformer(self):
for check, test in check_estimator(FImdlp(), generate_only=True):
test(check)