test: Add scikit learn compatibility check_estimator test

This commit is contained in:
2023-01-26 23:20:51 +01:00
parent 16b31ec293
commit 29fc88cecc
3 changed files with 29 additions and 17 deletions

View File

@@ -13,7 +13,9 @@ cdef extern from "../cppmdlp/CPPFImdlp.h" namespace "mdlp":
cdef class CFImdlp:
cdef CPPFImdlp *thisptr
def __cinit__(self, algorithm):
cdef int algorithm
def __cinit__(self, algorithm:int ):
self.algorithm = algorithm
self.thisptr = new CPPFImdlp(algorithm)
def __dealloc__(self):
del self.thisptr
@@ -24,6 +26,8 @@ cdef class CFImdlp:
return self.thisptr.getCutPoints()
def get_version(self):
return self.thisptr.version()
def __reduce__(self):
return (CFImdlp, (self.algorithm,))
cdef extern from "Factorize.h" namespace "utils":
vector[int] cppFactorize(vector[string] &input_vector)

View File

@@ -27,7 +27,7 @@ class FImdlp(TransformerMixin, BaseEstimator):
Attributes
----------
n_features_ : int
n_features_in_ : int
The number of features of the data passed to :meth:`fit`.
discretizer_ : list
The list of discretizers, one for each feature.
@@ -41,6 +41,9 @@ class FImdlp(TransformerMixin, BaseEstimator):
the list of features to be discretized
"""
def _more_tags(self):
return {"preserves_dtype": [np.int32], "requires_y": True}
def _check_args(self, X, y, expected_args, kwargs):
# Check that X and y have correct shape
X, y = check_X_y(X, y)
@@ -68,7 +71,7 @@ class FImdlp(TransformerMixin, BaseEstimator):
# Store the classes seen during fit
self.classes_ = unique_labels(y)
self.n_classes_ = self.classes_.shape[0]
self.n_features_ = X.shape[1]
self.n_features_in_ = X.shape[1]
def fit(self, X, y, **kwargs):
"""A reference implementation of a fitting function for a transformer.
@@ -91,11 +94,11 @@ class FImdlp(TransformerMixin, BaseEstimator):
self._update_params(X, y)
self.X_ = X
self.y_ = y
self.discretizer_ = [None] * self.n_features_
self.cut_points_ = [None] * self.n_features_
self.discretizer_ = [None] * self.n_features_in_
self.cut_points_ = [None] * self.n_features_in_
Parallel(n_jobs=self.n_jobs, prefer="threads")(
delayed(self._fit_discretizer)(feature)
for feature in range(self.n_features_)
for feature in range(self.n_features_in_)
)
return self
@@ -128,22 +131,22 @@ class FImdlp(TransformerMixin, BaseEstimator):
The array containing the discretized values of ``X``.
"""
# Check is fit had been called
check_is_fitted(self, "n_features_")
check_is_fitted(self, "n_features_in_")
# Input validation
X = check_array(X)
# Check that the input is of the same shape as the one passed
# during fit.
if X.shape[1] != self.n_features_:
if X.shape[1] != self.n_features_in_:
raise ValueError(
"Shape of input is different from what was seen in `fit`"
)
if len(self.features_) == self.n_features_:
if len(self.features_) == self.n_features_in_:
result = np.zeros_like(X, dtype=np.int32) - 1
else:
result = np.zeros_like(X) - 1
Parallel(n_jobs=self.n_jobs, prefer="threads")(
delayed(self._discretize_feature)(feature, X[:, feature], result)
for feature in range(self.n_features_)
for feature in range(self.n_features_in_)
)
return result
@@ -183,6 +186,6 @@ class FImdlp(TransformerMixin, BaseEstimator):
The list of cut points for each feature.
"""
result = []
for feature in range(self.n_features_):
for feature in range(self.n_features_in_):
result.append(self.cut_points_[feature])
return result

View File

@@ -2,6 +2,7 @@ import unittest
import sklearn
import numpy as np
from sklearn.datasets import load_iris
from sklearn.utils.estimator_checks import check_estimator
from ..cppfimdlp import factorize
from ..mdlp import FImdlp
from .. import version
@@ -23,13 +24,13 @@ class FImdlpTest(unittest.TestCase):
def test_fit_definitive(self):
clf = FImdlp(algorithm=0)
clf.fit([[1, 2], [3, 4]], [1, 2])
self.assertEqual(clf.n_features_, 2)
self.assertEqual(clf.n_features_in_, 2)
self.assertListEqual(clf.X_.tolist(), [[1, 2], [3, 4]])
self.assertListEqual(clf.y_.tolist(), [1, 2])
self.assertListEqual([[2.0], [3.0]], clf.get_cut_points())
X, y = load_iris(return_X_y=True)
clf.fit(X, y)
self.assertEqual(clf.n_features_, 4)
self.assertEqual(clf.n_features_in_, 4)
self.assertTrue(np.array_equal(X, clf.X_))
self.assertTrue(np.array_equal(y, clf.y_))
expected = [
@@ -46,13 +47,13 @@ class FImdlpTest(unittest.TestCase):
def test_fit_alternative(self):
clf = FImdlp(algorithm=1)
clf.fit([[1, 2], [3, 4]], [1, 2])
self.assertEqual(clf.n_features_, 2)
self.assertEqual(clf.n_features_in_, 2)
self.assertListEqual(clf.X_.tolist(), [[1, 2], [3, 4]])
self.assertListEqual(clf.y_.tolist(), [1, 2])
self.assertListEqual([[2], [3]], clf.get_cut_points())
X, y = load_iris(return_X_y=True)
clf.fit(X, y)
self.assertEqual(clf.n_features_, 4)
self.assertEqual(clf.n_features_in_, 4)
self.assertTrue(np.array_equal(X, clf.X_))
self.assertTrue(np.array_equal(y, clf.y_))
@@ -107,7 +108,7 @@ class FImdlpTest(unittest.TestCase):
)
X, y = load_iris(return_X_y=True)
clf.fit(X, y)
self.assertEqual(clf.n_features_, 4)
self.assertEqual(clf.n_features_in_, 4)
self.assertTrue(np.array_equal(X, clf.X_))
self.assertTrue(np.array_equal(y, clf.y_))
X_transformed = clf.transform(X)
@@ -139,7 +140,7 @@ class FImdlpTest(unittest.TestCase):
)
X, y = load_iris(return_X_y=True)
clf.fit(X, y)
self.assertEqual(clf.n_features_, 4)
self.assertEqual(clf.n_features_in_, 4)
self.assertTrue(np.array_equal(X, clf.X_))
self.assertTrue(np.array_equal(y, clf.y_))
self.assertListEqual(
@@ -213,3 +214,7 @@ class FImdlpTest(unittest.TestCase):
]
with self.assertRaises(ValueError):
FImdlp().join_transform(x, y, 5)
def test_sklearn_transformer(self):
for check, test in check_estimator(FImdlp(), generate_only=True):
test(check)