mirror of
https://github.com/Doctorado-ML/FImdlp.git
synced 2025-08-16 16:05:52 +00:00
test: ⚡ Add scikit learn compatibility check_estimator test
This commit is contained in:
@@ -13,7 +13,9 @@ cdef extern from "../cppmdlp/CPPFImdlp.h" namespace "mdlp":
|
||||
|
||||
cdef class CFImdlp:
|
||||
cdef CPPFImdlp *thisptr
|
||||
def __cinit__(self, algorithm):
|
||||
cdef int algorithm
|
||||
def __cinit__(self, algorithm:int ):
|
||||
self.algorithm = algorithm
|
||||
self.thisptr = new CPPFImdlp(algorithm)
|
||||
def __dealloc__(self):
|
||||
del self.thisptr
|
||||
@@ -24,6 +26,8 @@ cdef class CFImdlp:
|
||||
return self.thisptr.getCutPoints()
|
||||
def get_version(self):
|
||||
return self.thisptr.version()
|
||||
def __reduce__(self):
|
||||
return (CFImdlp, (self.algorithm,))
|
||||
|
||||
cdef extern from "Factorize.h" namespace "utils":
|
||||
vector[int] cppFactorize(vector[string] &input_vector)
|
||||
|
@@ -27,7 +27,7 @@ class FImdlp(TransformerMixin, BaseEstimator):
|
||||
|
||||
Attributes
|
||||
----------
|
||||
n_features_ : int
|
||||
n_features_in_ : int
|
||||
The number of features of the data passed to :meth:`fit`.
|
||||
discretizer_ : list
|
||||
The list of discretizers, one for each feature.
|
||||
@@ -41,6 +41,9 @@ class FImdlp(TransformerMixin, BaseEstimator):
|
||||
the list of features to be discretized
|
||||
"""
|
||||
|
||||
def _more_tags(self):
|
||||
return {"preserves_dtype": [np.int32], "requires_y": True}
|
||||
|
||||
def _check_args(self, X, y, expected_args, kwargs):
|
||||
# Check that X and y have correct shape
|
||||
X, y = check_X_y(X, y)
|
||||
@@ -68,7 +71,7 @@ class FImdlp(TransformerMixin, BaseEstimator):
|
||||
# Store the classes seen during fit
|
||||
self.classes_ = unique_labels(y)
|
||||
self.n_classes_ = self.classes_.shape[0]
|
||||
self.n_features_ = X.shape[1]
|
||||
self.n_features_in_ = X.shape[1]
|
||||
|
||||
def fit(self, X, y, **kwargs):
|
||||
"""A reference implementation of a fitting function for a transformer.
|
||||
@@ -91,11 +94,11 @@ class FImdlp(TransformerMixin, BaseEstimator):
|
||||
self._update_params(X, y)
|
||||
self.X_ = X
|
||||
self.y_ = y
|
||||
self.discretizer_ = [None] * self.n_features_
|
||||
self.cut_points_ = [None] * self.n_features_
|
||||
self.discretizer_ = [None] * self.n_features_in_
|
||||
self.cut_points_ = [None] * self.n_features_in_
|
||||
Parallel(n_jobs=self.n_jobs, prefer="threads")(
|
||||
delayed(self._fit_discretizer)(feature)
|
||||
for feature in range(self.n_features_)
|
||||
for feature in range(self.n_features_in_)
|
||||
)
|
||||
return self
|
||||
|
||||
@@ -128,22 +131,22 @@ class FImdlp(TransformerMixin, BaseEstimator):
|
||||
The array containing the discretized values of ``X``.
|
||||
"""
|
||||
# Check is fit had been called
|
||||
check_is_fitted(self, "n_features_")
|
||||
check_is_fitted(self, "n_features_in_")
|
||||
# Input validation
|
||||
X = check_array(X)
|
||||
# Check that the input is of the same shape as the one passed
|
||||
# during fit.
|
||||
if X.shape[1] != self.n_features_:
|
||||
if X.shape[1] != self.n_features_in_:
|
||||
raise ValueError(
|
||||
"Shape of input is different from what was seen in `fit`"
|
||||
)
|
||||
if len(self.features_) == self.n_features_:
|
||||
if len(self.features_) == self.n_features_in_:
|
||||
result = np.zeros_like(X, dtype=np.int32) - 1
|
||||
else:
|
||||
result = np.zeros_like(X) - 1
|
||||
Parallel(n_jobs=self.n_jobs, prefer="threads")(
|
||||
delayed(self._discretize_feature)(feature, X[:, feature], result)
|
||||
for feature in range(self.n_features_)
|
||||
for feature in range(self.n_features_in_)
|
||||
)
|
||||
return result
|
||||
|
||||
@@ -183,6 +186,6 @@ class FImdlp(TransformerMixin, BaseEstimator):
|
||||
The list of cut points for each feature.
|
||||
"""
|
||||
result = []
|
||||
for feature in range(self.n_features_):
|
||||
for feature in range(self.n_features_in_):
|
||||
result.append(self.cut_points_[feature])
|
||||
return result
|
||||
|
@@ -2,6 +2,7 @@ import unittest
|
||||
import sklearn
|
||||
import numpy as np
|
||||
from sklearn.datasets import load_iris
|
||||
from sklearn.utils.estimator_checks import check_estimator
|
||||
from ..cppfimdlp import factorize
|
||||
from ..mdlp import FImdlp
|
||||
from .. import version
|
||||
@@ -23,13 +24,13 @@ class FImdlpTest(unittest.TestCase):
|
||||
def test_fit_definitive(self):
|
||||
clf = FImdlp(algorithm=0)
|
||||
clf.fit([[1, 2], [3, 4]], [1, 2])
|
||||
self.assertEqual(clf.n_features_, 2)
|
||||
self.assertEqual(clf.n_features_in_, 2)
|
||||
self.assertListEqual(clf.X_.tolist(), [[1, 2], [3, 4]])
|
||||
self.assertListEqual(clf.y_.tolist(), [1, 2])
|
||||
self.assertListEqual([[2.0], [3.0]], clf.get_cut_points())
|
||||
X, y = load_iris(return_X_y=True)
|
||||
clf.fit(X, y)
|
||||
self.assertEqual(clf.n_features_, 4)
|
||||
self.assertEqual(clf.n_features_in_, 4)
|
||||
self.assertTrue(np.array_equal(X, clf.X_))
|
||||
self.assertTrue(np.array_equal(y, clf.y_))
|
||||
expected = [
|
||||
@@ -46,13 +47,13 @@ class FImdlpTest(unittest.TestCase):
|
||||
def test_fit_alternative(self):
|
||||
clf = FImdlp(algorithm=1)
|
||||
clf.fit([[1, 2], [3, 4]], [1, 2])
|
||||
self.assertEqual(clf.n_features_, 2)
|
||||
self.assertEqual(clf.n_features_in_, 2)
|
||||
self.assertListEqual(clf.X_.tolist(), [[1, 2], [3, 4]])
|
||||
self.assertListEqual(clf.y_.tolist(), [1, 2])
|
||||
self.assertListEqual([[2], [3]], clf.get_cut_points())
|
||||
X, y = load_iris(return_X_y=True)
|
||||
clf.fit(X, y)
|
||||
self.assertEqual(clf.n_features_, 4)
|
||||
self.assertEqual(clf.n_features_in_, 4)
|
||||
self.assertTrue(np.array_equal(X, clf.X_))
|
||||
self.assertTrue(np.array_equal(y, clf.y_))
|
||||
|
||||
@@ -107,7 +108,7 @@ class FImdlpTest(unittest.TestCase):
|
||||
)
|
||||
X, y = load_iris(return_X_y=True)
|
||||
clf.fit(X, y)
|
||||
self.assertEqual(clf.n_features_, 4)
|
||||
self.assertEqual(clf.n_features_in_, 4)
|
||||
self.assertTrue(np.array_equal(X, clf.X_))
|
||||
self.assertTrue(np.array_equal(y, clf.y_))
|
||||
X_transformed = clf.transform(X)
|
||||
@@ -139,7 +140,7 @@ class FImdlpTest(unittest.TestCase):
|
||||
)
|
||||
X, y = load_iris(return_X_y=True)
|
||||
clf.fit(X, y)
|
||||
self.assertEqual(clf.n_features_, 4)
|
||||
self.assertEqual(clf.n_features_in_, 4)
|
||||
self.assertTrue(np.array_equal(X, clf.X_))
|
||||
self.assertTrue(np.array_equal(y, clf.y_))
|
||||
self.assertListEqual(
|
||||
@@ -213,3 +214,7 @@ class FImdlpTest(unittest.TestCase):
|
||||
]
|
||||
with self.assertRaises(ValueError):
|
||||
FImdlp().join_transform(x, y, 5)
|
||||
|
||||
def test_sklearn_transformer(self):
|
||||
for check, test in check_estimator(FImdlp(), generate_only=True):
|
||||
test(check)
|
||||
|
Reference in New Issue
Block a user