feat: Add factorize method to transformer

This commit is contained in:
2023-01-28 10:35:07 +01:00
parent 29fc88cecc
commit 050b923631
2 changed files with 27 additions and 2 deletions

View File

@@ -150,6 +150,21 @@ class FImdlp(TransformerMixin, BaseEstimator):
)
return result
def factorize(self, yy):
"""Factorize the input labels
Parameters
----------
yy : array, shape (n_samples,)
Labels to be factorized, MUST be bytes, i.e. b"0", b"1", ...
Returns
-------
array, shape (n_samples,)
Factorized labels
"""
return factorize(yy)
def join_transform(self, X, y, feature, **kwargs):
"""Join the selected feature with the labels and discretize the values
join - fit - transform
@@ -174,7 +189,7 @@ class FImdlp(TransformerMixin, BaseEstimator):
f"{str(item_y)}{str(item_x)}".encode()
for item_y, item_x in zip(y, X[:, feature])
]
yy = factorize(self.y_join_)
yy = self.factorize(self.y_join_)
XX = np.delete(X, feature, axis=1)
return self.fit(XX, yy).transform(XX)

View File

@@ -162,7 +162,7 @@ class FImdlpTest(unittest.TestCase):
clf = FImdlp(algorithm=1)
clf.transform([[1, 2], [3, 4]])
def test_factorize(self):
def test_cppfactorize(self):
source = [
b"f0",
b"f1",
@@ -215,6 +215,16 @@ class FImdlpTest(unittest.TestCase):
with self.assertRaises(ValueError):
FImdlp().join_transform(x, y, 5)
def test_factorize(self):
y = np.array([b"f0", b"f0", b"f2", b"f3", b"f4"])
clf = FImdlp()
computed = clf.factorize(y)
self.assertListEqual([0, 0, 1, 2, 3], computed)
y = [b"f4", b"f0", b"f0", b"f2", b"f3"]
clf = FImdlp()
computed = clf.factorize(y)
self.assertListEqual([0, 1, 1, 2, 3], computed)
def test_sklearn_transformer(self):
for check, test in check_estimator(FImdlp(), generate_only=True):
test(check)