From 050b9236316c4fc49873e29f82ce5df4780c53b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= Date: Sat, 28 Jan 2023 10:35:07 +0100 Subject: [PATCH] feat: :zap: Add factorize method to transformer --- src/fimdlp/mdlp.py | 17 ++++++++++++++++- src/fimdlp/tests/FImdlp_test.py | 12 +++++++++++- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/src/fimdlp/mdlp.py b/src/fimdlp/mdlp.py index a9ec2b8..415705b 100644 --- a/src/fimdlp/mdlp.py +++ b/src/fimdlp/mdlp.py @@ -150,6 +150,21 @@ class FImdlp(TransformerMixin, BaseEstimator): ) return result + def factorize(self, yy): + """Factorize the input labels + + Parameters + ---------- + yy : array, shape (n_samples,) + Labels to be factorized, MUST be bytes, i.e. b"0", b"1", ... + + Returns + ------- + array, shape (n_samples,) + Factorized labels + """ + return factorize(yy) + def join_transform(self, X, y, feature, **kwargs): """Join the selected feature with the labels and discretize the values join - fit - transform @@ -174,7 +189,7 @@ class FImdlp(TransformerMixin, BaseEstimator): f"{str(item_y)}{str(item_x)}".encode() for item_y, item_x in zip(y, X[:, feature]) ] - yy = factorize(self.y_join_) + yy = self.factorize(self.y_join_) XX = np.delete(X, feature, axis=1) return self.fit(XX, yy).transform(XX) diff --git a/src/fimdlp/tests/FImdlp_test.py b/src/fimdlp/tests/FImdlp_test.py index 2de67ab..5e522f3 100644 --- a/src/fimdlp/tests/FImdlp_test.py +++ b/src/fimdlp/tests/FImdlp_test.py @@ -162,7 +162,7 @@ class FImdlpTest(unittest.TestCase): clf = FImdlp(algorithm=1) clf.transform([[1, 2], [3, 4]]) - def test_factorize(self): + def test_cppfactorize(self): source = [ b"f0", b"f1", @@ -215,6 +215,16 @@ class FImdlpTest(unittest.TestCase): with self.assertRaises(ValueError): FImdlp().join_transform(x, y, 5) + def test_factorize(self): + y = np.array([b"f0", b"f0", b"f2", b"f3", b"f4"]) + clf = FImdlp() + computed = clf.factorize(y) + self.assertListEqual([0, 0, 1, 2, 3], computed) + y = [b"f4", b"f0", b"f0", b"f2", b"f3"] + clf = FImdlp() + computed = clf.factorize(y) + self.assertListEqual([0, 1, 1, 2, 3], computed) + def test_sklearn_transformer(self): for check, test in check_estimator(FImdlp(), generate_only=True): test(check)