mirror of
https://github.com/Doctorado-ML/FImdlp.git
synced 2025-08-17 00:15:52 +00:00
add join_fit target info
This commit is contained in:
12
k.py
12
k.py
@@ -1,12 +0,0 @@
|
|||||||
from sklearn.datasets import load_wine
|
|
||||||
from fimdlp.mdlp import FImdlp
|
|
||||||
|
|
||||||
X, y = load_wine(return_X_y=True)
|
|
||||||
trans = FImdlp()
|
|
||||||
Xt = trans.join_transform(X, y, 12)
|
|
||||||
print("X shape = ", X.shape)
|
|
||||||
print("Xt.shape=", Xt.shape)
|
|
||||||
print("Xt ", Xt[:10])
|
|
||||||
print("trans.X_ shape = ", trans.X_.shape)
|
|
||||||
print("trans.y_ ", trans.y_[:10])
|
|
||||||
print("y_join ", trans.y_join_[:10])
|
|
@@ -6,8 +6,6 @@ from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
|
|||||||
from joblib import Parallel, delayed
|
from joblib import Parallel, delayed
|
||||||
from ._version import __version__
|
from ._version import __version__
|
||||||
|
|
||||||
# from ._version import __version__
|
|
||||||
|
|
||||||
|
|
||||||
class FImdlp(TransformerMixin, BaseEstimator):
|
class FImdlp(TransformerMixin, BaseEstimator):
|
||||||
def __init__(self, n_jobs=-1, min_length=3, max_depth=1e6, max_cuts=0):
|
def __init__(self, n_jobs=-1, min_length=3, max_depth=1e6, max_cuts=0):
|
||||||
@@ -24,6 +22,12 @@ class FImdlp(TransformerMixin, BaseEstimator):
|
|||||||
The number of jobs to run in parallel. :meth:`fit` and
|
The number of jobs to run in parallel. :meth:`fit` and
|
||||||
:meth:`transform`, are parallelized over the features. ``-1`` means
|
:meth:`transform`, are parallelized over the features. ``-1`` means
|
||||||
using all cores available.
|
using all cores available.
|
||||||
|
min_length: int, default=3
|
||||||
|
The minimum length of an interval to be considered to be discretized.
|
||||||
|
max_depth: int, default=1e6
|
||||||
|
The maximum depth of the discretization process.
|
||||||
|
max_cuts: float, default=0
|
||||||
|
The maximum number of cut points to be computed for each feature.
|
||||||
|
|
||||||
Attributes
|
Attributes
|
||||||
----------
|
----------
|
||||||
@@ -109,6 +113,8 @@ class FImdlp(TransformerMixin, BaseEstimator):
|
|||||||
delayed(self._fit_discretizer)(feature)
|
delayed(self._fit_discretizer)(feature)
|
||||||
for feature in range(self.n_features_in_)
|
for feature in range(self.n_features_in_)
|
||||||
)
|
)
|
||||||
|
# target of every feature. Start with -1 => y (see join_fit)
|
||||||
|
self.target_ = [-1] * self.n_features_in_
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def _fit_discretizer(self, feature):
|
def _fit_discretizer(self, feature):
|
||||||
@@ -244,11 +250,12 @@ class FImdlp(TransformerMixin, BaseEstimator):
|
|||||||
f"Target {target} not in range [0, {self.n_features_in_})"
|
f"Target {target} not in range [0, {self.n_features_in_})"
|
||||||
)
|
)
|
||||||
if target in features:
|
if target in features:
|
||||||
raise ValueError("Target cannot in features to join")
|
raise ValueError("Target cannot be in features to join")
|
||||||
y_join = [
|
y_join = [
|
||||||
f"{str(item_y)}{''.join([str(x) for x in items_x])}".encode()
|
f"{str(item_y)}{''.join([str(x) for x in items_x])}".encode()
|
||||||
for item_y, items_x in zip(self.y_, data[:, features])
|
for item_y, items_x in zip(self.y_, data[:, features])
|
||||||
]
|
]
|
||||||
|
self.target_[target] = features + [-1]
|
||||||
self.y_join_ = y_join
|
self.y_join_ = y_join
|
||||||
self.discretizer_[target].fit(self.X_[:, target], factorize(y_join))
|
self.discretizer_[target].fit(self.X_[:, target], factorize(y_join))
|
||||||
self.cut_points_[target] = self.discretizer_[target].get_cut_points()
|
self.cut_points_[target] = self.discretizer_[target].get_cut_points()
|
||||||
|
@@ -196,7 +196,7 @@ class FImdlpTest(unittest.TestCase):
|
|||||||
clf.join_fit([0, 2], 2, x)
|
clf.join_fit([0, 2], 2, x)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
str(exception.exception),
|
str(exception.exception),
|
||||||
"Target cannot in features to join",
|
"Target cannot be in features to join",
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_factorize(self):
|
def test_factorize(self):
|
||||||
@@ -209,6 +209,16 @@ class FImdlpTest(unittest.TestCase):
|
|||||||
computed = clf.factorize(y)
|
computed = clf.factorize(y)
|
||||||
self.assertListEqual([0, 1, 1, 2, 3], computed)
|
self.assertListEqual([0, 1, 1, 2, 3], computed)
|
||||||
|
|
||||||
|
def test_join_fit_info(self):
|
||||||
|
clf = FImdlp()
|
||||||
|
X, y = load_iris(return_X_y=True)
|
||||||
|
clf.fit(X, y)
|
||||||
|
clf.join_fit([0, 2], 1, X)
|
||||||
|
clf.join_fit([0, 3], 2, X)
|
||||||
|
clf.join_fit([1, 2], 3, X)
|
||||||
|
expected = [-1, [0, 2, -1], [0, 3, -1], [1, 2, -1]]
|
||||||
|
self.assertListEqual(expected, clf.target_)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def test_sklearn_transformer():
|
def test_sklearn_transformer():
|
||||||
for check, test in check_estimator(FImdlp(), generate_only=True):
|
for check, test in check_estimator(FImdlp(), generate_only=True):
|
||||||
|
Reference in New Issue
Block a user