Update to scikit-learn 1.2

This commit is contained in:
Ricardo Montañana Gómez 2023-01-14 21:38:11 +01:00
parent 7300bd66db
commit cabf926eb1
Signed by untrusted user who does not match committer: rmontanana
GPG Key ID: 46064262FD9A7ADE
3 changed files with 194 additions and 194 deletions

View File

@ -40,9 +40,9 @@
"clf = {}\n", "clf = {}\n",
"clf[\"stree\"] = Stree(random_state=random_state, max_depth=5)\n", "clf[\"stree\"] = Stree(random_state=random_state, max_depth=5)\n",
"clf[\"stree\"].set_params(**dict(splitter=\"best\", kernel=\"linear\", max_features=\"auto\"))\n", "clf[\"stree\"].set_params(**dict(splitter=\"best\", kernel=\"linear\", max_features=\"auto\"))\n",
"clf[\"odte\"] = Odte(n_jobs=-1, base_estimator=clf[\"stree\"], random_state=random_state, n_estimators=n_estimators, max_features=.8)\n", "clf[\"odte\"] = Odte(n_jobs=-1, estimator=clf[\"stree\"], random_state=random_state, n_estimators=n_estimators, max_features=.8)\n",
"clf[\"adaboost\"] = AdaBoostClassifier(base_estimator=clf[\"stree\"], n_estimators=n_estimators, random_state=random_state, algorithm=\"SAMME\")\n", "clf[\"adaboost\"] = AdaBoostClassifier(estimator=clf[\"stree\"], n_estimators=n_estimators, random_state=random_state, algorithm=\"SAMME\")\n",
"clf[\"bagging\"] = BaggingClassifier(base_estimator=clf[\"stree\"], n_estimators=n_estimators)" "clf[\"bagging\"] = BaggingClassifier(estimator=clf[\"stree\"], n_estimators=n_estimators)"
] ]
}, },
{ {
@ -82,8 +82,8 @@
"clf = {}\n", "clf = {}\n",
"clf[\"stree\"] = Stree(random_state=random_state, max_depth=3)\n", "clf[\"stree\"] = Stree(random_state=random_state, max_depth=3)\n",
"clf[\"odte\"] = Odte(n_jobs=-1, random_state=random_state, n_estimators=n_estimators, max_features=1.0)\n", "clf[\"odte\"] = Odte(n_jobs=-1, random_state=random_state, n_estimators=n_estimators, max_features=1.0)\n",
"clf[\"adaboost\"] = AdaBoostClassifier(base_estimator=clf[\"stree\"], n_estimators=n_estimators, random_state=random_state, algorithm=\"SAMME\")\n", "clf[\"adaboost\"] = AdaBoostClassifier(estimator=clf[\"stree\"], n_estimators=n_estimators, random_state=random_state, algorithm=\"SAMME\")\n",
"clf[\"bagging\"] = BaggingClassifier(base_estimator=clf[\"stree\"], n_estimators=n_estimators)" "clf[\"bagging\"] = BaggingClassifier(estimator=clf[\"stree\"], n_estimators=n_estimators)"
] ]
}, },
{ {

View File

@ -31,7 +31,7 @@ class Odte(BaseEnsemble, ClassifierMixin):
self, self,
# n_jobs = -1 to use all available cores # n_jobs = -1 to use all available cores
n_jobs: int = -1, n_jobs: int = -1,
base_estimator: BaseEstimator = None, estimator: BaseEstimator = None,
random_state: int = 0, random_state: int = 0,
max_features: Optional[Union[str, int, float]] = None, max_features: Optional[Union[str, int, float]] = None,
max_samples: Optional[Union[int, float]] = None, max_samples: Optional[Union[int, float]] = None,
@ -39,10 +39,10 @@ class Odte(BaseEnsemble, ClassifierMixin):
be_hyperparams: str = "{}", be_hyperparams: str = "{}",
): ):
super().__init__( super().__init__(
base_estimator=base_estimator, estimator=estimator,
n_estimators=n_estimators, n_estimators=n_estimators,
) )
self.base_estimator = base_estimator self.estimator = estimator
self.n_jobs = n_jobs self.n_jobs = n_jobs
self.n_estimators = n_estimators self.n_estimators = n_estimators
self.random_state = random_state self.random_state = random_state
@ -55,7 +55,7 @@ class Odte(BaseEnsemble, ClassifierMixin):
return __version__ return __version__
def _validate_estimator(self) -> None: def _validate_estimator(self) -> None:
"""Check the estimator and set the base_estimator_ attribute.""" """Check the estimator and set the estimator_ attribute."""
super()._validate_estimator( super()._validate_estimator(
default=Stree(random_state=self.random_state) default=Stree(random_state=self.random_state)
) )
@ -79,7 +79,7 @@ class Odte(BaseEnsemble, ClassifierMixin):
# Initialize computed parameters # Initialize computed parameters
# Build the estimator # Build the estimator
self.max_features_ = self._initialize_max_features() self.max_features_ = self._initialize_max_features()
# build base_estimator_ # build estimator_
self._validate_estimator() self._validate_estimator()
self.classes_, y = np.unique(y, return_inverse=True) self.classes_, y = np.unique(y, return_inverse=True)
self.n_classes_: int = self.classes_.shape[0] self.n_classes_: int = self.classes_.shape[0]
@ -108,7 +108,7 @@ class Odte(BaseEnsemble, ClassifierMixin):
) -> Tuple[List[BaseEstimator], List[Tuple[int, ...]]]: ) -> Tuple[List[BaseEstimator], List[Tuple[int, ...]]]:
n_samples = X.shape[0] n_samples = X.shape[0]
boot_samples = self._get_bootstrap_n_samples(n_samples) boot_samples = self._get_bootstrap_n_samples(n_samples)
estimator = clone(self.base_estimator_) estimator = clone(self.estimator_)
return Parallel(n_jobs=self.n_jobs, prefer="threads")( # type: ignore return Parallel(n_jobs=self.n_jobs, prefer="threads")( # type: ignore
delayed(Odte._parallel_build_tree)( delayed(Odte._parallel_build_tree)(
estimator, estimator,
@ -127,7 +127,7 @@ class Odte(BaseEnsemble, ClassifierMixin):
@staticmethod @staticmethod
def _parallel_build_tree( def _parallel_build_tree(
base_estimator_: BaseEstimator, estimator_: BaseEstimator,
X: np.ndarray, X: np.ndarray,
y: np.ndarray, y: np.ndarray,
weights: np.ndarray, weights: np.ndarray,
@ -136,7 +136,7 @@ class Odte(BaseEnsemble, ClassifierMixin):
max_features: int, max_features: int,
hyperparams: str, hyperparams: str,
) -> Tuple[BaseEstimator, Tuple[int, ...]]: ) -> Tuple[BaseEstimator, Tuple[int, ...]]:
clf = clone(base_estimator_) clf = clone(estimator_)
hyperparams_ = json.loads(hyperparams) hyperparams_ = json.loads(hyperparams)
hyperparams_.update(dict(random_state=random_seed)) hyperparams_.update(dict(random_state=random_seed))
clf.set_params(**hyperparams_) clf.set_params(**hyperparams_)

View File

@ -76,15 +76,15 @@ class Odte_test(unittest.TestCase):
X, y = [[1, 2], [5, 6], [9, 10], [16, 17]], [0, 1, 1, 2] X, y = [[1, 2], [5, 6], [9, 10], [16, 17]], [0, 1, 1, 2]
expected = [0, 1, 1, 2] expected = [0, 1, 1, 2]
tclf = Odte( tclf = Odte(
base_estimator=Stree(), estimator=Stree(),
random_state=self._random_state, random_state=self._random_state,
n_estimators=10, n_estimators=10,
n_jobs=-1, n_jobs=-1,
) )
tclf.set_params( tclf.set_params(
**dict( **dict(
base_estimator__kernel="rbf", estimator__kernel="rbf",
base_estimator__random_state=self._random_state, estimator__random_state=self._random_state,
) )
) )
computed = tclf.fit(X, y).predict(X) computed = tclf.fit(X, y).predict(X)
@ -96,14 +96,14 @@ class Odte_test(unittest.TestCase):
X, y = load_dataset(self._random_state) X, y = load_dataset(self._random_state)
expected = y expected = y
tclf = Odte( tclf = Odte(
base_estimator=Stree(), estimator=Stree(),
random_state=self._random_state, random_state=self._random_state,
max_features=1.0, max_features=1.0,
max_samples=0.1, max_samples=0.1,
) )
tclf.set_params( tclf.set_params(
**dict( **dict(
base_estimator__kernel="linear", estimator__kernel="linear",
) )
) )
computed = tclf.fit(X, y).predict(X) computed = tclf.fit(X, y).predict(X)
@ -146,16 +146,16 @@ class Odte_test(unittest.TestCase):
"cfs", "cfs",
]: ]:
tclf = Odte( tclf = Odte(
base_estimator=Stree(), estimator=Stree(),
random_state=self._random_state, random_state=self._random_state,
n_estimators=3, n_estimators=3,
n_jobs=1, n_jobs=1,
) )
tclf.set_params( tclf.set_params(
**dict( **dict(
base_estimator__max_features=max_features, estimator__max_features=max_features,
base_estimator__splitter=splitter, estimator__splitter=splitter,
base_estimator__random_state=self._random_state, estimator__random_state=self._random_state,
) )
) )
expected = results.pop(0) expected = results.pop(0)
@ -182,7 +182,7 @@ class Odte_test(unittest.TestCase):
def test_nodes_leaves_not_fitted(self): def test_nodes_leaves_not_fitted(self):
tclf = Odte( tclf = Odte(
base_estimator=Stree(), estimator=Stree(),
random_state=self._random_state, random_state=self._random_state,
n_estimators=3, n_estimators=3,
) )
@ -191,13 +191,13 @@ class Odte_test(unittest.TestCase):
def test_nodes_leaves_depth(self): def test_nodes_leaves_depth(self):
tclf = Odte( tclf = Odte(
base_estimator=Stree(), estimator=Stree(),
random_state=self._random_state, random_state=self._random_state,
n_estimators=5, n_estimators=5,
n_jobs=1, n_jobs=1,
) )
tclf_p = Odte( tclf_p = Odte(
base_estimator=Stree(), estimator=Stree(),
random_state=self._random_state, random_state=self._random_state,
n_estimators=5, n_estimators=5,
n_jobs=-1, n_jobs=-1,
@ -215,7 +215,7 @@ class Odte_test(unittest.TestCase):
def test_nodes_leaves_SVC(self): def test_nodes_leaves_SVC(self):
tclf = Odte( tclf = Odte(
base_estimator=SVC(), estimator=SVC(),
random_state=self._random_state, random_state=self._random_state,
n_estimators=3, n_estimators=3,
) )
@ -227,7 +227,7 @@ class Odte_test(unittest.TestCase):
self.assertAlmostEqual(0.0, leaves) self.assertAlmostEqual(0.0, leaves)
self.assertAlmostEqual(0.0, nodes) self.assertAlmostEqual(0.0, nodes)
def test_base_estimator_hyperparams(self): def test_estimator_hyperparams(self):
data = [ data = [
(Stree(), {"max_features": 7, "max_depth": 2}), (Stree(), {"max_features": 7, "max_depth": 2}),
(SVC(), {"kernel": "linear", "cache_size": 100}), (SVC(), {"kernel": "linear", "cache_size": 100}),
@ -235,7 +235,7 @@ class Odte_test(unittest.TestCase):
for clf, hyperparams in data: for clf, hyperparams in data:
hyperparams_ = json.dumps(hyperparams) hyperparams_ = json.dumps(hyperparams)
tclf = Odte( tclf = Odte(
base_estimator=clf, estimator=clf,
random_state=self._random_state, random_state=self._random_state,
n_estimators=3, n_estimators=3,
be_hyperparams=hyperparams_, be_hyperparams=hyperparams_,