From cabf926eb14560ee7dde6ab55dd54c4e3107978b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?=
 <rmontanana@users.noreply.github.com>
Date: Sat, 14 Jan 2023 21:38:11 +0100
Subject: [PATCH] Update to scikit-learn 1.2

---
 notebooks/wine_iris.ipynb | 342 +++++++++++++++++++-------------------
 odte/Odte.py              |  16 +-
 odte/tests/Odte_tests.py  |  30 ++--
 3 files changed, 194 insertions(+), 194 deletions(-)

diff --git a/notebooks/wine_iris.ipynb b/notebooks/wine_iris.ipynb
index f868ee7..c5d8883 100644
--- a/notebooks/wine_iris.ipynb
+++ b/notebooks/wine_iris.ipynb
@@ -1,174 +1,174 @@
 {
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import datetime, time\n",
-    "import numpy as np\n",
-    "import pandas as pd\n",
-    "from sklearn.model_selection import train_test_split, cross_validate\n",
-    "from sklearn import tree\n",
-    "from sklearn.metrics import classification_report, confusion_matrix, f1_score\n",
-    "from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, BaggingClassifier\n",
-    "from stree import Stree\n",
-    "from odte import Odte\n",
-    "\n",
-    "random_state = 1"
-   ]
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import datetime, time\n",
+        "import numpy as np\n",
+        "import pandas as pd\n",
+        "from sklearn.model_selection import train_test_split, cross_validate\n",
+        "from sklearn import tree\n",
+        "from sklearn.metrics import classification_report, confusion_matrix, f1_score\n",
+        "from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, BaggingClassifier\n",
+        "from stree import Stree\n",
+        "from odte import Odte\n",
+        "\n",
+        "random_state = 1"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from sklearn.datasets import load_wine\n",
+        "X, y = load_wine(return_X_y=True)\n",
+        "Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=.2, random_state=random_state)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "n_estimators = 20\n",
+        "clf = {}\n",
+        "clf[\"stree\"] = Stree(random_state=random_state, max_depth=5)\n",
+        "clf[\"stree\"].set_params(**dict(splitter=\"best\", kernel=\"linear\", max_features=\"auto\"))\n",
+        "clf[\"odte\"] = Odte(n_jobs=-1, estimator=clf[\"stree\"], random_state=random_state, n_estimators=n_estimators, max_features=.8)\n",
+        "clf[\"adaboost\"] = AdaBoostClassifier(estimator=clf[\"stree\"], n_estimators=n_estimators, random_state=random_state, algorithm=\"SAMME\")\n",
+        "clf[\"bagging\"] = BaggingClassifier(estimator=clf[\"stree\"], n_estimators=n_estimators)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "tags": []
+      },
+      "outputs": [],
+      "source": [
+        "print(\"*\"*30,\"Results for wine\", \"*\"*30)\n",
+        "for clf_type, item in clf.items():\n",
+        "    print(f\"Training {clf_type}...\")\n",
+        "    now = time.time()\n",
+        "    item.fit(Xtrain, ytrain)\n",
+        "    print(f\"Score: {item.score(Xtest, ytest) * 100:.3f} in {time.time()-now:.2f} seconds\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from sklearn.datasets import load_iris\n",
+        "X, y = load_iris(return_X_y=True)\n",
+        "Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=.2, random_state=random_state)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "n_estimators = 10\n",
+        "clf = {}\n",
+        "clf[\"stree\"] = Stree(random_state=random_state, max_depth=3)\n",
+        "clf[\"odte\"] = Odte(n_jobs=-1, random_state=random_state, n_estimators=n_estimators, max_features=1.0)\n",
+        "clf[\"adaboost\"] = AdaBoostClassifier(estimator=clf[\"stree\"], n_estimators=n_estimators, random_state=random_state, algorithm=\"SAMME\")\n",
+        "clf[\"bagging\"] = BaggingClassifier(estimator=clf[\"stree\"], n_estimators=n_estimators)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "tags": []
+      },
+      "outputs": [],
+      "source": [
+        "print(\"*\"*30,\"Results for iris\", \"*\"*30)\n",
+        "for clf_type, item in clf.items():\n",
+        "    print(f\"Training {clf_type}...\")\n",
+        "    now = time.time()\n",
+        "    item.fit(Xtrain, ytrain)\n",
+        "    print(f\"Score: {item.score(Xtest, ytest) * 100:.3f} in {time.time()-now:.2f} seconds\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "tags": []
+      },
+      "outputs": [],
+      "source": [
+        "cross = cross_validate(estimator=clf[\"odte\"], X=X, y=y, n_jobs=-1, return_train_score=True)\n",
+        "print(cross)\n",
+        "print(f\"{np.mean(cross['test_score'])*100:.3f} +- {np.std(cross['test_score']):.3f}\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "tags": []
+      },
+      "outputs": [],
+      "source": [
+        "cross = cross_validate(estimator=clf[\"adaboost\"], X=X, y=y, n_jobs=-1, return_train_score=True)\n",
+        "print(cross)\n",
+        "print(f\"{np.mean(cross['test_score'])*100:.3f} +- {np.std(cross['test_score']):.3f}\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "tags": []
+      },
+      "outputs": [],
+      "source": [
+        "from sklearn.utils.estimator_checks import check_estimator\n",
+        "# Make checks one by one\n",
+        "c = 0\n",
+        "checks = check_estimator(Odte(), generate_only=True)\n",
+        "for check in checks:\n",
+        "    c += 1\n",
+        "    print(c, check[1])\n",
+        "    check[1](check[0])"
+      ]
+    }
+  ],
+  "metadata": {
+    "interpreter": {
+      "hash": "da86226729227d0e8962a5ec29ea906307507ca2c30ceaaf651c09a617630939"
+    },
+    "kernelspec": {
+      "display_name": "Python 3.9.2 64-bit ('general': venv)",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.9.7"
+    },
+    "orig_nbformat": 2
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from sklearn.datasets import load_wine\n",
-    "X, y = load_wine(return_X_y=True)\n",
-    "Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=.2, random_state=random_state)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "n_estimators = 20\n",
-    "clf = {}\n",
-    "clf[\"stree\"] = Stree(random_state=random_state, max_depth=5)\n",
-    "clf[\"stree\"].set_params(**dict(splitter=\"best\", kernel=\"linear\", max_features=\"auto\"))\n",
-    "clf[\"odte\"] = Odte(n_jobs=-1, base_estimator=clf[\"stree\"], random_state=random_state, n_estimators=n_estimators, max_features=.8)\n",
-    "clf[\"adaboost\"] = AdaBoostClassifier(base_estimator=clf[\"stree\"], n_estimators=n_estimators, random_state=random_state, algorithm=\"SAMME\")\n",
-    "clf[\"bagging\"] = BaggingClassifier(base_estimator=clf[\"stree\"], n_estimators=n_estimators)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "print(\"*\"*30,\"Results for wine\", \"*\"*30)\n",
-    "for clf_type, item in clf.items():\n",
-    "    print(f\"Training {clf_type}...\")\n",
-    "    now = time.time()\n",
-    "    item.fit(Xtrain, ytrain)\n",
-    "    print(f\"Score: {item.score(Xtest, ytest) * 100:.3f} in {time.time()-now:.2f} seconds\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from sklearn.datasets import load_iris\n",
-    "X, y = load_iris(return_X_y=True)\n",
-    "Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=.2, random_state=random_state)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "n_estimators = 10\n",
-    "clf = {}\n",
-    "clf[\"stree\"] = Stree(random_state=random_state, max_depth=3)\n",
-    "clf[\"odte\"] = Odte(n_jobs=-1, random_state=random_state, n_estimators=n_estimators, max_features=1.0)\n",
-    "clf[\"adaboost\"] = AdaBoostClassifier(base_estimator=clf[\"stree\"], n_estimators=n_estimators, random_state=random_state, algorithm=\"SAMME\")\n",
-    "clf[\"bagging\"] = BaggingClassifier(base_estimator=clf[\"stree\"], n_estimators=n_estimators)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "print(\"*\"*30,\"Results for iris\", \"*\"*30)\n",
-    "for clf_type, item in clf.items():\n",
-    "    print(f\"Training {clf_type}...\")\n",
-    "    now = time.time()\n",
-    "    item.fit(Xtrain, ytrain)\n",
-    "    print(f\"Score: {item.score(Xtest, ytest) * 100:.3f} in {time.time()-now:.2f} seconds\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "cross = cross_validate(estimator=clf[\"odte\"], X=X, y=y, n_jobs=-1, return_train_score=True)\n",
-    "print(cross)\n",
-    "print(f\"{np.mean(cross['test_score'])*100:.3f} +- {np.std(cross['test_score']):.3f}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "cross = cross_validate(estimator=clf[\"adaboost\"], X=X, y=y, n_jobs=-1, return_train_score=True)\n",
-    "print(cross)\n",
-    "print(f\"{np.mean(cross['test_score'])*100:.3f} +- {np.std(cross['test_score']):.3f}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "from sklearn.utils.estimator_checks import check_estimator\n",
-    "# Make checks one by one\n",
-    "c = 0\n",
-    "checks = check_estimator(Odte(), generate_only=True)\n",
-    "for check in checks:\n",
-    "    c += 1\n",
-    "    print(c, check[1])\n",
-    "    check[1](check[0])"
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "da86226729227d0e8962a5ec29ea906307507ca2c30ceaaf651c09a617630939"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.9.2 64-bit ('general': venv)",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.7"
-  },
-  "orig_nbformat": 2
- },
- "nbformat": 4,
- "nbformat_minor": 2
+  "nbformat": 4,
+  "nbformat_minor": 2
 }
diff --git a/odte/Odte.py b/odte/Odte.py
index 825b6c3..c94afbb 100644
--- a/odte/Odte.py
+++ b/odte/Odte.py
@@ -31,7 +31,7 @@ class Odte(BaseEnsemble, ClassifierMixin):
         self,
         # n_jobs = -1 to use all available cores
         n_jobs: int = -1,
-        base_estimator: BaseEstimator = None,
+        estimator: BaseEstimator = None,
         random_state: int = 0,
         max_features: Optional[Union[str, int, float]] = None,
         max_samples: Optional[Union[int, float]] = None,
@@ -39,10 +39,10 @@ class Odte(BaseEnsemble, ClassifierMixin):
         be_hyperparams: str = "{}",
     ):
         super().__init__(
-            base_estimator=base_estimator,
+            estimator=estimator,
             n_estimators=n_estimators,
         )
-        self.base_estimator = base_estimator
+        self.estimator = estimator
         self.n_jobs = n_jobs
         self.n_estimators = n_estimators
         self.random_state = random_state
@@ -55,7 +55,7 @@ class Odte(BaseEnsemble, ClassifierMixin):
         return __version__
 
     def _validate_estimator(self) -> None:
-        """Check the estimator and set the base_estimator_ attribute."""
+        """Check the estimator and set the estimator_ attribute."""
         super()._validate_estimator(
             default=Stree(random_state=self.random_state)
         )
@@ -79,7 +79,7 @@ class Odte(BaseEnsemble, ClassifierMixin):
         # Initialize computed parameters
         #  Build the estimator
         self.max_features_ = self._initialize_max_features()
-        # build base_estimator_
+        # build estimator_
         self._validate_estimator()
         self.classes_, y = np.unique(y, return_inverse=True)
         self.n_classes_: int = self.classes_.shape[0]
@@ -108,7 +108,7 @@ class Odte(BaseEnsemble, ClassifierMixin):
     ) -> Tuple[List[BaseEstimator], List[Tuple[int, ...]]]:
         n_samples = X.shape[0]
         boot_samples = self._get_bootstrap_n_samples(n_samples)
-        estimator = clone(self.base_estimator_)
+        estimator = clone(self.estimator_)
         return Parallel(n_jobs=self.n_jobs, prefer="threads")(  # type: ignore
             delayed(Odte._parallel_build_tree)(
                 estimator,
@@ -127,7 +127,7 @@ class Odte(BaseEnsemble, ClassifierMixin):
 
     @staticmethod
     def _parallel_build_tree(
-        base_estimator_: BaseEstimator,
+        estimator_: BaseEstimator,
         X: np.ndarray,
         y: np.ndarray,
         weights: np.ndarray,
@@ -136,7 +136,7 @@ class Odte(BaseEnsemble, ClassifierMixin):
         max_features: int,
         hyperparams: str,
     ) -> Tuple[BaseEstimator, Tuple[int, ...]]:
-        clf = clone(base_estimator_)
+        clf = clone(estimator_)
         hyperparams_ = json.loads(hyperparams)
         hyperparams_.update(dict(random_state=random_seed))
         clf.set_params(**hyperparams_)
diff --git a/odte/tests/Odte_tests.py b/odte/tests/Odte_tests.py
index 3974b45..498e9f2 100644
--- a/odte/tests/Odte_tests.py
+++ b/odte/tests/Odte_tests.py
@@ -76,15 +76,15 @@ class Odte_test(unittest.TestCase):
         X, y = [[1, 2], [5, 6], [9, 10], [16, 17]], [0, 1, 1, 2]
         expected = [0, 1, 1, 2]
         tclf = Odte(
-            base_estimator=Stree(),
+            estimator=Stree(),
             random_state=self._random_state,
             n_estimators=10,
             n_jobs=-1,
         )
         tclf.set_params(
             **dict(
-                base_estimator__kernel="rbf",
-                base_estimator__random_state=self._random_state,
+                estimator__kernel="rbf",
+                estimator__random_state=self._random_state,
             )
         )
         computed = tclf.fit(X, y).predict(X)
@@ -96,14 +96,14 @@ class Odte_test(unittest.TestCase):
         X, y = load_dataset(self._random_state)
         expected = y
         tclf = Odte(
-            base_estimator=Stree(),
+            estimator=Stree(),
             random_state=self._random_state,
             max_features=1.0,
             max_samples=0.1,
         )
         tclf.set_params(
             **dict(
-                base_estimator__kernel="linear",
+                estimator__kernel="linear",
             )
         )
         computed = tclf.fit(X, y).predict(X)
@@ -146,16 +146,16 @@ class Odte_test(unittest.TestCase):
                 "cfs",
             ]:
                 tclf = Odte(
-                    base_estimator=Stree(),
+                    estimator=Stree(),
                     random_state=self._random_state,
                     n_estimators=3,
                     n_jobs=1,
                 )
                 tclf.set_params(
                     **dict(
-                        base_estimator__max_features=max_features,
-                        base_estimator__splitter=splitter,
-                        base_estimator__random_state=self._random_state,
+                        estimator__max_features=max_features,
+                        estimator__splitter=splitter,
+                        estimator__random_state=self._random_state,
                     )
                 )
                 expected = results.pop(0)
@@ -182,7 +182,7 @@ class Odte_test(unittest.TestCase):
 
     def test_nodes_leaves_not_fitted(self):
         tclf = Odte(
-            base_estimator=Stree(),
+            estimator=Stree(),
             random_state=self._random_state,
             n_estimators=3,
         )
@@ -191,13 +191,13 @@ class Odte_test(unittest.TestCase):
 
     def test_nodes_leaves_depth(self):
         tclf = Odte(
-            base_estimator=Stree(),
+            estimator=Stree(),
             random_state=self._random_state,
             n_estimators=5,
             n_jobs=1,
         )
         tclf_p = Odte(
-            base_estimator=Stree(),
+            estimator=Stree(),
             random_state=self._random_state,
             n_estimators=5,
             n_jobs=-1,
@@ -215,7 +215,7 @@ class Odte_test(unittest.TestCase):
 
     def test_nodes_leaves_SVC(self):
         tclf = Odte(
-            base_estimator=SVC(),
+            estimator=SVC(),
             random_state=self._random_state,
             n_estimators=3,
         )
@@ -227,7 +227,7 @@ class Odte_test(unittest.TestCase):
         self.assertAlmostEqual(0.0, leaves)
         self.assertAlmostEqual(0.0, nodes)
 
-    def test_base_estimator_hyperparams(self):
+    def test_estimator_hyperparams(self):
         data = [
             (Stree(), {"max_features": 7, "max_depth": 2}),
             (SVC(), {"kernel": "linear", "cache_size": 100}),
@@ -235,7 +235,7 @@ class Odte_test(unittest.TestCase):
         for clf, hyperparams in data:
             hyperparams_ = json.dumps(hyperparams)
             tclf = Odte(
-                base_estimator=clf,
+                estimator=clf,
                 random_state=self._random_state,
                 n_estimators=3,
                 be_hyperparams=hyperparams_,