From 7a49d672df35b730e8e034620e111e7e25932f82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Tue, 10 Nov 2020 14:12:00 +0100 Subject: [PATCH] use all available cores in fit adapt pre-commit to python 3.8 --- .pre-commit-config.yaml | 9 +++++++-- notebooks/wine_iris.ipynb | 18 +++++++++--------- odte/Odte.py | 10 +++++----- odte/tests/Odte_tests.py | 20 +++++++++++++++----- 4 files changed, 36 insertions(+), 21 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c81862b..97b54d1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -5,7 +5,7 @@ repos: - id: black language_version: python3.8 - repo: https://gitlab.com/pycqa/flake8 - rev: 3.7.9 + rev: 3.8.3 hooks: - id: flake8 - repo: https://github.com/pre-commit/mirrors-mypy @@ -17,6 +17,11 @@ repos: hooks: - id: unittest name: unittest - entry: python -m unittest discover + entry: python -m coverage run -m unittest discover + language: system + pass_filenames: false + - id: coverage + name: coverage + entry: python -m coverage report -m --fail-under=100 language: system pass_filenames: false \ No newline at end of file diff --git a/notebooks/wine_iris.ipynb b/notebooks/wine_iris.ipynb index 7736895..53157eb 100644 --- a/notebooks/wine_iris.ipynb +++ b/notebooks/wine_iris.ipynb @@ -21,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -32,7 +32,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -40,14 +40,14 @@ "clf = {}\n", "clf[\"stree\"] = Stree(random_state=random_state, max_depth=5)\n", "clf[\"stree\"].set_params(**dict(splitter=\"best\", kernel=\"linear\", max_features=\"auto\"))\n", - "clf[\"odte\"] = Odte(base_estimator=clf[\"stree\"], random_state=random_state, n_estimators=n_estimators, max_features=.8)\n", + "clf[\"odte\"] = Odte(n_jobs=-1, base_estimator=clf[\"stree\"], random_state=random_state, n_estimators=n_estimators, max_features=.8)\n", "clf[\"adaboost\"] = AdaBoostClassifier(base_estimator=clf[\"stree\"], n_estimators=n_estimators, random_state=random_state, algorithm=\"SAMME\")\n", "clf[\"bagging\"] = BaggingClassifier(base_estimator=clf[\"stree\"], n_estimators=n_estimators)" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 14, "metadata": { "tags": [] }, @@ -55,7 +55,7 @@ { "output_type": "stream", "name": "stdout", - "text": "****************************** Results for wine ******************************\nTraining stree...\nScore: 94.444 in 0.19 seconds\nTraining odte...\nScore: 100.000 in 3.43 seconds\nTraining adaboost...\nScore: 94.444 in 0.76 seconds\nTraining bagging...\nScore: 100.000 in 3.27 seconds\n" + "text": "****************************** Results for wine ******************************\nTraining stree...\nScore: 94.444 in 0.18 seconds\nTraining odte...\nScore: 100.000 in 1.33 seconds\nTraining adaboost...\nScore: 94.444 in 0.62 seconds\nTraining bagging...\nScore: 100.000 in 2.88 seconds\n" } ], "source": [ @@ -87,7 +87,7 @@ "n_estimators = 10\n", "clf = {}\n", "clf[\"stree\"] = Stree(random_state=random_state, max_depth=3)\n", - "clf[\"odte\"] = Odte(random_state=random_state, n_estimators=n_estimators, max_features=1.0)\n", + "clf[\"odte\"] = Odte(n_jobs=-1, random_state=random_state, n_estimators=n_estimators, max_features=1.0)\n", "clf[\"adaboost\"] = AdaBoostClassifier(base_estimator=clf[\"stree\"], n_estimators=n_estimators, random_state=random_state, algorithm=\"SAMME\")\n", "clf[\"bagging\"] = BaggingClassifier(base_estimator=clf[\"stree\"], n_estimators=n_estimators)" ] @@ -102,7 +102,7 @@ { "output_type": "stream", "name": "stdout", - "text": "****************************** Results for iris ******************************\nTraining stree...\nScore: 100.000 in 0.02 seconds\nTraining odte...\nScore: 100.000 in 0.15 seconds\nTraining adaboost...\nScore: 83.333 in 0.01 seconds\nTraining bagging...\nScore: 96.667 in 0.13 seconds\n" + "text": "****************************** Results for iris ******************************\nTraining stree...\nScore: 100.000 in 0.02 seconds\nTraining odte...\nScore: 100.000 in 0.12 seconds\nTraining adaboost...\nScore: 83.333 in 0.01 seconds\nTraining bagging...\nScore: 100.000 in 0.13 seconds\n" } ], "source": [ @@ -124,7 +124,7 @@ { "output_type": "stream", "name": "stdout", - "text": "{'fit_time': array([0.23599219, 0.22772503, 0.21689606, 0.20017815, 0.22257805]), 'score_time': array([0.01378369, 0.01322389, 0.0125649 , 0.01751685, 0.01062703]), 'test_score': array([1. , 1. , 1. , 0.93333333, 1. ]), 'train_score': array([0.98333333, 0.96666667, 0.99166667, 0.99166667, 0.975 ])}\n98.667 +- 0.027\n" + "text": "{'fit_time': array([0.22121811, 0.21985221, 0.19185114, 0.19187999, 0.20067477]), 'score_time': array([0.01268458, 0.01461887, 0.01160598, 0.01308703, 0.01070738]), 'test_score': array([1. , 1. , 1. , 0.93333333, 1. ]), 'train_score': array([0.98333333, 0.96666667, 0.99166667, 0.99166667, 0.975 ])}\n98.667 +- 0.027\n" } ], "source": [ @@ -143,7 +143,7 @@ { "output_type": "stream", "name": "stdout", - "text": "{'fit_time': array([0.02912688, 0.05858397, 0.06724691, 0.02860498, 0.03802919]), 'score_time': array([0.0024271 , 0.0022819 , 0.00219584, 0.00195408, 0.00342584]), 'test_score': array([1. , 0.93333333, 0.93333333, 0.93333333, 0.96666667]), 'train_score': array([0.93333333, 0.96666667, 0.96666667, 0.96666667, 0.95 ])}\n95.333 +- 0.027\n" + "text": "{'fit_time': array([0.02130818, 0.02036786, 0.02829814, 0.02326989, 0.03807497]), 'score_time': array([0.00140715, 0.00173712, 0.00199389, 0.00132608, 0.00199199]), 'test_score': array([1. , 0.93333333, 0.93333333, 0.93333333, 0.96666667]), 'train_score': array([0.93333333, 0.96666667, 0.96666667, 0.96666667, 0.95 ])}\n95.333 +- 0.027\n" } ], "source": [ diff --git a/odte/Odte.py b/odte/Odte.py index 65c2183..0610d63 100644 --- a/odte/Odte.py +++ b/odte/Odte.py @@ -25,6 +25,7 @@ from stree import Stree class Odte(BaseEnsemble, ClassifierMixin): # type: ignore def __init__( self, + # n_jobs = -1 to use all available cores n_jobs: int = 1, base_estimator: BaseEstimator = None, random_state: int = 0, @@ -38,7 +39,8 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore else base_estimator ) super().__init__( - base_estimator=base_estimator, n_estimators=n_estimators, + base_estimator=base_estimator, + n_estimators=n_estimators, ) self.n_jobs = n_jobs self.n_estimators = n_estimators @@ -48,7 +50,7 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore def _initialize_random(self) -> np.random.mtrand.RandomState: if self.random_state is None: - self.random_state = random.randint(0, sys.maxint) + self.random_state = random.randint(0, sys.maxsize) return np.random.mtrand._rand return np.random.RandomState(self.random_state) @@ -84,9 +86,7 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore self.estimators_: List[BaseEstimator] = [] self.subspaces_: List[Tuple[int, ...]] = [] result = self._train(X, y, sample_weight) - self.estimators_, self.subspaces_ = tuple( # type: ignore - zip(*result) - ) + self.estimators_, self.subspaces_ = tuple(zip(*result)) # type: ignore return self @staticmethod diff --git a/odte/tests/Odte_tests.py b/odte/tests/Odte_tests.py index 628eb16..b02e8a1 100644 --- a/odte/tests/Odte_tests.py +++ b/odte/tests/Odte_tests.py @@ -57,7 +57,7 @@ class Odte_test(unittest.TestCase): computed = box.randint(0, 1000, 3) self.assertListEqual(expected, computed.tolist()) # test None - tclf = Odte() + tclf = Odte(random_state=None) box = tclf._initialize_random() computed = box.randint(101, 1000, 3) for value in computed.tolist(): @@ -83,7 +83,7 @@ class Odte_test(unittest.TestCase): warnings.filterwarnings("ignore", category=ConvergenceWarning) warnings.filterwarnings("ignore", category=RuntimeWarning) X, y = [[1, 2], [5, 6], [9, 10], [16, 17]], [0, 1, 1, 2] - expected = [0, 1, 1, 1] + expected = [0, 1, 1, 2] tclf = Odte( random_state=self._random_state, n_estimators=10, n_jobs=-1 ) @@ -102,9 +102,15 @@ class Odte_test(unittest.TestCase): X, y = load_dataset(self._random_state) expected = y tclf = Odte( - random_state=self._random_state, max_features=1.0, max_samples=0.1, + random_state=self._random_state, + max_features=1.0, + max_samples=0.1, + ) + tclf.set_params( + **dict( + base_estimator__kernel="linear", + ) ) - tclf.set_params(**dict(base_estimator__kernel="linear",)) computed = tclf.fit(X, y).predict(X) self.assertListEqual(expected[:27].tolist(), computed[:27].tolist()) @@ -134,7 +140,11 @@ class Odte_test(unittest.TestCase): max_features=max_features, n_estimators=10, ) - tclf.set_params(**dict(base_estimator__splitter=splitter,)) + tclf.set_params( + **dict( + base_estimator__splitter=splitter, + ) + ) expected = results.pop(0) computed = tclf.fit(X, y).score(X, y) self.assertAlmostEqual(expected, computed)