mirror of
https://github.com/Doctorado-ML/Odte.git
synced 2025-07-11 16:22:00 +00:00
use all available cores in fit
adapt pre-commit to python 3.8
This commit is contained in:
parent
8f7cbc9091
commit
7a49d672df
@ -5,7 +5,7 @@ repos:
|
|||||||
- id: black
|
- id: black
|
||||||
language_version: python3.8
|
language_version: python3.8
|
||||||
- repo: https://gitlab.com/pycqa/flake8
|
- repo: https://gitlab.com/pycqa/flake8
|
||||||
rev: 3.7.9
|
rev: 3.8.3
|
||||||
hooks:
|
hooks:
|
||||||
- id: flake8
|
- id: flake8
|
||||||
- repo: https://github.com/pre-commit/mirrors-mypy
|
- repo: https://github.com/pre-commit/mirrors-mypy
|
||||||
@ -17,6 +17,11 @@ repos:
|
|||||||
hooks:
|
hooks:
|
||||||
- id: unittest
|
- id: unittest
|
||||||
name: unittest
|
name: unittest
|
||||||
entry: python -m unittest discover
|
entry: python -m coverage run -m unittest discover
|
||||||
|
language: system
|
||||||
|
pass_filenames: false
|
||||||
|
- id: coverage
|
||||||
|
name: coverage
|
||||||
|
entry: python -m coverage report -m --fail-under=100
|
||||||
language: system
|
language: system
|
||||||
pass_filenames: false
|
pass_filenames: false
|
@ -21,7 +21,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 2,
|
"execution_count": 12,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -32,7 +32,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 3,
|
"execution_count": 13,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -40,14 +40,14 @@
|
|||||||
"clf = {}\n",
|
"clf = {}\n",
|
||||||
"clf[\"stree\"] = Stree(random_state=random_state, max_depth=5)\n",
|
"clf[\"stree\"] = Stree(random_state=random_state, max_depth=5)\n",
|
||||||
"clf[\"stree\"].set_params(**dict(splitter=\"best\", kernel=\"linear\", max_features=\"auto\"))\n",
|
"clf[\"stree\"].set_params(**dict(splitter=\"best\", kernel=\"linear\", max_features=\"auto\"))\n",
|
||||||
"clf[\"odte\"] = Odte(base_estimator=clf[\"stree\"], random_state=random_state, n_estimators=n_estimators, max_features=.8)\n",
|
"clf[\"odte\"] = Odte(n_jobs=-1, base_estimator=clf[\"stree\"], random_state=random_state, n_estimators=n_estimators, max_features=.8)\n",
|
||||||
"clf[\"adaboost\"] = AdaBoostClassifier(base_estimator=clf[\"stree\"], n_estimators=n_estimators, random_state=random_state, algorithm=\"SAMME\")\n",
|
"clf[\"adaboost\"] = AdaBoostClassifier(base_estimator=clf[\"stree\"], n_estimators=n_estimators, random_state=random_state, algorithm=\"SAMME\")\n",
|
||||||
"clf[\"bagging\"] = BaggingClassifier(base_estimator=clf[\"stree\"], n_estimators=n_estimators)"
|
"clf[\"bagging\"] = BaggingClassifier(base_estimator=clf[\"stree\"], n_estimators=n_estimators)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 4,
|
"execution_count": 14,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": []
|
"tags": []
|
||||||
},
|
},
|
||||||
@ -55,7 +55,7 @@
|
|||||||
{
|
{
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"text": "****************************** Results for wine ******************************\nTraining stree...\nScore: 94.444 in 0.19 seconds\nTraining odte...\nScore: 100.000 in 3.43 seconds\nTraining adaboost...\nScore: 94.444 in 0.76 seconds\nTraining bagging...\nScore: 100.000 in 3.27 seconds\n"
|
"text": "****************************** Results for wine ******************************\nTraining stree...\nScore: 94.444 in 0.18 seconds\nTraining odte...\nScore: 100.000 in 1.33 seconds\nTraining adaboost...\nScore: 94.444 in 0.62 seconds\nTraining bagging...\nScore: 100.000 in 2.88 seconds\n"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
@ -87,7 +87,7 @@
|
|||||||
"n_estimators = 10\n",
|
"n_estimators = 10\n",
|
||||||
"clf = {}\n",
|
"clf = {}\n",
|
||||||
"clf[\"stree\"] = Stree(random_state=random_state, max_depth=3)\n",
|
"clf[\"stree\"] = Stree(random_state=random_state, max_depth=3)\n",
|
||||||
"clf[\"odte\"] = Odte(random_state=random_state, n_estimators=n_estimators, max_features=1.0)\n",
|
"clf[\"odte\"] = Odte(n_jobs=-1, random_state=random_state, n_estimators=n_estimators, max_features=1.0)\n",
|
||||||
"clf[\"adaboost\"] = AdaBoostClassifier(base_estimator=clf[\"stree\"], n_estimators=n_estimators, random_state=random_state, algorithm=\"SAMME\")\n",
|
"clf[\"adaboost\"] = AdaBoostClassifier(base_estimator=clf[\"stree\"], n_estimators=n_estimators, random_state=random_state, algorithm=\"SAMME\")\n",
|
||||||
"clf[\"bagging\"] = BaggingClassifier(base_estimator=clf[\"stree\"], n_estimators=n_estimators)"
|
"clf[\"bagging\"] = BaggingClassifier(base_estimator=clf[\"stree\"], n_estimators=n_estimators)"
|
||||||
]
|
]
|
||||||
@ -102,7 +102,7 @@
|
|||||||
{
|
{
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"text": "****************************** Results for iris ******************************\nTraining stree...\nScore: 100.000 in 0.02 seconds\nTraining odte...\nScore: 100.000 in 0.15 seconds\nTraining adaboost...\nScore: 83.333 in 0.01 seconds\nTraining bagging...\nScore: 96.667 in 0.13 seconds\n"
|
"text": "****************************** Results for iris ******************************\nTraining stree...\nScore: 100.000 in 0.02 seconds\nTraining odte...\nScore: 100.000 in 0.12 seconds\nTraining adaboost...\nScore: 83.333 in 0.01 seconds\nTraining bagging...\nScore: 100.000 in 0.13 seconds\n"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
@ -124,7 +124,7 @@
|
|||||||
{
|
{
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"text": "{'fit_time': array([0.23599219, 0.22772503, 0.21689606, 0.20017815, 0.22257805]), 'score_time': array([0.01378369, 0.01322389, 0.0125649 , 0.01751685, 0.01062703]), 'test_score': array([1. , 1. , 1. , 0.93333333, 1. ]), 'train_score': array([0.98333333, 0.96666667, 0.99166667, 0.99166667, 0.975 ])}\n98.667 +- 0.027\n"
|
"text": "{'fit_time': array([0.22121811, 0.21985221, 0.19185114, 0.19187999, 0.20067477]), 'score_time': array([0.01268458, 0.01461887, 0.01160598, 0.01308703, 0.01070738]), 'test_score': array([1. , 1. , 1. , 0.93333333, 1. ]), 'train_score': array([0.98333333, 0.96666667, 0.99166667, 0.99166667, 0.975 ])}\n98.667 +- 0.027\n"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
@ -143,7 +143,7 @@
|
|||||||
{
|
{
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"text": "{'fit_time': array([0.02912688, 0.05858397, 0.06724691, 0.02860498, 0.03802919]), 'score_time': array([0.0024271 , 0.0022819 , 0.00219584, 0.00195408, 0.00342584]), 'test_score': array([1. , 0.93333333, 0.93333333, 0.93333333, 0.96666667]), 'train_score': array([0.93333333, 0.96666667, 0.96666667, 0.96666667, 0.95 ])}\n95.333 +- 0.027\n"
|
"text": "{'fit_time': array([0.02130818, 0.02036786, 0.02829814, 0.02326989, 0.03807497]), 'score_time': array([0.00140715, 0.00173712, 0.00199389, 0.00132608, 0.00199199]), 'test_score': array([1. , 0.93333333, 0.93333333, 0.93333333, 0.96666667]), 'train_score': array([0.93333333, 0.96666667, 0.96666667, 0.96666667, 0.95 ])}\n95.333 +- 0.027\n"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
|
10
odte/Odte.py
10
odte/Odte.py
@ -25,6 +25,7 @@ from stree import Stree
|
|||||||
class Odte(BaseEnsemble, ClassifierMixin): # type: ignore
|
class Odte(BaseEnsemble, ClassifierMixin): # type: ignore
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
|
# n_jobs = -1 to use all available cores
|
||||||
n_jobs: int = 1,
|
n_jobs: int = 1,
|
||||||
base_estimator: BaseEstimator = None,
|
base_estimator: BaseEstimator = None,
|
||||||
random_state: int = 0,
|
random_state: int = 0,
|
||||||
@ -38,7 +39,8 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore
|
|||||||
else base_estimator
|
else base_estimator
|
||||||
)
|
)
|
||||||
super().__init__(
|
super().__init__(
|
||||||
base_estimator=base_estimator, n_estimators=n_estimators,
|
base_estimator=base_estimator,
|
||||||
|
n_estimators=n_estimators,
|
||||||
)
|
)
|
||||||
self.n_jobs = n_jobs
|
self.n_jobs = n_jobs
|
||||||
self.n_estimators = n_estimators
|
self.n_estimators = n_estimators
|
||||||
@ -48,7 +50,7 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore
|
|||||||
|
|
||||||
def _initialize_random(self) -> np.random.mtrand.RandomState:
|
def _initialize_random(self) -> np.random.mtrand.RandomState:
|
||||||
if self.random_state is None:
|
if self.random_state is None:
|
||||||
self.random_state = random.randint(0, sys.maxint)
|
self.random_state = random.randint(0, sys.maxsize)
|
||||||
return np.random.mtrand._rand
|
return np.random.mtrand._rand
|
||||||
return np.random.RandomState(self.random_state)
|
return np.random.RandomState(self.random_state)
|
||||||
|
|
||||||
@ -84,9 +86,7 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore
|
|||||||
self.estimators_: List[BaseEstimator] = []
|
self.estimators_: List[BaseEstimator] = []
|
||||||
self.subspaces_: List[Tuple[int, ...]] = []
|
self.subspaces_: List[Tuple[int, ...]] = []
|
||||||
result = self._train(X, y, sample_weight)
|
result = self._train(X, y, sample_weight)
|
||||||
self.estimators_, self.subspaces_ = tuple( # type: ignore
|
self.estimators_, self.subspaces_ = tuple(zip(*result)) # type: ignore
|
||||||
zip(*result)
|
|
||||||
)
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -57,7 +57,7 @@ class Odte_test(unittest.TestCase):
|
|||||||
computed = box.randint(0, 1000, 3)
|
computed = box.randint(0, 1000, 3)
|
||||||
self.assertListEqual(expected, computed.tolist())
|
self.assertListEqual(expected, computed.tolist())
|
||||||
# test None
|
# test None
|
||||||
tclf = Odte()
|
tclf = Odte(random_state=None)
|
||||||
box = tclf._initialize_random()
|
box = tclf._initialize_random()
|
||||||
computed = box.randint(101, 1000, 3)
|
computed = box.randint(101, 1000, 3)
|
||||||
for value in computed.tolist():
|
for value in computed.tolist():
|
||||||
@ -83,7 +83,7 @@ class Odte_test(unittest.TestCase):
|
|||||||
warnings.filterwarnings("ignore", category=ConvergenceWarning)
|
warnings.filterwarnings("ignore", category=ConvergenceWarning)
|
||||||
warnings.filterwarnings("ignore", category=RuntimeWarning)
|
warnings.filterwarnings("ignore", category=RuntimeWarning)
|
||||||
X, y = [[1, 2], [5, 6], [9, 10], [16, 17]], [0, 1, 1, 2]
|
X, y = [[1, 2], [5, 6], [9, 10], [16, 17]], [0, 1, 1, 2]
|
||||||
expected = [0, 1, 1, 1]
|
expected = [0, 1, 1, 2]
|
||||||
tclf = Odte(
|
tclf = Odte(
|
||||||
random_state=self._random_state, n_estimators=10, n_jobs=-1
|
random_state=self._random_state, n_estimators=10, n_jobs=-1
|
||||||
)
|
)
|
||||||
@ -102,9 +102,15 @@ class Odte_test(unittest.TestCase):
|
|||||||
X, y = load_dataset(self._random_state)
|
X, y = load_dataset(self._random_state)
|
||||||
expected = y
|
expected = y
|
||||||
tclf = Odte(
|
tclf = Odte(
|
||||||
random_state=self._random_state, max_features=1.0, max_samples=0.1,
|
random_state=self._random_state,
|
||||||
|
max_features=1.0,
|
||||||
|
max_samples=0.1,
|
||||||
|
)
|
||||||
|
tclf.set_params(
|
||||||
|
**dict(
|
||||||
|
base_estimator__kernel="linear",
|
||||||
|
)
|
||||||
)
|
)
|
||||||
tclf.set_params(**dict(base_estimator__kernel="linear",))
|
|
||||||
computed = tclf.fit(X, y).predict(X)
|
computed = tclf.fit(X, y).predict(X)
|
||||||
self.assertListEqual(expected[:27].tolist(), computed[:27].tolist())
|
self.assertListEqual(expected[:27].tolist(), computed[:27].tolist())
|
||||||
|
|
||||||
@ -134,7 +140,11 @@ class Odte_test(unittest.TestCase):
|
|||||||
max_features=max_features,
|
max_features=max_features,
|
||||||
n_estimators=10,
|
n_estimators=10,
|
||||||
)
|
)
|
||||||
tclf.set_params(**dict(base_estimator__splitter=splitter,))
|
tclf.set_params(
|
||||||
|
**dict(
|
||||||
|
base_estimator__splitter=splitter,
|
||||||
|
)
|
||||||
|
)
|
||||||
expected = results.pop(0)
|
expected = results.pop(0)
|
||||||
computed = tclf.fit(X, y).score(X, y)
|
computed = tclf.fit(X, y).score(X, y)
|
||||||
self.assertAlmostEqual(expected, computed)
|
self.assertAlmostEqual(expected, computed)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user