mirror of
https://github.com/Doctorado-ML/Odte.git
synced 2025-07-11 00:02:30 +00:00
Add mypy to pre-commit
Fix some lack of stubs issues
This commit is contained in:
parent
3a06c9d1cc
commit
e55c3335c2
@ -10,11 +10,12 @@ repos:
|
|||||||
hooks:
|
hooks:
|
||||||
- id: flake8
|
- id: flake8
|
||||||
exclude: ".virtual_documents"
|
exclude: ".virtual_documents"
|
||||||
# - repo: https://github.com/pre-commit/mirrors-mypy
|
- repo: https://github.com/pre-commit/mirrors-mypy
|
||||||
# rev: "v0.790" # Use the sha / tag you want to point at
|
rev: "v0.790" # Use the sha / tag you want to point at
|
||||||
# hooks:
|
hooks:
|
||||||
# - id: mypy
|
- id: mypy
|
||||||
# # args: [--strict, --ignore-missing-imports]
|
#args: [--strict, --ignore-missing-imports]
|
||||||
|
exclude: odte/tests
|
||||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||||
rev: v3.4.0
|
rev: v3.4.0
|
||||||
hooks:
|
hooks:
|
||||||
|
6
Makefile
6
Makefile
@ -11,9 +11,9 @@ deps: ## Install dependencies
|
|||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
|
|
||||||
lint: ## Lint and static-check
|
lint: ## Lint and static-check
|
||||||
black stree
|
black odte
|
||||||
flake8 stree
|
flake8 odte
|
||||||
mypy stree
|
mypy odte --exclude tests
|
||||||
|
|
||||||
push: ## Push code with tags
|
push: ## Push code with tags
|
||||||
git push && git push --tags
|
git push && git push --tags
|
||||||
|
47
odte/Odte.py
47
odte/Odte.py
@ -2,27 +2,28 @@
|
|||||||
__author__ = "Ricardo Montañana Gómez"
|
__author__ = "Ricardo Montañana Gómez"
|
||||||
__copyright__ = "Copyright 2020, Ricardo Montañana Gómez"
|
__copyright__ = "Copyright 2020, Ricardo Montañana Gómez"
|
||||||
__license__ = "MIT"
|
__license__ = "MIT"
|
||||||
__version__ = "0.1"
|
|
||||||
Build a forest of oblique trees based on STree
|
Build a forest of oblique trees based on STree
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
import random
|
import random
|
||||||
import sys
|
import sys
|
||||||
from math import factorial
|
from math import factorial
|
||||||
from typing import Union, Optional, Tuple, List
|
from typing import Union, Optional, Tuple, List, Set
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from sklearn.utils.multiclass import check_classification_targets
|
from sklearn.utils.multiclass import ( # type: ignore
|
||||||
from sklearn.base import clone, BaseEstimator, ClassifierMixin
|
check_classification_targets,
|
||||||
from sklearn.ensemble import BaseEnsemble
|
)
|
||||||
from sklearn.utils.validation import (
|
from sklearn.base import clone, BaseEstimator, ClassifierMixin # type: ignore
|
||||||
|
from sklearn.ensemble import BaseEnsemble # type: ignore
|
||||||
|
from sklearn.utils.validation import ( # type: ignore
|
||||||
check_is_fitted,
|
check_is_fitted,
|
||||||
_check_sample_weight,
|
_check_sample_weight,
|
||||||
)
|
)
|
||||||
from joblib import Parallel, delayed
|
from joblib import Parallel, delayed # type: ignore
|
||||||
from stree import Stree
|
from stree import Stree # type: ignore
|
||||||
|
|
||||||
|
|
||||||
class Odte(BaseEnsemble, ClassifierMixin): # type: ignore
|
class Odte(BaseEnsemble, ClassifierMixin):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
# n_jobs = -1 to use all available cores
|
# n_jobs = -1 to use all available cores
|
||||||
@ -57,7 +58,7 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore
|
|||||||
)
|
)
|
||||||
|
|
||||||
def fit(
|
def fit(
|
||||||
self, X: np.array, y: np.array, sample_weight: np.array = None
|
self, X: np.ndarray, y: np.ndarray, sample_weight: np.ndarray = None
|
||||||
) -> Odte:
|
) -> Odte:
|
||||||
# Check parameters are Ok.
|
# Check parameters are Ok.
|
||||||
if self.n_estimators < 3:
|
if self.n_estimators < 3:
|
||||||
@ -67,8 +68,8 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore
|
|||||||
)
|
)
|
||||||
check_classification_targets(y)
|
check_classification_targets(y)
|
||||||
X, y = self._validate_data(X, y)
|
X, y = self._validate_data(X, y)
|
||||||
# if weights is None return np.ones
|
# if sample_weight is None return np.ones
|
||||||
sample_weight = _check_sample_weight(
|
sample_weights = _check_sample_weight(
|
||||||
sample_weight, X, dtype=np.float64
|
sample_weight, X, dtype=np.float64
|
||||||
)
|
)
|
||||||
check_classification_targets(y)
|
check_classification_targets(y)
|
||||||
@ -81,7 +82,7 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore
|
|||||||
self.n_classes_: int = self.classes_.shape[0]
|
self.n_classes_: int = self.classes_.shape[0]
|
||||||
self.estimators_: List[BaseEstimator] = []
|
self.estimators_: List[BaseEstimator] = []
|
||||||
self.subspaces_: List[Tuple[int, ...]] = []
|
self.subspaces_: List[Tuple[int, ...]] = []
|
||||||
result = self._train(X, y, sample_weight)
|
result = self._train(X, y, sample_weights)
|
||||||
self.estimators_, self.subspaces_ = tuple(zip(*result)) # type: ignore
|
self.estimators_, self.subspaces_ = tuple(zip(*result)) # type: ignore
|
||||||
self._compute_metrics()
|
self._compute_metrics()
|
||||||
return self
|
return self
|
||||||
@ -101,9 +102,9 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def _parallel_build_tree(
|
def _parallel_build_tree(
|
||||||
base_estimator_: Stree,
|
base_estimator_: Stree,
|
||||||
X: np.array,
|
X: np.ndarray,
|
||||||
y: np.array,
|
y: np.ndarray,
|
||||||
weights: np.array,
|
weights: np.ndarray,
|
||||||
random_box: np.random.mtrand.RandomState,
|
random_box: np.random.mtrand.RandomState,
|
||||||
random_seed: int,
|
random_seed: int,
|
||||||
boot_samples: int,
|
boot_samples: int,
|
||||||
@ -125,7 +126,7 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore
|
|||||||
return (clf, features)
|
return (clf, features)
|
||||||
|
|
||||||
def _train(
|
def _train(
|
||||||
self, X: np.array, y: np.array, weights: np.array
|
self, X: np.ndarray, y: np.ndarray, weights: np.ndarray
|
||||||
) -> Tuple[List[BaseEstimator], List[Tuple[int, ...]]]:
|
) -> Tuple[List[BaseEstimator], List[Tuple[int, ...]]]:
|
||||||
random_box = self._initialize_random()
|
random_box = self._initialize_random()
|
||||||
n_samples = X.shape[0]
|
n_samples = X.shape[0]
|
||||||
@ -200,7 +201,7 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _generate_spaces(features: int, max_features: int) -> list:
|
def _generate_spaces(features: int, max_features: int) -> list:
|
||||||
comb = set()
|
comb: Set[Tuple[int, ...]] = set()
|
||||||
# Generate at most 5 combinations
|
# Generate at most 5 combinations
|
||||||
if max_features == features:
|
if max_features == features:
|
||||||
set_length = 1
|
set_length = 1
|
||||||
@ -208,7 +209,7 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore
|
|||||||
number = factorial(features) / (
|
number = factorial(features) / (
|
||||||
factorial(max_features) * factorial(features - max_features)
|
factorial(max_features) * factorial(features - max_features)
|
||||||
)
|
)
|
||||||
set_length = min(5, number)
|
set_length = min(5, int(number))
|
||||||
while len(comb) < set_length:
|
while len(comb) < set_length:
|
||||||
comb.add(
|
comb.add(
|
||||||
tuple(sorted(random.sample(range(features), max_features)))
|
tuple(sorted(random.sample(range(features), max_features)))
|
||||||
@ -217,7 +218,7 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _get_random_subspace(
|
def _get_random_subspace(
|
||||||
dataset: np.array, labels: np.array, max_features: int
|
dataset: np.ndarray, labels: np.ndarray, max_features: int
|
||||||
) -> Tuple[int, ...]:
|
) -> Tuple[int, ...]:
|
||||||
features_sets = Odte._generate_spaces(dataset.shape[1], max_features)
|
features_sets = Odte._generate_spaces(dataset.shape[1], max_features)
|
||||||
if len(features_sets) > 1:
|
if len(features_sets) > 1:
|
||||||
@ -226,11 +227,11 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore
|
|||||||
else:
|
else:
|
||||||
return features_sets[0]
|
return features_sets[0]
|
||||||
|
|
||||||
def predict(self, X: np.array) -> np.array:
|
def predict(self, X: np.ndarray) -> np.ndarray:
|
||||||
proba = self.predict_proba(X)
|
proba = self.predict_proba(X)
|
||||||
return self.classes_[np.argmax(proba, axis=1)]
|
return self.classes_[np.argmax(proba, axis=1)]
|
||||||
|
|
||||||
def predict_proba(self, X: np.array) -> np.array:
|
def predict_proba(self, X: np.ndarray) -> np.ndarray:
|
||||||
check_is_fitted(self, "estimators_")
|
check_is_fitted(self, "estimators_")
|
||||||
# Input validation
|
# Input validation
|
||||||
X = self._validate_data(X, reset=False)
|
X = self._validate_data(X, reset=False)
|
||||||
@ -242,6 +243,6 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore
|
|||||||
result[i, predictions[i]] += 1
|
result[i, predictions[i]] += 1
|
||||||
return result / self.n_estimators
|
return result / self.n_estimators
|
||||||
|
|
||||||
def nodes_leaves(self) -> list(float, float):
|
def nodes_leaves(self) -> Tuple[float, float]:
|
||||||
check_is_fitted(self, "estimators_")
|
check_is_fitted(self, "estimators_")
|
||||||
return self.nodes_, self.leaves_
|
return self.nodes_, self.leaves_
|
||||||
|
Loading…
x
Reference in New Issue
Block a user