Add mypy to pre-commit

Fix some lack of stubs issues
This commit is contained in:
Ricardo Montañana Gómez 2021-11-22 14:51:27 +01:00
parent 3a06c9d1cc
commit e55c3335c2
Signed by: rmontanana
GPG Key ID: 46064262FD9A7ADE
3 changed files with 33 additions and 31 deletions

View File

@ -10,11 +10,12 @@ repos:
hooks: hooks:
- id: flake8 - id: flake8
exclude: ".virtual_documents" exclude: ".virtual_documents"
# - repo: https://github.com/pre-commit/mirrors-mypy - repo: https://github.com/pre-commit/mirrors-mypy
# rev: "v0.790" # Use the sha / tag you want to point at rev: "v0.790" # Use the sha / tag you want to point at
# hooks: hooks:
# - id: mypy - id: mypy
# # args: [--strict, --ignore-missing-imports] #args: [--strict, --ignore-missing-imports]
exclude: odte/tests
- repo: https://github.com/pre-commit/pre-commit-hooks - repo: https://github.com/pre-commit/pre-commit-hooks
rev: v3.4.0 rev: v3.4.0
hooks: hooks:

View File

@ -11,9 +11,9 @@ deps: ## Install dependencies
pip install -r requirements.txt pip install -r requirements.txt
lint: ## Lint and static-check lint: ## Lint and static-check
black stree black odte
flake8 stree flake8 odte
mypy stree mypy odte --exclude tests
push: ## Push code with tags push: ## Push code with tags
git push && git push --tags git push && git push --tags

View File

@ -2,27 +2,28 @@
__author__ = "Ricardo Montañana Gómez" __author__ = "Ricardo Montañana Gómez"
__copyright__ = "Copyright 2020, Ricardo Montañana Gómez" __copyright__ = "Copyright 2020, Ricardo Montañana Gómez"
__license__ = "MIT" __license__ = "MIT"
__version__ = "0.1"
Build a forest of oblique trees based on STree Build a forest of oblique trees based on STree
""" """
from __future__ import annotations from __future__ import annotations
import random import random
import sys import sys
from math import factorial from math import factorial
from typing import Union, Optional, Tuple, List from typing import Union, Optional, Tuple, List, Set
import numpy as np import numpy as np
from sklearn.utils.multiclass import check_classification_targets from sklearn.utils.multiclass import ( # type: ignore
from sklearn.base import clone, BaseEstimator, ClassifierMixin check_classification_targets,
from sklearn.ensemble import BaseEnsemble )
from sklearn.utils.validation import ( from sklearn.base import clone, BaseEstimator, ClassifierMixin # type: ignore
from sklearn.ensemble import BaseEnsemble # type: ignore
from sklearn.utils.validation import ( # type: ignore
check_is_fitted, check_is_fitted,
_check_sample_weight, _check_sample_weight,
) )
from joblib import Parallel, delayed from joblib import Parallel, delayed # type: ignore
from stree import Stree from stree import Stree # type: ignore
class Odte(BaseEnsemble, ClassifierMixin): # type: ignore class Odte(BaseEnsemble, ClassifierMixin):
def __init__( def __init__(
self, self,
# n_jobs = -1 to use all available cores # n_jobs = -1 to use all available cores
@ -57,7 +58,7 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore
) )
def fit( def fit(
self, X: np.array, y: np.array, sample_weight: np.array = None self, X: np.ndarray, y: np.ndarray, sample_weight: np.ndarray = None
) -> Odte: ) -> Odte:
# Check parameters are Ok. # Check parameters are Ok.
if self.n_estimators < 3: if self.n_estimators < 3:
@ -67,8 +68,8 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore
) )
check_classification_targets(y) check_classification_targets(y)
X, y = self._validate_data(X, y) X, y = self._validate_data(X, y)
# if weights is None return np.ones # if sample_weight is None return np.ones
sample_weight = _check_sample_weight( sample_weights = _check_sample_weight(
sample_weight, X, dtype=np.float64 sample_weight, X, dtype=np.float64
) )
check_classification_targets(y) check_classification_targets(y)
@ -81,7 +82,7 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore
self.n_classes_: int = self.classes_.shape[0] self.n_classes_: int = self.classes_.shape[0]
self.estimators_: List[BaseEstimator] = [] self.estimators_: List[BaseEstimator] = []
self.subspaces_: List[Tuple[int, ...]] = [] self.subspaces_: List[Tuple[int, ...]] = []
result = self._train(X, y, sample_weight) result = self._train(X, y, sample_weights)
self.estimators_, self.subspaces_ = tuple(zip(*result)) # type: ignore self.estimators_, self.subspaces_ = tuple(zip(*result)) # type: ignore
self._compute_metrics() self._compute_metrics()
return self return self
@ -101,9 +102,9 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore
@staticmethod @staticmethod
def _parallel_build_tree( def _parallel_build_tree(
base_estimator_: Stree, base_estimator_: Stree,
X: np.array, X: np.ndarray,
y: np.array, y: np.ndarray,
weights: np.array, weights: np.ndarray,
random_box: np.random.mtrand.RandomState, random_box: np.random.mtrand.RandomState,
random_seed: int, random_seed: int,
boot_samples: int, boot_samples: int,
@ -125,7 +126,7 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore
return (clf, features) return (clf, features)
def _train( def _train(
self, X: np.array, y: np.array, weights: np.array self, X: np.ndarray, y: np.ndarray, weights: np.ndarray
) -> Tuple[List[BaseEstimator], List[Tuple[int, ...]]]: ) -> Tuple[List[BaseEstimator], List[Tuple[int, ...]]]:
random_box = self._initialize_random() random_box = self._initialize_random()
n_samples = X.shape[0] n_samples = X.shape[0]
@ -200,7 +201,7 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore
@staticmethod @staticmethod
def _generate_spaces(features: int, max_features: int) -> list: def _generate_spaces(features: int, max_features: int) -> list:
comb = set() comb: Set[Tuple[int, ...]] = set()
# Generate at most 5 combinations # Generate at most 5 combinations
if max_features == features: if max_features == features:
set_length = 1 set_length = 1
@ -208,7 +209,7 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore
number = factorial(features) / ( number = factorial(features) / (
factorial(max_features) * factorial(features - max_features) factorial(max_features) * factorial(features - max_features)
) )
set_length = min(5, number) set_length = min(5, int(number))
while len(comb) < set_length: while len(comb) < set_length:
comb.add( comb.add(
tuple(sorted(random.sample(range(features), max_features))) tuple(sorted(random.sample(range(features), max_features)))
@ -217,7 +218,7 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore
@staticmethod @staticmethod
def _get_random_subspace( def _get_random_subspace(
dataset: np.array, labels: np.array, max_features: int dataset: np.ndarray, labels: np.ndarray, max_features: int
) -> Tuple[int, ...]: ) -> Tuple[int, ...]:
features_sets = Odte._generate_spaces(dataset.shape[1], max_features) features_sets = Odte._generate_spaces(dataset.shape[1], max_features)
if len(features_sets) > 1: if len(features_sets) > 1:
@ -226,11 +227,11 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore
else: else:
return features_sets[0] return features_sets[0]
def predict(self, X: np.array) -> np.array: def predict(self, X: np.ndarray) -> np.ndarray:
proba = self.predict_proba(X) proba = self.predict_proba(X)
return self.classes_[np.argmax(proba, axis=1)] return self.classes_[np.argmax(proba, axis=1)]
def predict_proba(self, X: np.array) -> np.array: def predict_proba(self, X: np.ndarray) -> np.ndarray:
check_is_fitted(self, "estimators_") check_is_fitted(self, "estimators_")
# Input validation # Input validation
X = self._validate_data(X, reset=False) X = self._validate_data(X, reset=False)
@ -242,6 +243,6 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore
result[i, predictions[i]] += 1 result[i, predictions[i]] += 1
return result / self.n_estimators return result / self.n_estimators
def nodes_leaves(self) -> list(float, float): def nodes_leaves(self) -> Tuple[float, float]:
check_is_fitted(self, "estimators_") check_is_fitted(self, "estimators_")
return self.nodes_, self.leaves_ return self.nodes_, self.leaves_