Add mypy to pre-commit

Fix some lack of stubs issues
This commit is contained in:
Ricardo Montañana Gómez 2021-11-22 14:51:27 +01:00
parent 3a06c9d1cc
commit e55c3335c2
Signed by: rmontanana
GPG Key ID: 46064262FD9A7ADE
3 changed files with 33 additions and 31 deletions

View File

@ -10,11 +10,12 @@ repos:
hooks:
- id: flake8
exclude: ".virtual_documents"
# - repo: https://github.com/pre-commit/mirrors-mypy
# rev: "v0.790" # Use the sha / tag you want to point at
# hooks:
# - id: mypy
# # args: [--strict, --ignore-missing-imports]
- repo: https://github.com/pre-commit/mirrors-mypy
rev: "v0.790" # Use the sha / tag you want to point at
hooks:
- id: mypy
#args: [--strict, --ignore-missing-imports]
exclude: odte/tests
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v3.4.0
hooks:

View File

@ -11,9 +11,9 @@ deps: ## Install dependencies
pip install -r requirements.txt
lint: ## Lint and static-check
black stree
flake8 stree
mypy stree
black odte
flake8 odte
mypy odte --exclude tests
push: ## Push code with tags
git push && git push --tags

View File

@ -2,27 +2,28 @@
__author__ = "Ricardo Montañana Gómez"
__copyright__ = "Copyright 2020, Ricardo Montañana Gómez"
__license__ = "MIT"
__version__ = "0.1"
Build a forest of oblique trees based on STree
"""
from __future__ import annotations
import random
import sys
from math import factorial
from typing import Union, Optional, Tuple, List
from typing import Union, Optional, Tuple, List, Set
import numpy as np
from sklearn.utils.multiclass import check_classification_targets
from sklearn.base import clone, BaseEstimator, ClassifierMixin
from sklearn.ensemble import BaseEnsemble
from sklearn.utils.validation import (
from sklearn.utils.multiclass import ( # type: ignore
check_classification_targets,
)
from sklearn.base import clone, BaseEstimator, ClassifierMixin # type: ignore
from sklearn.ensemble import BaseEnsemble # type: ignore
from sklearn.utils.validation import ( # type: ignore
check_is_fitted,
_check_sample_weight,
)
from joblib import Parallel, delayed
from stree import Stree
from joblib import Parallel, delayed # type: ignore
from stree import Stree # type: ignore
class Odte(BaseEnsemble, ClassifierMixin): # type: ignore
class Odte(BaseEnsemble, ClassifierMixin):
def __init__(
self,
# n_jobs = -1 to use all available cores
@ -57,7 +58,7 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore
)
def fit(
self, X: np.array, y: np.array, sample_weight: np.array = None
self, X: np.ndarray, y: np.ndarray, sample_weight: np.ndarray = None
) -> Odte:
# Check parameters are Ok.
if self.n_estimators < 3:
@ -67,8 +68,8 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore
)
check_classification_targets(y)
X, y = self._validate_data(X, y)
# if weights is None return np.ones
sample_weight = _check_sample_weight(
# if sample_weight is None return np.ones
sample_weights = _check_sample_weight(
sample_weight, X, dtype=np.float64
)
check_classification_targets(y)
@ -81,7 +82,7 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore
self.n_classes_: int = self.classes_.shape[0]
self.estimators_: List[BaseEstimator] = []
self.subspaces_: List[Tuple[int, ...]] = []
result = self._train(X, y, sample_weight)
result = self._train(X, y, sample_weights)
self.estimators_, self.subspaces_ = tuple(zip(*result)) # type: ignore
self._compute_metrics()
return self
@ -101,9 +102,9 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore
@staticmethod
def _parallel_build_tree(
base_estimator_: Stree,
X: np.array,
y: np.array,
weights: np.array,
X: np.ndarray,
y: np.ndarray,
weights: np.ndarray,
random_box: np.random.mtrand.RandomState,
random_seed: int,
boot_samples: int,
@ -125,7 +126,7 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore
return (clf, features)
def _train(
self, X: np.array, y: np.array, weights: np.array
self, X: np.ndarray, y: np.ndarray, weights: np.ndarray
) -> Tuple[List[BaseEstimator], List[Tuple[int, ...]]]:
random_box = self._initialize_random()
n_samples = X.shape[0]
@ -200,7 +201,7 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore
@staticmethod
def _generate_spaces(features: int, max_features: int) -> list:
comb = set()
comb: Set[Tuple[int, ...]] = set()
# Generate at most 5 combinations
if max_features == features:
set_length = 1
@ -208,7 +209,7 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore
number = factorial(features) / (
factorial(max_features) * factorial(features - max_features)
)
set_length = min(5, number)
set_length = min(5, int(number))
while len(comb) < set_length:
comb.add(
tuple(sorted(random.sample(range(features), max_features)))
@ -217,7 +218,7 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore
@staticmethod
def _get_random_subspace(
dataset: np.array, labels: np.array, max_features: int
dataset: np.ndarray, labels: np.ndarray, max_features: int
) -> Tuple[int, ...]:
features_sets = Odte._generate_spaces(dataset.shape[1], max_features)
if len(features_sets) > 1:
@ -226,11 +227,11 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore
else:
return features_sets[0]
def predict(self, X: np.array) -> np.array:
def predict(self, X: np.ndarray) -> np.ndarray:
proba = self.predict_proba(X)
return self.classes_[np.argmax(proba, axis=1)]
def predict_proba(self, X: np.array) -> np.array:
def predict_proba(self, X: np.ndarray) -> np.ndarray:
check_is_fitted(self, "estimators_")
# Input validation
X = self._validate_data(X, reset=False)
@ -242,6 +243,6 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore
result[i, predictions[i]] += 1
return result / self.n_estimators
def nodes_leaves(self) -> list(float, float):
def nodes_leaves(self) -> Tuple[float, float]:
check_is_fitted(self, "estimators_")
return self.nodes_, self.leaves_