From e55c3335c27e4a46f68ad64250b6add473965641 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Mon, 22 Nov 2021 14:51:27 +0100 Subject: [PATCH] Add mypy to pre-commit Fix some lack of stubs issues --- .pre-commit-config.yaml | 11 +++++----- Makefile | 6 +++--- odte/Odte.py | 47 +++++++++++++++++++++-------------------- 3 files changed, 33 insertions(+), 31 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6398008..e829189 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,11 +10,12 @@ repos: hooks: - id: flake8 exclude: ".virtual_documents" - # - repo: https://github.com/pre-commit/mirrors-mypy - # rev: "v0.790" # Use the sha / tag you want to point at - # hooks: - # - id: mypy - # # args: [--strict, --ignore-missing-imports] + - repo: https://github.com/pre-commit/mirrors-mypy + rev: "v0.790" # Use the sha / tag you want to point at + hooks: + - id: mypy + #args: [--strict, --ignore-missing-imports] + exclude: odte/tests - repo: https://github.com/pre-commit/pre-commit-hooks rev: v3.4.0 hooks: diff --git a/Makefile b/Makefile index 72646ac..d30017d 100644 --- a/Makefile +++ b/Makefile @@ -11,9 +11,9 @@ deps: ## Install dependencies pip install -r requirements.txt lint: ## Lint and static-check - black stree - flake8 stree - mypy stree + black odte + flake8 odte + mypy odte --exclude tests push: ## Push code with tags git push && git push --tags diff --git a/odte/Odte.py b/odte/Odte.py index f071707..890f649 100644 --- a/odte/Odte.py +++ b/odte/Odte.py @@ -2,27 +2,28 @@ __author__ = "Ricardo Montañana Gómez" __copyright__ = "Copyright 2020, Ricardo Montañana Gómez" __license__ = "MIT" -__version__ = "0.1" Build a forest of oblique trees based on STree """ from __future__ import annotations import random import sys from math import factorial -from typing import Union, Optional, Tuple, List +from typing import Union, Optional, Tuple, List, Set import numpy as np -from sklearn.utils.multiclass import check_classification_targets -from sklearn.base import clone, BaseEstimator, ClassifierMixin -from sklearn.ensemble import BaseEnsemble -from sklearn.utils.validation import ( +from sklearn.utils.multiclass import ( # type: ignore + check_classification_targets, +) +from sklearn.base import clone, BaseEstimator, ClassifierMixin # type: ignore +from sklearn.ensemble import BaseEnsemble # type: ignore +from sklearn.utils.validation import ( # type: ignore check_is_fitted, _check_sample_weight, ) -from joblib import Parallel, delayed -from stree import Stree +from joblib import Parallel, delayed # type: ignore +from stree import Stree # type: ignore -class Odte(BaseEnsemble, ClassifierMixin): # type: ignore +class Odte(BaseEnsemble, ClassifierMixin): def __init__( self, # n_jobs = -1 to use all available cores @@ -57,7 +58,7 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore ) def fit( - self, X: np.array, y: np.array, sample_weight: np.array = None + self, X: np.ndarray, y: np.ndarray, sample_weight: np.ndarray = None ) -> Odte: # Check parameters are Ok. if self.n_estimators < 3: @@ -67,8 +68,8 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore ) check_classification_targets(y) X, y = self._validate_data(X, y) - # if weights is None return np.ones - sample_weight = _check_sample_weight( + # if sample_weight is None return np.ones + sample_weights = _check_sample_weight( sample_weight, X, dtype=np.float64 ) check_classification_targets(y) @@ -81,7 +82,7 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore self.n_classes_: int = self.classes_.shape[0] self.estimators_: List[BaseEstimator] = [] self.subspaces_: List[Tuple[int, ...]] = [] - result = self._train(X, y, sample_weight) + result = self._train(X, y, sample_weights) self.estimators_, self.subspaces_ = tuple(zip(*result)) # type: ignore self._compute_metrics() return self @@ -101,9 +102,9 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore @staticmethod def _parallel_build_tree( base_estimator_: Stree, - X: np.array, - y: np.array, - weights: np.array, + X: np.ndarray, + y: np.ndarray, + weights: np.ndarray, random_box: np.random.mtrand.RandomState, random_seed: int, boot_samples: int, @@ -125,7 +126,7 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore return (clf, features) def _train( - self, X: np.array, y: np.array, weights: np.array + self, X: np.ndarray, y: np.ndarray, weights: np.ndarray ) -> Tuple[List[BaseEstimator], List[Tuple[int, ...]]]: random_box = self._initialize_random() n_samples = X.shape[0] @@ -200,7 +201,7 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore @staticmethod def _generate_spaces(features: int, max_features: int) -> list: - comb = set() + comb: Set[Tuple[int, ...]] = set() # Generate at most 5 combinations if max_features == features: set_length = 1 @@ -208,7 +209,7 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore number = factorial(features) / ( factorial(max_features) * factorial(features - max_features) ) - set_length = min(5, number) + set_length = min(5, int(number)) while len(comb) < set_length: comb.add( tuple(sorted(random.sample(range(features), max_features))) @@ -217,7 +218,7 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore @staticmethod def _get_random_subspace( - dataset: np.array, labels: np.array, max_features: int + dataset: np.ndarray, labels: np.ndarray, max_features: int ) -> Tuple[int, ...]: features_sets = Odte._generate_spaces(dataset.shape[1], max_features) if len(features_sets) > 1: @@ -226,11 +227,11 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore else: return features_sets[0] - def predict(self, X: np.array) -> np.array: + def predict(self, X: np.ndarray) -> np.ndarray: proba = self.predict_proba(X) return self.classes_[np.argmax(proba, axis=1)] - def predict_proba(self, X: np.array) -> np.array: + def predict_proba(self, X: np.ndarray) -> np.ndarray: check_is_fitted(self, "estimators_") # Input validation X = self._validate_data(X, reset=False) @@ -242,6 +243,6 @@ class Odte(BaseEnsemble, ClassifierMixin): # type: ignore result[i, predictions[i]] += 1 return result / self.n_estimators - def nodes_leaves(self) -> list(float, float): + def nodes_leaves(self) -> Tuple[float, float]: check_is_fitted(self, "estimators_") return self.nodes_, self.leaves_