mirror of
https://github.com/Doctorado-ML/Odte.git
synced 2025-07-11 16:22:00 +00:00
First commit
This commit is contained in:
parent
eae2eaf663
commit
872914dca7
14
.coveragerc
Normal file
14
.coveragerc
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
[run]
|
||||||
|
branch = True
|
||||||
|
source = odte
|
||||||
|
|
||||||
|
[report]
|
||||||
|
exclude_lines =
|
||||||
|
if self.debug:
|
||||||
|
pragma: no cover
|
||||||
|
raise NotImplementedError
|
||||||
|
if __name__ == .__main__.:
|
||||||
|
ignore_errors = True
|
||||||
|
omit =
|
||||||
|
odte/tests/*
|
||||||
|
odte/__init__.py
|
10
.pre-commit-config.yaml
Normal file
10
.pre-commit-config.yaml
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
repos:
|
||||||
|
- repo: https://github.com/ambv/black
|
||||||
|
rev: stable
|
||||||
|
hooks:
|
||||||
|
- id: black
|
||||||
|
language_version: python3.7
|
||||||
|
- repo: https://gitlab.com/pycqa/flake8
|
||||||
|
rev: 3.7.9
|
||||||
|
hooks:
|
||||||
|
- id: flake8
|
15
.vscode/launch.json
vendored
Normal file
15
.vscode/launch.json
vendored
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
{
|
||||||
|
// Use IntelliSense para saber los atributos posibles.
|
||||||
|
// Mantenga el puntero para ver las descripciones de los existentes atributos.
|
||||||
|
// Para más información, visite: https://go.microsoft.com/fwlink/?linkid=830387
|
||||||
|
"version": "0.2.0",
|
||||||
|
"configurations": [
|
||||||
|
{
|
||||||
|
"name": "Python: Archivo actual",
|
||||||
|
"type": "python",
|
||||||
|
"request": "launch",
|
||||||
|
"program": "${file}",
|
||||||
|
"console": "integratedTerminal"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
18
.vscode/settings.json
vendored
Normal file
18
.vscode/settings.json
vendored
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
{
|
||||||
|
"python.testing.unittestArgs": [
|
||||||
|
|
||||||
|
],
|
||||||
|
"python.testing.pytestEnabled": false,
|
||||||
|
"python.testing.nosetestsEnabled": false,
|
||||||
|
"python.testing.unittestEnabled": true,
|
||||||
|
"python.pythonPath": "/Users/rmontanana/.virtualenvs/general/bin/python",
|
||||||
|
"python.linting.flake8Enabled": true,
|
||||||
|
"python.linting.enabled": true,
|
||||||
|
"editor.rulers": [
|
||||||
|
80,
|
||||||
|
100
|
||||||
|
],
|
||||||
|
"python.linting.pylintEnabled": false,
|
||||||
|
"restructuredtext.confPath": "${workspaceFolder}/docs/source"
|
||||||
|
|
||||||
|
}
|
12
codecov.yml
Normal file
12
codecov.yml
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
overage:
|
||||||
|
status:
|
||||||
|
project:
|
||||||
|
default:
|
||||||
|
target: 90%
|
||||||
|
comment:
|
||||||
|
layout: "reach, diff, flags, files"
|
||||||
|
behavior: default
|
||||||
|
require_changes: false
|
||||||
|
require_base: yes
|
||||||
|
require_head: yes
|
||||||
|
branches: null
|
152
odte/Odte.py
Normal file
152
odte/Odte.py
Normal file
@ -0,0 +1,152 @@
|
|||||||
|
"""
|
||||||
|
__author__ = "Ricardo Montañana Gómez"
|
||||||
|
__copyright__ = "Copyright 2020, Ricardo Montañana Gómez"
|
||||||
|
__license__ = "MIT"
|
||||||
|
__version__ = "0.1"
|
||||||
|
Build a forest of oblique trees based on STree
|
||||||
|
"""
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from sklearn.utils import check_consistent_length
|
||||||
|
from sklearn.metrics._classification import _weighted_sum, _check_targets
|
||||||
|
from sklearn.utils.multiclass import check_classification_targets
|
||||||
|
from sklearn.base import BaseEstimator, ClassifierMixin
|
||||||
|
from sklearn.utils.validation import (
|
||||||
|
check_X_y,
|
||||||
|
check_array,
|
||||||
|
check_is_fitted,
|
||||||
|
_check_sample_weight,
|
||||||
|
)
|
||||||
|
|
||||||
|
from stree import Stree
|
||||||
|
|
||||||
|
|
||||||
|
class Odte(BaseEstimator, ClassifierMixin):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
random_state: int = None,
|
||||||
|
C: int = 1,
|
||||||
|
n_estimators: int = 100,
|
||||||
|
max_iter: int = 1000,
|
||||||
|
max_depth: int = None,
|
||||||
|
min_samples_split: int = 0,
|
||||||
|
bootstrap: bool = True,
|
||||||
|
split_criteria: str = "min_distance",
|
||||||
|
tol: float = 1e-4,
|
||||||
|
gamma="scale",
|
||||||
|
degree: int = 3,
|
||||||
|
kernel: str = "linear",
|
||||||
|
max_features="auto",
|
||||||
|
max_samples=None,
|
||||||
|
):
|
||||||
|
self.n_estimators = n_estimators
|
||||||
|
self.bootstrap = bootstrap
|
||||||
|
self.random_state = random_state
|
||||||
|
self.max_features = max_features
|
||||||
|
self.max_samples = max_samples
|
||||||
|
self.estimator_params = dict(
|
||||||
|
C=C,
|
||||||
|
random_state=random_state,
|
||||||
|
min_samples_split=min_samples_split,
|
||||||
|
max_depth=max_depth,
|
||||||
|
split_criteria=split_criteria,
|
||||||
|
kernel=kernel,
|
||||||
|
max_iter=max_iter,
|
||||||
|
tol=tol,
|
||||||
|
degree=degree,
|
||||||
|
gamma=gamma,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _initialize_random(self) -> np.random.mtrand.RandomState:
|
||||||
|
if self.random_state is None:
|
||||||
|
return np.random.mtrand._rand
|
||||||
|
else:
|
||||||
|
return np.random.RandomState(self.random_state)
|
||||||
|
|
||||||
|
def _initialize_sample_weight(
|
||||||
|
self, sample_weight: np.array, n_samples: int
|
||||||
|
) -> np.array:
|
||||||
|
if sample_weight is None:
|
||||||
|
return np.ones((n_samples,), dtype=np.float64)
|
||||||
|
else:
|
||||||
|
return sample_weight.copy()
|
||||||
|
|
||||||
|
def fit(
|
||||||
|
self, X: np.array, y: np.array, sample_weight: np.array = None
|
||||||
|
) -> "Odte":
|
||||||
|
# Check parameters are Ok.
|
||||||
|
if self.n_estimators < 10:
|
||||||
|
raise ValueError(
|
||||||
|
f"n_estimators must be greater than 9... got (n_estimators=\
|
||||||
|
{self.n_estimators:f})"
|
||||||
|
)
|
||||||
|
# the rest of parameters are checked in estimator
|
||||||
|
check_classification_targets(y)
|
||||||
|
X, y = check_X_y(X, y)
|
||||||
|
sample_weight = _check_sample_weight(sample_weight, X)
|
||||||
|
check_classification_targets(y)
|
||||||
|
# Initialize computed parameters
|
||||||
|
self.classes_, y = np.unique(y, return_inverse=True)
|
||||||
|
self.n_classes_ = self.classes_.shape[0]
|
||||||
|
self.estimators_ = []
|
||||||
|
self._train(X, y, sample_weight)
|
||||||
|
|
||||||
|
def _train(
|
||||||
|
self, X: np.array, y: np.array, sample_weight: np.array
|
||||||
|
) -> "Odte":
|
||||||
|
random_box = self._initialize_random()
|
||||||
|
n_samples = X.shape[0]
|
||||||
|
weights = self._initialize_sample_weight(sample_weight, n_samples)
|
||||||
|
boot_samples = self._get_bootstrap_n_samples(n_samples)
|
||||||
|
for _ in range(self.n_estimators):
|
||||||
|
# Build clf
|
||||||
|
clf = Stree().set_params(**self.estimator_params)
|
||||||
|
self.estimators_.append(clf)
|
||||||
|
# bootstrap
|
||||||
|
indices = random_box.randint(0, n_samples, boot_samples)
|
||||||
|
# update weights with the chosen samples
|
||||||
|
weights_update = np.bincount(indices, minlength=n_samples)
|
||||||
|
current_weights = weights * weights_update
|
||||||
|
# train the classifier
|
||||||
|
clf.fit(X[indices, :], y[indices, :], current_weights[indices, :])
|
||||||
|
|
||||||
|
def _get_bootstrap_n_samples(self, n_samples) -> int:
|
||||||
|
if self.max_samples is None:
|
||||||
|
return n_samples
|
||||||
|
if type(self.max_samples) == int:
|
||||||
|
if not (1 <= self.max_samples <= n_samples):
|
||||||
|
message = f"max_samples should be in the range 1 to \
|
||||||
|
{n_samples} but got {self.max_samples}"
|
||||||
|
raise ValueError(message)
|
||||||
|
return self.max_samples
|
||||||
|
if type(self.max_samples) == float:
|
||||||
|
if not (0 < self.max_samples < 1):
|
||||||
|
message = f"max_samples should be in the range (0, 1)\
|
||||||
|
but got {self.max_samples}"
|
||||||
|
raise ValueError(message)
|
||||||
|
return int(round(self.max_samples * n_samples))
|
||||||
|
raise ValueError(
|
||||||
|
f"Expected values int, float but got \
|
||||||
|
{type(self.max_samples)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def predict(self, X: np.array):
|
||||||
|
# todo
|
||||||
|
check_is_fitted(self, ["estimators_"])
|
||||||
|
# Input validation
|
||||||
|
X = check_array(X)
|
||||||
|
|
||||||
|
def score(
|
||||||
|
self, X: np.array, y: np.array, sample_weight: np.array
|
||||||
|
) -> float:
|
||||||
|
# todo
|
||||||
|
check_is_fitted(self, ["estimators_"])
|
||||||
|
check_classification_targets(y)
|
||||||
|
X, y = check_X_y(X, y)
|
||||||
|
y_pred = self.predict(X).reshape(y.shape)
|
||||||
|
# Compute accuracy for each possible representation
|
||||||
|
y_type, y_true, y_pred = _check_targets(y, y_pred)
|
||||||
|
check_consistent_length(y_true, y_pred, sample_weight)
|
||||||
|
score = y_true == y_pred
|
||||||
|
return _weighted_sum(score, sample_weight, normalize=True)
|
3
odte/__init__.py
Normal file
3
odte/__init__.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
from .Odte import Odte
|
||||||
|
|
||||||
|
__all__ = ["Odte"]
|
49
odte/tests/Odte_tests.py
Normal file
49
odte/tests/Odte_tests.py
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
import unittest
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from odte import Odte
|
||||||
|
from .utils import load_dataset
|
||||||
|
|
||||||
|
|
||||||
|
class Odte_test(unittest.TestCase):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
self._random_state = 1
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
def test_max_samples_bogus(self):
|
||||||
|
values = [0, 3000, 1.1, 0.0, "hi!"]
|
||||||
|
for max_samples in values:
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
tclf = Odte(max_samples=max_samples)
|
||||||
|
tclf.fit(*load_dataset(self._random_state))
|
||||||
|
|
||||||
|
def test_get_bootstrap_nsamples(self):
|
||||||
|
expected_values = [(1, 1), (1500, 1500), (0.1, 150)]
|
||||||
|
for value, expected in expected_values:
|
||||||
|
tclf = Odte(max_samples=value)
|
||||||
|
computed = tclf._get_bootstrap_n_samples(1500)
|
||||||
|
self.assertEqual(expected, computed)
|
||||||
|
|
||||||
|
def test_initialize_sample_weight(self):
|
||||||
|
m = 5
|
||||||
|
ones = np.ones(m,)
|
||||||
|
weights = np.random.rand(m,)
|
||||||
|
expected_values = [(None, ones), (weights, weights)]
|
||||||
|
for value, expected in expected_values:
|
||||||
|
tclf = Odte()
|
||||||
|
computed = tclf._initialize_sample_weight(value, m)
|
||||||
|
self.assertListEqual(expected.tolist(), computed.tolist())
|
||||||
|
|
||||||
|
def test_initialize_random(self):
|
||||||
|
expected = [37, 235, 908]
|
||||||
|
tclf = Odte(random_state=self._random_state)
|
||||||
|
box = tclf._initialize_random()
|
||||||
|
computed = box.randint(0, 1000, 3)
|
||||||
|
self.assertListEqual(expected, computed.tolist())
|
||||||
|
# test None
|
||||||
|
tclf = Odte()
|
||||||
|
box = tclf._initialize_random()
|
||||||
|
computed = box.randint(101, 1000, 3)
|
||||||
|
for value in computed.tolist():
|
||||||
|
self.assertGreaterEqual(value, 101)
|
||||||
|
self.assertLessEqual(value, 1000)
|
3
odte/tests/__init__.py
Normal file
3
odte/tests/__init__.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
from .Odte_tests import Odte_test
|
||||||
|
|
||||||
|
__all__ = ["Odte_test"]
|
17
odte/tests/utils.py
Normal file
17
odte/tests/utils.py
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
from sklearn.datasets import make_classification
|
||||||
|
|
||||||
|
|
||||||
|
def load_dataset(random_state=0, n_classes=2):
|
||||||
|
X, y = make_classification(
|
||||||
|
n_samples=1500,
|
||||||
|
n_features=3,
|
||||||
|
n_informative=3,
|
||||||
|
n_redundant=0,
|
||||||
|
n_repeated=0,
|
||||||
|
n_classes=n_classes,
|
||||||
|
n_clusters_per_class=2,
|
||||||
|
class_sep=1.5,
|
||||||
|
flip_y=0,
|
||||||
|
random_state=random_state,
|
||||||
|
)
|
||||||
|
return X, y
|
16
pyproject.toml
Normal file
16
pyproject.toml
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
[tool.black]
|
||||||
|
line-length = 79
|
||||||
|
include = '\.pyi?$'
|
||||||
|
exclude = '''
|
||||||
|
/(
|
||||||
|
\.git
|
||||||
|
| \.hg
|
||||||
|
| \.mypy_cache
|
||||||
|
| \.tox
|
||||||
|
| \.venv
|
||||||
|
| _build
|
||||||
|
| buck-out
|
||||||
|
| build
|
||||||
|
| dist
|
||||||
|
)/
|
||||||
|
'''
|
5
requirements.txt
Normal file
5
requirements.txt
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
numpy
|
||||||
|
scikit-learn
|
||||||
|
pandas
|
||||||
|
ipympl
|
||||||
|
stree
|
Loading…
x
Reference in New Issue
Block a user