First commit

2025-07-11 16:22:00 +00:00 · 2020-06-12 22:45:58 +02:00 · 2020-06-12 22:45:58 +02:00 · 872914dca7
commit 872914dca7
parent eae2eaf663
12 changed files with 314 additions and 0 deletions
--- a/.coveragerc
+++ b/.coveragerc
@ -0,0 +1,14 @@
 [run]
 branch = True
 source = odte
 [report]
 exclude_lines =
    if self.debug:
    pragma: no cover
    raise NotImplementedError
    if __name__ == .__main__.:
 ignore_errors = True
 omit =
    odte/tests/*
    odte/__init__.py
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -0,0 +1,10 @@
 repos:
 -   repo: https://github.com/ambv/black
    rev: stable
    hooks:
    - id: black
      language_version: python3.7
 -   repo: https://gitlab.com/pycqa/flake8
    rev: 3.7.9
    hooks:
    - id: flake8
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@ -0,0 +1,15 @@
 {
    // Use IntelliSense para saber los atributos posibles.
    // Mantenga el puntero para ver las descripciones de los existentes atributos.
    // Para más información, visite: https://go.microsoft.com/fwlink/?linkid=830387
    "version": "0.2.0",
    "configurations": [
        {
            "name": "Python: Archivo actual",
            "type": "python",
            "request": "launch",
            "program": "${file}",
            "console": "integratedTerminal"
        }
    ]
 }
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -0,0 +1,18 @@
 {
    "python.testing.unittestArgs": [
    ],
    "python.testing.pytestEnabled": false,
    "python.testing.nosetestsEnabled": false,
    "python.testing.unittestEnabled": true,
    "python.pythonPath": "/Users/rmontanana/.virtualenvs/general/bin/python",
    "python.linting.flake8Enabled": true,
    "python.linting.enabled": true,
    "editor.rulers": [
        80,
        100
    ],
    "python.linting.pylintEnabled": false,
    "restructuredtext.confPath": "${workspaceFolder}/docs/source"
 }
--- a/codecov.yml
+++ b/codecov.yml
@ -0,0 +1,12 @@
 overage:
  status:
    project:
      default:
        target: 90%
 comment:
  layout: "reach, diff, flags, files"
  behavior: default
  require_changes: false  
  require_base: yes
  require_head: yes       
  branches: null
--- a/odte/Odte.py
+++ b/odte/Odte.py
@ -0,0 +1,152 @@
 """
 __author__ = "Ricardo Montañana Gómez"
 __copyright__ = "Copyright 2020, Ricardo Montañana Gómez"
 __license__ = "MIT"
 __version__ = "0.1"
 Build a forest of oblique trees based on STree
 """
 import numpy as np
 from sklearn.utils import check_consistent_length
 from sklearn.metrics._classification import _weighted_sum, _check_targets
 from sklearn.utils.multiclass import check_classification_targets
 from sklearn.base import BaseEstimator, ClassifierMixin
 from sklearn.utils.validation import (
    check_X_y,
    check_array,
    check_is_fitted,
    _check_sample_weight,
 )
 from stree import Stree
 class Odte(BaseEstimator, ClassifierMixin):
    def __init__(
        self,
        random_state: int = None,
        C: int = 1,
        n_estimators: int = 100,
        max_iter: int = 1000,
        max_depth: int = None,
        min_samples_split: int = 0,
        bootstrap: bool = True,
        split_criteria: str = "min_distance",
        tol: float = 1e-4,
        gamma="scale",
        degree: int = 3,
        kernel: str = "linear",
        max_features="auto",
        max_samples=None,
    ):
        self.n_estimators = n_estimators
        self.bootstrap = bootstrap
        self.random_state = random_state
        self.max_features = max_features
        self.max_samples = max_samples
        self.estimator_params = dict(
            C=C,
            random_state=random_state,
            min_samples_split=min_samples_split,
            max_depth=max_depth,
            split_criteria=split_criteria,
            kernel=kernel,
            max_iter=max_iter,
            tol=tol,
            degree=degree,
            gamma=gamma,
        )
    def _initialize_random(self) -> np.random.mtrand.RandomState:
        if self.random_state is None:
            return np.random.mtrand._rand
        else:
            return np.random.RandomState(self.random_state)
    def _initialize_sample_weight(
        self, sample_weight: np.array, n_samples: int
    ) -> np.array:
        if sample_weight is None:
            return np.ones((n_samples,), dtype=np.float64)
        else:
            return sample_weight.copy()
    def fit(
        self, X: np.array, y: np.array, sample_weight: np.array = None
    ) -> "Odte":
        # Check parameters are Ok.
        if self.n_estimators < 10:
            raise ValueError(
                f"n_estimators must be greater than 9... got (n_estimators=\
                    {self.n_estimators:f})"
            )
        # the rest of parameters are checked in estimator
        check_classification_targets(y)
        X, y = check_X_y(X, y)
        sample_weight = _check_sample_weight(sample_weight, X)
        check_classification_targets(y)
        # Initialize computed parameters
        self.classes_, y = np.unique(y, return_inverse=True)
        self.n_classes_ = self.classes_.shape[0]
        self.estimators_ = []
        self._train(X, y, sample_weight)
    def _train(
        self, X: np.array, y: np.array, sample_weight: np.array
    ) -> "Odte":
        random_box = self._initialize_random()
        n_samples = X.shape[0]
        weights = self._initialize_sample_weight(sample_weight, n_samples)
        boot_samples = self._get_bootstrap_n_samples(n_samples)
        for _ in range(self.n_estimators):
            # Build clf
            clf = Stree().set_params(**self.estimator_params)
            self.estimators_.append(clf)
            # bootstrap
            indices = random_box.randint(0, n_samples, boot_samples)
            # update weights with the chosen samples
            weights_update = np.bincount(indices, minlength=n_samples)
            current_weights = weights * weights_update
            # train the classifier
            clf.fit(X[indices, :], y[indices, :], current_weights[indices, :])
    def _get_bootstrap_n_samples(self, n_samples) -> int:
        if self.max_samples is None:
            return n_samples
        if type(self.max_samples) == int:
            if not (1 <= self.max_samples <= n_samples):
                message = f"max_samples should be in the range 1 to \
                    {n_samples} but got {self.max_samples}"
                raise ValueError(message)
            return self.max_samples
        if type(self.max_samples) == float:
            if not (0 < self.max_samples < 1):
                message = f"max_samples should be in the range (0, 1)\
                    but got {self.max_samples}"
                raise ValueError(message)
            return int(round(self.max_samples * n_samples))
        raise ValueError(
            f"Expected values int, float but got \
            {type(self.max_samples)}"
        )
    def predict(self, X: np.array):
        # todo
        check_is_fitted(self, ["estimators_"])
        # Input validation
        X = check_array(X)
    def score(
        self, X: np.array, y: np.array, sample_weight: np.array
    ) -> float:
        # todo
        check_is_fitted(self, ["estimators_"])
        check_classification_targets(y)
        X, y = check_X_y(X, y)
        y_pred = self.predict(X).reshape(y.shape)
        # Compute accuracy for each possible representation
        y_type, y_true, y_pred = _check_targets(y, y_pred)
        check_consistent_length(y_true, y_pred, sample_weight)
        score = y_true == y_pred
        return _weighted_sum(score, sample_weight, normalize=True)
--- a/odte/init.py
+++ b/odte/init.py
@ -0,0 +1,3 @@
 from .Odte import Odte
 __all__ = ["Odte"]
--- a/odte/tests/Odte_tests.py
+++ b/odte/tests/Odte_tests.py
@ -0,0 +1,49 @@
 import unittest
 import numpy as np
 from odte import Odte
 from .utils import load_dataset
 class Odte_test(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        self._random_state = 1
        super().__init__(*args, **kwargs)
    def test_max_samples_bogus(self):
        values = [0, 3000, 1.1, 0.0, "hi!"]
        for max_samples in values:
            with self.assertRaises(ValueError):
                tclf = Odte(max_samples=max_samples)
                tclf.fit(*load_dataset(self._random_state))
    def test_get_bootstrap_nsamples(self):
        expected_values = [(1, 1), (1500, 1500), (0.1, 150)]
        for value, expected in expected_values:
            tclf = Odte(max_samples=value)
            computed = tclf._get_bootstrap_n_samples(1500)
            self.assertEqual(expected, computed)
    def test_initialize_sample_weight(self):
        m = 5
        ones = np.ones(m,)
        weights = np.random.rand(m,)
        expected_values = [(None, ones), (weights, weights)]
        for value, expected in expected_values:
            tclf = Odte()
            computed = tclf._initialize_sample_weight(value, m)
            self.assertListEqual(expected.tolist(), computed.tolist())
    def test_initialize_random(self):
        expected = [37, 235, 908]
        tclf = Odte(random_state=self._random_state)
        box = tclf._initialize_random()
        computed = box.randint(0, 1000, 3)
        self.assertListEqual(expected, computed.tolist())
        # test None
        tclf = Odte()
        box = tclf._initialize_random()
        computed = box.randint(101, 1000, 3)
        for value in computed.tolist():
            self.assertGreaterEqual(value, 101)
            self.assertLessEqual(value, 1000)
--- a/odte/tests/init.py
+++ b/odte/tests/init.py
@ -0,0 +1,3 @@
 from .Odte_tests import Odte_test
 __all__ = ["Odte_test"]
--- a/odte/tests/utils.py
+++ b/odte/tests/utils.py
@ -0,0 +1,17 @@
 from sklearn.datasets import make_classification
 def load_dataset(random_state=0, n_classes=2):
    X, y = make_classification(
        n_samples=1500,
        n_features=3,
        n_informative=3,
        n_redundant=0,
        n_repeated=0,
        n_classes=n_classes,
        n_clusters_per_class=2,
        class_sep=1.5,
        flip_y=0,
        random_state=random_state,
    )
    return X, y
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,16 @@
 [tool.black]
 line-length = 79
 include = '\.pyi?$'
 exclude = '''
 /(
    \.git
  | \.hg
  | \.mypy_cache
  | \.tox
  | \.venv
  | _build
  | buck-out
  | build
  | dist
 )/
 '''
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,5 @@
 numpy
 scikit-learn
 pandas
 ipympl
 stree
		`@ -0,0 +1,3 @@`
							`from .Odte import Odte`

							`__all__ = ["Odte"]`
		`@ -0,0 +1,3 @@`
							`from .Odte_tests import Odte_test`

							`__all__ = ["Odte_test"]`