Refactor predict and score and make mypy --strict

First Approach
Add test for getting 3 feature_sets in Splitter
2025-08-17 16:36:01 +00:00 · 2020-07-01 18:37:10 +02:00 · 2020-06-28 02:46:20 +02:00 · 2020-06-28 02:45:08 +02:00 · 2020-06-27 23:34:15 +02:00 · 2020-06-27 18:29:40 +02:00
20 changed files with 1495 additions and 1509 deletions
--- a/.coveragerc
+++ b/.coveragerc
@@ -10,5 +10,4 @@ exclude_lines =
    if __name__ == .__main__.:
 ignore_errors = True
 omit =
-    stree/tests/*
    stree/__init__.py
--- a/.gitignore
+++ b/.gitignore
@@ -130,4 +130,6 @@ dmypy.json

 .idea
 .vscode
-.pre-commit-config.yaml
+.pre-commit-config.yaml
+
+**.csv
--- a/codecov.yml
+++ b/codecov.yml
@@ -3,9 +3,6 @@ overage:
    project:
      default:
        target: 90%
-    patch:
-      default:
-        target: 90%
 comment:
  layout: "reach, diff, flags, files"
  behavior: default
--- a/main.py
+++ b/main.py
@@ -1,88 +1,29 @@
 import time
 from sklearn.model_selection import train_test_split
+from sklearn.datasets import load_iris
 from stree import Stree

 random_state = 1

+X, y = load_iris(return_X_y=True)

-def load_creditcard(n_examples=0):
-    import pandas as pd
-    import numpy as np
-    import random
-
-    df = pd.read_csv("data/creditcard.csv")
-    print(
-        "Fraud: {0:.3f}% {1}".format(
-            df.Class[df.Class == 1].count() * 100 / df.shape[0],
-            df.Class[df.Class == 1].count(),
-        )
-    )
-    print(
-        "Valid: {0:.3f}% {1}".format(
-            df.Class[df.Class == 0].count() * 100 / df.shape[0],
-            df.Class[df.Class == 0].count(),
-        )
-    )
-    y = np.expand_dims(df.Class.values, axis=1)
-    X = df.drop(["Class", "Time", "Amount"], axis=1).values
-    if n_examples > 0:
-        # Take first n_examples samples
-        X = X[:n_examples, :]
-        y = y[:n_examples, :]
-    else:
-        # Take all the positive samples with a number of random negatives
-        if n_examples < 0:
-            Xt = X[(y == 1).ravel()]
-            yt = y[(y == 1).ravel()]
-            indices = random.sample(range(X.shape[0]), -1 * n_examples)
-            X = np.append(Xt, X[indices], axis=0)
-            y = np.append(yt, y[indices], axis=0)
-    print("X.shape", X.shape, " y.shape", y.shape)
-    print(
-        "Fraud: {0:.3f}% {1}".format(
-            len(y[y == 1]) * 100 / X.shape[0], len(y[y == 1])
-        )
-    )
-    print(
-        "Valid: {0:.3f}% {1}".format(
-            len(y[y == 0]) * 100 / X.shape[0], len(y[y == 0])
-        )
-    )
-    Xtrain, Xtest, ytrain, ytest = train_test_split(
-        X,
-        y,
-        train_size=0.7,
-        shuffle=True,
-        random_state=random_state,
-        stratify=y,
-    )
-    return Xtrain, Xtest, ytrain, ytest
-
-
-# data = load_creditcard(-5000) # Take all true samples + 5000 of the others
-# data = load_creditcard(5000)  # Take the first 5000 samples
-data = load_creditcard()  # Take all the samples
-
-Xtrain = data[0]
-Xtest = data[1]
-ytrain = data[2]
-ytest = data[3]
+Xtrain, Xtest, ytrain, ytest = train_test_split(
+    X, y, test_size=0.2, random_state=random_state
+)

 now = time.time()
+print("Predicting with max_features=sqrt(n_features)")
+clf = Stree(C=0.01, random_state=random_state, max_features="auto")
+clf.fit(Xtrain, ytrain)
+print(f"Took {time.time() - now:.2f} seconds to train")
+print(clf)
+print(f"Classifier's accuracy (train): {clf.score(Xtrain, ytrain):.4f}")
+print(f"Classifier's accuracy (test) : {clf.score(Xtest, ytest):.4f}")
+print("=" * 40)
+print("Predicting with max_features=n_features")
 clf = Stree(C=0.01, random_state=random_state)
 clf.fit(Xtrain, ytrain)
 print(f"Took {time.time() - now:.2f} seconds to train")
 print(clf)
 print(f"Classifier's accuracy (train): {clf.score(Xtrain, ytrain):.4f}")
 print(f"Classifier's accuracy (test) : {clf.score(Xtest, ytest):.4f}")
-proba = clf.predict_proba(Xtest)
-print(
-    "Checking that we have correct probabilities, these are probabilities of "
-    "sample belonging to class 1"
-)
-res0 = proba[proba[:, 0] == 0]
-res1 = proba[proba[:, 0] == 1]
-print("++++++++++res0 > .8++++++++++++")
-print(res0[res0[:, 1] > 0.8])
-print("**********res1 < .4************")
-print(res1[res1[:, 1] < 0.4])
--- a/notebooks/benchmark.ipynb
+++ b/notebooks/benchmark.ipynb
--- a/notebooks/ensemble.ipynb
+++ b/notebooks/ensemble.ipynb
@@ -4,7 +4,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "# Test AdaBoost with different configurations"
+    "# Test Stree with AdaBoost and Bagging with different configurations"
   ]
  },
  {
@@ -34,11 +34,8 @@
   "outputs": [],
   "source": [
    "import time\n",
-    "from sklearn.ensemble import AdaBoostClassifier\n",
-    "from sklearn.tree import DecisionTreeClassifier\n",
-    "from sklearn.svm import LinearSVC, SVC\n",
-    "from sklearn.model_selection import GridSearchCV, train_test_split\n",
-    "from sklearn.datasets import load_iris\n",
+    "from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier\n",
+    "from sklearn.model_selection import train_test_split\n",
    "from stree import Stree"
   ]
  },
@@ -57,12 +54,14 @@
  {
   "cell_type": "code",
   "execution_count": 4,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
-     "text": "Fraud: 0.173% 492\nValid: 99.827% 284315\nX.shape (100492, 28)  y.shape (100492,)\nFraud: 0.659% 662\nValid: 99.341% 99830\n"
+     "text": "Fraud: 0.173% 492\nValid: 99.827% 284315\nX.shape (100492, 28)  y.shape (100492,)\nFraud: 0.644% 647\nValid: 99.356% 99845\n"
    }
   ],
   "source": [
@@ -117,18 +116,20 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## STree alone on the whole dataset and linear kernel"
+    "## STree alone with 100.000 samples and linear kernel"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
-     "text": "Score Train:  0.9985499829409757\nScore Test:  0.998407854584052\nTook 39.45 seconds\n"
+     "text": "Score Train:  0.9985784146480154\nScore Test:  0.9981093273185617\nTook 73.27 seconds\n"
    }
   ],
   "source": [
@@ -144,7 +145,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## Different kernels with different configuations"
+    "## Adaboost"
   ]
  },
  {
@@ -161,18 +162,20 @@
  {
   "cell_type": "code",
   "execution_count": 7,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
-     "text": "Kernel: linear\tTime: 87.00 seconds\tScore Train: 0.9982372\tScore Test: 0.9981425\nKernel: rbf\tTime: 60.60 seconds\tScore Train: 0.9934181\tScore Test: 0.9933992\nKernel: poly\tTime: 88.08 seconds\tScore Train: 0.9937450\tScore Test: 0.9938968\n"
+     "text": "Kernel: linear\tTime: 93.78 seconds\tScore Train: 0.9983083\tScore Test: 0.9983083\nKernel: rbf\tTime: 18.32 seconds\tScore Train: 0.9935602\tScore Test: 0.9935651\nKernel: poly\tTime: 69.68 seconds\tScore Train: 0.9973132\tScore Test: 0.9972801\n"
    }
   ],
   "source": [
    "for kernel in ['linear', 'rbf', 'poly']:\n",
    "    now = time.time()\n",
-    "    clf = AdaBoostClassifier(Stree(C=7, kernel=kernel, max_depth=max_depth, random_state=random_state), n_estimators=n_estimators, random_state=random_state)\n",
+    "    clf = AdaBoostClassifier(base_estimator=Stree(C=C, kernel=kernel, max_depth=max_depth, random_state=random_state), algorithm=\"SAMME\", n_estimators=n_estimators, random_state=random_state)\n",
    "    clf.fit(Xtrain, ytrain)\n",
    "    score_train = clf.score(Xtrain, ytrain)\n",
    "    score_test = clf.score(Xtest, ytest)\n",
@@ -183,24 +186,37 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## Test algorithm SAMME in AdaBoost to check speed/accuracy"
+    "## Bagging"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
+   "outputs": [],
+   "source": [
+    "n_estimators = 10\n",
+    "C = 7\n",
+    "max_depth = 3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "tags": []
+   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
-     "text": "Kernel: linear\tTime: 58.75 seconds\tScore Train: 0.9980524\tScore Test: 0.9978771\nKernel: rbf\tTime: 12.49 seconds\tScore Train: 0.9934181\tScore Test: 0.9933992\nKernel: poly\tTime: 97.85 seconds\tScore Train: 0.9972137\tScore Test: 0.9971806\n"
+     "text": "Kernel: linear\tTime: 387.06 seconds\tScore Train: 0.9985784\tScore Test: 0.9981093\nKernel: rbf\tTime: 144.00 seconds\tScore Train: 0.9992750\tScore Test: 0.9983415\nKernel: poly\tTime: 101.78 seconds\tScore Train: 0.9992466\tScore Test: 0.9981757\n"
    }
   ],
   "source": [
    "for kernel in ['linear', 'rbf', 'poly']:\n",
    "    now = time.time()\n",
-    "    clf = AdaBoostClassifier(Stree(C=7, kernel=kernel, max_depth=max_depth, random_state=random_state), n_estimators=n_estimators, random_state=random_state, algorithm=\"SAMME\")\n",
+    "    clf = BaggingClassifier(base_estimator=Stree(C=C, kernel=kernel, max_depth=max_depth, random_state=random_state), n_estimators=n_estimators, random_state=random_state)\n",
    "    clf.fit(Xtrain, ytrain)\n",
    "    score_train = clf.score(Xtrain, ytrain)\n",
    "    score_test = clf.score(Xtest, ytest)\n",
@@ -223,7 +239,7 @@
  },
  "orig_nbformat": 2,
  "kernelspec": {
-   "name": "python37664bitgeneralvenvfbd0a23e74cf4e778460f5ffc6761f39",
+   "name": "python37664bitgeneralvenve3128601eb614c5da59c5055670b6040",
   "display_name": "Python 3.7.6 64-bit ('general': venv)"
  }
 },
--- a/notebooks/features.ipynb
+++ b/notebooks/features.ipynb
--- a/notebooks/test_graphs.ipynb
+++ b/notebooks/test_graphs.ipynb
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,4 @@
 numpy
 scikit-learn
 pandas
-matplotlib
 ipympl
--- a/setup.py
+++ b/setup.py
@@ -30,7 +30,7 @@ setuptools.setup(
        "Topic :: Scientific/Engineering :: Artificial Intelligence",
        "Intended Audience :: Science/Research",
    ],
-    install_requires=["scikit-learn>=0.23.0", "numpy", "matplotlib", "ipympl"],
+    install_requires=["scikit-learn>=0.23.0", "numpy", "ipympl"],
    test_suite="stree.tests",
    zip_safe=False,
 )
--- a/stree/Strees.py
+++ b/stree/Strees.py
@@ -6,21 +6,24 @@ __version__ = "0.9"
 Build an oblique tree classifier based on SVM Trees
 """

+from __future__ import annotations
 import os
-
-import numpy as np
-from sklearn.base import BaseEstimator, ClassifierMixin
-from sklearn.svm import SVC, LinearSVC
-from sklearn.utils import check_consistent_length
-from sklearn.utils.multiclass import check_classification_targets
-from sklearn.utils.validation import (
-    check_X_y,
-    check_array,
+import random
+import warnings
+from typing import Optional, List, Union, Tuple
+from math import log
+from itertools import combinations
+import numpy as np  # type: ignore
+from sklearn.base import BaseEstimator, ClassifierMixin  # type: ignore
+from sklearn.svm import SVC, LinearSVC  # type: ignore
+from sklearn.utils.multiclass import (  # type: ignore
+    check_classification_targets,
+)
+from sklearn.exceptions import ConvergenceWarning  # type: ignore
+from sklearn.utils.validation import (  # type: ignore
    check_is_fitted,
    _check_sample_weight,
 )
-from sklearn.utils.sparsefuncs import count_nonzero
-from sklearn.metrics._classification import _weighted_sum, _check_targets


 class Snode:
@@ -28,37 +31,60 @@ class Snode:
    dataset assigned to it
    """

-    def __init__(self, clf: SVC, X: np.ndarray, y: np.ndarray, title: str):
-        self._clf = clf
-        self._title = title
-        self._belief = 0.0
+    def __init__(
+        self,
+        clf: Union[SVC, LinearSVC],
+        X: np.ndarray,
+        y: np.ndarray,
+        features: np.array,
+        impurity: float,
+        title: str,
+        weight: np.ndarray = None,
+    ):
+        self._clf: Union[SVC, LinearSVC] = clf
+        self._title: str = title
+        self._belief: float = 0.0
        # Only store dataset in Testing
-        self._X = X if os.environ.get("TESTING", "NS") != "NS" else None
-        self._y = y
-        self._down = None
-        self._up = None
+        self._X: Optional[np.array] = X if os.environ.get(
+            "TESTING", "NS"
+        ) != "NS" else None
+        self._y: np.array = y
+        self._down: Optional[Snode] = None
+        self._up: Optional[Snode] = None
        self._class = None
+        self._sample_weight: Optional[np.array] = (
+            weight if os.environ.get("TESTING", "NS") != "NS" else None
+        )
+        self._features: Tuple[int, ...] = features
+        self._impurity: float = impurity

    @classmethod
-    def copy(cls, node: "Snode") -> "Snode":
-        return cls(node._clf, node._X, node._y, node._title)
+    def copy(cls, node: Snode) -> Snode:
+        return cls(
+            node._clf,
+            node._X,
+            node._y,
+            node._features,
+            node._impurity,
+            node._title,
+        )

-    def set_down(self, son):
+    def set_down(self, son: Snode) -> None:
        self._down = son

-    def set_up(self, son):
+    def set_up(self, son: Snode) -> None:
        self._up = son

    def is_leaf(self) -> bool:
        return self._up is None and self._down is None

-    def get_down(self) -> "Snode":
+    def get_down(self) -> Optional[Snode]:
        return self._down

-    def get_up(self) -> "Snode":
+    def get_up(self) -> Optional[Snode]:
        return self._up

-    def make_predictor(self):
+    def make_predictor(self) -> None:
        """Compute the class of the predictor and its belief based on the
        subdataset of the node only if it is a leaf
        """
@@ -82,25 +108,26 @@ class Snode:
            count_values = np.unique(self._y, return_counts=True)
            result = (
                f"{self._title} - Leaf class={self._class} belief="
-                f"{self._belief: .6f} counts={count_values}"
+                f"{self._belief: .6f} impurity={self._impurity:.4f} "
+                f"counts={count_values}"
            )
            return result
        else:
-            return f"{self._title}"
+            return (
+                f"{self._title} feaures={self._features} impurity="
+                f"{self._impurity:.4f}"
+            )


 class Siterator:
    """Stree preorder iterator
    """

-    def __init__(self, tree: Snode):
-        self._stack = []
+    def __init__(self, tree: Optional[Snode]):
+        self._stack: List[Snode] = []
        self._push(tree)

-    def __iter__(self):
-        return self
-
-    def _push(self, node: Snode):
+    def _push(self, node: Optional[Snode]) -> None:
        if node is not None:
            self._stack.append(node)

@@ -113,7 +140,235 @@ class Siterator:
        return node


-class Stree(BaseEstimator, ClassifierMixin):
+class Splitter:
+    def __init__(
+        self,
+        clf: Union[SVC, LinearSVC] = None,
+        criterion: str = "",
+        splitter_type: str = "",
+        criteria: str = "",
+        min_samples_split: int = 0,
+        random_state: Optional[int] = None,
+    ):
+        self._clf: Union[SVC, LinearSVC] = clf
+        self._random_state: Optional[int] = random_state
+        if random_state is not None:
+            random.seed(random_state)
+        self._criterion: str = criterion
+        self._min_samples_split: int = min_samples_split
+        self._criteria: str = criteria
+        self._splitter_type: str = splitter_type
+
+        if clf is None:
+            raise ValueError(f"clf has to be a sklearn estimator, got({clf})")
+
+        if criterion not in ["gini", "entropy"]:
+            raise ValueError(
+                f"criterion must be gini or entropy got({criterion})"
+            )
+
+        if criteria not in ["min_distance", "max_samples", "max_distance"]:
+            raise ValueError(
+                "split_criteria has to be min_distance "
+                f"max_distance or max_samples got ({criteria})"
+            )
+
+        if splitter_type not in ["random", "best"]:
+            raise ValueError(
+                f"splitter must be either random or best got({splitter_type})"
+            )
+        self.criterion_function = getattr(self, f"_{self._criterion}")
+        self.decision_criteria = getattr(self, f"_{self._criteria}")
+
+    def impurity(self, y: np.array) -> np.array:
+        return self.criterion_function(y)
+
+    @staticmethod
+    def _gini(y: np.array) -> float:
+        _, count = np.unique(y, return_counts=True)
+        return float(1 - np.sum(np.square(count / np.sum(count))))
+
+    @staticmethod
+    def _entropy(y: np.array) -> float:
+        n_labels = len(y)
+        if n_labels <= 1:
+            return 0
+        counts = np.bincount(y)
+        proportions = counts / n_labels
+        n_classes = np.count_nonzero(proportions)
+        if n_classes <= 1:
+            return 0
+        entropy = 0.0
+        # Compute standard entropy.
+        for prop in proportions:
+            if prop != 0.0:
+                entropy -= prop * log(prop, n_classes)
+        return entropy
+
+    def information_gain(
+        self, labels: np.array, labels_up: np.array, labels_dn: np.array
+    ) -> float:
+        imp_prev = self.criterion_function(labels)
+        card_up = card_dn = imp_up = imp_dn = 0
+        if labels_up is not None:
+            card_up = labels_up.shape[0]
+            imp_up = self.criterion_function(labels_up)
+        if labels_dn is not None:
+            card_dn = labels_dn.shape[0] if labels_dn is not None else 0
+            imp_dn = self.criterion_function(labels_dn)
+        samples = card_up + card_dn
+        if samples == 0:
+            return 0.0
+        else:
+            result = float(
+                imp_prev
+                - (card_up / samples) * imp_up
+                - (card_dn / samples) * imp_dn
+            )
+            return result
+
+    def _select_best_set(
+        self,
+        dataset: np.array,
+        labels: np.array,
+        features_sets: List[Tuple[int, ...]],
+    ) -> Tuple[int, ...]:
+        max_gain: float = 0.0
+        selected: Union[Tuple[int, ...], None] = None
+        warnings.filterwarnings("ignore", category=ConvergenceWarning)
+        for feature_set in features_sets:
+            self._clf.fit(dataset[:, feature_set], labels)
+            node = Snode(
+                self._clf, dataset, labels, feature_set, 0.0, "subset"
+            )
+            self.partition(dataset, node)
+            y1, y2 = self.part(labels)
+            gain = self.information_gain(labels, y1, y2)
+            if gain > max_gain:
+                max_gain = gain
+                selected = feature_set
+        return selected if selected is not None else feature_set
+
+    def _get_subspaces_set(
+        self, dataset: np.array, labels: np.array, max_features: int
+    ) -> np.array:
+        features = range(dataset.shape[1])
+        features_sets = list(combinations(features, max_features))
+        if len(features_sets) > 1:
+            if self._splitter_type == "random":
+                index = random.randint(0, len(features_sets) - 1)
+                return features_sets[index]
+            else:
+                # get only 3 sets at most
+                if len(features_sets) > 3:
+                    features_sets = random.sample(features_sets, 3)
+                return self._select_best_set(dataset, labels, features_sets)
+        else:
+            return features_sets[0]
+
+    def get_subspace(
+        self, dataset: np.array, labels: np.array, max_features: int
+    ) -> Tuple[np.array, np.array]:
+        """Return the best subspace to make a split
+        """
+        indices = self._get_subspaces_set(dataset, labels, max_features)
+        return dataset[:, indices], indices
+
+    @staticmethod
+    def _min_distance(data: np.array, _: np.array) -> np.array:
+        """Assign class to min distances
+
+        return a vector of classes so partition can separate class 0 from
+        the rest of classes, ie. class 0 goes to one splitted node and the
+        rest of classes go to the other
+        :param data: distances to hyper plane of every class
+        :type data: np.array (m, n_classes)
+        :param _: enable call compat with other measures
+        :type _: None
+        :return: vector with the class assigned to each sample
+        :rtype: np.array shape (m,)
+        """
+        return np.argmin(data, axis=1)
+
+    @staticmethod
+    def _max_distance(data: np.array, _: np.array) -> np.array:
+        """Assign class to max distances
+
+        return a vector of classes so partition can separate class 0 from
+        the rest of classes, ie. class 0 goes to one splitted node and the
+        rest of classes go to the other
+        :param data: distances to hyper plane of every class
+        :type data: np.array (m, n_classes)
+        :param _: enable call compat with other measures
+        :type _: None
+        :return: vector with the class assigned to each sample values
+        (can be 0, 1, ...)
+        :rtype: np.array shape (m,)
+        """
+        return np.argmax(data, axis=1)
+
+    @staticmethod
+    def _max_samples(data: np.array, y: np.array) -> np.array:
+        """return distances of the class with more samples
+
+        :param data: distances to hyper plane of every class
+        :type data: np.array (m, n_classes)
+        :param y: vector of labels (classes)
+        :type y: np.array (m,)
+        :return: vector with distances to hyperplane (can be positive or neg.)
+        :rtype: np.array shape (m,)
+        """
+        # select the class with max number of samples
+        _, samples = np.unique(y, return_counts=True)
+        selected = np.argmax(samples)
+        return data[:, selected]
+
+    def partition(self, samples: np.array, node: Snode) -> None:
+        """Set the criteria to split arrays. Compute the indices of the samples
+        that should go to one side of the tree (down)
+
+        """
+        data = self._distances(node, samples)
+        if data.shape[0] < self._min_samples_split:
+            self._down = np.ones((data.shape[0]), dtype=bool)
+            return
+        if data.ndim > 1:
+            # split criteria for multiclass
+            data = self.decision_criteria(data, node._y)
+        self._down = data > 0
+
+    @staticmethod
+    def _distances(node: Snode, data: np.ndarray) -> np.array:
+        """Compute distances of the samples to the hyperplane of the node
+
+        :param node: node containing the svm classifier
+        :type node: Snode
+        :param data: samples to find out distance to hyperplane
+        :type data: np.ndarray
+        :return: array of shape (m, 1) with the distances of every sample to
+        the hyperplane of the node
+        :rtype: np.array
+        """
+        return node._clf.decision_function(data[:, node._features])
+
+    def part(self, origin: np.array) -> Tuple[np.array, np.array]:
+        """Split an array in two based on indices (down) and its complement
+
+        :param origin: dataset to split
+        :type origin: np.array
+        :param down: indices to use to split array
+        :type down: np.array
+        :return: list with two splits of the array
+        :rtype: list
+        """
+        up = ~self._down
+        return (
+            origin[up] if any(up) else None,
+            origin[self._down] if any(self._down) else None,
+        )
+
+
+class Stree(BaseEstimator, ClassifierMixin):  # type: ignore
    """Estimator that is based on binary trees of svm nodes
    can deal with sample_weights in predict, used in boosting sklearn methods
    inheriting from BaseEstimator implements get_params and set_params methods
@@ -126,83 +381,34 @@ class Stree(BaseEstimator, ClassifierMixin):
        C: float = 1.0,
        kernel: str = "linear",
        max_iter: int = 1000,
-        random_state: int = None,
-        max_depth: int = None,
+        random_state: Optional[int] = None,
+        max_depth: Optional[int] = None,
        tol: float = 1e-4,
        degree: int = 3,
-        gamma="scale",
+        gamma: Union[float, str] = "scale",
+        split_criteria: str = "max_samples",
+        criterion: str = "gini",
        min_samples_split: int = 0,
+        max_features: Optional[Union[str, int, float]] = None,
+        splitter: str = "random",
    ):
        self.max_iter = max_iter
-        self.C = C
-        self.kernel = kernel
-        self.random_state = random_state
-        self.max_depth = max_depth
-        self.tol = tol
-        self.gamma = gamma
-        self.degree = degree
-        self.min_samples_split = min_samples_split
-
-    def _more_tags(self) -> dict:
-        """Required by sklearn to tell that this estimator is a binary classifier
-
-        :return: the tag required
-        :rtype: dict
-        """
-        return {"binary_only": True, "requires_y": True}
-
-    def _split_array(self, origin: np.array, down: np.array) -> list:
-        """Split an array in two based on indices passed as down and its complement
-
-        :param origin: dataset to split
-        :type origin: np.array
-        :param down: indices to use to split array
-        :type down: np.array
-        :return: list with two splits of the array
-        :rtype: list
-        """
-        up = ~down
-        return (
-            origin[up[:, 0]] if any(up) else None,
-            origin[down[:, 0]] if any(down) else None,
-        )
-
-    def _distances(self, node: Snode, data: np.ndarray) -> np.array:
-        """Compute distances of the samples to the hyperplane of the node
-
-        :param node: node containing the svm classifier
-        :type node: Snode
-        :param data: samples to find out distance to hyperplane
-        :type data: np.ndarray
-        :return: array of shape (m, 1) with the distances of every sample to
-        the hyperplane of the node
-        :rtype: np.array
-        """
-        res = node._clf.decision_function(data)
-        if res.ndim == 1:
-            return np.expand_dims(res, 1)
-        elif res.shape[1] > 1:
-            # remove multiclass info
-            res = np.delete(res, slice(1, res.shape[1]), axis=1)
-        return res
-
-    def _split_criteria(self, data: np.array) -> np.array:
-        """Set the criteria to split arrays
-
-        :param data: [description]
-        :type data: np.array
-        :return: [description]
-        :rtype: np.array
-        """
-        return (
-            data > 0
-            if data.shape[0] >= self.min_samples_split
-            else np.ones((data.shape[0], 1), dtype=bool)
-        )
+        self.C: float = C
+        self.kernel: str = kernel
+        self.random_state: Optional[int] = random_state
+        self.max_depth: Optional[int] = max_depth
+        self.tol: float = tol
+        self.gamma: Union[float, str] = gamma
+        self.degree: int = degree
+        self.min_samples_split: int = min_samples_split
+        self.split_criteria: str = split_criteria
+        self.max_features: Union[str, int, float, None] = max_features
+        self.criterion: str = criterion
+        self.splitter: str = splitter

    def fit(
        self, X: np.ndarray, y: np.ndarray, sample_weight: np.array = None
-    ) -> "Stree":
+    ) -> Stree:
        """Build the tree based on the dataset of samples and its labels

        :param X: dataset of samples to make predictions
@@ -232,52 +438,30 @@ class Stree(BaseEstimator, ClassifierMixin):
                    {self.max_depth})"
            )
        check_classification_targets(y)
-        X, y = check_X_y(X, y)
-        sample_weight = _check_sample_weight(sample_weight, X)
-        check_classification_targets(y)
+        X, y = self._validate_data(X, y)
+        sample_weight = _check_sample_weight(
+            sample_weight, X, dtype=np.float64
+        )
        # Initialize computed parameters
+        self.splitter_ = Splitter(
+            clf=self._build_clf(),
+            criterion=self.criterion,
+            splitter_type=self.splitter,
+            criteria=self.split_criteria,
+            random_state=self.random_state,
+            min_samples_split=self.min_samples_split,
+        )
+        if self.random_state is not None:
+            random.seed(self.random_state)
        self.classes_, y = np.unique(y, return_inverse=True)
+        self.n_classes_ = self.classes_.shape[0]
        self.n_iter_ = self.max_iter
        self.depth_ = 0
-        self.n_features_in_ = X.shape[1]
+        self.max_features_ = self._initialize_max_features()
        self.tree_ = self.train(X, y, sample_weight, 1, "root")
        self._build_predictor()
        return self

-    def _build_predictor(self):
-        """Process the leaves to make them predictors
-        """
-
-        def run_tree(node: Snode):
-            if node.is_leaf():
-                node.make_predictor()
-                return
-            run_tree(node.get_down())
-            run_tree(node.get_up())
-
-        run_tree(self.tree_)
-
-    def _build_clf(self):
-        """ Build the correct classifier for the node
-        """
-        return (
-            LinearSVC(
-                max_iter=self.max_iter,
-                random_state=self.random_state,
-                C=self.C,
-                tol=self.tol,
-            )
-            if self.kernel == "linear"
-            else SVC(
-                kernel=self.kernel,
-                max_iter=self.max_iter,
-                tol=self.tol,
-                C=self.C,
-                gamma=self.gamma,
-                degree=self.degree,
-            )
-        )
-
    def train(
        self,
        X: np.ndarray,
@@ -285,7 +469,7 @@ class Stree(BaseEstimator, ClassifierMixin):
        sample_weight: np.ndarray,
        depth: int,
        title: str,
-    ) -> Snode:
+    ) -> Optional[Snode]:
        """Recursive function to split the original dataset into predictor
        nodes (leaves)

@@ -307,24 +491,95 @@ class Stree(BaseEstimator, ClassifierMixin):
            return None
        if np.unique(y).shape[0] == 1:
            # only 1 class => pure dataset
-            return Snode(None, X, y, title + ", <pure>")
+            return Snode(
+                clf=None,
+                X=X,
+                y=y,
+                features=X.shape[1],
+                impurity=0.0,
+                title=title + ", <pure>",
+                weight=sample_weight,
+            )
        # Train the model
        clf = self._build_clf()
-        clf.fit(X, y, sample_weight=sample_weight)
-        tree = Snode(clf, X, y, title)
+        Xs, features = self.splitter_.get_subspace(X, y, self.max_features_)
+        # solve WARNING: class label 0 specified in weight is not found
+        # in bagging
+        if any(sample_weight == 0):
+            indices = sample_weight == 0
+            y_next = y[~indices]
+            # touch weights if removing any class
+            if np.unique(y_next).shape[0] != self.n_classes_:
+                sample_weight += 1e-5
+        clf.fit(Xs, y, sample_weight=sample_weight)
+        impurity = self.splitter_.impurity(y)
+        node = Snode(clf, X, y, features, impurity, title, sample_weight)
        self.depth_ = max(depth, self.depth_)
-        down = self._split_criteria(self._distances(tree, X))
-        X_U, X_D = self._split_array(X, down)
-        y_u, y_d = self._split_array(y, down)
-        sw_u, sw_d = self._split_array(sample_weight, down)
+        self.splitter_.partition(X, node)
+        X_U, X_D = self.splitter_.part(X)
+        y_u, y_d = self.splitter_.part(y)
+        sw_u, sw_d = self.splitter_.part(sample_weight)
        if X_U is None or X_D is None:
            # didn't part anything
-            return Snode(clf, X, y, title + ", <cgaf>")
-        tree.set_up(self.train(X_U, y_u, sw_u, depth + 1, title + " - Up"))
-        tree.set_down(self.train(X_D, y_d, sw_d, depth + 1, title + " - Down"))
-        return tree
+            return Snode(
+                clf,
+                X,
+                y,
+                features=X.shape[1],
+                impurity=impurity,
+                title=title + ", <cgaf>",
+                weight=sample_weight,
+            )
+        node.set_up(
+            self.train(  # type: ignore
+                X_U, y_u, sw_u, depth + 1, title + " - Up"
+            )
+        )
+        node.set_down(
+            self.train(  # type: ignore
+                X_D, y_d, sw_d, depth + 1, title + " - Down"
+            )
+        )
+        return node

-    def _reorder_results(self, y: np.array, indices: np.array) -> np.array:
+    def _build_predictor(self) -> None:
+        """Process the leaves to make them predictors
+        """
+
+        def run_tree(node: Optional[Snode]) -> None:
+            if node is None:
+                raise ValueError("Can't build predictors on None")
+            if node.is_leaf():
+                node.make_predictor()
+                return
+            run_tree(node.get_down())
+            run_tree(node.get_up())
+
+        run_tree(self.tree_)
+
+    def _build_clf(self) -> Union[LinearSVC, SVC]:
+        """ Build the correct classifier for the node
+        """
+        return (
+            LinearSVC(
+                max_iter=self.max_iter,
+                random_state=self.random_state,
+                C=self.C,
+                tol=self.tol,
+            )
+            if self.kernel == "linear"
+            else SVC(
+                kernel=self.kernel,
+                max_iter=self.max_iter,
+                tol=self.tol,
+                C=self.C,
+                gamma=self.gamma,
+                degree=self.degree,
+            )
+        )
+
+    @staticmethod
+    def _reorder_results(y: np.array, indices: np.array) -> np.array:
        """Reorder an array based on the array of indices passed

        :param y: data untidy
@@ -334,12 +589,8 @@ class Stree(BaseEstimator, ClassifierMixin):
        :return: array y ordered
        :rtype: np.array
        """
-        if y.ndim > 1 and y.shape[1] > 1:
-            # if predict_proba return np.array of floats
-            y_ordered = np.zeros(y.shape, dtype=float)
-        else:
-            # return array of same type given in y
-            y_ordered = y.copy()
+        # return array of same type given in y
+        y_ordered = y.copy()
        indices = indices.astype(int)
        for i, index in enumerate(indices):
            y_ordered[index] = y[i]
@@ -355,25 +606,30 @@ class Stree(BaseEstimator, ClassifierMixin):
        """

        def predict_class(
-            xp: np.array, indices: np.array, node: Snode
+            xp: np.array, indices: np.array, node: Optional[Snode]
        ) -> np.array:
            if xp is None:
                return [], []
-            if node.is_leaf():
+            if node.is_leaf():  # type: ignore
                # set a class for every sample in dataset
-                prediction = np.full((xp.shape[0], 1), node._class)
+                prediction = np.full(
+                    (xp.shape[0], 1), node._class  # type: ignore
+                )
                return prediction, indices
-            down = self._split_criteria(self._distances(node, xp))
-            X_U, X_D = self._split_array(xp, down)
-            i_u, i_d = self._split_array(indices, down)
-            prx_u, prin_u = predict_class(X_U, i_u, node.get_up())
-            prx_d, prin_d = predict_class(X_D, i_d, node.get_down())
+            self.splitter_.partition(xp, node)  # type: ignore
+            x_u, x_d = self.splitter_.part(xp)
+            i_u, i_d = self.splitter_.part(indices)
+            prx_u, prin_u = predict_class(
+                x_u, i_u, node.get_up()  # type: ignore
+            )
+            prx_d, prin_d = predict_class(
+                x_d, i_d, node.get_down()  # type: ignore
+            )
            return np.append(prx_u, prx_d), np.append(prin_u, prin_d)

-        # sklearn check
-        check_is_fitted(self, ["tree_"])
+        check_is_fitted(self, "n_features_in_")
        # Input validation
-        X = check_array(X)
+        X = self._validate_data(X, reset=False)
        # setup prediction & make it happen
        indices = np.arange(X.shape[0])
        result = (
@@ -383,102 +639,9 @@ class Stree(BaseEstimator, ClassifierMixin):
        )
        return self.classes_[result]

-    def predict_proba(self, X: np.array) -> np.array:
-        """Computes an approximation of the probability of samples belonging to
-        class 0 and 1
-        :param X: dataset
-        :type X: np.array
-        :return: array array of shape (m, num_classes), probability of being
-        each class
-        :rtype: np.array
-        """
-
-        def predict_class(
-            xp: np.array, indices: np.array, dist: np.array, node: Snode
-        ) -> np.array:
-            """Run the tree to compute predictions
-
-            :param xp: subdataset of samples
-            :type xp: np.array
-            :param indices: indices of subdataset samples to rebuild original
-            order
-            :type indices: np.array
-            :param dist: distances of every sample to the hyperplane or the
-            father node
-            :type dist: np.array
-            :param node: node of the leaf with the class
-            :type node: Snode
-            :return: array of labels and distances, array of indices
-            :rtype: np.array
-            """
-            if xp is None:
-                return [], []
-            if node.is_leaf():
-                # set a class for every sample in dataset
-                prediction = np.full((xp.shape[0], 1), node._class)
-                prediction_proba = dist
-                return np.append(prediction, prediction_proba, axis=1), indices
-            distances = self._distances(node, xp)
-            down = self._split_criteria(distances)
-            X_U, X_D = self._split_array(xp, down)
-            i_u, i_d = self._split_array(indices, down)
-            di_u, di_d = self._split_array(distances, down)
-            prx_u, prin_u = predict_class(X_U, i_u, di_u, node.get_up())
-            prx_d, prin_d = predict_class(X_D, i_d, di_d, node.get_down())
-            return np.append(prx_u, prx_d), np.append(prin_u, prin_d)
-
-        # sklearn check
-        check_is_fitted(self, ["tree_"])
-        # Input validation
-        X = check_array(X)
-        # setup prediction & make it happen
-        indices = np.arange(X.shape[0])
-        empty_dist = np.empty((X.shape[0], 1), dtype=float)
-        result, indices = predict_class(X, indices, empty_dist, self.tree_)
-        result = result.reshape(X.shape[0], 2)
-        # Turn distances to hyperplane into probabilities based on fitting
-        # distances of samples to its hyperplane that classified them, to the
-        # sigmoid function
-        # Probability of being 1
-        result[:, 1] = 1 / (1 + np.exp(-result[:, 1]))
-        # Probability of being 0
-        result[:, 0] = 1 - result[:, 1]
-        return self._reorder_results(result, indices)
-
-    def score(
-        self, X: np.array, y: np.array, sample_weight: np.array = None
-    ) -> float:
-        """Compute accuracy of the prediction
-
-        :param X: dataset of samples to make predictions
-        :type X: np.array
-        :param y_true: samples labels
-        :type y_true: np.array
-        :param sample_weight: weights of the samples. Rescale C per sample.
-        Hi' weights force the classifier to put more emphasis on these points
-        :type sample_weight: np.array optional
-        :return: accuracy of the prediction
-        :rtype: float
-        """
-        # sklearn check
-        check_is_fitted(self)
-        check_classification_targets(y)
-        X, y = check_X_y(X, y)
-        y_pred = self.predict(X).reshape(y.shape)
-        # Compute accuracy for each possible representation
-        y_type, y_true, y_pred = _check_targets(y, y_pred)
-        check_consistent_length(y_true, y_pred, sample_weight)
-        if y_type.startswith("multilabel"):
-            differing_labels = count_nonzero(y_true - y_pred, axis=1)
-            score = differing_labels == 0
-        else:
-            score = y_true == y_pred
-
-        return _weighted_sum(score, sample_weight, normalize=True)
-
    def __iter__(self) -> Siterator:
-        """Create an iterator to be able to visit the nodes of the tree in preorder,
-        can make a list with all the nodes in preorder
+        """Create an iterator to be able to visit the nodes of the tree in
+        preorder, can make a list with all the nodes in preorder

        :return: an iterator, can for i in... and list(...)
        :rtype: Siterator
@@ -499,3 +662,34 @@ class Stree(BaseEstimator, ClassifierMixin):
        for i in self:
            output += str(i) + "\n"
        return output
+
+    def _initialize_max_features(self) -> int:
+        if isinstance(self.max_features, str):
+            if self.max_features == "auto":
+                max_features = max(1, int(np.sqrt(self.n_features_in_)))
+            elif self.max_features == "sqrt":
+                max_features = max(1, int(np.sqrt(self.n_features_in_)))
+            elif self.max_features == "log2":
+                max_features = max(1, int(np.log2(self.n_features_in_)))
+            else:
+                raise ValueError(
+                    "Invalid value for max_features. "
+                    "Allowed string values are 'auto', "
+                    "'sqrt' or 'log2'."
+                )
+        elif self.max_features is None:
+            max_features = self.n_features_in_
+        elif isinstance(self.max_features, int):
+            max_features = self.max_features
+        else:  # float
+            if self.max_features > 0.0:
+                max_features = max(
+                    1, int(self.max_features * self.n_features_in_)
+                )
+            else:
+                raise ValueError(
+                    "Invalid value for max_features."
+                    "Allowed float must be in range (0, 1] "
+                    f"got ({self.max_features})"
+                )
+        return max_features
--- a/stree/Strees_grapher.py
+++ b/stree/Strees_grapher.py
@@ -1,205 +0,0 @@
-"""
-__author__ = "Ricardo Montañana Gómez"
-__copyright__ = "Copyright 2020, Ricardo Montañana Gómez"
-__license__ = "MIT"
-__version__ = "0.9"
-Plot 3D views of nodes in Stree
-"""
-
-import os
-
-import matplotlib.pyplot as plt
-import numpy as np
-from sklearn.decomposition import PCA
-from mpl_toolkits.mplot3d import Axes3D
-
-from .Strees import Stree, Snode, Siterator
-
-
-class Snode_graph(Snode):
-    def __init__(self, node: Stree):
-        self._plot_size = (8, 8)
-        self._xlimits = (None, None)
-        self._ylimits = (None, None)
-        self._zlimits = (None, None)
-        n = Snode.copy(node)
-        super().__init__(n._clf, n._X, n._y, n._title)
-
-    def set_plot_size(self, size: tuple):
-        self._plot_size = size
-
-    def get_plot_size(self) -> tuple:
-        return self._plot_size
-
-    def _is_pure(self) -> bool:
-        """is considered pure a leaf node with one label
-        """
-        if self.is_leaf():
-            return self._belief == 1.0
-        return False
-
-    def set_axis_limits(self, limits: tuple):
-        self._xlimits, self._ylimits, self._zlimits = limits
-
-    def get_axis_limits(self) -> tuple:
-        return self._xlimits, self._ylimits, self._zlimits
-
-    def _set_graphics_axis(self, ax: Axes3D):
-        ax.set_xlim(self._xlimits)
-        ax.set_ylim(self._ylimits)
-        ax.set_zlim(self._zlimits)
-
-    def save_hyperplane(
-        self, save_folder: str = "./", save_prefix: str = "", save_seq: int = 1
-    ):
-        _, fig = self.plot_hyperplane()
-        name = os.path.join(save_folder, f"{save_prefix}STnode{save_seq}.png")
-        fig.savefig(name, bbox_inches="tight")
-        plt.close(fig)
-
-    def _get_cmap(self):
-        cmap = "jet"
-        if self._is_pure() and self._class == 1:
-            cmap = "jet_r"
-        return cmap
-
-    def _graph_title(self):
-        n_class, card = np.unique(self._y, return_counts=True)
-        return f"{self._title} {n_class} {card}"
-
-    def plot_hyperplane(self, plot_distribution: bool = True):
-        fig = plt.figure(figsize=self._plot_size)
-        ax = fig.add_subplot(1, 1, 1, projection="3d")
-        if not self._is_pure():
-            # Can't plot hyperplane of leaves with one label because it hasn't
-            # classiffier
-            # get the splitting hyperplane
-            def hyperplane(x, y):
-                return (
-                    -self._clf.intercept_
-                    - self._clf.coef_[0][0] * x
-                    - self._clf.coef_[0][1] * y
-                ) / self._clf.coef_[0][2]
-
-            tmpx = np.linspace(self._X[:, 0].min(), self._X[:, 0].max())
-            tmpy = np.linspace(self._X[:, 1].min(), self._X[:, 1].max())
-            xx, yy = np.meshgrid(tmpx, tmpy)
-            ax.plot_surface(
-                xx,
-                yy,
-                hyperplane(xx, yy),
-                alpha=0.5,
-                antialiased=True,
-                rstride=1,
-                cstride=1,
-                cmap="seismic",
-            )
-            self._set_graphics_axis(ax)
-        if plot_distribution:
-            self.plot_distribution(ax)
-        else:
-            plt.title(self._graph_title())
-            plt.show()
-        return ax, fig
-
-    def plot_distribution(self, ax: Axes3D = None):
-        if ax is None:
-            fig = plt.figure(figsize=self._plot_size)
-            ax = fig.add_subplot(1, 1, 1, projection="3d")
-        plt.title(self._graph_title())
-        cmap = self._get_cmap()
-        ax.scatter(
-            self._X[:, 0], self._X[:, 1], self._X[:, 2], c=self._y, cmap=cmap
-        )
-        ax.set_xlabel("X0")
-        ax.set_ylabel("X1")
-        ax.set_zlabel("X2")
-        plt.show()
-
-
-class Stree_grapher(Stree):
-    """Build 3d graphs of any dataset, if it's more than 3 features PCA shall
-    make its magic
-    """
-
-    def __init__(self, params: dict):
-        self._plot_size = (8, 8)
-        self._tree_gr = None
-        # make Snode store X's
-        os.environ["TESTING"] = "1"
-        self._fitted = False
-        self._pca = None
-        super().__init__(**params)
-
-    def __del__(self):
-        try:
-            os.environ.pop("TESTING")
-        except KeyError:
-            pass
-
-    def _copy_tree(self, node: Snode) -> Snode_graph:
-        mirror = Snode_graph(node)
-        # clone node
-        mirror._class = node._class
-        mirror._belief = node._belief
-        if node.get_down() is not None:
-            mirror.set_down(self._copy_tree(node.get_down()))
-        if node.get_up() is not None:
-            mirror.set_up(self._copy_tree(node.get_up()))
-        return mirror
-
-    def fit(
-        self, X: np.array, y: np.array, sample_weight: np.array = None
-    ) -> "Stree_grapher":
-        """Fit the Stree and copy the tree in a Snode_graph tree
-
-        :param X: Dataset
-        :type X: np.array
-        :param y: Labels
-        :type y: np.array
-        :return: Stree model
-        :rtype: Stree
-        """
-        if X.shape[1] != 3:
-            self._pca = PCA(n_components=3)
-            X = self._pca.fit_transform(X)
-        super().fit(X, y, sample_weight=sample_weight)
-        self._tree_gr = self._copy_tree(self.tree_)
-        self._fitted = True
-        return self
-
-    def score(self, X: np.array, y: np.array) -> float:
-        self._check_fitted()
-        if X.shape[1] != 3:
-            X = self._pca.transform(X)
-        return super().score(X, y)
-
-    def _check_fitted(self):
-        if not self._fitted:
-            raise Exception("Have to fit the grapher first!")
-
-    def save_all(self, save_folder: str = "./", save_prefix: str = ""):
-        """Save all the node plots in png format, each with a sequence number
-
-        :param save_folder: folder where the plots are saved, defaults to './'
-        :type save_folder: str, optional
-        """
-        self._check_fitted()
-        if not os.path.isdir(save_folder):
-            os.mkdir(save_folder)
-        seq = 1
-        for node in self:
-            node.save_hyperplane(
-                save_folder=save_folder, save_prefix=save_prefix, save_seq=seq
-            )
-            seq += 1
-
-    def plot_all(self):
-        """Plots all the nodes
-        """
-        self._check_fitted()
-        for node in self:
-            node.plot_hyperplane()
-
-    def __iter__(self):
-        return Siterator(self._tree_gr)
--- a/stree/init.py
+++ b/stree/init.py
@@ -1,4 +1,3 @@
-from .Strees import Stree, Snode, Siterator
-from .Strees_grapher import Stree_grapher, Snode_graph
+from .Strees import Stree, Snode, Siterator, Splitter

-__all__ = ["Stree", "Snode", "Siterator", "Stree_grapher", "Snode_graph"]
+__all__ = ["Stree", "Snode", "Siterator", "Splitter"]
--- a/stree/tests/Snode_test.py
+++ b/stree/tests/Snode_test.py
@@ -0,0 +1,89 @@
+# type: ignore
+import os
+import unittest
+
+import numpy as np
+
+from stree import Stree, Snode
+from .utils import load_dataset
+
+
+class Snode_test(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        self._random_state = 1
+        self._clf = Stree(random_state=self._random_state)
+        self._clf.fit(*load_dataset(self._random_state))
+        super().__init__(*args, **kwargs)
+
+    @classmethod
+    def setUp(cls):
+        os.environ["TESTING"] = "1"
+
+    def test_attributes_in_leaves(self):
+        """Check if the attributes in leaves have correct values so they form a
+        predictor
+        """
+
+        def check_leave(node: Snode):
+            if not node.is_leaf():
+                check_leave(node.get_down())
+                check_leave(node.get_up())
+                return
+            # Check Belief in leave
+            classes, card = np.unique(node._y, return_counts=True)
+            max_card = max(card)
+            min_card = min(card)
+            if len(classes) > 1:
+                belief = max_card / (max_card + min_card)
+            else:
+                belief = 1
+            self.assertEqual(belief, node._belief)
+            # Check Class
+            class_computed = classes[card == max_card]
+            self.assertEqual(class_computed, node._class)
+
+        check_leave(self._clf.tree_)
+
+    def test_nodes_coefs(self):
+        """Check if the nodes of the tree have the right attributes filled
+        """
+
+        def run_tree(node: Snode):
+            if node._belief < 1:
+                # only exclude pure leaves
+                self.assertIsNotNone(node._clf)
+                self.assertIsNotNone(node._clf.coef_)
+            if node.is_leaf():
+                return
+            run_tree(node.get_down())
+            run_tree(node.get_up())
+
+        run_tree(self._clf.tree_)
+
+    def test_make_predictor_on_leaf(self):
+        test = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test")
+        test.make_predictor()
+        self.assertEqual(1, test._class)
+        self.assertEqual(0.75, test._belief)
+
+    def test_make_predictor_on_not_leaf(self):
+        test = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test")
+        test.set_up(Snode(None, [1], [1], [], 0.0, "another_test"))
+        test.make_predictor()
+        self.assertIsNone(test._class)
+        self.assertEqual(0, test._belief)
+
+    def test_make_predictor_on_leaf_bogus_data(self):
+        test = Snode(None, [1, 2, 3, 4], [], [], 0.0, "test")
+        test.make_predictor()
+        self.assertIsNone(test._class)
+
+    def test_copy_node(self):
+        px = [1, 2, 3, 4]
+        py = [1]
+        test = Snode(Stree(), px, py, [], 0.0, "test")
+        computed = Snode.copy(test)
+        self.assertListEqual(computed._X, px)
+        self.assertListEqual(computed._y, py)
+        self.assertEqual("test", computed._title)
+        self.assertIsInstance(computed._clf, Stree)
--- a/stree/tests/Splitter_test.py
+++ b/stree/tests/Splitter_test.py
@@ -0,0 +1,230 @@
+# type: ignore
+import os
+import unittest
+import random
+
+import numpy as np
+from sklearn.svm import SVC
+from sklearn.datasets import load_wine, load_iris
+from stree import Splitter
+
+
+class Splitter_test(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        self._random_state = 1
+        super().__init__(*args, **kwargs)
+
+    @staticmethod
+    def build(
+        clf=SVC,
+        min_samples_split=0,
+        splitter_type="random",
+        criterion="gini",
+        criteria="min_distance",
+        random_state=None,
+    ):
+        return Splitter(
+            clf=clf(random_state=random_state, kernel="rbf"),
+            min_samples_split=min_samples_split,
+            splitter_type=splitter_type,
+            criterion=criterion,
+            criteria=criteria,
+            random_state=random_state,
+        )
+
+    @classmethod
+    def setUp(cls):
+        os.environ["TESTING"] = "1"
+
+    def test_init(self):
+        with self.assertRaises(ValueError):
+            self.build(criterion="duck")
+        with self.assertRaises(ValueError):
+            self.build(splitter_type="duck")
+        with self.assertRaises(ValueError):
+            self.build(criteria="duck")
+        with self.assertRaises(ValueError):
+            _ = Splitter(clf=None)
+        for splitter_type in ["best", "random"]:
+            for criterion in ["gini", "entropy"]:
+                for criteria in [
+                    "min_distance",
+                    "max_samples",
+                    "max_distance",
+                ]:
+                    tcl = self.build(
+                        splitter_type=splitter_type,
+                        criterion=criterion,
+                        criteria=criteria,
+                    )
+                    self.assertEqual(splitter_type, tcl._splitter_type)
+                    self.assertEqual(criterion, tcl._criterion)
+                    self.assertEqual(criteria, tcl._criteria)
+
+    def test_gini(self):
+        expected_values = [
+            ([0, 1, 1, 1, 1, 1, 0, 0, 0, 1], 0.48),
+            ([0, 1, 1, 2, 2, 3, 4, 5, 3, 2, 1, 1], 0.7777777777777778),
+            ([0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 2, 2, 2], 0.520408163265306),
+            ([0, 0, 1, 1, 1, 1, 0, 0], 0.5),
+            ([0, 0, 1, 1, 2, 2, 3, 3], 0.75),
+            ([0, 0, 1, 1, 1, 1, 1, 1], 0.375),
+            ([0], 0),
+            ([1, 1, 1, 1], 0),
+        ]
+        for labels, expected in expected_values:
+            self.assertAlmostEqual(expected, Splitter._gini(labels))
+            tcl = self.build(criterion="gini")
+            self.assertAlmostEqual(expected, tcl.criterion_function(labels))
+
+    def test_entropy(self):
+        expected_values = [
+            ([0, 1, 1, 1, 1, 1, 0, 0, 0, 1], 0.9709505944546686),
+            ([0, 1, 1, 2, 2, 3, 4, 5, 3, 2, 1, 1], 0.9111886696810589),
+            ([0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 2, 2, 2], 0.8120406807940999),
+            ([0, 0, 1, 1, 1, 1, 0, 0], 1),
+            ([0, 0, 1, 1, 2, 2, 3, 3], 1),
+            ([0, 0, 1, 1, 1, 1, 1, 1], 0.8112781244591328),
+            ([1], 0),
+            ([0, 0, 0, 0], 0),
+        ]
+        for labels, expected in expected_values:
+            self.assertAlmostEqual(expected, Splitter._entropy(labels))
+            tcl = self.build(criterion="entropy")
+            self.assertAlmostEqual(expected, tcl.criterion_function(labels))
+
+    def test_information_gain(self):
+        expected_values = [
+            (
+                [0, 1, 1, 1, 1, 1],
+                [0, 0, 0, 1],
+                0.16333333333333333,
+                0.25642589168200297,
+            ),
+            (
+                [0, 1, 1, 2, 2, 3, 4, 5, 3, 2, 1, 1],
+                [5, 3, 2, 1, 1],
+                0.007381776239907684,
+                -0.03328610916207225,
+            ),
+            ([], [], 0.0, 0.0),
+            ([1], [], 0.0, 0.0),
+            ([], [1], 0.0, 0.0),
+            ([0, 0, 0, 0], [0, 0], 0.0, 0.0),
+            ([], [1, 1, 1, 2], 0.0, 0.0),
+            (None, [1, 2, 3], 0.0, 0.0),
+            ([1, 2, 3], None, 0.0, 0.0),
+        ]
+        for yu, yd, expected_gini, expected_entropy in expected_values:
+            yu = np.array(yu, dtype=np.int32) if yu is not None else None
+            yd = np.array(yd, dtype=np.int32) if yd is not None else None
+            if yu is not None and yd is not None:
+                complete = np.append(yu, yd)
+            elif yd is not None:
+                complete = yd
+            else:
+                complete = yu
+            tcl = self.build(criterion="gini")
+            computed = tcl.information_gain(complete, yu, yd)
+            self.assertAlmostEqual(expected_gini, computed)
+            tcl = self.build(criterion="entropy")
+            computed = tcl.information_gain(complete, yu, yd)
+            self.assertAlmostEqual(expected_entropy, computed)
+
+    def test_max_samples(self):
+        tcl = self.build(criteria="max_samples")
+        data = np.array(
+            [
+                [-0.1, 0.2, -0.3],
+                [0.7, 0.01, -0.1],
+                [0.7, -0.9, 0.5],
+                [0.1, 0.2, 0.3],
+            ]
+        )
+        expected = np.array([0.2, 0.01, -0.9, 0.2])
+        y = [1, 2, 1, 0]
+        computed = tcl._max_samples(data, y)
+        self.assertEqual((4,), computed.shape)
+        self.assertListEqual(expected.tolist(), computed.tolist())
+
+    def test_min_distance(self):
+        tcl = self.build()
+        data = np.array(
+            [
+                [-0.1, 0.2, -0.3],
+                [0.7, 0.01, -0.1],
+                [0.7, -0.9, 0.5],
+                [0.1, 0.2, 0.3],
+            ]
+        )
+        expected = np.array([2, 2, 1, 0])
+        computed = tcl._min_distance(data, None)
+        self.assertEqual((4,), computed.shape)
+        self.assertListEqual(expected.tolist(), computed.tolist())
+
+    def test_max_distance(self):
+        tcl = self.build(criteria="max_distance")
+        data = np.array(
+            [
+                [-0.1, 0.2, -0.3],
+                [0.7, 0.01, -0.1],
+                [0.7, -0.9, 0.5],
+                [0.1, 0.2, 0.3],
+            ]
+        )
+        expected = np.array([1, 0, 0, 2])
+        computed = tcl._max_distance(data, None)
+        self.assertEqual((4,), computed.shape)
+        self.assertListEqual(expected.tolist(), computed.tolist())
+
+    def test_best_splitter_few_sets(self):
+        X, y = load_iris(return_X_y=True)
+        X = np.delete(X, 3, 1)
+        tcl = self.build(splitter_type="best", random_state=self._random_state)
+        dataset, computed = tcl.get_subspace(X, y, max_features=2)
+        self.assertListEqual([0, 2], list(computed))
+        self.assertListEqual(X[:, computed].tolist(), dataset.tolist())
+
+    def test_splitter_parameter(self):
+        expected_values = [
+            [2, 3, 5, 7],  # best   entropy min_distance
+            [0, 2, 4, 5],  # best   entropy max_samples
+            [0, 2, 8, 12],  # best   entropy max_distance
+            [1, 2, 5, 12],  # best   gini    min_distance
+            [0, 3, 4, 10],  # best   gini    max_samples
+            [1, 2, 9, 12],  # best   gini    max_distance
+            [3, 9, 11, 12],  # random entropy min_distance
+            [1, 5, 6, 9],  # random entropy max_samples
+            [1, 2, 4, 8],  # random entropy max_distance
+            [2, 6, 7, 12],  # random gini    min_distance
+            [3, 9, 10, 11],  # random gini    max_samples
+            [2, 5, 8, 12],  # random gini    max_distance
+        ]
+        X, y = load_wine(return_X_y=True)
+        rn = 0
+        for splitter_type in ["best", "random"]:
+            for criterion in ["entropy", "gini"]:
+                for criteria in [
+                    "min_distance",
+                    "max_samples",
+                    "max_distance",
+                ]:
+                    tcl = self.build(
+                        splitter_type=splitter_type,
+                        criterion=criterion,
+                        criteria=criteria,
+                    )
+                    expected = expected_values.pop(0)
+                    random.seed(rn)
+                    rn += 1
+                    dataset, computed = tcl.get_subspace(X, y, max_features=4)
+                    # print(
+                    #     "{},  # {:7s}{:8s}{:15s}".format(
+                    #         list(computed), splitter_type, criterion,
+                    #           criteria,
+                    #     )
+                    # )
+                    self.assertListEqual(expected, list(computed))
+                    self.assertListEqual(
+                        X[:, computed].tolist(), dataset.tolist()
+                    )
--- a/stree/tests/Stree_test.py
+++ b/stree/tests/Stree_test.py
@@ -0,0 +1,438 @@
+# type: ignore
+import os
+import unittest
+import warnings
+
+import numpy as np
+from sklearn.datasets import load_iris, load_wine
+from sklearn.exceptions import ConvergenceWarning
+
+from stree import Stree, Snode
+from .utils import load_dataset
+
+
+class Stree_test(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        self._random_state = 1
+        self._kernels = ["linear", "rbf", "poly"]
+        super().__init__(*args, **kwargs)
+
+    @classmethod
+    def setUp(cls):
+        os.environ["TESTING"] = "1"
+
+    def _check_tree(self, node: Snode):
+        """Check recursively that the nodes that are not leaves have the
+        correct number of labels and its sons have the right number of elements
+        in their dataset
+
+        Arguments:
+            node {Snode} -- node to check
+        """
+        if node.is_leaf():
+            return
+        y_prediction = node._clf.predict(node._X)
+        y_down = node.get_down()._y
+        y_up = node.get_up()._y
+        # Is a correct partition in terms of cadinality?
+        # i.e. The partition algorithm didn't forget any sample
+        self.assertEqual(node._y.shape[0], y_down.shape[0] + y_up.shape[0])
+        unique_y, count_y = np.unique(node._y, return_counts=True)
+        _, count_d = np.unique(y_down, return_counts=True)
+        _, count_u = np.unique(y_up, return_counts=True)
+        #
+        for i in unique_y:
+            number_down = count_d[i]
+            try:
+                number_up = count_u[i]
+            except IndexError:
+                number_up = 0
+            self.assertEqual(count_y[i], number_down + number_up)
+        # Is the partition made the same as the prediction?
+        # as the node is not a leaf...
+        _, count_yp = np.unique(y_prediction, return_counts=True)
+        self.assertEqual(count_yp[0], y_up.shape[0])
+        self.assertEqual(count_yp[1], y_down.shape[0])
+        self._check_tree(node.get_down())
+        self._check_tree(node.get_up())
+
+    def test_build_tree(self):
+        """Check if the tree is built the same way as predictions of models
+        """
+        warnings.filterwarnings("ignore")
+        for kernel in self._kernels:
+            clf = Stree(kernel=kernel, random_state=self._random_state)
+            clf.fit(*load_dataset(self._random_state))
+            self._check_tree(clf.tree_)
+
+    def test_single_prediction(self):
+        X, y = load_dataset(self._random_state)
+        for kernel in self._kernels:
+            clf = Stree(kernel=kernel, random_state=self._random_state)
+            yp = clf.fit(X, y).predict((X[0, :].reshape(-1, X.shape[1])))
+            self.assertEqual(yp[0], y[0])
+
+    def test_multiple_prediction(self):
+        # First 27 elements the predictions are the same as the truth
+        num = 27
+        X, y = load_dataset(self._random_state)
+        for kernel in self._kernels:
+            clf = Stree(kernel=kernel, random_state=self._random_state)
+            yp = clf.fit(X, y).predict(X[:num, :])
+            self.assertListEqual(y[:num].tolist(), yp.tolist())
+
+    def test_single_vs_multiple_prediction(self):
+        """Check if predicting sample by sample gives the same result as
+        predicting all samples at once
+        """
+        X, y = load_dataset(self._random_state)
+        for kernel in self._kernels:
+            clf = Stree(kernel=kernel, random_state=self._random_state)
+            clf.fit(X, y)
+            # Compute prediction line by line
+            yp_line = np.array([], dtype=int)
+            for xp in X:
+                yp_line = np.append(
+                    yp_line, clf.predict(xp.reshape(-1, X.shape[1]))
+                )
+            # Compute prediction at once
+            yp_once = clf.predict(X)
+            self.assertListEqual(yp_line.tolist(), yp_once.tolist())
+
+    def test_iterator_and_str(self):
+        """Check preorder iterator
+        """
+        expected = [
+            "root feaures=(0, 1, 2) impurity=0.5000",
+            "root - Down feaures=(0, 1, 2) impurity=0.0671",
+            "root - Down - Down, <cgaf> - Leaf class=1 belief= 0.975989 "
+            "impurity=0.0469 counts=(array([0, 1]), array([ 17, 691]))",
+            "root - Down - Up feaures=(0, 1, 2) impurity=0.3967",
+            "root - Down - Up - Down, <cgaf> - Leaf class=1 belief= 0.750000 "
+            "impurity=0.3750 counts=(array([0, 1]), array([1, 3]))",
+            "root - Down - Up - Up, <pure> - Leaf class=0 belief= 1.000000 "
+            "impurity=0.0000 counts=(array([0]), array([7]))",
+            "root - Up, <cgaf> - Leaf class=0 belief= 0.928297 impurity=0.1331"
+            " counts=(array([0, 1]), array([725,  56]))",
+        ]
+        computed = []
+        expected_string = ""
+        clf = Stree(kernel="linear", random_state=self._random_state)
+        clf.fit(*load_dataset(self._random_state))
+        for node in clf:
+            computed.append(str(node))
+            expected_string += str(node) + "\n"
+        self.assertListEqual(expected, computed)
+        self.assertEqual(expected_string, str(clf))
+
+    @staticmethod
+    def test_is_a_sklearn_classifier():
+        warnings.filterwarnings("ignore", category=ConvergenceWarning)
+        warnings.filterwarnings("ignore", category=RuntimeWarning)
+        from sklearn.utils.estimator_checks import check_estimator
+
+        check_estimator(Stree())
+
+    def test_exception_if_C_is_negative(self):
+        tclf = Stree(C=-1)
+        with self.assertRaises(ValueError):
+            tclf.fit(*load_dataset(self._random_state))
+
+    def test_exception_if_bogus_split_criteria(self):
+        tclf = Stree(split_criteria="duck")
+        with self.assertRaises(ValueError):
+            tclf.fit(*load_dataset(self._random_state))
+
+    def test_check_max_depth_is_positive_or_None(self):
+        tcl = Stree()
+        self.assertIsNone(tcl.max_depth)
+        tcl = Stree(max_depth=1)
+        self.assertGreaterEqual(1, tcl.max_depth)
+        with self.assertRaises(ValueError):
+            tcl = Stree(max_depth=-1)
+            tcl.fit(*load_dataset(self._random_state))
+
+    def test_check_max_depth(self):
+        depths = (3, 4)
+        for depth in depths:
+            tcl = Stree(random_state=self._random_state, max_depth=depth)
+            tcl.fit(*load_dataset(self._random_state))
+            self.assertEqual(depth, tcl.depth_)
+
+    def test_unfitted_tree_is_iterable(self):
+        tcl = Stree()
+        self.assertEqual(0, len(list(tcl)))
+
+    def test_min_samples_split(self):
+        dataset = [[1], [2], [3]], [1, 1, 0]
+        tcl_split = Stree(min_samples_split=3).fit(*dataset)
+        self.assertIsNotNone(tcl_split.tree_.get_down())
+        self.assertIsNotNone(tcl_split.tree_.get_up())
+        tcl_nosplit = Stree(min_samples_split=4).fit(*dataset)
+        self.assertIsNone(tcl_nosplit.tree_.get_down())
+        self.assertIsNone(tcl_nosplit.tree_.get_up())
+
+    def test_simple_muticlass_dataset(self):
+        for kernel in self._kernels:
+            clf = Stree(
+                kernel=kernel,
+                split_criteria="max_samples",
+                random_state=self._random_state,
+            )
+            px = [[1, 2], [5, 6], [9, 10]]
+            py = [0, 1, 2]
+            clf.fit(px, py)
+            self.assertEqual(1.0, clf.score(px, py))
+            self.assertListEqual(py, clf.predict(px).tolist())
+            self.assertListEqual(py, clf.classes_.tolist())
+
+    def test_muticlass_dataset(self):
+        datasets = {
+            "Synt": load_dataset(random_state=self._random_state, n_classes=3),
+            "Iris": load_iris(return_X_y=True),
+        }
+        outcomes = {
+            "Synt": {
+                "max_samples linear": 0.9533333333333334,
+                "max_samples rbf": 0.836,
+                "max_samples poly": 0.9473333333333334,
+                "min_distance linear": 0.9533333333333334,
+                "min_distance rbf": 0.836,
+                "min_distance poly": 0.9473333333333334,
+                "max_distance linear": 0.9533333333333334,
+                "max_distance rbf": 0.836,
+                "max_distance poly": 0.9473333333333334,
+            },
+            "Iris": {
+                "max_samples linear": 0.98,
+                "max_samples rbf": 1.0,
+                "max_samples poly": 1.0,
+                "min_distance linear": 0.98,
+                "min_distance rbf": 1.0,
+                "min_distance poly": 1.0,
+                "max_distance linear": 0.98,
+                "max_distance rbf": 1.0,
+                "max_distance poly": 1.0,
+            },
+        }
+        for name, dataset in datasets.items():
+            px, py = dataset
+            for criteria in ["max_samples", "min_distance", "max_distance"]:
+                for kernel in self._kernels:
+                    clf = Stree(
+                        C=1e4,
+                        max_iter=1e4,
+                        kernel=kernel,
+                        random_state=self._random_state,
+                    )
+                    clf.fit(px, py)
+                    outcome = outcomes[name][f"{criteria} {kernel}"]
+                    self.assertAlmostEqual(outcome, clf.score(px, py))
+
+    def test_max_features(self):
+        n_features = 16
+        expected_values = [
+            ("auto", 4),
+            ("log2", 4),
+            ("sqrt", 4),
+            (0.5, 8),
+            (3, 3),
+            (None, 16),
+        ]
+        clf = Stree()
+        clf.n_features_in_ = n_features
+        for max_features, expected in expected_values:
+            clf.set_params(**dict(max_features=max_features))
+            computed = clf._initialize_max_features()
+            self.assertEqual(expected, computed)
+        # Check bogus max_features
+        values = ["duck", -0.1, 0.0]
+        for max_features in values:
+            clf.set_params(**dict(max_features=max_features))
+            with self.assertRaises(ValueError):
+                _ = clf._initialize_max_features()
+
+    def test_get_subspaces(self):
+        dataset = np.random.random((10, 16))
+        y = np.random.randint(0, 2, 10)
+        expected_values = [
+            ("auto", 4),
+            ("log2", 4),
+            ("sqrt", 4),
+            (0.5, 8),
+            (3, 3),
+            (None, 16),
+        ]
+        clf = Stree()
+        for max_features, expected in expected_values:
+            clf.set_params(**dict(max_features=max_features))
+            clf.fit(dataset, y)
+            computed, indices = clf.splitter_.get_subspace(
+                dataset, y, clf.max_features_
+            )
+            self.assertListEqual(
+                dataset[:, indices].tolist(), computed.tolist()
+            )
+            self.assertEqual(expected, len(indices))
+
+    def test_bogus_criterion(self):
+        clf = Stree(criterion="duck")
+        with self.assertRaises(ValueError):
+            clf.fit(*load_dataset())
+
+    def test_predict_feature_dimensions(self):
+        X = np.random.rand(10, 5)
+        y = np.random.randint(0, 2, 10)
+        clf = Stree()
+        clf.fit(X, y)
+        with self.assertRaises(ValueError):
+            clf.predict(X[:, :3])
+
+    # Tests of score
+
+    def test_score_binary(self):
+        X, y = load_dataset(self._random_state)
+        accuracies = [
+            0.9506666666666667,
+            0.9606666666666667,
+            0.9433333333333334,
+        ]
+        for kernel, accuracy_expected in zip(self._kernels, accuracies):
+            clf = Stree(random_state=self._random_state, kernel=kernel,)
+            clf.fit(X, y)
+            accuracy_score = clf.score(X, y)
+            yp = clf.predict(X)
+            accuracy_computed = np.mean(yp == y)
+            self.assertEqual(accuracy_score, accuracy_computed)
+            self.assertAlmostEqual(accuracy_expected, accuracy_score)
+
+    def test_score_max_features(self):
+        X, y = load_dataset(self._random_state)
+        clf = Stree(random_state=self._random_state, max_features=2)
+        clf.fit(X, y)
+        self.assertAlmostEqual(0.9426666666666667, clf.score(X, y))
+
+    def test_score_multi_class(self):
+        warnings.filterwarnings("ignore")
+        accuracies = [
+            0.8258427,  # Wine    linear min_distance
+            0.6741573,  # Wine    linear max_distance
+            0.8314607,  # Wine    linear max_samples
+            0.6629213,  # Wine    rbf   min_distance
+            1.0000000,  # Wine    rbf   max_distance
+            0.4044944,  # Wine    rbf   max_samples
+            0.9157303,  # Wine    poly  min_distance
+            1.0000000,  # Wine    poly  max_distance
+            0.7640449,  # Wine    poly  max_samples
+            0.9933333,  # Iris    linear min_distance
+            0.9666667,  # Iris    linear max_distance
+            0.9666667,  # Iris    linear max_samples
+            0.9800000,  # Iris    rbf   min_distance
+            0.9800000,  # Iris    rbf   max_distance
+            0.9800000,  # Iris    rbf   max_samples
+            1.0000000,  # Iris    poly  min_distance
+            1.0000000,  # Iris    poly  max_distance
+            1.0000000,  # Iris    poly  max_samples
+            0.8993333,  # Synthetic linear min_distance
+            0.6533333,  # Synthetic linear max_distance
+            0.9313333,  # Synthetic linear max_samples
+            0.8320000,  # Synthetic rbf   min_distance
+            0.6660000,  # Synthetic rbf   max_distance
+            0.8320000,  # Synthetic rbf   max_samples
+            0.6066667,  # Synthetic poly  min_distance
+            0.6840000,  # Synthetic poly  max_distance
+            0.6340000,  # Synthetic poly  max_samples
+        ]
+        datasets = [
+            ("Wine", load_wine(return_X_y=True)),
+            ("Iris", load_iris(return_X_y=True)),
+            (
+                "Synthetic",
+                load_dataset(self._random_state, n_classes=3, n_features=5),
+            ),
+        ]
+        for dataset_name, dataset in datasets:
+            X, y = dataset
+            for kernel in self._kernels:
+                for criteria in [
+                    "min_distance",
+                    "max_distance",
+                    "max_samples",
+                ]:
+                    clf = Stree(
+                        C=17,
+                        random_state=self._random_state,
+                        kernel=kernel,
+                        split_criteria=criteria,
+                        degree=5,
+                        gamma="auto",
+                    )
+                    clf.fit(X, y)
+                    accuracy_score = clf.score(X, y)
+                    yp = clf.predict(X)
+                    accuracy_computed = np.mean(yp == y)
+                    # print(
+                    #     "{:.7f},  # {:7} {:5} {}".format(
+                    #         accuracy_score, dataset_name, kernel, criteria
+                    #     )
+                    # )
+                    accuracy_expected = accuracies.pop(0)
+                    self.assertEqual(accuracy_score, accuracy_computed)
+                    self.assertAlmostEqual(accuracy_expected, accuracy_score)
+
+    def test_bogus_splitter_parameter(self):
+        clf = Stree(splitter="duck")
+        with self.assertRaises(ValueError):
+            clf.fit(*load_dataset())
+
+    def test_weights_removing_class(self):
+        # This patch solves an stderr message from sklearn svm lib
+        # "WARNING: class label x specified in weight is not found"
+        X = np.array(
+            [
+                [0.1, 0.1],
+                [0.1, 0.2],
+                [0.2, 0.1],
+                [5, 6],
+                [8, 9],
+                [6, 7],
+                [0.2, 0.2],
+            ]
+        )
+        y = np.array([0, 0, 0, 1, 1, 1, 0])
+        epsilon = 1e-5
+        weights = [1, 1, 1, 0, 0, 0, 1]
+        weights = np.array(weights, dtype="float64")
+        weights_epsilon = [x + epsilon for x in weights]
+        weights_no_zero = np.array([1, 1, 1, 0, 0, 2, 1])
+        original = weights_no_zero.copy()
+        clf = Stree()
+        clf.fit(X, y)
+        node = clf.train(X, y, weights, 1, "test",)
+        # if a class is lost with zero weights the patch adds epsilon
+        self.assertListEqual(weights.tolist(), weights_epsilon)
+        self.assertListEqual(node._sample_weight.tolist(), weights_epsilon)
+        # zero weights are ok when they don't erase a class
+        _ = clf.train(X, y, weights_no_zero, 1, "test")
+        self.assertListEqual(weights_no_zero.tolist(), original.tolist())
+
+    def test_build_predictor(self):
+        X, y = load_dataset(self._random_state)
+        clf = Stree(random_state=self._random_state)
+        with self.assertRaises(ValueError):
+            clf.tree_ = None
+            clf._build_predictor()
+        clf.fit(X, y)
+        node = clf.tree_.get_down().get_down()
+        expected_impurity = 0.04686951386893923
+        expected_class = 1
+        expected_belief = 0.9759887005649718
+        self.assertAlmostEqual(expected_impurity, node._impurity)
+        self.assertAlmostEqual(expected_belief, node._belief)
+        self.assertEqual(expected_class, node._class)
+        node._belief = 0.0
+        node._class = None
+        clf._build_predictor()
+        node = clf.tree_.get_down().get_down()
+        self.assertAlmostEqual(expected_belief, node._belief)
+        self.assertEqual(expected_class, node._class)
--- a/stree/tests/Strees_grapher_test.py
+++ b/stree/tests/Strees_grapher_test.py
@@ -1,211 +0,0 @@
-import os
-import imghdr
-import unittest
-
-import numpy as np
-import matplotlib
-import matplotlib.pyplot as plt
-import warnings
-from sklearn.datasets import make_classification
-
-from stree import Stree_grapher, Snode_graph, Snode
-
-
-def get_dataset(random_state=0, n_features=3):
-    X, y = make_classification(
-        n_samples=1500,
-        n_features=n_features,
-        n_informative=3,
-        n_redundant=0,
-        n_repeated=0,
-        n_classes=2,
-        n_clusters_per_class=2,
-        class_sep=1.5,
-        flip_y=0,
-        weights=[0.5, 0.5],
-        random_state=random_state,
-    )
-    return X, y
-
-
-class Stree_grapher_test(unittest.TestCase):
-    def __init__(self, *args, **kwargs):
-        self._random_state = 1
-        self._clf = Stree_grapher(dict(random_state=self._random_state))
-        self._clf.fit(*get_dataset(self._random_state, n_features=4))
-        super().__init__(*args, **kwargs)
-
-    @classmethod
-    def setUp(cls):
-        os.environ["TESTING"] = "1"
-
-    def test_iterator(self):
-        """Check preorder iterator
-        """
-        expected = [
-            "root",
-            "root - Down",
-            "root - Down - Down, <cgaf> - Leaf class=1 belief= 0.976023 counts"
-            "=(array([0, 1]), array([ 17, 692]))",
-            "root - Down - Up",
-            "root - Down - Up - Down, <cgaf> - Leaf class=0 belief= 0.500000 "
-            "counts=(array([0, 1]), array([1, 1]))",
-            "root - Down - Up - Up, <cgaf> - Leaf class=0 belief= 0.888889 "
-            "counts=(array([0, 1]), array([8, 1]))",
-            "root - Up, <cgaf> - Leaf class=0 belief= 0.928205 counts=(array("
-            "[0, 1]), array([724,  56]))",
-        ]
-        computed = []
-        for node in self._clf:
-            computed.append(str(node))
-        self.assertListEqual(expected, computed)
-
-    def test_score(self):
-        X, y = get_dataset(self._random_state)
-        accuracy_score = self._clf.score(X, y)
-        yp = self._clf.predict(X)
-        accuracy_computed = np.mean(yp == y)
-        self.assertEqual(accuracy_score, accuracy_computed)
-        self.assertGreater(accuracy_score, 0.86)
-
-    def test_save_all(self):
-        folder_name = os.path.join(os.sep, "tmp", "stree")
-        if os.path.isdir(folder_name):
-            os.rmdir(folder_name)
-        file_names = [
-            os.path.join(folder_name, f"STnode{i}.png") for i in range(1, 8)
-        ]
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore")
-            matplotlib.use("Agg")
-            self._clf.save_all(save_folder=folder_name)
-        for file_name in file_names:
-            self.assertTrue(os.path.exists(file_name))
-            self.assertEqual("png", imghdr.what(file_name))
-            os.remove(file_name)
-        os.rmdir(folder_name)
-
-    def test_plot_all(self):
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore")
-            matplotlib.use("Agg")
-            num_figures_before = plt.gcf().number
-            self._clf.plot_all()
-            num_figures_after = plt.gcf().number
-        self.assertEqual(7, num_figures_after - num_figures_before)
-
-
-class Snode_graph_test(unittest.TestCase):
-    def __init__(self, *args, **kwargs):
-        self._random_state = 1
-        self._clf = Stree_grapher(dict(random_state=self._random_state))
-        self._clf.fit(*get_dataset(self._random_state))
-        super().__init__(*args, **kwargs)
-
-    @classmethod
-    def setUp(cls):
-        os.environ["TESTING"] = "1"
-
-    def test_plot_size(self):
-        default = self._clf._tree_gr.get_plot_size()
-        expected = (17, 3)
-        self._clf._tree_gr.set_plot_size(expected)
-        self.assertEqual(expected, self._clf._tree_gr.get_plot_size())
-        self._clf._tree_gr.set_plot_size(default)
-        self.assertEqual(default, self._clf._tree_gr.get_plot_size())
-
-    def test_attributes_in_leaves_graph(self):
-        """Check if the attributes in leaves have correct values so they form a
-        predictor
-        """
-
-        def check_leave(node: Snode_graph):
-            if not node.is_leaf():
-                check_leave(node.get_down())
-                check_leave(node.get_up())
-                return
-            # Check Belief in leave
-            classes, card = np.unique(node._y, return_counts=True)
-            max_card = max(card)
-            min_card = min(card)
-            if len(classes) > 1:
-                try:
-                    belief = max_card / (max_card + min_card)
-                except ZeroDivisionError:
-                    belief = 0.0
-            else:
-                belief = 1
-            self.assertEqual(belief, node._belief)
-            # Check Class
-            class_computed = classes[card == max_card]
-            self.assertEqual(class_computed, node._class)
-
-        check_leave(self._clf._tree_gr)
-
-    def test_nodes_graph_coefs(self):
-        """Check if the nodes of the tree have the right attributes filled
-        """
-
-        def run_tree(node: Snode_graph):
-            if node._belief < 1:
-                # only exclude pure leaves
-                self.assertIsNotNone(node._clf)
-                self.assertIsNotNone(node._clf.coef_)
-            if node.is_leaf():
-                return
-            run_tree(node.get_down())
-            run_tree(node.get_up())
-
-        run_tree(self._clf._tree_gr)
-
-    def test_save_hyperplane(self):
-        folder_name = "/tmp/"
-        file_name = os.path.join(folder_name, "STnode1.png")
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore")
-            matplotlib.use("Agg")
-            self._clf._tree_gr.save_hyperplane(folder_name)
-        self.assertTrue(os.path.exists(file_name))
-        self.assertEqual("png", imghdr.what(file_name))
-        os.remove(file_name)
-
-    def test_plot_hyperplane_with_distribution(self):
-        plt.close()
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore")
-            matplotlib.use("Agg")
-            num_figures_before = plt.gcf().number
-            self._clf._tree_gr.plot_hyperplane(plot_distribution=True)
-            num_figures_after = plt.gcf().number
-        self.assertEqual(1, num_figures_after - num_figures_before)
-
-    def test_plot_hyperplane_without_distribution(self):
-        plt.close()
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore")
-            matplotlib.use("Agg")
-            num_figures_before = plt.gcf().number
-            self._clf._tree_gr.plot_hyperplane(plot_distribution=False)
-            num_figures_after = plt.gcf().number
-        self.assertEqual(1, num_figures_after - num_figures_before)
-
-    def test_plot_distribution(self):
-        plt.close()
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore")
-            matplotlib.use("Agg")
-            num_figures_before = plt.gcf().number
-            self._clf._tree_gr.plot_distribution()
-            num_figures_after = plt.gcf().number
-        self.assertEqual(1, num_figures_after - num_figures_before)
-
-    def test_set_axis_limits(self):
-        node = Snode_graph(Snode(None, None, None, "test"))
-        limits = (-2, 2), (-3, 3), (-4, 4)
-        node.set_axis_limits(limits)
-        computed = node.get_axis_limits()
-        x, y, z = limits
-        xx, yy, zz = computed
-        self.assertEqual(x, xx)
-        self.assertEqual(y, yy)
-        self.assertEqual(z, zz)
--- a/stree/tests/Strees_test.py
+++ b/stree/tests/Strees_test.py
@@ -1,340 +0,0 @@
-import os
-import unittest
-
-import numpy as np
-from sklearn.datasets import make_classification
-
-from stree import Stree, Snode
-
-
-def get_dataset(random_state=0):
-    X, y = make_classification(
-        n_samples=1500,
-        n_features=3,
-        n_informative=3,
-        n_redundant=0,
-        n_repeated=0,
-        n_classes=2,
-        n_clusters_per_class=2,
-        class_sep=1.5,
-        flip_y=0,
-        weights=[0.5, 0.5],
-        random_state=random_state,
-    )
-    return X, y
-
-
-class Stree_test(unittest.TestCase):
-    def __init__(self, *args, **kwargs):
-        self._random_state = 1
-        self._kernels = ["linear", "rbf", "poly"]
-        super().__init__(*args, **kwargs)
-
-    @classmethod
-    def setUp(cls):
-        os.environ["TESTING"] = "1"
-
-    def _check_tree(self, node: Snode):
-        """Check recursively that the nodes that are not leaves have the
-        correct number of labels and its sons have the right number of elements
-        in their dataset
-
-        Arguments:
-            node {Snode} -- node to check
-        """
-        if node.is_leaf():
-            return
-        y_prediction = node._clf.predict(node._X)
-        y_down = node.get_down()._y
-        y_up = node.get_up()._y
-        # Is a correct partition in terms of cadinality?
-        # i.e. The partition algorithm didn't forget any sample
-        self.assertEqual(node._y.shape[0], y_down.shape[0] + y_up.shape[0])
-        unique_y, count_y = np.unique(node._y, return_counts=True)
-        _, count_d = np.unique(y_down, return_counts=True)
-        _, count_u = np.unique(y_up, return_counts=True)
-        #
-        for i in unique_y:
-            try:
-                number_down = count_d[i]
-            except IndexError:
-                number_down = 0
-            try:
-                number_up = count_u[i]
-            except IndexError:
-                number_up = 0
-            self.assertEqual(count_y[i], number_down + number_up)
-        # Is the partition made the same as the prediction?
-        # as the node is not a leaf...
-        _, count_yp = np.unique(y_prediction, return_counts=True)
-        self.assertEqual(count_yp[0], y_up.shape[0])
-        self.assertEqual(count_yp[1], y_down.shape[0])
-        self._check_tree(node.get_down())
-        self._check_tree(node.get_up())
-
-    def test_build_tree(self):
-        """Check if the tree is built the same way as predictions of models
-        """
-        import warnings
-
-        warnings.filterwarnings("ignore")
-        for kernel in self._kernels:
-            clf = Stree(kernel=kernel, random_state=self._random_state)
-            clf.fit(*get_dataset(self._random_state))
-            self._check_tree(clf.tree_)
-
-    def _find_out(
-        self, px: np.array, x_original: np.array, y_original
-    ) -> list:
-        """Find the original values of y for a given array of samples
-
-        Arguments:
-            px {np.array} -- array of samples to search for
-            x_original {np.array} -- original dataset
-            y_original {[type]} -- original classes
-
-        Returns:
-            np.array -- classes of the given samples
-        """
-        res = []
-        for needle in px:
-            for row in range(x_original.shape[0]):
-                if all(x_original[row, :] == needle):
-                    res.append(y_original[row])
-        return res
-
-    def test_single_prediction(self):
-        probs = [0.29026400766, 0.73105613, 0.0307635]
-        X, y = get_dataset(self._random_state)
-        for kernel, prob in zip(self._kernels, probs):
-            clf = Stree(kernel=kernel, random_state=self._random_state)
-            yp = clf.fit(X, y).predict((X[0, :].reshape(-1, X.shape[1])))
-            self.assertEqual(yp[0], y[0])
-
-    def test_multiple_prediction(self):
-        # First 27 elements the predictions are the same as the truth
-        num = 27
-        X, y = get_dataset(self._random_state)
-        for kernel in self._kernels:
-            clf = Stree(kernel=kernel, random_state=self._random_state)
-            yp = clf.fit(X, y).predict(X[:num, :])
-            self.assertListEqual(y[:num].tolist(), yp.tolist())
-
-    def test_score(self):
-        X, y = get_dataset(self._random_state)
-        for kernel, accuracy_expected in zip(
-            self._kernels,
-            [0.9506666666666667, 0.9606666666666667, 0.9433333333333334],
-        ):
-            clf = Stree(random_state=self._random_state, kernel=kernel,)
-            clf.fit(X, y)
-            accuracy_score = clf.score(X, y)
-            yp = clf.predict(X)
-            accuracy_computed = np.mean(yp == y)
-            self.assertEqual(accuracy_score, accuracy_computed)
-            self.assertAlmostEqual(accuracy_expected, accuracy_score)
-
-    def test_single_predict_proba(self):
-        """Check the element 28 probability of being 1
-        """
-        decimals = 5
-        element = 28
-        probs = [0.29026400766, 0.73105613, 0.0307635]
-        X, y = get_dataset(self._random_state)
-        self.assertEqual(1, y[element])
-        for kernel, prob in zip(self._kernels, probs):
-            clf = Stree(kernel=kernel, random_state=self._random_state)
-            yp = clf.fit(X, y).predict_proba(
-                X[element, :].reshape(-1, X.shape[1])
-            )
-            self.assertAlmostEqual(
-                np.round(1 - prob, decimals), np.round(yp[0:, 0], decimals)
-            )
-            self.assertAlmostEqual(
-                round(prob, decimals), round(yp[0, 1], decimals), decimals
-            )
-
-    def test_multiple_predict_proba(self):
-        # First 27 elements the predictions are the same as the truth
-        num = 27
-        X, y = get_dataset(self._random_state)
-        for kernel in self._kernels:
-            clf = Stree(kernel=kernel, random_state=self._random_state)
-            clf.fit(X, y)
-            yp = clf.predict_proba(X[:num, :])
-            self.assertListEqual(
-                y[:num].tolist(), np.argmax(yp[:num], axis=1).tolist()
-            )
-
-    def test_single_vs_multiple_prediction(self):
-        """Check if predicting sample by sample gives the same result as
-        predicting all samples at once
-        """
-        X, y = get_dataset(self._random_state)
-        for kernel in self._kernels:
-            clf = Stree(kernel=kernel, random_state=self._random_state)
-            clf.fit(X, y)
-            # Compute prediction line by line
-            yp_line = np.array([], dtype=int)
-            for xp in X:
-                yp_line = np.append(
-                    yp_line, clf.predict(xp.reshape(-1, X.shape[1]))
-                )
-            # Compute prediction at once
-            yp_once = clf.predict(X)
-            self.assertListEqual(yp_line.tolist(), yp_once.tolist())
-
-    def test_iterator_and_str(self):
-        """Check preorder iterator
-        """
-        expected = [
-            "root",
-            "root - Down",
-            "root - Down - Down, <cgaf> - Leaf class=1 belief= 0.975989 counts"
-            "=(array([0, 1]), array([ 17, 691]))",
-            "root - Down - Up",
-            "root - Down - Up - Down, <cgaf> - Leaf class=1 belief= 0.750000 "
-            "counts=(array([0, 1]), array([1, 3]))",
-            "root - Down - Up - Up, <pure> - Leaf class=0 belief= 1.000000 "
-            "counts=(array([0]), array([7]))",
-            "root - Up, <cgaf> - Leaf class=0 belief= 0.928297 counts=(array("
-            "[0, 1]), array([725,  56]))",
-        ]
-        computed = []
-        expected_string = ""
-        clf = Stree(kernel="linear", random_state=self._random_state)
-        clf.fit(*get_dataset(self._random_state))
-        for node in clf:
-            computed.append(str(node))
-            expected_string += str(node) + "\n"
-        self.assertListEqual(expected, computed)
-        self.assertEqual(expected_string, str(clf))
-
-    def test_is_a_sklearn_classifier(self):
-        import warnings
-        from sklearn.exceptions import ConvergenceWarning
-
-        warnings.filterwarnings("ignore", category=ConvergenceWarning)
-        warnings.filterwarnings("ignore", category=RuntimeWarning)
-        from sklearn.utils.estimator_checks import check_estimator
-
-        check_estimator(Stree())
-
-    def test_exception_if_C_is_negative(self):
-        tclf = Stree(C=-1)
-        with self.assertRaises(ValueError):
-            tclf.fit(*get_dataset(self._random_state))
-
-    def test_check_max_depth_is_positive_or_None(self):
-        tcl = Stree()
-        self.assertIsNone(tcl.max_depth)
-        tcl = Stree(max_depth=1)
-        self.assertGreaterEqual(1, tcl.max_depth)
-        with self.assertRaises(ValueError):
-            tcl = Stree(max_depth=-1)
-            tcl.fit(*get_dataset(self._random_state))
-
-    def test_check_max_depth(self):
-        depths = (3, 4)
-        for depth in depths:
-            tcl = Stree(random_state=self._random_state, max_depth=depth)
-            tcl.fit(*get_dataset(self._random_state))
-            self.assertEqual(depth, tcl.depth_)
-
-    def test_unfitted_tree_is_iterable(self):
-        tcl = Stree()
-        self.assertEqual(0, len(list(tcl)))
-
-    def test_min_samples_split(self):
-        tcl_split = Stree(min_samples_split=3)
-        tcl_nosplit = Stree(min_samples_split=4)
-        dataset = [[1], [2], [3]], [1, 1, 0]
-        tcl_split.fit(*dataset)
-        self.assertIsNotNone(tcl_split.tree_.get_down())
-        self.assertIsNotNone(tcl_split.tree_.get_up())
-        tcl_nosplit.fit(*dataset)
-        self.assertIsNone(tcl_nosplit.tree_.get_down())
-        self.assertIsNone(tcl_nosplit.tree_.get_up())
-
-    def test_muticlass_dataset(self):
-        for kernel in self._kernels:
-            clf = Stree(kernel=kernel, random_state=self._random_state)
-            px = [[1, 2], [3, 4], [5, 6]]
-            py = [1, 2, 3]
-            clf.fit(px, py)
-            self.assertEqual(1.0, clf.score(px, py))
-            self.assertListEqual([1, 2, 3], clf.predict(px).tolist())
-
-
-class Snode_test(unittest.TestCase):
-    def __init__(self, *args, **kwargs):
-        self._random_state = 1
-        self._clf = Stree(random_state=self._random_state)
-        self._clf.fit(*get_dataset(self._random_state))
-        super().__init__(*args, **kwargs)
-
-    @classmethod
-    def setUp(cls):
-        os.environ["TESTING"] = "1"
-
-    def test_attributes_in_leaves(self):
-        """Check if the attributes in leaves have correct values so they form a
-        predictor
-        """
-
-        def check_leave(node: Snode):
-            if not node.is_leaf():
-                check_leave(node.get_down())
-                check_leave(node.get_up())
-                return
-            # Check Belief in leave
-            classes, card = np.unique(node._y, return_counts=True)
-            max_card = max(card)
-            min_card = min(card)
-            if len(classes) > 1:
-                try:
-                    belief = max_card / (max_card + min_card)
-                except ZeroDivisionError:
-                    belief = 0.0
-            else:
-                belief = 1
-            self.assertEqual(belief, node._belief)
-            # Check Class
-            class_computed = classes[card == max_card]
-            self.assertEqual(class_computed, node._class)
-
-        check_leave(self._clf.tree_)
-
-    def test_nodes_coefs(self):
-        """Check if the nodes of the tree have the right attributes filled
-        """
-
-        def run_tree(node: Snode):
-            if node._belief < 1:
-                # only exclude pure leaves
-                self.assertIsNotNone(node._clf)
-                self.assertIsNotNone(node._clf.coef_)
-            if node.is_leaf():
-                return
-            run_tree(node.get_down())
-            run_tree(node.get_up())
-
-        run_tree(self._clf.tree_)
-
-    def test_make_predictor_on_leaf(self):
-        test = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], "test")
-        test.make_predictor()
-        self.assertEqual(1, test._class)
-        self.assertEqual(0.75, test._belief)
-
-    def test_make_predictor_on_not_leaf(self):
-        test = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], "test")
-        test.set_up(Snode(None, [1], [1], "another_test"))
-        test.make_predictor()
-        self.assertIsNone(test._class)
-        self.assertEqual(0, test._belief)
-
-    def test_make_predictor_on_leaf_bogus_data(self):
-        test = Snode(None, [1, 2, 3, 4], [], "test")
-        test.make_predictor()
-        self.assertIsNone(test._class)
--- a/stree/tests/init.py
+++ b/stree/tests/init.py
@@ -1,9 +1,6 @@
-from .Strees_test import Stree_test, Snode_test
-from .Strees_grapher_test import Stree_grapher_test, Snode_graph_test
+# type: ignore
+from .Stree_test import Stree_test
+from .Snode_test import Snode_test
+from .Splitter_test import Splitter_test

-__all__ = [
-    "Stree_test",
-    "Snode_test",
-    "Stree_grapher_test",
-    "Snode_graph_test",
-]
+__all__ = ["Stree_test", "Snode_test", "Splitter_test"]
--- a/stree/tests/utils.py
+++ b/stree/tests/utils.py
@@ -0,0 +1,18 @@
+# type: ignore
+from sklearn.datasets import make_classification
+
+
+def load_dataset(random_state=0, n_classes=2, n_features=3):
+    X, y = make_classification(
+        n_samples=1500,
+        n_features=n_features,
+        n_informative=3,
+        n_redundant=0,
+        n_repeated=0,
+        n_classes=n_classes,
+        n_clusters_per_class=2,
+        class_sep=1.5,
+        flip_y=0,
+        random_state=random_state,
+    )
+    return X, y
Author	SHA1	Message	Date
Ricardo Montañana	d1e30a3372	Refactor predict and score and make mypy --strict	2020-07-01 18:37:10 +02:00
Ricardo Montañana	fa001f97a4	First Approach	2020-06-28 02:46:20 +02:00
Ricardo Montañana	be552fdd6c	Add test for getting 3 feature_sets in Splitter Add ensemble notebook	2020-06-28 02:45:08 +02:00
Ricardo Montañana	5e3a8e3ec5	Change adaboost notebook	2020-06-27 23:34:15 +02:00
Ricardo Montañana	554ec03c32	Get only 3 sets for best split Fix flaky test in Splitter_test	2020-06-27 18:29:40 +02:00
Ricardo Montañana	4b7e4a3fb0	better solution to the sklearn bagging problem Add better tests enhance .coveragerc	2020-06-26 11:22:45 +02:00
Ricardo Montañana	76723993fd	Solve Warning class label not found when bagging	2020-06-25 13:07:50 +02:00
Ricardo Montañana	ecd0b86f4d	Solve the mistake of min and max distance The split criteria functions min and max distance return classes while max_samples return distances positives and negatives to hyperplane of the class with more samples in node	2020-06-17 00:13:52 +02:00
Ricardo Montañana	3e52a4746c	Fix entroy and information_gain functions	2020-06-16 13:56:02 +02:00
Ricardo Montañana Gómez	a20e45e8e7	Merge pull request #10 from Doctorado-ML/add_subspaces #2 Add subspaces	2020-06-15 11:30:53 +02:00
Ricardo Montañana	9334951d1b	#2 Cosmetic and style updates	2020-06-15 11:09:11 +02:00
Ricardo Montañana	736ab7ef20	#2 update benchmark notebook	2020-06-15 10:33:51 +02:00
Ricardo Montañana	c94bc068bd	#2 Refactor Stree & create Splitter Add and test splitter parameter	2020-06-15 00:22:57 +02:00
Ricardo Montañana	502ee72799	#2 Add predict and score support Add a test in features notebook Show max_features in main.py	2020-06-14 14:00:21 +02:00
Ricardo Montañana	f1ee4de37b	#2 - Add gini and entropy measures rename get_dataset to load_dataset add features and impurity to __str__ of node	2020-06-14 03:08:55 +02:00
Ricardo Montañana	ae1c199e21	# 2 - add max_features parameters	2020-06-13 17:58:45 +02:00
Ricardo Montañana	1bfe273a70	Fix problem in _min_distance Remove grapher (moved to another repo)	2020-06-12 00:50:25 +02:00
Ricardo Montañana Gómez	647d21bdb5	Merge pull request #9 from Doctorado-ML/add_multiclass #6 Add multiclass	2020-06-11 16:30:16 +02:00
Ricardo Montañana	1d392d534f	#6 - Update tests and codecov conf	2020-06-11 13:45:24 +02:00
Ricardo Montañana	f360a2640c	#6 - Add multiclass support Removed (by now) predict_proba. Created a notebook in jupyter Added split_criteria parameter with min_distance and max_samples values Refactor _distances Refactor _split_criteria Refactor _reorder_results	2020-06-11 13:10:52 +02:00
Ricardo Montañana Gómez	45510b43bc	Merge pull request #5 from Doctorado-ML/add_kernels #3 Add kernels to STree	2020-06-09 13:43:31 +02:00