Make Stree a sklearn estimator

Added check_estimator in notebook test2 Added a Stree test with check_estimator
Update google colab setup in notebooks
2025-08-18 08:56:00 +00:00 · 2020-05-25 19:51:39 +02:00 · 2020-05-24 20:13:27 +02:00 · 2020-05-24 18:47:55 +02:00 · 2020-05-23 23:59:03 +02:00 · 2020-05-23 23:40:33 +02:00
21 changed files with 1065 additions and 343 deletions
--- a/.travis.yml
+++ b/.travis.yml
@@ -10,4 +10,4 @@ notifications:
    on_success: never # default: change
    on_failure: always # default: always
 # command to run tests
-script: python -m unittest tests.Stree_test tests.Snode_test
+script: python -m unittest stree.tests
--- a/README.md
+++ b/README.md
@@ -4,20 +4,38 @@
 Oblique Tree classifier based on SVM nodes
-## Example
+![Stree](https://raw.github.com/doctorado-ml/stree/master/example.png)
-### Jupyter
+## Installation
-[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/Doctorado-ML/STree/master?urlpath=lab/tree/test.ipynb)
+```bash
 pip install git+https://github.com/doctorado-ml/stree
 ```
 ## Examples
 ### Jupyter notebooks
 ##### Slow launch but better integration
 * [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/Doctorado-ML/STree/master?urlpath=lab/tree/test.ipynb) Test notebook
 ##### Fast launch but have to run first commented out cell for setup
 * [![Test](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Doctorado-ML/STree/blob/master/test.ipynb) Test notebook
 * [![Test2](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Doctorado-ML/STree/blob/master/test2.ipynb) Another Test notebook
 * [![Test Graphics](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Doctorado-ML/STree/blob/master/test_graphs.ipynb) Test Graphics notebook
 ### Command line
-```python
+```bash
 python main.py
 ```
 ## Tests
-```python
+```bash
-python -m unittest -v tests.Stree_test tests.Snode_test
+python -m unittest -v stree.tests
 ```
--- a/crcard_graphs.ipynb
+++ b/crcard_graphs.ipynb
--- a/data/.gitignore
+++ b/data/.gitignore
@@ -1,2 +1 @@
-*.csv
+*
 *.txt
--- a/example.png
+++ b/example.png
--- a/main.py
+++ b/main.py
@@ -1,6 +1,6 @@
 import time
 from sklearn.model_selection import train_test_split
-from trees.Stree import Stree
+from stree import Stree
 random_state=1
@@ -50,9 +50,8 @@ print(f"Classifier's accuracy (test) : {clf.score(Xtest, ytest):.4f}")
 proba = clf.predict_proba(Xtest)
 print("Checking that we have correct probabilities, these are probabilities of sample belonging to class 1")
 res0 = proba[proba[:, 0] == 0]
-res1 = proba[proba[:, 0] == 0]
+res1 = proba[proba[:, 0] == 1]
-print("++++++++++res0++++++++++++")
+print("++++++++++res0 > .8++++++++++++")
 print(res0[res0[:, 1] > .8])
-print("**********res1************")
+print("**********res1 < .4************")
 print(res1[res1[:, 1] < .4])
 print(clf.predict_proba(Xtest))
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,5 @@
-numpy==1.18.2
+numpy
-scikit-learn==0.22.2
+scikit-learn
-pandas==1.0.3
+pandas
 matplotlib
 ipympl
--- a/setup.py
+++ b/setup.py
@@ -0,0 +1,39 @@
 import setuptools
 __version__ = "0.9rc3"
 __author__ = "Ricardo Montañana Gómez"
 def readme():
    with open('README.md') as f:
        return f.read()
 setuptools.setup(
    name='STree',
    version=__version__,
    license='MIT License',
    description='Oblique decision tree with svm nodes',
    long_description=readme(),
    long_description_content_type='text/markdown',
    packages=setuptools.find_packages(),
    url='https://github.com/doctorado-ml/stree',
    author=__author__,
    author_email='ricardo.montanana@alu.uclm.es',
    keywords='scikit-learn oblique-classifier oblique-decision-tree decision-tree svm svc',
    classifiers=[
        'Development Status :: 4 - Beta',
        'License :: OSI Approved :: MIT License',
        'Programming Language :: Python :: 3.7',
        'Natural Language :: English',
        'Topic :: Scientific/Engineering :: Artificial Intelligence',
        'Intended Audience :: Science/Research'
    ],
    install_requires=[
        'scikit-learn>=0.23.0',
        'numpy',
        'matplotlib',
        'ipympl'
    ],
    test_suite="stree.tests",
    zip_safe=False
 )
--- a/stree/Strees.py
+++ b/stree/Strees.py
@@ -8,34 +8,117 @@ Uses LinearSVC
 '''
 import typing
 import os
 import numpy as np
 from sklearn.base import BaseEstimator, ClassifierMixin
 from sklearn.svm import LinearSVC
 from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
-from trees.Snode import Snode
+
-from trees.Siterator import Siterator
+class Snode:
    def __init__(self, clf: LinearSVC, X: np.ndarray, y: np.ndarray, title: str):
        self._clf = clf
        self._vector = None if clf is None else clf.coef_
        self._interceptor = 0. if clf is None else clf.intercept_
        self._title = title
        self._belief = 0.  # belief of the prediction in a leaf node based on samples
        # Only store dataset in Testing
        self._X = X if os.environ.get('TESTING', 'NS') != 'NS' else None
        self._y = y
        self._down = None
        self._up = None
        self._class = None
    @classmethod
    def copy(cls, node: 'Snode') -> 'Snode':
        return cls(node._clf, node._X, node._y, node._title)
    def set_down(self, son):
        self._down = son
    def set_up(self, son):
        self._up = son
    def is_leaf(self) -> bool:
        return self._up is None and self._down is None
    def get_down(self) -> 'Snode':
        return self._down
    def get_up(self) -> 'Snode':
        return self._up
    def make_predictor(self):
        """Compute the class of the predictor and its belief based on the subdataset of the node
        only if it is a leaf
        """
        if not self.is_leaf():
            return
        classes, card = np.unique(self._y, return_counts=True)
        if len(classes) > 1:
            max_card = max(card)
            min_card = min(card)
            try:
                self._belief = max_card / (max_card + min_card)
            except:
                self._belief = 0.
            self._class = classes[card == max_card][0]
        else:
            self._belief = 1
            self._class = classes[0]
    def __str__(self) -> str:
        if self.is_leaf():
            return f"{self._title} - Leaf class={self._class} belief={self._belief:.6f} counts={np.unique(self._y, return_counts=True)}"
        else:
            return f"{self._title}"
 class Siterator:
    """Stree preorder iterator
    """
    def __init__(self, tree: Snode):
        self._stack = []
        self._push(tree)
    def __iter__(self):
        return self
    def _push(self, node: Snode):
        if node is not None:
            self._stack.append(node)
    def __next__(self) -> Snode:
        if len(self._stack) == 0:
            raise StopIteration()
        node = self._stack.pop()
        self._push(node.get_up())
        self._push(node.get_down())
        return node
 class Stree(BaseEstimator, ClassifierMixin):
    """
    """
    __folder = 'data/'
-    def __init__(self, C=1.0, max_iter: int = 1000, random_state: int = 0, use_predictions: bool = False):
+    def __init__(self, C: float = 1.0, max_iter: int = 1000, random_state: int = 0, use_predictions: bool = False):
-        self._max_iter = max_iter
+        self.max_iter = max_iter
-        self._C = C
+        self.C = C
-        self._random_state = random_state
+        self.random_state = random_state    
-        self._tree = None
+        self.use_predictions = use_predictions
        self.__folder = 'data/'
        self.__use_predictions = use_predictions
        self.__trained = False
        self.__proba = False
    def get_params(self, deep=True):
        """Get dict with hyperparameters and its values to accomplish sklearn rules
        """
-        return {"C": self._C, "random_state": self._random_state, 'max_iter': self._max_iter}
+        return {
            'C': self.C,
            'random_state': self.random_state,
            'max_iter': self.max_iter,
            'use_predictions': self.use_predictions
        }
    def set_params(self, **parameters):
        """Set hyperparmeters as specified by sklearn, needed in Gridsearchs
@@ -44,12 +127,16 @@ class Stree(BaseEstimator, ClassifierMixin):
            setattr(self, parameter, value)
        return self
    # Added binary_only tag as required by sklearn check_estimator
    def _more_tags(self):
        return {'binary_only': True}
    def _linear_function(self, data: np.array, node: Snode) -> np.array:
        coef = node._vector[0, :].reshape(-1, data.shape[1])
        return data.dot(coef.T) + node._interceptor[0]
    def _split_data(self, node: Snode, data: np.ndarray, indices: np.ndarray) -> list:
-        if self.__use_predictions:
+        if self.use_predictions:
            yp = node._clf.predict(data)
            down = (yp == 1).reshape(-1, 1)
            res = np.expand_dims(node._clf.decision_function(data), 1)
@@ -68,31 +155,38 @@ class Stree(BaseEstimator, ClassifierMixin):
        return [data_up, indices_up, data_down, indices_down, res_up, res_down]
    def fit(self, X: np.ndarray, y: np.ndarray, title: str = 'root') -> 'Stree':
-        X, y = check_X_y(X, y.ravel())
+        from sklearn.utils.multiclass import check_classification_targets
        if type(y).__name__ == 'np.ndarray':
            y = y.ravel()
        X, y = check_X_y(X, y)
        self.classes_ = np.unique(y)
        self.n_iter_ = self.max_iter
        check_classification_targets(y)
        self.n_features_in_ = X.shape[1]
-        self._tree = self.train(X, y.ravel(), title)
+        self.tree_ = self.train(X, y.ravel(), title)
        self._build_predictor()
        self.__trained = True
        return self
    def _build_predictor(self):
        """Process the leaves to make them predictors
        """
        def run_tree(node: Snode):
            if node.is_leaf():
                node.make_predictor()
                return
            run_tree(node.get_down())
            run_tree(node.get_up())
-        run_tree(self._tree)
+
        run_tree(self.tree_)
    def train(self, X: np.ndarray, y: np.ndarray, title: str = 'root') -> Snode:
        if np.unique(y).shape[0] == 1:
            # only 1 class => pure dataset
            return Snode(None, X, y, title + ', <pure>')
        # Train the model
-        clf = LinearSVC(max_iter=self._max_iter, C=self._C,
+        clf = LinearSVC(max_iter=self.max_iter, C=self.C,
-                        random_state=self._random_state)
+                        random_state=self.random_state)
        clf.fit(X, y)
        tree = Snode(clf, X, y, title)
        X_U, y_u, X_D, y_d, _, _ = self._split_data(tree, X, y)
@@ -103,8 +197,13 @@ class Stree(BaseEstimator, ClassifierMixin):
        tree.set_down(self.train(X_D, y_d, title + ' - Down'))
        return tree
-    def _reorder_results(self, y: np.array, indices: np.array) -> np.array:
+    def _reorder_results(self, y: np.array, indices: np.array, proba=False) -> np.array:
-        y_ordered = np.zeros(y.shape, dtype=int if y.ndim == 1 else float)
+        if proba:
            # if predict_proba return np.array of floats
            y_ordered = np.zeros(y.shape, dtype=float)
        else:
            # return array of same type given in y
            y_ordered = y.copy()
        indices = indices.astype(int)
        for i, index in enumerate(indices):
            y_ordered[index] = y[i]
@@ -122,21 +221,21 @@ class Stree(BaseEstimator, ClassifierMixin):
            k, l = predict_class(d, i_d, node.get_down())
            m, n = predict_class(u, i_u, node.get_up())
            return np.append(k, m), np.append(l, n)
        # sklearn check
-        check_is_fitted(self)
+        check_is_fitted(self, ['tree_'])
        # Input validation
        X = check_array(X)
        # setup prediction & make it happen
        indices = np.arange(X.shape[0])
-        return self._reorder_results(*predict_class(X, indices, self._tree))
+        return self._reorder_results(*predict_class(X, indices, self.tree_)).ravel()
    def predict_proba(self, X: np.array) -> np.array:
-        """Computes an approximation of the probability of samples belonging to class 1 
+        """Computes an approximation of the probability of samples belonging to class 0 and 1
        (nothing more, nothing less)
        :param X: dataset
        :type X: np.array
        """
        def predict_class(xp: np.array, indices: np.array, dist: np.array, node: Snode) -> np.array:
            """Run the tree to compute predictions
@@ -162,30 +261,33 @@ class Stree(BaseEstimator, ClassifierMixin):
            k, l = predict_class(d, i_d, r_d, node.get_down())
            m, n = predict_class(u, i_u, r_u, node.get_up())
            return np.append(k, m), np.append(l, n)
        # sklearn check
-        check_is_fitted(self)
+        check_is_fitted(self, ['tree_'])
        # Input validation
        X = check_array(X)
        # setup prediction & make it happen
        indices = np.arange(X.shape[0])
-        result, indices = predict_class(X, indices, [], self._tree)
+        empty_dist = np.empty((X.shape[0], 1), dtype=float)
        result, indices = predict_class(X, indices, empty_dist, self.tree_)
        result = result.reshape(X.shape[0], 2)
        # Turn distances to hyperplane into probabilities based on fitting distances
        # of samples to its hyperplane that classified them, to the sigmoid function
-        result[:, 1] = 1 / (1 + np.exp(-result[:, 1]))
+        result[:, 1] = 1 / (1 + np.exp(-result[:, 1])) # Probability of being 1
-        return self._reorder_results(result, indices)
+        result[:, 0] = 1 - result[:, 1] # Probability of being 0
        return self._reorder_results(result, indices, proba=True)
    def score(self, X: np.array, y: np.array) -> float:
        """Return accuracy
        """
-        if not self.__trained:
+        # sklearn check
-            self.fit(X, y)
+        check_is_fitted(self)
        yp = self.predict(X).reshape(y.shape)
        right = (yp == y).astype(int)
        return np.sum(right) / len(y)
    def __iter__(self):
-        return Siterator(self._tree)
+        return Siterator(self.tree_)
    def __str__(self) -> str:
        output = ''
@@ -218,5 +320,8 @@ class Stree(BaseEstimator, ClassifierMixin):
    def save_sub_datasets(self):
        """Save the every dataset stored in the tree to check with manual classifier
        """
        if not os.path.isdir(self.__folder):
            os.mkdir(self.__folder)
        with open(self.get_catalog_name(), 'w', encoding='utf-8') as catalog:
-            self._save_datasets(self._tree, catalog, 1)
+            self._save_datasets(self.tree_, catalog, 1)
--- a/stree/Strees_grapher.py
+++ b/stree/Strees_grapher.py
@@ -0,0 +1,184 @@
 '''
 __author__ = "Ricardo Montañana Gómez"
 __copyright__ = "Copyright 2020, Ricardo Montañana Gómez"
 __license__ = "MIT"
 __version__ = "0.9"
 Plot 3D views of nodes in Stree
 '''
 import os
 import matplotlib.pyplot as plt
 import numpy as np
 from sklearn.decomposition import PCA
 from mpl_toolkits.mplot3d import Axes3D
 from .Strees import Stree, Snode, Siterator
 class Snode_graph(Snode):
    def __init__(self, node: Stree):
        self._plot_size = (8, 8)
        self._xlimits = (None, None)
        self._ylimits = (None, None)
        self._zlimits = (None, None)
        n = Snode.copy(node)
        super().__init__(n._clf, n._X, n._y, n._title)
    def set_plot_size(self, size: tuple):
        self._plot_size = size
    def _is_pure(self) -> bool:
        """is considered pure a leaf node with one label
        """
        if self.is_leaf():
            return self._belief == 1.
        return False
    def set_axis_limits(self, limits: tuple):
        self._xlimits = limits[0]
        self._ylimits = limits[1]
        self._zlimits = limits[2]
    def _set_graphics_axis(self, ax: Axes3D):
        ax.set_xlim(self._xlimits)
        ax.set_ylim(self._ylimits)
        ax.set_zlim(self._zlimits)
    def save_hyperplane(self, save_folder: str = './', save_prefix: str = '', save_seq: int = 1):
        _, fig = self.plot_hyperplane()
        name = f"{save_folder}{save_prefix}STnode{save_seq}.png"
        fig.savefig(name, bbox_inches='tight')
        plt.close(fig)
    def _get_cmap(self):
        cmap = 'jet'
        if self._is_pure():
            if self._class == 1:
                cmap = 'jet_r'
        return cmap
    def _graph_title(self):
        n_class, card = np.unique(self._y, return_counts=True)
        return f"{self._title} {n_class} {card}"
    def plot_hyperplane(self, plot_distribution: bool = True):
        fig = plt.figure(figsize=self._plot_size)
        ax = fig.add_subplot(1, 1, 1, projection='3d')
        if not self._is_pure():
            # Can't plot hyperplane of leaves with one label because it hasn't classiffier
            # get the splitting hyperplane
            def hyperplane(x, y): return (-self._interceptor - self._vector[0][0] * x
                                          - self._vector[0][1] * y) / self._vector[0][2]
            tmpx = np.linspace(self._X[:, 0].min(), self._X[:, 0].max())
            tmpy = np.linspace(self._X[:, 1].min(), self._X[:, 1].max())
            xx, yy = np.meshgrid(tmpx, tmpy)
            ax.plot_surface(xx, yy, hyperplane(xx, yy), alpha=.5, antialiased=True,
                            rstride=1, cstride=1, cmap='seismic')
            self._set_graphics_axis(ax)
        if plot_distribution:
            self.plot_distribution(ax)
        else:
            plt.title(self._graph_title())
            plt.show()
        return ax, fig
    def plot_distribution(self, ax: Axes3D = None):
        if ax is None:
            fig = plt.figure(figsize=self._plot_size)
            ax = fig.add_subplot(1, 1, 1, projection='3d')
        plt.title(self._graph_title())
        cmap = self._get_cmap()
        ax.scatter(self._X[:, 0], self._X[:, 1],
                   self._X[:, 2], c=self._y, cmap=cmap)
        ax.set_xlabel('X0')
        ax.set_ylabel('X1')
        ax.set_zlabel('X2')
        plt.show()
 class Stree_grapher(Stree):
    """Build 3d graphs of any dataset, if it's more than 3 features PCA shall
    make its magic
    """
    def __init__(self, params: dict):
        self._plot_size = (8, 8)
        self._tree_gr = None
        # make Snode store X's
        os.environ['TESTING'] = '1'
        self._fitted = False
        self._pca = None
        super().__init__(**params)
    def __del__(self):
        try:
            os.environ.pop('TESTING')
        except:
            pass
        plt.close('all')
    def _copy_tree(self, node: Snode) -> Snode_graph:
        mirror = Snode_graph(node)
        # clone node
        mirror._class = node._class
        mirror._belief = node._belief
        if node.get_down() is not None:
            mirror.set_down(self._copy_tree(node.get_down()))
        if node.get_up() is not None:
            mirror.set_up(self._copy_tree(node.get_up()))
        return mirror
    def fit(self, X: np.array, y: np.array) -> Stree:
        """Fit the Stree and copy the tree in a Snode_graph tree
        :param X: Dataset
        :type X: np.array
        :param y: Labels
        :type y: np.array
        :return: Stree model
        :rtype: Stree
        """
        if X.shape[1] != 3:
            self._pca = PCA(n_components=3)
            X = self._pca.fit_transform(X)
        res = super().fit(X, y)
        self._tree_gr = self._copy_tree(self.tree_)
        self._fitted = True
        return res
    def score(self, X: np.array, y: np.array) -> float:
        self._check_fitted()
        if X.shape[1] != 3:
            X = self._pca.transform(X)
        return super().score(X, y)
    def _check_fitted(self):
        if not self._fitted:
            raise Exception('Have to fit the grapher first!')
    def save_all(self, save_folder: str = './', save_prefix: str = ''):
        """Save all the node plots in png format, each with a sequence number
        :param save_folder: folder where the plots are saved, defaults to './'
        :type save_folder: str, optional
        """
        self._check_fitted()
        if not os.path.isdir(save_folder):
            os.mkdir(save_folder)
        seq = 1
        for node in self:
            node.save_hyperplane(save_folder=save_folder,
                                 save_prefix=save_prefix, save_seq=seq)
            seq += 1
    def plot_all(self):
        """Plots all the nodes
        """
        self._check_fitted()
        for node in self:
            node.plot_hyperplane()
    def __iter__(self):
        return Siterator(self._tree_gr)
--- a/stree/init.py
+++ b/stree/init.py
@@ -0,0 +1,2 @@
 from .Strees import Stree, Snode, Siterator
 from .Strees_grapher import Stree_grapher, Snode_graph
--- a/stree/tests/Strees_test.py
+++ b/stree/tests/Strees_test.py
@@ -5,7 +5,7 @@ import unittest
 import numpy as np
 from sklearn.datasets import make_classification
-from trees.Stree import Stree, Snode
+from stree import Stree, Snode
 class Stree_test(unittest.TestCase):
@@ -16,7 +16,7 @@ class Stree_test(unittest.TestCase):
        self._clf = Stree(random_state=self._random_state,
                          use_predictions=False)
        self._clf.fit(*self._get_Xy())
-        super(Stree_test, self).__init__(*args, **kwargs)
+        super().__init__(*args, **kwargs)
    @classmethod
    def tearDownClass(cls):
@@ -71,7 +71,7 @@ class Stree_test(unittest.TestCase):
    def test_build_tree(self):
        """Check if the tree is built the same way as predictions of models
        """
-        self._check_tree(self._clf._tree)
+        self._check_tree(self._clf.tree_)
    def _get_file_data(self, file_name: str) -> tuple:
        """Return X, y from data, y is the last column in array
@@ -144,24 +144,35 @@ class Stree_test(unittest.TestCase):
        """Check that element 28 has a prediction different that the current label
        """
        # Element 28 has a different prediction than the truth
        decimals = 5
        prob = 0.29026400766
        X, y = self._get_Xy()
        yp = self._clf.predict_proba(X[28, :].reshape(-1, X.shape[1]))
-        self.assertEqual(0, yp[0:, 0])
+        self.assertEqual(np.round(1 - prob, decimals), np.round(yp[0:, 0], decimals))
        self.assertEqual(1, y[28])
-        self.assertEqual(0.29026400766, round(yp[0, 1], 11))
+        
        self.assertAlmostEqual(
            round(prob, decimals),
            round(yp[0, 1], decimals),
            decimals
        )
    def test_multiple_predict_proba(self):
        # First 27 elements the predictions are the same as the truth
        num = 27
        decimals = 5
        X, y = self._get_Xy()
        yp = self._clf.predict_proba(X[:num, :])
-        self.assertListEqual(y[:num].tolist(), yp[:, 0].tolist())
+        self.assertListEqual(y[:num].tolist(), np.argmax(yp[:num], axis=1).tolist())
        expected_proba = [0.88395641, 0.36746962, 0.84158767, 0.34106833, 0.14269291, 0.85193236,
                          0.29876058, 0.7282164, 0.85958616, 0.89517877, 0.99745224, 0.18860349,
                          0.30756427, 0.8318412, 0.18981198, 0.15564624, 0.25740655, 0.22923355,
                          0.87365959, 0.49928689, 0.95574351, 0.28761257, 0.28906333, 0.32643692,
                          0.29788483, 0.01657364, 0.81149083]
-        self.assertListEqual(expected_proba, np.round(yp[:, 1], decimals=8).tolist())
+        expected = np.round(expected_proba, decimals=decimals).tolist()
        computed = np.round(yp[:, 1], decimals=decimals).tolist()
        for i in range(len(expected)):
            self.assertAlmostEqual(expected[i], computed[i], decimals)
    def build_models(self):
        """Build and train two models, model_clf will use the sklearn classifier to
@@ -217,7 +228,95 @@ class Stree_test(unittest.TestCase):
        #
        self.assertListEqual(yp_line.tolist(), yp_once.tolist())
    def test_iterator(self):
        """Check preorder iterator
        """
        expected = [
            'root',
            'root - Down',
            'root - Down - Down, <cgaf> - Leaf class=1 belief=0.975989 counts=(array([0, 1]), array([ 17, 691]))',
            'root - Down - Up',
            'root - Down - Up - Down, <cgaf> - Leaf class=1 belief=0.750000 counts=(array([0, 1]), array([1, 3]))',
            'root - Down - Up - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([7]))',
            'root - Up, <cgaf> - Leaf class=0 belief=0.928297 counts=(array([0, 1]), array([725,  56]))',
        ]
        computed = []
        for node in self._clf:
            computed.append(str(node))
        self.assertListEqual(expected, computed)
    def test_is_a_sklearn_classifier(self):
        import warnings
        from sklearn.exceptions import ConvergenceWarning
        warnings.filterwarnings('ignore', category=ConvergenceWarning)
        warnings.filterwarnings('ignore', category=RuntimeWarning)
        from sklearn.utils.estimator_checks import check_estimator
        check_estimator(Stree())
 class Snode_test(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        os.environ['TESTING'] = '1'
        self._random_state = 1
        self._clf = Stree(random_state=self._random_state,
                          use_predictions=True)
        self._clf.fit(*self._get_Xy())
        super().__init__(*args, **kwargs)
    @classmethod
    def tearDownClass(cls):
        try:
            os.environ.pop('TESTING')
        except:
            pass
    def _get_Xy(self):
        X, y = make_classification(n_samples=1500, n_features=3, n_informative=3,
                                   n_redundant=0, n_repeated=0, n_classes=2, n_clusters_per_class=2,
                                   class_sep=1.5, flip_y=0, weights=[0.5, 0.5], random_state=self._random_state)
        return X, y
    def test_attributes_in_leaves(self):
        """Check if the attributes in leaves have correct values so they form a predictor
        """
        def check_leave(node: Snode):
            if not node.is_leaf():
                check_leave(node.get_down())
                check_leave(node.get_up())
                return
            # Check Belief in leave
            classes, card = np.unique(node._y, return_counts=True)
            max_card = max(card)
            min_card = min(card)
            if len(classes) > 1:
                try:
                    belief = max_card / (max_card + min_card)
                except:
                    belief = 0.
            else:
                belief = 1
            self.assertEqual(belief, node._belief)
            # Check Class
            class_computed = classes[card == max_card]
            self.assertEqual(class_computed, node._class)
        check_leave(self._clf.tree_)
    def test_nodes_coefs(self):
        """Check if the nodes of the tree have the right attributes filled
        """
        def run_tree(node: Snode):
            if node._belief < 1:
                # only exclude pure leaves
                self.assertIsNotNone(node._clf)
                self.assertIsNotNone(node._clf.coef_)
                self.assertIsNotNone(node._vector)
                self.assertIsNotNone(node._interceptor)
            if node.is_leaf():
                return
            run_tree(node.get_down())
            run_tree(node.get_up())
        run_tree(self._clf.tree_)
--- a/stree/tests/init.py
+++ b/stree/tests/init.py
@@ -0,0 +1 @@
 from .Strees_test import Stree_test, Snode_test
--- a/test.ipynb
+++ b/test.ipynb
--- a/test2.ipynb
+++ b/test2.ipynb
@@ -6,13 +6,10 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "import numpy as np\n",
+    "#\n",
-    "import pandas as pd\n",
+    "# Google Colab setup\n",
-    "from sklearn.svm import LinearSVC\n",
+    "#\n",
-    "from sklearn.tree import DecisionTreeClassifier\n",
+    "#!pip install git+https://github.com/doctorado-ml/stree"
    "from sklearn.datasets import make_classification, load_iris, load_wine\n",
    "from trees.Stree import Stree\n",
    "import time"
   ]
  },
  {
@@ -20,6 +17,22 @@
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from sklearn.svm import LinearSVC\n",
    "from sklearn.tree import DecisionTreeClassifier\n",
    "from sklearn.datasets import make_classification, load_iris, load_wine\n",
    "from sklearn.model_selection import train_test_split\n",
    "from stree import Stree\n",
    "import time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "if not os.path.isfile('data/creditcard.csv'):\n",
@@ -29,7 +42,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
@@ -39,10 +52,6 @@
    }
   ],
   "source": [
    "import time\n",
    "from sklearn.model_selection import train_test_split\n",
    "from trees.Stree import Stree\n",
    "\n",
    "random_state=1\n",
    "\n",
    "def load_creditcard(n_examples=0):\n",
@@ -84,20 +93,17 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 5,
-   "metadata": {},
+   "metadata": {
-   "outputs": [],
+    "tags": [
-   "source": []
+     "outputPrepend"
    ]
   },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
-     "text": "************** C=0.001 ****************************\nClassifier's accuracy (train): 0.9550\nClassifier's accuracy (test) : 0.9487\nroot\nroot - Down\nroot - Down - Down, <cgaf> - Leaf class=1 belief=0.977346 counts=(array([0, 1]), array([  7, 302]))\nroot - Up\nroot - Up - Down, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([1]))\nroot - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([1]))\nroot - Up - Up\nroot - Up - Up - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([2]))\nroot - Up - Up - Up, <cgaf> - Leaf class=0 belief=0.945280 counts=(array([0, 1]), array([691,  40]))\n\n**************************************************\n************** C=0.01 ****************************\nClassifier's accuracy (train): 0.9569\nClassifier's accuracy (test) : 0.9576\nroot\nroot - Down, <cgaf> - Leaf class=1 belief=0.986971 counts=(array([0, 1]), array([  4, 303]))\nroot - Up, <cgaf> - Leaf class=0 belief=0.944369 counts=(array([0, 1]), array([696,  41]))\n\n**************************************************\n************** C=1 ****************************\nClassifier's accuracy (train): 0.9674\nClassifier's accuracy (test) : 0.9554\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([310]))\nroot - Up, <cgaf> - Leaf class=0 belief=0.953232 counts=(array([0, 1]), array([693,  34]))\nroot - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([7]))\n\n**************************************************\n************** C=5 ****************************\nClassifier's accuracy (train): 0.9693\nClassifier's accuracy (test) : 0.9487\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([310]))\nroot - Up\nroot - Up - Down, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([1]))\nroot - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([7]))\nroot - Up - Up\nroot - Up - Up - Down, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up\nroot - Up - Up - Up - Down, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([2]))\nroot - Up - Up - Up - Up - Up, <cgaf> - Leaf class=0 belief=0.955494 counts=(array([0, 1]), array([687,  32]))\nroot - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([1]))\n\n**************************************************\n************** C=17 ****************************\nClassifier's accuracy (train): 0.9780\nClassifier's accuracy (test) : 0.9487\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([301]))\nroot - Up\nroot - Up - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([2]))\nroot - Down - Up\nroot - Down - Up - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([15]))\nroot - Up - Up\nroot - Up - Up - Down\nroot - Up - Up - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([3]))\nroot - Down - Up - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([15]))\nroot - Up - Up - Up, <cgaf> - Leaf class=0 belief=0.967468 counts=(array([0, 1]), array([684,  23]))\nroot - Up - Up - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([1]))\n\n**************************************************\n0.7277 secs\n"
+     "text": "************** C=0.001 ****************************\nClassifier's accuracy (train): 0.9579\nClassifier's accuracy (test) : 0.9509\nroot\nroot - Down, <cgaf> - Leaf class=1 belief=0.987013 counts=(array([0, 1]), array([  4, 304]))\nroot - Up, <cgaf> - Leaf class=0 belief=0.945652 counts=(array([0, 1]), array([696,  40]))\n\n**************************************************\n************** C=0.01 ****************************\nClassifier's accuracy (train): 0.9579\nClassifier's accuracy (test) : 0.9509\nroot\nroot - Down, <cgaf> - Leaf class=1 belief=0.990196 counts=(array([0, 1]), array([  3, 303]))\nroot - Up, <cgaf> - Leaf class=0 belief=0.944444 counts=(array([0, 1]), array([697,  41]))\n\n**************************************************\n************** C=1 ****************************\nClassifier's accuracy (train): 0.9693\nClassifier's accuracy (test) : 0.9576\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([311]))\nroot - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([6]))\nroot - Up\nroot - Up - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([1]))\nroot - Up - Up, <cgaf> - Leaf class=0 belief=0.955923 counts=(array([0, 1]), array([694,  32]))\n\n**************************************************\n************** C=5 ****************************\nClassifier's accuracy (train): 0.9713\nClassifier's accuracy (test) : 0.9576\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([314]))\nroot - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([6]))\nroot - Up, <cgaf> - Leaf class=0 belief=0.958564 counts=(array([0, 1]), array([694,  30]))\n\n**************************************************\n************** C=17 ****************************\nClassifier's accuracy (train): 0.9780\nClassifier's accuracy (test) : 0.9420\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([301]))\nroot - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([13]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([17]))\nroot - Up - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([3]))\nroot - Up - Up\nroot - Up - Up - Down\nroot - Up - Up - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([1]))\nroot - Up - Up - Up\nroot - Up - Up - Up - Down\nroot - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([2]))\nroot - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([1]))\nroot - Up - Up - Up - Up, <cgaf> - Leaf class=0 belief=0.967376 counts=(array([0, 1]), array([682,  23]))\n\n**************************************************\n0.4537 secs\n"
    }
   ],
   "source": [
@@ -115,7 +121,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -138,10 +144,11 @@
    {
     "output_type": "stream",
     "name": "stdout",
-     "text": "root\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([301]))\nroot - Up\nroot - Up - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([2]))\nroot - Down - Up\nroot - Down - Up - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([15]))\nroot - Up - Up\nroot - Up - Up - Down\nroot - Up - Up - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([3]))\nroot - Down - Up - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([15]))\nroot - Up - Up - Up, <cgaf> - Leaf class=0 belief=0.967468 counts=(array([0, 1]), array([684,  23]))\nroot - Up - Up - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([1]))\n"
+     "text": "root\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([301]))\nroot - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([13]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([17]))\nroot - Up - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([3]))\nroot - Up - Up\nroot - Up - Up - Down\nroot - Up - Up - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([1]))\nroot - Up - Up - Up\nroot - Up - Up - Up - Down\nroot - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([2]))\nroot - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([1]))\nroot - Up - Up - Up - Up, <cgaf> - Leaf class=0 belief=0.967376 counts=(array([0, 1]), array([682,  23]))\n"
    }
   ],
   "source": [
    "#check iterator\n",
    "for i in list(clf):\n",
    "    print(i)"
   ]
@@ -154,75 +161,46 @@
    {
     "output_type": "stream",
     "name": "stdout",
-     "text": "root\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([301]))\nroot - Up\nroot - Up - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([2]))\nroot - Down - Up\nroot - Down - Up - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([15]))\nroot - Up - Up\nroot - Up - Up - Down\nroot - Up - Up - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([3]))\nroot - Down - Up - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([15]))\nroot - Up - Up - Up, <cgaf> - Leaf class=0 belief=0.967468 counts=(array([0, 1]), array([684,  23]))\nroot - Up - Up - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([1]))\n"
+     "text": "root\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([301]))\nroot - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([13]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([17]))\nroot - Up - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([3]))\nroot - Up - Up\nroot - Up - Up - Down\nroot - Up - Up - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([1]))\nroot - Up - Up - Up\nroot - Up - Up - Up - Down\nroot - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([2]))\nroot - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([1]))\nroot - Up - Up - Up - Up, <cgaf> - Leaf class=0 belief=0.967376 counts=(array([0, 1]), array([682,  23]))\n"
    }
   ],
   "source": [
    "#check iterator again\n",
    "for i in clf:\n",
    "    print(i)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 9,
   "metadata": {},
-   "outputs": [
+   "outputs": [],
    {
     "output_type": "display_data",
     "data": {
      "text/plain": "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …",
      "application/vnd.jupyter.widget-view+json": {
       "version_major": 2,
       "version_minor": 0,
       "model_id": "0025f832c1734afc944021e5990c2d11"
      }
     },
     "metadata": {}
    }
   ],
   "source": [
-    "%matplotlib widget\n",
+    "# Check if the classifier is a sklearn estimator\n",
-    "from mpl_toolkits.mplot3d import Axes3D\n",
+    "from sklearn.utils.estimator_checks import check_estimator\n",
-    "import matplotlib.pyplot as plt\n",
+    "check_estimator(Stree())"
    "from matplotlib import cm\n",
    "from matplotlib.ticker import LinearLocator, FormatStrFormatter\n",
    "import numpy as np\n",
    "\n",
    "fig = plt.figure()\n",
    "ax = fig.gca(projection='3d')\n",
    "\n",
    "scale = 8\n",
    "# Make data.\n",
    "X = np.arange(-scale, scale, 0.25)\n",
    "Y = np.arange(-scale, scale, 0.25)\n",
    "X, Y = np.meshgrid(X, Y)\n",
    "Z = X**2 + Y**2\n",
    "\n",
    "# Plot the surface.\n",
    "surf = ax.plot_surface(X, Y, Z, cmap=cm.coolwarm,\n",
    "                   linewidth=0, antialiased=False)\n",
    "\n",
    "# Customize the z axis.\n",
    "ax.set_zlim(0, 100)\n",
    "ax.zaxis.set_major_locator(LinearLocator(10))\n",
    "ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))\n",
    "\n",
    "# rotate the axes and update\n",
    "#for angle in range(0, 360):\n",
    "#   ax.view_init(30, 40)\n",
    "\n",
    "# Add a color bar which maps values to colors.\n",
    "fig.colorbar(surf, shrink=0.5, aspect=5)\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
-   "source": []
+    {
     "output_type": "stream",
     "name": "stdout",
     "text": "1 functools.partial(<function check_no_attributes_set_in_init at 0x12d18e0e0>, 'Stree')\n2 functools.partial(<function check_estimators_dtypes at 0x12d185200>, 'Stree')\n3 functools.partial(<function check_fit_score_takes_y at 0x12d1850e0>, 'Stree')\n4 functools.partial(<function check_sample_weights_pandas_series at 0x12d17eb00>, 'Stree')\n5 functools.partial(<function check_sample_weights_not_an_array at 0x12d17ec20>, 'Stree')\n6 functools.partial(<function check_sample_weights_list at 0x12d17ed40>, 'Stree')\n7 functools.partial(<function check_sample_weights_invariance at 0x12d17ee60>, 'Stree')\n8 functools.partial(<function check_estimators_fit_returns_self at 0x12d189200>, 'Stree')\n9 functools.partial(<function check_estimators_fit_returns_self at 0x12d189200>, 'Stree', readonly_memmap=True)\n10 functools.partial(<function check_complex_data at 0x12d181050>, 'Stree')\n11 functools.partial(<function check_dtype_object at 0x12d17ef80>, 'Stree')\n12 functools.partial(<function check_estimators_empty_data_messages at 0x12d185320>, 'Stree')\n13 functools.partial(<function check_pipeline_consistency at 0x12d181f80>, 'Stree')\n14 functools.partial(<function check_estimators_nan_inf at 0x12d185440>, 'Stree')\n15 functools.partial(<function check_estimators_overwrite_params at 0x12d189f80>, 'Stree')\n16 functools.partial(<function check_estimator_sparse_data at 0x12d17e9e0>, 'Stree')\n17 functools.partial(<function check_estimators_pickle at 0x12d185680>, 'Stree')\n18 functools.partial(<function check_classifier_data_not_an_array at 0x12d18e320>, 'Stree')\n19 functools.partial(<function check_classifiers_one_label at 0x12d185d40>, 'Stree')\n20 functools.partial(<function check_classifiers_classes at 0x12d1897a0>, 'Stree')\n21 functools.partial(<function check_estimators_partial_fit_n_features at 0x12d1857a0>, 'Stree')\n22 functools.partial(<function check_classifiers_train at 0x12d185e60>, 'Stree')\n23 functools.partial(<function check_classifiers_train at 0x12d185e60>, 'Stree', readonly_memmap=True)\n24 functools.partial(<function check_classifiers_regression_target at 0x12d18ed40>, 'Stree')\n25 functools.partial(<function check_supervised_y_no_nan at 0x12d17cb00>, 'Stree')\n26 functools.partial(<function check_supervised_y_2d at 0x12d189440>, 'Stree')\n27 functools.partial(<function check_estimators_unfitted at 0x12d189320>, 'Stree')\n28 functools.partial(<function check_non_transformer_estimators_n_iter at 0x12d18e8c0>, 'Stree')\n29 functools.partial(<function check_decision_proba_consistency at 0x12d18ee60>, 'Stree')\n30 functools.partial(<function check_fit2d_predict1d at 0x12d181560>, 'Stree')\n31 functools.partial(<function check_methods_subset_invariance at 0x12d181710>, 'Stree')\n32 functools.partial(<function check_fit2d_1sample at 0x12d181830>, 'Stree')\n33 functools.partial(<function check_fit2d_1feature at 0x12d181950>, 'Stree')\n34 functools.partial(<function check_fit1d at 0x12d181a70>, 'Stree')\n35 functools.partial(<function check_get_params_invariance at 0x12d18eb00>, 'Stree')\n36 functools.partial(<function check_set_params at 0x12d18ec20>, 'Stree')\n37 functools.partial(<function check_dict_unchanged at 0x12d181170>, 'Stree')\n38 functools.partial(<function check_dont_overwrite_parameters at 0x12d181440>, 'Stree')\n39 functools.partial(<function check_fit_idempotent at 0x12d192050>, 'Stree')\n"
    }
   ],
   "source": [
    "# Make checks one by one\n",
    "c = 0\n",
    "checks = check_estimator(Stree(), generate_only=True)\n",
    "for check in checks:\n",
    "    c += 1\n",
    "    print(c, check[1])\n",
    "    check[1](check[0])"
   ]
  }
 ],
 "metadata": {
--- a/test_graphs.ipynb
+++ b/test_graphs.ipynb
--- a/tests/Snode_test.py
+++ b/tests/Snode_test.py
@@ -1,72 +0,0 @@
 import os
 import unittest
 import numpy as np
 from sklearn.datasets import make_classification
 from trees.Stree import Stree, Snode
 class Snode_test(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        os.environ['TESTING'] = '1'
        self._random_state = 1
        self._clf = Stree(random_state=self._random_state,
                            use_predictions=True)
        self._clf.fit(*self._get_Xy())
        super(Snode_test, self).__init__(*args, **kwargs)
    @classmethod
    def tearDownClass(cls):
        try:
            os.environ.pop('TESTING')
        except:
            pass
    def _get_Xy(self):
        X, y = make_classification(n_samples=1500, n_features=3, n_informative=3,
                                   n_redundant=0, n_repeated=0, n_classes=2, n_clusters_per_class=2,
                                   class_sep=1.5, flip_y=0, weights=[0.5, 0.5], random_state=self._random_state)
        return X, y
    def test_attributes_in_leaves(self):
        """Check if the attributes in leaves have correct values so they form a predictor
        """
        def check_leave(node: Snode):
            if not node.is_leaf():
                check_leave(node.get_down())
                check_leave(node.get_up())
                return
            # Check Belief in leave
            classes, card = np.unique(node._y, return_counts=True)
            max_card = max(card)
            min_card = min(card)
            if len(classes) > 1:
                try:
                    belief = max_card / (max_card + min_card)
                except:
                    belief = 0.
            else:
                belief = 1
            self.assertEqual(belief, node._belief)
            # Check Class
            class_computed = classes[card == max_card]
            self.assertEqual(class_computed, node._class)
        check_leave(self._clf._tree)
    def test_nodes_coefs(self):
        """Check if the nodes of the tree have the right attributes filled
        """
        def run_tree(node: Snode):
            if node._belief < 1:
                # only exclude pure leaves
                self.assertIsNotNone(node._clf)
                self.assertIsNotNone(node._clf.coef_)
                self.assertIsNotNone(node._vector)
                self.assertIsNotNone(node._interceptor)
            if node.is_leaf():
                return
            run_tree(node.get_down())
            run_tree(node.get_up())
        run_tree(self._clf._tree)
--- a/tests/init.py
+++ b/tests/init.py
--- a/trees/Siterator.py
+++ b/trees/Siterator.py
@@ -1,34 +0,0 @@
 '''
 __author__ = "Ricardo Montañana Gómez"
 __copyright__ = "Copyright 2020, Ricardo Montañana Gómez"
 __license__ = "MIT"
 __version__ = "0.9"
 Inorder iterator for the binary tree of Snodes
 Uses LinearSVC
 '''
 from trees.Snode import Snode
 class Siterator:
    """Inorder iterator
    """
    def __init__(self, tree: Snode):
        self._stack = []
        self._push(tree)
    def __iter__(self):
        return self
    def _push(self, node: Snode):
        while (node is not None):
            self._stack.insert(0, node)
            node = node.get_down()
    def __next__(self) -> Snode:
        if len(self._stack) == 0:
            raise StopIteration()
        node = self._stack.pop()
        self._push(node.get_up())
        return node
--- a/trees/Snode.py
+++ b/trees/Snode.py
@@ -1,70 +0,0 @@
 '''
 __author__ = "Ricardo Montañana Gómez"
 __copyright__ = "Copyright 2020, Ricardo Montañana Gómez"
 __license__ = "MIT"
 __version__ = "0.9"
 Node of the Stree (binary tree)
 '''
 import os
 import numpy as np
 from sklearn.svm import LinearSVC
 class Snode:
    def __init__(self, clf: LinearSVC, X: np.ndarray, y: np.ndarray, title: str):
        self._clf = clf
        self._vector = None if clf is None else clf.coef_
        self._interceptor = 0. if clf is None else clf.intercept_
        self._title = title
        self._belief = 0.  # belief of the prediction in a leaf node based on samples
        # Only store dataset in Testing 
        self._X = X if os.environ.get('TESTING', 'NS') != 'NS' else None
        self._y = y
        self._down = None
        self._up = None
        self._class = None
    def set_down(self, son):
        self._down = son
    def set_up(self, son):
        self._up = son
    def is_leaf(self,) -> bool:
        return self._up is None and self._down is None
    def get_down(self) -> 'Snode':
        return self._down
    def get_up(self) -> 'Snode':
        return self._up
    def make_predictor(self):
        """Compute the class of the predictor and its belief based on the subdataset of the node
        only if it is a leaf
        """
        # Clean memory
        #self._X = None
        #self._y = None
        if not self.is_leaf():
            return
        classes, card = np.unique(self._y, return_counts=True)
        if len(classes) > 1:
            max_card = max(card)
            min_card = min(card)
            try:
                self._belief = max_card / (max_card + min_card)
            except:
                self._belief = 0.
            self._class = classes[card == max_card][0]
        else:
            self._belief = 1
            self._class = classes[0]
    def __str__(self) -> str:
        if self.is_leaf():
            return f"{self._title} - Leaf class={self._class} belief={self._belief:.6f} counts={np.unique(self._y, return_counts=True)}"
        else:
            return f"{self._title}"
--- a/trees/init.py
+++ b/trees/init.py
Author	SHA1	Message	Date
Ricardo Montañana	e95bd9697a	Make Stree a sklearn estimator Added check_estimator in notebook test2 Added a Stree test with check_estimator	2020-05-25 19:51:39 +02:00
Ricardo Montañana	5956cd0cd2	Update google colab setup in notebooks Undate save_all in grapher to make dest. folder if it doesn't exist	2020-05-24 20:13:27 +02:00
Ricardo Montañana	27b278860d	Fix install from scratch	2020-05-24 18:47:55 +02:00
Ricardo Montañana	d5d723c67f	update setup.py to include tests suite	2020-05-23 23:59:03 +02:00
Ricardo Montañana	77f10281c1	Make project python package friendly - Add setup.py - Move classes to module files - Move tests folder inside module folder	2020-05-23 23:40:33 +02:00
Ricardo Montañana	ac1483ae1d	update requirements to alllow maptlot widget	2020-05-23 00:05:58 +02:00
Ricardo Montañana	e51690ed95	Implement grapher and notebook to test it	2020-05-22 19:42:13 +02:00
Ricardo Montañana	a4595f5815	Update notebooks and readme with cosmetic changes	2020-05-20 18:11:57 +02:00
Ricardo Montañana	316f84cc63	Fix precision issues in tests executed in Travis	2020-05-20 15:02:31 +02:00
Ricardo Montañana	6e35628c85	Grapher working	2020-05-20 14:26:55 +02:00
Ricardo Montañana	c0ef71f139	first approx to grapher	2020-05-20 12:32:17 +02:00
		`@@ -0,0 +1,2 @@`
							`from .Strees import Stree, Snode, Siterator`
							`from .Strees_grapher import Stree_grapher, Snode_graph`
		`@@ -0,0 +1 @@`
							`from .Strees_test import Stree_test, Snode_test`