Make project python package friendly

- Add setup.py - Move classes to module files - Move tests folder inside module folder
2025-08-17 00:16:07 +00:00 · 2020-05-23 23:40:33 +02:00
parent ac1483ae1d
commit 77f10281c1
19 changed files with 383 additions and 470 deletions
--- a/stree/Strees.py
+++ b/stree/Strees.py
@@ -0,0 +1,311 @@
+'''
+__author__ = "Ricardo Montañana Gómez"
+__copyright__ = "Copyright 2020, Ricardo Montañana Gómez"
+__license__ = "MIT"
+__version__ = "0.9"
+Build an oblique tree classifier based on SVM Trees
+Uses LinearSVC
+'''
+
+import typing
+import os
+
+import numpy as np
+from sklearn.base import BaseEstimator, ClassifierMixin
+from sklearn.svm import LinearSVC
+from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
+
+class Snode:
+    def __init__(self, clf: LinearSVC, X: np.ndarray, y: np.ndarray, title: str):
+        self._clf = clf
+        self._vector = None if clf is None else clf.coef_
+        self._interceptor = 0. if clf is None else clf.intercept_
+        self._title = title
+        self._belief = 0.  # belief of the prediction in a leaf node based on samples
+        # Only store dataset in Testing 
+        self._X = X if os.environ.get('TESTING', 'NS') != 'NS' else None
+        self._y = y
+        self._down = None
+        self._up = None
+        self._class = None
+
+    @classmethod
+    def copy(cls, node: 'Snode') -> 'Snode':
+        return cls(node._clf, node._X, node._y, node._title)
+
+    def set_down(self, son):
+        self._down = son
+
+    def set_up(self, son):
+        self._up = son
+
+    def is_leaf(self) -> bool:
+        return self._up is None and self._down is None
+
+    def get_down(self) -> 'Snode':
+        return self._down
+
+    def get_up(self) -> 'Snode':
+        return self._up
+
+    def make_predictor(self):
+        """Compute the class of the predictor and its belief based on the subdataset of the node
+        only if it is a leaf
+        """
+        if not self.is_leaf():
+            return
+        classes, card = np.unique(self._y, return_counts=True)
+        if len(classes) > 1:
+            max_card = max(card)
+            min_card = min(card)
+            try:
+                self._belief = max_card / (max_card + min_card)
+            except:
+                self._belief = 0.
+            self._class = classes[card == max_card][0]
+        else:
+            self._belief = 1
+            self._class = classes[0]
+
+    def __str__(self) -> str:
+        if self.is_leaf():
+            return f"{self._title} - Leaf class={self._class} belief={self._belief:.6f} counts={np.unique(self._y, return_counts=True)}"
+        else:
+            return f"{self._title}"
+
+
+class Siterator:
+    """Stree preorder iterator
+    """
+
+    def __init__(self, tree: Snode):
+        self._stack = []
+        self._push(tree)
+
+    def __iter__(self):
+        return self
+
+    def _push(self, node: Snode):
+        if node is not None:
+            self._stack.append(node)
+
+    def __next__(self) -> Snode:
+        if len(self._stack) == 0:
+            raise StopIteration()
+        node = self._stack.pop()
+        self._push(node.get_up())
+        self._push(node.get_down())
+        return node
+
+class Stree(BaseEstimator, ClassifierMixin):
+    """
+    """
+
+    def __init__(self, C: float = 1.0, max_iter: int = 1000, random_state: int = 0, use_predictions: bool = False):
+        self._max_iter = max_iter
+        self._C = C
+        self._random_state = random_state
+        self._tree = None
+        self.__folder = 'data/'
+        self.__use_predictions = use_predictions
+        self.__trained = False
+        self.__proba = False
+
+    def get_params(self, deep=True):
+        """Get dict with hyperparameters and its values to accomplish sklearn rules
+        """
+        return {"C": self._C, "random_state": self._random_state, 'max_iter': self._max_iter}
+
+    def set_params(self, **parameters):
+        """Set hyperparmeters as specified by sklearn, needed in Gridsearchs
+        """
+        for parameter, value in parameters.items():
+            setattr(self, parameter, value)
+        return self
+
+    def _linear_function(self, data: np.array, node: Snode) -> np.array:
+        coef = node._vector[0, :].reshape(-1, data.shape[1])
+        return data.dot(coef.T) + node._interceptor[0]
+
+    def _split_data(self, node: Snode, data: np.ndarray, indices: np.ndarray) -> list:
+        if self.__use_predictions:
+            yp = node._clf.predict(data)
+            down = (yp == 1).reshape(-1, 1)
+            res = np.expand_dims(node._clf.decision_function(data), 1)
+        else:
+            # doesn't work with multiclass as each sample has to do inner product with its own coeficients
+            # computes positition of every sample is w.r.t. the hyperplane
+            res = self._linear_function(data, node)
+            down = res > 0
+        up = ~down
+        data_down = data[down[:, 0]] if any(down) else None
+        indices_down = indices[down[:, 0]] if any(down) else None
+        res_down = res[down[:, 0]] if any(down) else None
+        data_up = data[up[:, 0]] if any(up) else None
+        indices_up = indices[up[:, 0]] if any(up) else None
+        res_up = res[up[:, 0]] if any(up) else None
+        return [data_up, indices_up, data_down, indices_down, res_up, res_down]
+
+    def fit(self, X: np.ndarray, y: np.ndarray, title: str = 'root') -> 'Stree':
+        X, y = check_X_y(X, y.ravel())
+        self.n_features_in_ = X.shape[1]
+        self._tree = self.train(X, y.ravel(), title)
+        self._build_predictor()
+        self.__trained = True
+        return self
+
+    def _build_predictor(self):
+        """Process the leaves to make them predictors
+        """
+
+        def run_tree(node: Snode):
+            if node.is_leaf():
+                node.make_predictor()
+                return
+            run_tree(node.get_down())
+            run_tree(node.get_up())
+
+        run_tree(self._tree)
+
+    def train(self, X: np.ndarray, y: np.ndarray, title: str = 'root') -> Snode:
+        if np.unique(y).shape[0] == 1:
+            # only 1 class => pure dataset
+            return Snode(None, X, y, title + ', <pure>')
+        # Train the model
+        clf = LinearSVC(max_iter=self._max_iter, C=self._C,
+                        random_state=self._random_state)
+        clf.fit(X, y)
+        tree = Snode(clf, X, y, title)
+        X_U, y_u, X_D, y_d, _, _ = self._split_data(tree, X, y)
+        if X_U is None or X_D is None:
+            # didn't part anything
+            return Snode(clf, X, y, title + ', <cgaf>')
+        tree.set_up(self.train(X_U, y_u, title + ' - Up'))
+        tree.set_down(self.train(X_D, y_d, title + ' - Down'))
+        return tree
+
+    def _reorder_results(self, y: np.array, indices: np.array) -> np.array:
+        y_ordered = np.zeros(y.shape, dtype=int if y.ndim == 1 else float)
+        indices = indices.astype(int)
+        for i, index in enumerate(indices):
+            y_ordered[index] = y[i]
+        return y_ordered
+
+    def predict(self, X: np.array) -> np.array:
+        def predict_class(xp: np.array, indices: np.array, node: Snode) -> np.array:
+            if xp is None:
+                return [], []
+            if node.is_leaf():
+                # set a class for every sample in dataset
+                prediction = np.full((xp.shape[0], 1), node._class)
+                return prediction, indices
+            u, i_u, d, i_d, _, _ = self._split_data(node, xp, indices)
+            k, l = predict_class(d, i_d, node.get_down())
+            m, n = predict_class(u, i_u, node.get_up())
+            return np.append(k, m), np.append(l, n)
+
+        # sklearn check
+        check_is_fitted(self)
+        # Input validation
+        X = check_array(X)
+        # setup prediction & make it happen
+        indices = np.arange(X.shape[0])
+        return self._reorder_results(*predict_class(X, indices, self._tree))
+
+    def predict_proba(self, X: np.array) -> np.array:
+        """Computes an approximation of the probability of samples belonging to class 1 
+        (nothing more, nothing less)
+
+        :param X: dataset
+        :type X: np.array
+        """
+
+        def predict_class(xp: np.array, indices: np.array, dist: np.array, node: Snode) -> np.array:
+            """Run the tree to compute predictions
+
+            :param xp: subdataset of samples
+            :type xp: np.array
+            :param indices: indices of subdataset samples to rebuild original order
+            :type indices: np.array
+            :param dist: distances of every sample to the hyperplane or the father node
+            :type dist: np.array
+            :param node: node of the leaf with the class
+            :type node: Snode
+            :return: array of labels and distances, array of indices
+            :rtype: np.array
+            """
+            if xp is None:
+                return [], []
+            if node.is_leaf():
+                # set a class for every sample in dataset
+                prediction = np.full((xp.shape[0], 1), node._class)
+                prediction_proba = dist
+                return np.append(prediction, prediction_proba, axis=1), indices
+            u, i_u, d, i_d, r_u, r_d = self._split_data(node, xp, indices)
+            k, l = predict_class(d, i_d, r_d, node.get_down())
+            m, n = predict_class(u, i_u, r_u, node.get_up())
+            return np.append(k, m), np.append(l, n)
+
+        # sklearn check
+        check_is_fitted(self)
+        # Input validation
+        X = check_array(X)
+        # setup prediction & make it happen
+        indices = np.arange(X.shape[0])
+        result, indices = predict_class(X, indices, [], self._tree)
+        result = result.reshape(X.shape[0], 2)
+        # Turn distances to hyperplane into probabilities based on fitting distances
+        # of samples to its hyperplane that classified them, to the sigmoid function
+        result[:, 1] = 1 / (1 + np.exp(-result[:, 1]))
+        return self._reorder_results(result, indices)
+
+    def score(self, X: np.array, y: np.array) -> float:
+        """Return accuracy
+        """
+        if not self.__trained:
+            self.fit(X, y)
+        yp = self.predict(X).reshape(y.shape)
+        right = (yp == y).astype(int)
+        return np.sum(right) / len(y)
+
+    def __iter__(self):
+        return Siterator(self._tree)
+
+    def __str__(self) -> str:
+        output = ''
+        for i in self:
+            output += str(i) + '\n'
+        return output
+
+    def _save_datasets(self, tree: Snode, catalog: typing.TextIO, number: int):
+        """Save the dataset of the node in a csv file
+
+        :param tree: node with data to save
+        :type tree: Snode
+        :param catalog: catalog file handler
+        :type catalog: typing.TextIO
+        :param number: sequential number for the generated file name
+        :type number: int
+        """
+        data = np.append(tree._X, tree._y.reshape(-1, 1), axis=1)
+        name = f"{self.__folder}dataset{number}.csv"
+        np.savetxt(name, data, delimiter=",")
+        catalog.write(f"{name}, - {str(tree)}")
+        if tree.is_leaf():
+            return
+        self._save_datasets(tree.get_down(), catalog, number + 1)
+        self._save_datasets(tree.get_up(), catalog, number + 2)
+
+    def get_catalog_name(self):
+        return self.__folder + "catalog.txt"
+
+    def save_sub_datasets(self):
+        """Save the every dataset stored in the tree to check with manual classifier
+        """
+        if not os.path.isdir(self.__folder):
+            os.mkdir(self.__folder)
+        with open(self.get_catalog_name(), 'w', encoding='utf-8') as catalog:
+            self._save_datasets(self._tree, catalog, 1)
+
+
+
--- a/stree/Strees_grapher.py
+++ b/stree/Strees_grapher.py
@@ -0,0 +1,182 @@
+'''
+__author__ = "Ricardo Montañana Gómez"
+__copyright__ = "Copyright 2020, Ricardo Montañana Gómez"
+__license__ = "MIT"
+__version__ = "0.9"
+Plot 3D views of nodes in Stree
+'''
+
+import os
+
+import matplotlib.pyplot as plt
+import numpy as np
+from sklearn.decomposition import PCA
+from mpl_toolkits.mplot3d import Axes3D
+
+from .Strees import Stree, Snode, Siterator
+
+class Snode_graph(Snode):
+
+    def __init__(self, node: Stree):
+        self._plot_size = (8, 8)
+        self._xlimits = (None, None)
+        self._ylimits = (None, None)
+        self._zlimits = (None, None)
+        n = Snode.copy(node)
+        super().__init__(n._clf, n._X, n._y, n._title)
+
+    def set_plot_size(self, size: tuple):
+        self._plot_size = size
+
+    def _is_pure(self) -> bool:
+        """is considered pure a leaf node with one label
+        """
+        if self.is_leaf():
+            return self._belief == 1.
+        return False
+
+    def set_axis_limits(self, limits: tuple):
+        self._xlimits = limits[0]
+        self._ylimits = limits[1]
+        self._zlimits = limits[2]
+
+    def _set_graphics_axis(self, ax: Axes3D):
+        ax.set_xlim(self._xlimits)
+        ax.set_ylim(self._ylimits)
+        ax.set_zlim(self._zlimits)
+
+    def save_hyperplane(self, save_folder: str = './', save_prefix: str = '', save_seq: int = 1):
+        _, fig = self.plot_hyperplane()
+        name = f"{save_folder}{save_prefix}STnode{save_seq}.png"
+        fig.savefig(name, bbox_inches='tight')
+        plt.close(fig)
+
+    def _get_cmap(self):
+        cmap = 'jet'
+        if self._is_pure():
+            if self._class == 1:
+                cmap = 'jet_r'
+        return cmap
+
+    def _graph_title(self):
+        n_class, card = np.unique(self._y, return_counts=True)
+        return f"{self._title} {n_class} {card}"
+
+    def plot_hyperplane(self, plot_distribution: bool = True):
+        fig = plt.figure(figsize=self._plot_size)
+        ax = fig.add_subplot(1, 1, 1, projection='3d')
+        if not self._is_pure():
+            # Can't plot hyperplane of leaves with one label because it hasn't classiffier
+            # get the splitting hyperplane
+            def hyperplane(x, y): return (-self._interceptor - self._vector[0][0] * x
+                                          - self._vector[0][1] * y) / self._vector[0][2]
+
+            tmpx = np.linspace(self._X[:, 0].min(), self._X[:, 0].max())
+            tmpy = np.linspace(self._X[:, 1].min(), self._X[:, 1].max())
+            xx, yy = np.meshgrid(tmpx, tmpy)
+            ax.plot_surface(xx, yy, hyperplane(xx, yy), alpha=.5, antialiased=True,
+                            rstride=1, cstride=1, cmap='seismic')
+            self._set_graphics_axis(ax)
+        if plot_distribution:
+            self.plot_distribution(ax)
+        else:
+            plt.title(self._graph_title())
+            plt.show()
+        return ax, fig
+
+    def plot_distribution(self, ax: Axes3D = None):
+        if ax is None:
+            fig = plt.figure(figsize=self._plot_size)
+            ax = fig.add_subplot(1, 1, 1, projection='3d')
+        plt.title(self._graph_title())
+        cmap = self._get_cmap()
+        ax.scatter(self._X[:, 0], self._X[:, 1],
+                   self._X[:, 2], c=self._y, cmap=cmap)
+        ax.set_xlabel('X0')
+        ax.set_ylabel('X1')
+        ax.set_zlabel('X2')
+        plt.show()
+
+class Stree_grapher(Stree):
+    """Build 3d graphs of any dataset, if it's more than 3 features PCA shall
+    make its magic
+    """
+
+    def __init__(self, params: dict):
+        self._plot_size = (8, 8)
+        self._tree_gr = None
+        # make Snode store X's
+        os.environ['TESTING'] = '1'
+        self._fitted = False
+        self._pca = None
+        super().__init__(**params)
+
+    def __del__(self):
+        try:
+            os.environ.pop('TESTING')
+        except:
+            pass
+        plt.close('all')
+
+    def _copy_tree(self, node: Snode) -> Snode_graph:
+        mirror = Snode_graph(node)
+        # clone node
+        mirror._class = node._class
+        mirror._belief = node._belief
+        if node.get_down() is not None:
+            mirror.set_down(self._copy_tree(node.get_down()))
+        if node.get_up() is not None:
+            mirror.set_up(self._copy_tree(node.get_up()))
+        return mirror
+
+    def fit(self, X: np.array, y: np.array) -> Stree:
+        """Fit the Stree and copy the tree in a Snode_graph tree
+
+        :param X: Dataset
+        :type X: np.array
+        :param y: Labels
+        :type y: np.array
+        :return: Stree model
+        :rtype: Stree
+        """
+        if X.shape[1] != 3:
+            self._pca = PCA(n_components=3)
+            X = self._pca.fit_transform(X)
+        res = super().fit(X, y)
+        self._tree_gr = self._copy_tree(self._tree)
+        self._fitted = True
+        return res
+
+    def score(self, X: np.array, y: np.array) -> float:
+        self._check_fitted()
+        if X.shape[1] != 3:
+            X = self._pca.transform(X)
+        return super().score(X, y)
+
+    def _check_fitted(self):
+        if not self._fitted:
+            raise Exception('Have to fit the grapher first!')
+
+    def save_all(self, save_folder: str = './', save_prefix: str = ''):
+        """Save all the node plots in png format, each with a sequence number
+
+        :param save_folder: folder where the plots are saved, defaults to './'
+        :type save_folder: str, optional
+        """
+        self._check_fitted()
+        seq = 1
+        for node in self:
+            node.save_hyperplane(save_folder=save_folder,
+                                 save_prefix=save_prefix, save_seq=seq)
+            seq += 1
+
+    def plot_all(self):
+        """Plots all the nodes
+        """
+        self._check_fitted()
+        for node in self:
+            node.plot_hyperplane()
+
+    def __iter__(self):
+        return Siterator(self._tree_gr)
+
--- a/stree/init.py
+++ b/stree/init.py
@@ -0,0 +1,4 @@
+__version__ = "0.9rc1"
+__author__ = "Ricardo Montañana Gómez"
+from .Strees import Stree, Snode, Siterator
+from .Strees_grapher import Stree_grapher, Snode_graph
--- a/stree/tests/Strees_test.py
+++ b/stree/tests/Strees_test.py
@@ -0,0 +1,313 @@
+import csv
+import os
+import unittest
+
+import numpy as np
+from sklearn.datasets import make_classification
+
+from stree import Stree, Snode
+
+
+class Stree_test(unittest.TestCase):
+
+    def __init__(self, *args, **kwargs):
+        os.environ['TESTING'] = '1'
+        self._random_state = 1
+        self._clf = Stree(random_state=self._random_state,
+                          use_predictions=False)
+        self._clf.fit(*self._get_Xy())
+        super().__init__(*args, **kwargs)
+
+    @classmethod
+    def tearDownClass(cls):
+        try:
+            os.environ.pop('TESTING')
+        except:
+            pass
+
+    def _get_Xy(self):
+        X, y = make_classification(n_samples=1500, n_features=3, n_informative=3,
+                                   n_redundant=0, n_repeated=0, n_classes=2, n_clusters_per_class=2,
+                                   class_sep=1.5, flip_y=0, weights=[0.5, 0.5], random_state=self._random_state)
+        return X, y
+
+    def _check_tree(self, node: Snode):
+        """Check recursively that the nodes that are not leaves have the correct 
+        number of labels and its sons have the right number of elements in their dataset
+
+        Arguments:
+            node {Snode} -- node to check
+        """
+        if node.is_leaf():
+            return
+        y_prediction = node._clf.predict(node._X)
+        y_down = node.get_down()._y
+        y_up = node.get_up()._y
+        # Is a correct partition in terms of cadinality?
+        # i.e. The partition algorithm didn't forget any sample
+        self.assertEqual(node._y.shape[0], y_down.shape[0] + y_up.shape[0])
+        unique_y, count_y = np.unique(node._y, return_counts=True)
+        _, count_d = np.unique(y_down, return_counts=True)
+        _, count_u = np.unique(y_up, return_counts=True)
+        #
+        for i in unique_y:
+            try:
+                number_down = count_d[i]
+            except:
+                number_down = 0
+            try:
+                number_up = count_u[i]
+            except:
+                number_up = 0
+            self.assertEqual(count_y[i], number_down + number_up)
+        # Is the partition made the same as the prediction?
+        # as the node is not a leaf...
+        _, count_yp = np.unique(y_prediction, return_counts=True)
+        self.assertEqual(count_yp[0], y_up.shape[0])
+        self.assertEqual(count_yp[1], y_down.shape[0])
+        self._check_tree(node.get_down())
+        self._check_tree(node.get_up())
+
+    def test_build_tree(self):
+        """Check if the tree is built the same way as predictions of models
+        """
+        self._check_tree(self._clf._tree)
+
+    def _get_file_data(self, file_name: str) -> tuple:
+        """Return X, y from data, y is the last column in array
+
+        Arguments:
+            file_name {str} -- the file name
+
+        Returns:
+            tuple -- tuple with samples, categories
+        """
+        data = np.genfromtxt(file_name, delimiter=',')
+        data = np.array(data)
+        column_y = data.shape[1] - 1
+        fy = data[:, column_y]
+        fx = np.delete(data, column_y, axis=1)
+        return fx, fy
+
+    def _find_out(self, px: np.array, x_original: np.array, y_original) -> list:
+        """Find the original values of y for a given array of samples
+
+        Arguments:
+            px {np.array} -- array of samples to search for
+            x_original {np.array} -- original dataset
+            y_original {[type]} -- original classes
+
+        Returns:
+            np.array -- classes of the given samples
+        """
+        res = []
+        for needle in px:
+            for row in range(x_original.shape[0]):
+                if all(x_original[row, :] == needle):
+                    res.append(y_original[row])
+        return res
+
+    def test_subdatasets(self):
+        """Check if the subdatasets files have the same labels as the original dataset
+        """
+        self._clf.save_sub_datasets()
+        with open(self._clf.get_catalog_name()) as cat_file:
+            catalog = csv.reader(cat_file, delimiter=',')
+            for row in catalog:
+                X, y = self._get_Xy()
+                x_file, y_file = self._get_file_data(row[0])
+                y_original = np.array(self._find_out(x_file, X, y), dtype=int)
+                self.assertTrue(np.array_equal(y_file, y_original))
+
+    def test_single_prediction(self):
+        X, y = self._get_Xy()
+        yp = self._clf.predict((X[0, :].reshape(-1, X.shape[1])))
+        self.assertEqual(yp[0], y[0])
+
+    def test_multiple_prediction(self):
+        # First 27 elements the predictions are the same as the truth
+        num = 27
+        X, y = self._get_Xy()
+        yp = self._clf.predict(X[:num, :])
+        self.assertListEqual(y[:num].tolist(), yp.tolist())
+
+    def test_score(self):
+        X, y = self._get_Xy()
+        accuracy_score = self._clf.score(X, y)
+        yp = self._clf.predict(X)
+        right = (yp == y).astype(int)
+        accuracy_computed = sum(right) / len(y)
+        self.assertEqual(accuracy_score, accuracy_computed)
+        self.assertGreater(accuracy_score, 0.8)
+
+    def test_single_predict_proba(self):
+        """Check that element 28 has a prediction different that the current label
+        """
+        # Element 28 has a different prediction than the truth
+        decimals = 5
+        X, y = self._get_Xy()
+        yp = self._clf.predict_proba(X[28, :].reshape(-1, X.shape[1]))
+        self.assertEqual(0, yp[0:, 0])
+        self.assertEqual(1, y[28])
+        self.assertAlmostEqual(
+            round(0.29026400766, decimals),
+            round(yp[0, 1], decimals),
+            decimals
+        )
+
+    def test_multiple_predict_proba(self):
+        # First 27 elements the predictions are the same as the truth
+        num = 27
+        decimals = 5
+        X, y = self._get_Xy()
+        yp = self._clf.predict_proba(X[:num, :])
+        self.assertListEqual(y[:num].tolist(), yp[:, 0].tolist())
+        expected_proba = [0.88395641, 0.36746962, 0.84158767, 0.34106833, 0.14269291, 0.85193236,
+                          0.29876058, 0.7282164, 0.85958616, 0.89517877, 0.99745224, 0.18860349,
+                          0.30756427, 0.8318412, 0.18981198, 0.15564624, 0.25740655, 0.22923355,
+                          0.87365959, 0.49928689, 0.95574351, 0.28761257, 0.28906333, 0.32643692,
+                          0.29788483, 0.01657364, 0.81149083]
+        expected = np.round(expected_proba, decimals=decimals).tolist()
+        computed = np.round(yp[:, 1], decimals=decimals).tolist()
+        for i in range(len(expected)):
+            self.assertAlmostEqual(expected[i], computed[i], decimals)
+
+    def build_models(self):
+        """Build and train two models, model_clf will use the sklearn classifier to
+        compute predictions and split data. model_computed will use vector of
+        coefficients to compute both predictions and splitted data
+        """
+        model_clf = Stree(random_state=self._random_state,
+                          use_predictions=True)
+        model_computed = Stree(random_state=self._random_state,
+                               use_predictions=False)
+        X, y = self._get_Xy()
+        model_clf.fit(X, y)
+        model_computed.fit(X, y)
+        return model_clf, model_computed, X, y
+
+    def test_use_model_predict(self):
+        """Check that we get the same results wether we use the estimator in nodes
+        to compute labels or we use the hyperplane and the position of samples wrt to it
+        """
+        use_clf, use_math, X, _ = self.build_models()
+        self.assertListEqual(
+            use_clf.predict(X).tolist(),
+            use_math.predict(X).tolist()
+        )
+
+    def test_use_model_score(self):
+        use_clf, use_math, X, y = self.build_models()
+        b = use_math.score(X, y)
+        self.assertEqual(
+            use_clf.score(X, y),
+            b
+        )
+        self.assertGreater(b, .95)
+
+    def test_use_model_predict_proba(self):
+        use_clf, use_math, X, _ = self.build_models()
+        self.assertListEqual(
+            use_clf.predict_proba(X).tolist(),
+            use_math.predict_proba(X).tolist()
+        )
+
+    def test_single_vs_multiple_prediction(self):
+        """Check if predicting sample by sample gives the same result as predicting
+        all samples at once
+        """
+        X, _ = self._get_Xy()
+        # Compute prediction line by line
+        yp_line = np.array([], dtype=int)
+        for xp in X:
+            yp_line = np.append(yp_line, self._clf.predict(xp.reshape(-1, X.shape[1])))
+        # Compute prediction at once
+        yp_once = self._clf.predict(X)
+        #
+        self.assertListEqual(yp_line.tolist(), yp_once.tolist())
+
+    def test_iterator(self):
+        """Check preorder iterator
+        """
+        expected = [
+            'root',
+            'root - Down',
+            'root - Down - Down, <cgaf> - Leaf class=1 belief=0.975989 counts=(array([0, 1]), array([ 17, 691]))',
+            'root - Down - Up',
+            'root - Down - Up - Down, <cgaf> - Leaf class=1 belief=0.750000 counts=(array([0, 1]), array([1, 3]))',
+            'root - Down - Up - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([7]))',
+            'root - Up, <cgaf> - Leaf class=0 belief=0.928297 counts=(array([0, 1]), array([725,  56]))',
+        ]
+        computed = []
+        for node in self._clf:
+            computed.append(str(node))
+        self.assertListEqual(expected, computed)
+
+class Snode_test(unittest.TestCase):
+
+    def __init__(self, *args, **kwargs):
+        os.environ['TESTING'] = '1'
+        self._random_state = 1
+        self._clf = Stree(random_state=self._random_state,
+                          use_predictions=True)
+        self._clf.fit(*self._get_Xy())
+        super().__init__(*args, **kwargs)
+
+    @classmethod
+    def tearDownClass(cls):
+        try:
+            os.environ.pop('TESTING')
+        except:
+            pass
+
+    def _get_Xy(self):
+        X, y = make_classification(n_samples=1500, n_features=3, n_informative=3,
+                                   n_redundant=0, n_repeated=0, n_classes=2, n_clusters_per_class=2,
+                                   class_sep=1.5, flip_y=0, weights=[0.5, 0.5], random_state=self._random_state)
+        return X, y
+
+    def test_attributes_in_leaves(self):
+        """Check if the attributes in leaves have correct values so they form a predictor
+        """
+
+        def check_leave(node: Snode):
+            if not node.is_leaf():
+                check_leave(node.get_down())
+                check_leave(node.get_up())
+                return
+            # Check Belief in leave
+            classes, card = np.unique(node._y, return_counts=True)
+            max_card = max(card)
+            min_card = min(card)
+            if len(classes) > 1:
+                try:
+                    belief = max_card / (max_card + min_card)
+                except:
+                    belief = 0.
+            else:
+                belief = 1
+            self.assertEqual(belief, node._belief)
+            # Check Class
+            class_computed = classes[card == max_card]
+            self.assertEqual(class_computed, node._class)
+
+        check_leave(self._clf._tree)
+
+    def test_nodes_coefs(self):
+        """Check if the nodes of the tree have the right attributes filled
+        """
+
+        def run_tree(node: Snode):
+            if node._belief < 1:
+                # only exclude pure leaves
+                self.assertIsNotNone(node._clf)
+                self.assertIsNotNone(node._clf.coef_)
+                self.assertIsNotNone(node._vector)
+                self.assertIsNotNone(node._interceptor)
+            if node.is_leaf():
+                return
+            run_tree(node.get_down())
+            run_tree(node.get_up())
+
+        run_tree(self._clf._tree)
+
--- a/stree/tests/init.py
+++ b/stree/tests/init.py
@@ -0,0 +1 @@
+from .Strees_test import Stree_test, Snode_test
				`@@ -0,0 +1 @@`
				`from .Strees_test import Stree_test, Snode_test`