diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..e3694a1 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,12 @@ +{ + "python.testing.unittestArgs": [ + "-v", + "-s", + "./tests", + "-p", + "*_test.py" + ], + "python.testing.pytestEnabled": false, + "python.testing.nosetestsEnabled": false, + "python.testing.unittestEnabled": true +} \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..29f14dc --- /dev/null +++ b/main.py @@ -0,0 +1,10 @@ +from trees.Stree import Stree +from sklearn.datasets import make_classification + +random_state = 1 +X, y = make_classification(n_samples=1500, n_features=3, n_informative=3, + n_redundant=0, n_repeated=0, n_classes=2, n_clusters_per_class=2, + class_sep=1.5, flip_y=0,weights=[0.5,0.5], random_state=random_state) +model = Stree(random_state=random_state) +model.fit(X, y) +model.show_outcomes() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..a243d29 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +numpy==1.18.2 +scikit-learn==0.22.2 \ No newline at end of file diff --git a/tests/Stree_test.py b/tests/Stree_test.py new file mode 100644 index 0000000..c4210af --- /dev/null +++ b/tests/Stree_test.py @@ -0,0 +1,14 @@ +import unittest + +from trees.Stree import Stree + +class Stree_test(unittest.TestCase): + + def __init__(self, *args, **kwargs): + self.random_state = 17 + self._model = Stree(random_state=self.random_state) + super(Stree_test, self).__init__(*args, **kwargs) + + def test_split_data(self): + self.assertTrue(True) + diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/trees/Snode.py b/trees/Snode.py new file mode 100644 index 0000000..bb5ab35 --- /dev/null +++ b/trees/Snode.py @@ -0,0 +1,49 @@ +''' +__author__ = "Ricardo Montañana Gómez" +__copyright__ = "Copyright 2020, Ricardo Montañana Gómez" +__license__ = "MIT" +__version__ = "1.0" +Node of the Stree +''' + +import numpy as np + +class Snode: + def __init__(self, vector: np.ndarray, interceptor: float, X: np.ndarray, y: np.ndarray, title: str): + self._vector = vector + self._interceptor = interceptor + self._title = title + self._X = X + self._y = y + self._down = None + self._up = None + self._class = None + + def set_down(self, son): + self._down = son + + def set_up(self, son): + self._up = son + + def is_leaf(self,) -> bool: + return self._up is None and self._down is None + + def get_down(self): + return self._down + + def get_up(self): + return self._up + + def __str__(self): + if self.is_leaf(): + num = 0 + for i in np.unique(self._y): + num = max(num, self._y[self._y == i].shape[0]) + den = self._y.shape[0] + accuracy = num / den if den != 0 else 1 + return f"{self._title} LEAF accuracy={accuracy:.2f}" + else: + return self._title + + + \ No newline at end of file diff --git a/trees/Stree.py b/trees/Stree.py new file mode 100644 index 0000000..50b6efb --- /dev/null +++ b/trees/Stree.py @@ -0,0 +1,70 @@ +''' +__author__ = "Ricardo Montañana Gómez" +__copyright__ = "Copyright 2020, Ricardo Montañana Gómez" +__license__ = "MIT" +__version__ = "1.0" +Create a oblique tree classifier based on SVM Trees +Uses LinearSVC +''' + +import numpy as np +from sklearn.svm import LinearSVC + +from trees.Snode import Snode + +class Stree: + """ + """ + def __init__(self, max_iter: int=1000, random_state: int=0): + self._max_iter = max_iter + self._random_state = random_state + self._outcomes = None + self._tree = None + + def _split_data(self, clf: LinearSVC, X: np.ndarray, y: np.ndarray) -> list: + # doesn't work with multiclass as each sample has to do inner product with its own coeficients + # computes positition of every sample is w.r.t. the hyperplane + coef = clf.coef_[0, :].reshape(-1, X.shape[1]) + intercept = clf.intercept_[0] + res = X.dot(coef.T) + intercept + down = res > 0 + up = ~down + X_down = X[down[:, 0]] if any(down) else None + y_down = y[down[:, 0]] if any(down) else None + X_up = X[up[:, 0]] if any(up) else None + y_up = y[up[:, 0]] if any(up) else None + return X_up, y_up, X_down, y_down + + def fit(self, X: np.ndarray, y: np.ndarray, title: str = 'root') -> list: + self._tree = self.train(X, y, title) + return self + + def train(self: Snode, X: np.ndarray, y: np.ndarray, title: str='') -> list: + if np.unique(y).shape[0] == 1: + # onlyt 1 class => pure dataset + return Snode(np.array([]), 0, X, y, title + f', class={np.unique(y)} items={y.shape[0]}') + # Train the model + clf = LinearSVC(max_iter=self._max_iter, random_state=self._random_state) + clf.fit(X, y) + tree = Snode(clf.coef_, clf.intercept_, X, y, title) + #plot_hyperplane(clf, X, y, title) + X_T, y_t, X_O, y_o = self._split_data(clf, X, y) + if X_T is None or X_O is None: + # didn't part anything + return Snode(clf.coef_, clf.intercept_, X, y, title + f', classes={np.unique(y)} items<0>={y[y==0].shape[0]} items<1>={y[y==1].shape[0]}') + tree.set_up( self.train(X_T, y_t, title + ' - Up')) + tree.set_down(self.train(X_O, y_o, title + ' - Down')) + return tree + + def _print_tree(self, tree: Snode): + print(tree) + if tree.is_leaf(): + return + self._print_tree(tree.get_down()) + self._print_tree(tree.get_up()) + + def show_outcomes(self): + pointer = self._tree + self._print_tree(pointer) + + diff --git a/trees/__init__.py b/trees/__init__.py new file mode 100644 index 0000000..e69de29