mirror of
https://github.com/Doctorado-ML/STree.git
synced 2025-08-15 15:36:00 +00:00
Working tree with samples and first test
This commit is contained in:
12
.vscode/settings.json
vendored
Normal file
12
.vscode/settings.json
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"python.testing.unittestArgs": [
|
||||
"-v",
|
||||
"-s",
|
||||
"./tests",
|
||||
"-p",
|
||||
"*_test.py"
|
||||
],
|
||||
"python.testing.pytestEnabled": false,
|
||||
"python.testing.nosetestsEnabled": false,
|
||||
"python.testing.unittestEnabled": true
|
||||
}
|
10
main.py
Normal file
10
main.py
Normal file
@@ -0,0 +1,10 @@
|
||||
from trees.Stree import Stree
|
||||
from sklearn.datasets import make_classification
|
||||
|
||||
random_state = 1
|
||||
X, y = make_classification(n_samples=1500, n_features=3, n_informative=3,
|
||||
n_redundant=0, n_repeated=0, n_classes=2, n_clusters_per_class=2,
|
||||
class_sep=1.5, flip_y=0,weights=[0.5,0.5], random_state=random_state)
|
||||
model = Stree(random_state=random_state)
|
||||
model.fit(X, y)
|
||||
model.show_outcomes()
|
2
requirements.txt
Normal file
2
requirements.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
numpy==1.18.2
|
||||
scikit-learn==0.22.2
|
14
tests/Stree_test.py
Normal file
14
tests/Stree_test.py
Normal file
@@ -0,0 +1,14 @@
|
||||
import unittest
|
||||
|
||||
from trees.Stree import Stree
|
||||
|
||||
class Stree_test(unittest.TestCase):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.random_state = 17
|
||||
self._model = Stree(random_state=self.random_state)
|
||||
super(Stree_test, self).__init__(*args, **kwargs)
|
||||
|
||||
def test_split_data(self):
|
||||
self.assertTrue(True)
|
||||
|
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
49
trees/Snode.py
Normal file
49
trees/Snode.py
Normal file
@@ -0,0 +1,49 @@
|
||||
'''
|
||||
__author__ = "Ricardo Montañana Gómez"
|
||||
__copyright__ = "Copyright 2020, Ricardo Montañana Gómez"
|
||||
__license__ = "MIT"
|
||||
__version__ = "1.0"
|
||||
Node of the Stree
|
||||
'''
|
||||
|
||||
import numpy as np
|
||||
|
||||
class Snode:
|
||||
def __init__(self, vector: np.ndarray, interceptor: float, X: np.ndarray, y: np.ndarray, title: str):
|
||||
self._vector = vector
|
||||
self._interceptor = interceptor
|
||||
self._title = title
|
||||
self._X = X
|
||||
self._y = y
|
||||
self._down = None
|
||||
self._up = None
|
||||
self._class = None
|
||||
|
||||
def set_down(self, son):
|
||||
self._down = son
|
||||
|
||||
def set_up(self, son):
|
||||
self._up = son
|
||||
|
||||
def is_leaf(self,) -> bool:
|
||||
return self._up is None and self._down is None
|
||||
|
||||
def get_down(self):
|
||||
return self._down
|
||||
|
||||
def get_up(self):
|
||||
return self._up
|
||||
|
||||
def __str__(self):
|
||||
if self.is_leaf():
|
||||
num = 0
|
||||
for i in np.unique(self._y):
|
||||
num = max(num, self._y[self._y == i].shape[0])
|
||||
den = self._y.shape[0]
|
||||
accuracy = num / den if den != 0 else 1
|
||||
return f"{self._title} LEAF accuracy={accuracy:.2f}"
|
||||
else:
|
||||
return self._title
|
||||
|
||||
|
||||
|
70
trees/Stree.py
Normal file
70
trees/Stree.py
Normal file
@@ -0,0 +1,70 @@
|
||||
'''
|
||||
__author__ = "Ricardo Montañana Gómez"
|
||||
__copyright__ = "Copyright 2020, Ricardo Montañana Gómez"
|
||||
__license__ = "MIT"
|
||||
__version__ = "1.0"
|
||||
Create a oblique tree classifier based on SVM Trees
|
||||
Uses LinearSVC
|
||||
'''
|
||||
|
||||
import numpy as np
|
||||
from sklearn.svm import LinearSVC
|
||||
|
||||
from trees.Snode import Snode
|
||||
|
||||
class Stree:
|
||||
"""
|
||||
"""
|
||||
def __init__(self, max_iter: int=1000, random_state: int=0):
|
||||
self._max_iter = max_iter
|
||||
self._random_state = random_state
|
||||
self._outcomes = None
|
||||
self._tree = None
|
||||
|
||||
def _split_data(self, clf: LinearSVC, X: np.ndarray, y: np.ndarray) -> list:
|
||||
# doesn't work with multiclass as each sample has to do inner product with its own coeficients
|
||||
# computes positition of every sample is w.r.t. the hyperplane
|
||||
coef = clf.coef_[0, :].reshape(-1, X.shape[1])
|
||||
intercept = clf.intercept_[0]
|
||||
res = X.dot(coef.T) + intercept
|
||||
down = res > 0
|
||||
up = ~down
|
||||
X_down = X[down[:, 0]] if any(down) else None
|
||||
y_down = y[down[:, 0]] if any(down) else None
|
||||
X_up = X[up[:, 0]] if any(up) else None
|
||||
y_up = y[up[:, 0]] if any(up) else None
|
||||
return X_up, y_up, X_down, y_down
|
||||
|
||||
def fit(self, X: np.ndarray, y: np.ndarray, title: str = 'root') -> list:
|
||||
self._tree = self.train(X, y, title)
|
||||
return self
|
||||
|
||||
def train(self: Snode, X: np.ndarray, y: np.ndarray, title: str='') -> list:
|
||||
if np.unique(y).shape[0] == 1:
|
||||
# onlyt 1 class => pure dataset
|
||||
return Snode(np.array([]), 0, X, y, title + f', <pure> class={np.unique(y)} items={y.shape[0]}')
|
||||
# Train the model
|
||||
clf = LinearSVC(max_iter=self._max_iter, random_state=self._random_state)
|
||||
clf.fit(X, y)
|
||||
tree = Snode(clf.coef_, clf.intercept_, X, y, title)
|
||||
#plot_hyperplane(clf, X, y, title)
|
||||
X_T, y_t, X_O, y_o = self._split_data(clf, X, y)
|
||||
if X_T is None or X_O is None:
|
||||
# didn't part anything
|
||||
return Snode(clf.coef_, clf.intercept_, X, y, title + f', <couldn\'t go any further> classes={np.unique(y)} items<0>={y[y==0].shape[0]} items<1>={y[y==1].shape[0]}')
|
||||
tree.set_up( self.train(X_T, y_t, title + ' - Up'))
|
||||
tree.set_down(self.train(X_O, y_o, title + ' - Down'))
|
||||
return tree
|
||||
|
||||
def _print_tree(self, tree: Snode):
|
||||
print(tree)
|
||||
if tree.is_leaf():
|
||||
return
|
||||
self._print_tree(tree.get_down())
|
||||
self._print_tree(tree.get_up())
|
||||
|
||||
def show_outcomes(self):
|
||||
pointer = self._tree
|
||||
self._print_tree(pointer)
|
||||
|
||||
|
0
trees/__init__.py
Normal file
0
trees/__init__.py
Normal file
Reference in New Issue
Block a user