mirror of
https://github.com/Doctorado-ML/STree.git
synced 2025-08-16 07:56:06 +00:00
compute predictor and store model in node
This commit is contained in:
@@ -3,47 +3,63 @@ __author__ = "Ricardo Montañana Gómez"
|
||||
__copyright__ = "Copyright 2020, Ricardo Montañana Gómez"
|
||||
__license__ = "MIT"
|
||||
__version__ = "1.0"
|
||||
Node of the Stree
|
||||
Node of the Stree (binary tree)
|
||||
'''
|
||||
|
||||
import numpy as np
|
||||
from sklearn.svm import LinearSVC
|
||||
|
||||
|
||||
class Snode:
|
||||
def __init__(self, vector: np.ndarray, interceptor: float, X: np.ndarray, y: np.ndarray, title: str):
|
||||
self._vector = vector
|
||||
self._interceptor = interceptor
|
||||
def __init__(self, model: LinearSVC, X: np.ndarray, y: np.ndarray, title: str):
|
||||
self._model = model
|
||||
self._vector = None if model is None else model.coef_
|
||||
self._interceptor = 0 if model is None else model.intercept_
|
||||
self._title = title
|
||||
self._belief = 0 # belief of the prediction in a leaf node based on samples
|
||||
self._X = X
|
||||
self._y = y
|
||||
self._down = None
|
||||
self._up = None
|
||||
self._class = None # really needed?
|
||||
|
||||
self._class = None # really needed?
|
||||
|
||||
def set_down(self, son):
|
||||
self._down = son
|
||||
|
||||
|
||||
def set_up(self, son):
|
||||
self._up = son
|
||||
|
||||
def is_leaf(self,) -> bool:
|
||||
return self._up is None and self._down is None
|
||||
|
||||
|
||||
def get_down(self) -> 'Snode':
|
||||
return self._down
|
||||
|
||||
def get_up(self) -> 'Snode':
|
||||
return self._up
|
||||
|
||||
def make_predictor(self):
|
||||
"""Compute the class of the predictor and its belief based on the subdataset of the node
|
||||
only if it is a leaf
|
||||
"""
|
||||
if not self.is_leaf():
|
||||
return
|
||||
classes, card = np.unique(self._y, return_counts=True)
|
||||
max_card = max(card)
|
||||
min_card = min(card)
|
||||
try:
|
||||
self._belief = max_card / min_card
|
||||
except:
|
||||
self._belief = 0
|
||||
self._class = classes[card == max_card]
|
||||
|
||||
def __str__(self) -> str:
|
||||
if self.is_leaf():
|
||||
num = 0
|
||||
for i in np.unique(self._y):
|
||||
num = max(num, self._y[self._y == i].shape[0])
|
||||
den = self._y.shape[0]
|
||||
accuracy = num / den if den != 0 else 1
|
||||
return f"{self._title} LEAF accuracy={accuracy:.2f}"
|
||||
accuracy = num / den if den != 0 else 1
|
||||
return f"{self._title} LEAF accuracy={accuracy:.2f}\n"
|
||||
else:
|
||||
return self._title
|
||||
|
||||
|
||||
|
||||
return f"{self._title}\n"
|
||||
|
@@ -13,10 +13,12 @@ from sklearn.svm import LinearSVC
|
||||
|
||||
from trees.Snode import Snode
|
||||
|
||||
|
||||
class Stree:
|
||||
"""
|
||||
"""
|
||||
def __init__(self, max_iter: int=1000, random_state: int=0, use_predictions: bool=False):
|
||||
|
||||
def __init__(self, max_iter: int = 1000, random_state: int = 0, use_predictions: bool = False):
|
||||
self._max_iter = max_iter
|
||||
self._random_state = random_state
|
||||
self._outcomes = None
|
||||
@@ -44,34 +46,48 @@ class Stree:
|
||||
|
||||
def fit(self, X: np.ndarray, y: np.ndarray, title: str = 'root') -> 'Stree':
|
||||
self._tree = self.train(X, y, title)
|
||||
self._predictor()
|
||||
return self
|
||||
|
||||
def _predictor(self):
|
||||
"""Process the leaves to make them predictors
|
||||
"""
|
||||
def run_tree(node: Snode):
|
||||
if node.is_leaf():
|
||||
node.make_predictor()
|
||||
return
|
||||
run_tree(node.get_down())
|
||||
run_tree(node.get_up())
|
||||
run_tree(self._tree)
|
||||
|
||||
def train(self, X: np.ndarray, y: np.ndarray, title: str = 'root') -> Snode:
|
||||
if np.unique(y).shape[0] == 1:
|
||||
# only 1 class => pure dataset
|
||||
return Snode(np.array([]), 0, X, y, title + f', class={np.unique(y)}, items={y.shape[0]}, rest=0, <pure> ')
|
||||
return Snode(None, X, y, title + f', class={np.unique(y)}, items={y.shape[0]}, rest=0, <pure> ')
|
||||
# Train the model
|
||||
clf = LinearSVC(max_iter=self._max_iter, random_state=self._random_state)
|
||||
clf = LinearSVC(max_iter=self._max_iter,
|
||||
random_state=self._random_state)
|
||||
clf.fit(X, y)
|
||||
tree = Snode(clf.coef_, clf.intercept_, X, y, title)
|
||||
tree = Snode(clf, X, y, title)
|
||||
X_U, y_u, X_D, y_d = self._split_data(clf, X, y)
|
||||
if X_U is None or X_D is None:
|
||||
# didn't part anything
|
||||
return Snode(clf.coef_, clf.intercept_, X, y, title + f', classes={np.unique(y)}, items<0>={y[y==0].shape[0]}, items<1>={y[y==1].shape[0]}, <couldn\'t go any further>')
|
||||
tree.set_up(self.train(X_U, y_u, title + ' - Up' + str(np.unique(y_u, return_counts=True))))
|
||||
tree.set_down(self.train(X_D, y_d, title + ' - Down' + str(np.unique(y_d, return_counts=True))))
|
||||
return Snode(clf, X, y, title + f', classes={np.unique(y)}, items<0>={y[y==0].shape[0]}, items<1>={y[y==1].shape[0]}, <couldn\'t go any further>')
|
||||
tree.set_up(self.train(X_U, y_u, title + ' - Up' +
|
||||
str(np.unique(y_u, return_counts=True))))
|
||||
tree.set_down(self.train(X_D, y_d, title + ' - Down' +
|
||||
str(np.unique(y_d, return_counts=True))))
|
||||
return tree
|
||||
|
||||
def _print_tree(self, tree: Snode):
|
||||
print(tree)
|
||||
if tree.is_leaf():
|
||||
return
|
||||
self._print_tree(tree.get_down())
|
||||
self._print_tree(tree.get_up())
|
||||
|
||||
def show_outcomes(self):
|
||||
pointer = self._tree
|
||||
self._print_tree(pointer)
|
||||
def __str__(self):
|
||||
def print_tree(tree: Snode) -> str:
|
||||
output = str(tree)
|
||||
if tree.is_leaf():
|
||||
return output
|
||||
output += print_tree(tree.get_down())
|
||||
output += print_tree(tree.get_up())
|
||||
return output
|
||||
return print_tree(self._tree)
|
||||
|
||||
def _save_datasets(self, tree: Snode, catalog: typing.TextIO, number: int):
|
||||
"""Save the dataset of the node in a csv file
|
||||
@@ -80,10 +96,10 @@ class Stree:
|
||||
tree {Snode} -- node with data to save
|
||||
number {int} -- a number to make different file names
|
||||
"""
|
||||
data = np.append(tree._X, tree._y.reshape(-1,1), axis=1)
|
||||
data = np.append(tree._X, tree._y.reshape(-1, 1), axis=1)
|
||||
name = f"{self.__folder}dataset{number}.csv"
|
||||
np.savetxt(name, data, delimiter=",")
|
||||
catalog.write(f"{name}, - {str(tree)}\n")
|
||||
catalog.write(f"{name}, - {str(tree)}")
|
||||
if tree.is_leaf():
|
||||
return
|
||||
self._save_datasets(tree.get_down(), catalog, number + 1)
|
||||
@@ -95,8 +111,5 @@ class Stree:
|
||||
def save_sub_datasets(self):
|
||||
"""Save the every dataset stored in the tree to check with manual classifier
|
||||
"""
|
||||
pointer = self._tree
|
||||
with open(self.get_catalog_name(), 'w', encoding = 'utf-8') as catalog:
|
||||
self._save_datasets(pointer, catalog, 1)
|
||||
|
||||
|
||||
with open(self.get_catalog_name(), 'w', encoding='utf-8') as catalog:
|
||||
self._save_datasets(self._tree, catalog, 1)
|
||||
|
Reference in New Issue
Block a user