mirror of
https://github.com/Doctorado-ML/STree.git
synced 2025-08-16 07:56:06 +00:00
Add C param in constructor and creditcard dataset
This commit is contained in:
@@ -14,9 +14,9 @@ class Snode:
|
||||
def __init__(self, clf: LinearSVC, X: np.ndarray, y: np.ndarray, title: str):
|
||||
self._clf = clf
|
||||
self._vector = None if clf is None else clf.coef_
|
||||
self._interceptor = 0 if clf is None else clf.intercept_
|
||||
self._interceptor = 0. if clf is None else clf.intercept_
|
||||
self._title = title
|
||||
self._belief = 0 # belief of the prediction in a leaf node based on samples
|
||||
self._belief = 0. # belief of the prediction in a leaf node based on samples
|
||||
self._X = X
|
||||
self._y = y
|
||||
self._down = None
|
||||
@@ -45,21 +45,20 @@ class Snode:
|
||||
if not self.is_leaf():
|
||||
return
|
||||
classes, card = np.unique(self._y, return_counts=True)
|
||||
max_card = max(card)
|
||||
min_card = min(card)
|
||||
try:
|
||||
self._belief = max_card / min_card
|
||||
except:
|
||||
self._belief = 0
|
||||
self._class = classes[card == max_card]
|
||||
if len(classes) > 1:
|
||||
max_card = max(card)
|
||||
min_card = min(card)
|
||||
try:
|
||||
self._belief = max_card / (max_card + min_card)
|
||||
except:
|
||||
self._belief = 0.
|
||||
self._class = classes[card == max_card][0]
|
||||
else:
|
||||
self._belief = 1
|
||||
self._class = classes[0]
|
||||
|
||||
def __str__(self) -> str:
|
||||
if self.is_leaf():
|
||||
num = 0
|
||||
for i in np.unique(self._y):
|
||||
num = max(num, self._y[self._y == i].shape[0])
|
||||
den = self._y.shape[0]
|
||||
accuracy = num / den if den != 0 else 1
|
||||
return f"{self._title} LEAF accuracy={accuracy:.2f}, belief={self._belief:.2f} class={self._class}\n"
|
||||
return f"Leaf class={self._class} belief={self._belief:.6f} counts={np.unique(self._y, return_counts=True)}\n"
|
||||
else:
|
||||
return f"{self._title}\n"
|
||||
|
@@ -18,8 +18,9 @@ class Stree:
|
||||
"""
|
||||
"""
|
||||
|
||||
def __init__(self, max_iter: int = 1000, random_state: int = 0, use_predictions: bool = False):
|
||||
def __init__(self, C=1.0, max_iter: int = 1000, random_state: int = 0, use_predictions: bool = False):
|
||||
self._max_iter = max_iter
|
||||
self._C = C
|
||||
self._random_state = random_state
|
||||
self._outcomes = None
|
||||
self._tree = None
|
||||
@@ -46,7 +47,7 @@ class Stree:
|
||||
return [X_up, y_up, X_down, y_down]
|
||||
|
||||
def fit(self, X: np.ndarray, y: np.ndarray, title: str = 'root') -> 'Stree':
|
||||
self._tree = self.train(X, y, title)
|
||||
self._tree = self.train(X, y.ravel(), title)
|
||||
self._build_predictor()
|
||||
self.__trained = True
|
||||
return self
|
||||
@@ -65,20 +66,18 @@ class Stree:
|
||||
def train(self, X: np.ndarray, y: np.ndarray, title: str = 'root') -> Snode:
|
||||
if np.unique(y).shape[0] == 1:
|
||||
# only 1 class => pure dataset
|
||||
return Snode(None, X, y, title + f', class={np.unique(y)}, items={y.shape[0]}, rest=0, <pure> ')
|
||||
return Snode(None, X, y, title + ', <pure> ')
|
||||
# Train the model
|
||||
clf = LinearSVC(max_iter=self._max_iter,
|
||||
clf = LinearSVC(max_iter=self._max_iter, C=self._C,
|
||||
random_state=self._random_state)
|
||||
clf.fit(X, y)
|
||||
tree = Snode(clf, X, y, title)
|
||||
X_U, y_u, X_D, y_d = self._split_data(clf, X, y)
|
||||
if X_U is None or X_D is None:
|
||||
# didn't part anything
|
||||
return Snode(clf, X, y, title + f', classes={np.unique(y)}, items<0>={y[y==0].shape[0]}, items<1>={y[y==1].shape[0]}, <couldn\'t go any further>')
|
||||
tree.set_up(self.train(X_U, y_u, title + ' - Up' +
|
||||
str(np.unique(y_u, return_counts=True))))
|
||||
tree.set_down(self.train(X_D, y_d, title + ' - Down' +
|
||||
str(np.unique(y_d, return_counts=True))))
|
||||
return Snode(clf, X, y, title + ', <couldn\'t go any further>')
|
||||
tree.set_up(self.train(X_U, y_u, title + ' - Up'))
|
||||
tree.set_down(self.train(X_D, y_d, title + ' - Down'))
|
||||
return tree
|
||||
|
||||
def predict(self, X: np.array) -> np.array:
|
||||
@@ -95,23 +94,36 @@ class Stree:
|
||||
return y
|
||||
|
||||
def score(self, X: np.array, y: np.array, print_out=True) -> float:
|
||||
self.fit(X, y)
|
||||
yp = self.predict(X)
|
||||
if not self.__trained:
|
||||
self.fit(X, y)
|
||||
yp = self.predict(X).reshape(y.shape)
|
||||
right = (yp == y).astype(int)
|
||||
accuracy = sum(right) / len(y)
|
||||
accuracy = np.sum(right) / len(y)
|
||||
if print_out:
|
||||
print(f"Accuracy: {accuracy:.6f}")
|
||||
return accuracy
|
||||
|
||||
def __str__(self):
|
||||
def print_tree(tree: Snode) -> str:
|
||||
def __print_tree(self, tree: Snode, only_leaves=False) -> str:
|
||||
if not only_leaves:
|
||||
output = str(tree)
|
||||
if tree.is_leaf():
|
||||
return output
|
||||
output += print_tree(tree.get_down())
|
||||
output += print_tree(tree.get_up())
|
||||
else:
|
||||
output = ''
|
||||
if tree.is_leaf():
|
||||
if only_leaves:
|
||||
output = str(tree)
|
||||
return output
|
||||
return print_tree(self._tree)
|
||||
output += self.__print_tree(tree.get_down(), only_leaves)
|
||||
output += self.__print_tree(tree.get_up(), only_leaves)
|
||||
return output
|
||||
|
||||
def show_tree(self, only_leaves=False):
|
||||
if only_leaves:
|
||||
print(self.__print_tree(self._tree, only_leaves=True))
|
||||
else:
|
||||
print(self)
|
||||
|
||||
def __str__(self):
|
||||
return self.__print_tree(self._tree)
|
||||
|
||||
def _save_datasets(self, tree: Snode, catalog: typing.TextIO, number: int):
|
||||
"""Save the dataset of the node in a csv file
|
||||
|
Reference in New Issue
Block a user