Add C param in constructor and creditcard dataset

This commit is contained in:
2020-05-14 10:48:39 +02:00
parent 8f71eeb316
commit e3ae3a3a6c
5 changed files with 465 additions and 73 deletions

View File

@@ -18,8 +18,9 @@ class Stree:
"""
"""
def __init__(self, max_iter: int = 1000, random_state: int = 0, use_predictions: bool = False):
def __init__(self, C=1.0, max_iter: int = 1000, random_state: int = 0, use_predictions: bool = False):
self._max_iter = max_iter
self._C = C
self._random_state = random_state
self._outcomes = None
self._tree = None
@@ -46,7 +47,7 @@ class Stree:
return [X_up, y_up, X_down, y_down]
def fit(self, X: np.ndarray, y: np.ndarray, title: str = 'root') -> 'Stree':
self._tree = self.train(X, y, title)
self._tree = self.train(X, y.ravel(), title)
self._build_predictor()
self.__trained = True
return self
@@ -65,20 +66,18 @@ class Stree:
def train(self, X: np.ndarray, y: np.ndarray, title: str = 'root') -> Snode:
if np.unique(y).shape[0] == 1:
# only 1 class => pure dataset
return Snode(None, X, y, title + f', class={np.unique(y)}, items={y.shape[0]}, rest=0, <pure> ')
return Snode(None, X, y, title + ', <pure> ')
# Train the model
clf = LinearSVC(max_iter=self._max_iter,
clf = LinearSVC(max_iter=self._max_iter, C=self._C,
random_state=self._random_state)
clf.fit(X, y)
tree = Snode(clf, X, y, title)
X_U, y_u, X_D, y_d = self._split_data(clf, X, y)
if X_U is None or X_D is None:
# didn't part anything
return Snode(clf, X, y, title + f', classes={np.unique(y)}, items<0>={y[y==0].shape[0]}, items<1>={y[y==1].shape[0]}, <couldn\'t go any further>')
tree.set_up(self.train(X_U, y_u, title + ' - Up' +
str(np.unique(y_u, return_counts=True))))
tree.set_down(self.train(X_D, y_d, title + ' - Down' +
str(np.unique(y_d, return_counts=True))))
return Snode(clf, X, y, title + ', <couldn\'t go any further>')
tree.set_up(self.train(X_U, y_u, title + ' - Up'))
tree.set_down(self.train(X_D, y_d, title + ' - Down'))
return tree
def predict(self, X: np.array) -> np.array:
@@ -95,23 +94,36 @@ class Stree:
return y
def score(self, X: np.array, y: np.array, print_out=True) -> float:
self.fit(X, y)
yp = self.predict(X)
if not self.__trained:
self.fit(X, y)
yp = self.predict(X).reshape(y.shape)
right = (yp == y).astype(int)
accuracy = sum(right) / len(y)
accuracy = np.sum(right) / len(y)
if print_out:
print(f"Accuracy: {accuracy:.6f}")
return accuracy
def __str__(self):
def print_tree(tree: Snode) -> str:
def __print_tree(self, tree: Snode, only_leaves=False) -> str:
if not only_leaves:
output = str(tree)
if tree.is_leaf():
return output
output += print_tree(tree.get_down())
output += print_tree(tree.get_up())
else:
output = ''
if tree.is_leaf():
if only_leaves:
output = str(tree)
return output
return print_tree(self._tree)
output += self.__print_tree(tree.get_down(), only_leaves)
output += self.__print_tree(tree.get_up(), only_leaves)
return output
def show_tree(self, only_leaves=False):
if only_leaves:
print(self.__print_tree(self._tree, only_leaves=True))
else:
print(self)
def __str__(self):
return self.__print_tree(self._tree)
def _save_datasets(self, tree: Snode, catalog: typing.TextIO, number: int):
"""Save the dataset of the node in a csv file