#3 First try, change LinearSVC to SVC

make a builder
start changing tests
This commit is contained in:
2020-06-07 20:26:59 +02:00
parent b824229121
commit 05b462716e
10 changed files with 112 additions and 70 deletions

View File

@@ -4,14 +4,13 @@ __copyright__ = "Copyright 2020, Ricardo Montañana Gómez"
__license__ = "MIT"
__version__ = "0.9"
Build an oblique tree classifier based on SVM Trees
Uses LinearSVC
"""
import os
import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.svm import LinearSVC
from sklearn.svm import SVC, LinearSVC
from sklearn.utils.multiclass import check_classification_targets
from sklearn.utils.validation import (
check_X_y,
@@ -26,12 +25,8 @@ class Snode:
dataset assigned to it
"""
def __init__(
self, clf: LinearSVC, X: np.ndarray, y: np.ndarray, title: str
):
def __init__(self, clf: SVC, X: np.ndarray, y: np.ndarray, title: str):
self._clf = clf
self._vector = None if clf is None else clf.coef_
self._interceptor = 0.0 if clf is None else clf.intercept_
self._title = title
self._belief = 0.0
# Only store dataset in Testing
@@ -126,6 +121,7 @@ class Stree(BaseEstimator, ClassifierMixin):
def __init__(
self,
C: float = 1.0,
kernel: str = "linear",
max_iter: int = 1000,
random_state: int = None,
max_depth: int = None,
@@ -135,6 +131,7 @@ class Stree(BaseEstimator, ClassifierMixin):
):
self.max_iter = max_iter
self.C = C
self.kernel = kernel
self.random_state = random_state
self.use_predictions = use_predictions
self.max_depth = max_depth
@@ -161,8 +158,8 @@ class Stree(BaseEstimator, ClassifierMixin):
:return: array of distances of each sample to the hyperplane
:rtype: np.array
"""
coef = node._vector[0, :].reshape(-1, data.shape[1])
return data.dot(coef.T) + node._interceptor[0]
coef = node._clf.coef_[0, :].reshape(-1, data.shape[1])
return data.dot(coef.T) + node._clf.intercept_[0]
def _split_array(self, origin: np.array, down: np.array) -> list:
"""Split an array in two based on indices passed as down and its complement
@@ -266,6 +263,26 @@ class Stree(BaseEstimator, ClassifierMixin):
run_tree(self.tree_)
def _build_clf(self):
""" Select the correct classifier for the node
"""
return (
LinearSVC(
max_iter=self.max_iter,
random_state=self.random_state,
C=self.C,
tol=self.tol,
)
if self.kernel == "linear"
else SVC(
kernel=self.kernel,
max_iter=self.max_iter,
tol=self.tol,
C=self.C,
)
)
def train(
self,
X: np.ndarray,
@@ -296,9 +313,7 @@ class Stree(BaseEstimator, ClassifierMixin):
# only 1 class => pure dataset
return Snode(None, X, y, title + ", <pure>")
# Train the model
clf = LinearSVC(
max_iter=self.max_iter, random_state=self.random_state, C=self.C
) # , sample_weight=sample_weight)
clf = self._build_clf()
clf.fit(X, y, sample_weight=sample_weight)
tree = Snode(clf, X, y, title)
self.depth_ = max(depth, self.depth_)

View File

@@ -73,10 +73,10 @@ class Snode_graph(Snode):
# get the splitting hyperplane
def hyperplane(x, y):
return (
-self._interceptor
- self._vector[0][0] * x
- self._vector[0][1] * y
) / self._vector[0][2]
-self._clf.intercept_
- self._clf.coef_[0][0] * x
- self._clf.coef_[0][1] * y
) / self._clf.coef_[0][2]
tmpx = np.linspace(self._X[:, 0].min(), self._X[:, 0].max())
tmpy = np.linspace(self._X[:, 1].min(), self._X[:, 1].max())

View File

@@ -76,7 +76,9 @@ class Stree_grapher_test(unittest.TestCase):
def test_save_all(self):
folder_name = "/tmp/"
file_names = [f"{folder_name}STnode{i}.png" for i in range(1, 8)]
file_names = [
os.path.join(folder_name, f"STnode{i}.png") for i in range(1, 8)
]
with warnings.catch_warnings():
warnings.simplefilter("ignore")
matplotlib.use("Agg")
@@ -160,8 +162,6 @@ class Snode_graph_test(unittest.TestCase):
# only exclude pure leaves
self.assertIsNotNone(node._clf)
self.assertIsNotNone(node._clf.coef_)
self.assertIsNotNone(node._vector)
self.assertIsNotNone(node._interceptor)
if node.is_leaf():
return
run_tree(node.get_down())
@@ -171,7 +171,7 @@ class Snode_graph_test(unittest.TestCase):
def test_save_hyperplane(self):
folder_name = "/tmp/"
file_name = f"{folder_name}STnode1.png"
file_name = os.path.join(folder_name, "STnode1.png")
with warnings.catch_warnings():
warnings.simplefilter("ignore")
matplotlib.use("Agg")

View File

@@ -84,22 +84,6 @@ class Stree_test(unittest.TestCase):
"""
self._check_tree(self._clf.tree_)
def _get_file_data(self, file_name: str) -> tuple:
"""Return X, y from data, y is the last column in array
Arguments:
file_name {str} -- the file name
Returns:
tuple -- tuple with samples, categories
"""
data = np.genfromtxt(file_name, delimiter=",")
data = np.array(data)
column_y = data.shape[1] - 1
fy = data[:, column_y]
fx = np.delete(data, column_y, axis=1)
return fx, fy
def _find_out(
self, px: np.array, x_original: np.array, y_original
) -> list:
@@ -134,11 +118,18 @@ class Stree_test(unittest.TestCase):
def test_score(self):
X, y = get_dataset(self._random_state)
accuracy_score = self._clf.score(X, y)
yp = self._clf.predict(X)
accuracy_computed = np.mean(yp == y)
self.assertEqual(accuracy_score, accuracy_computed)
self.assertGreater(accuracy_score, 0.9)
for kernel in ["linear"]:
clf = Stree(
random_state=self._random_state,
kernel=kernel,
use_predictions=True,
)
clf.fit(X, y)
accuracy_score = clf.score(X, y)
yp = clf.predict(X)
accuracy_computed = np.mean(yp == y)
self.assertEqual(accuracy_score, accuracy_computed)
self.assertGreater(accuracy_score, 0.9)
def test_single_predict_proba(self):
"""Check that element 28 has a prediction different that the current
@@ -306,10 +297,11 @@ class Stree_test(unittest.TestCase):
tcl.fit(*get_dataset(self._random_state))
def test_check_max_depth(self):
depth = 3
tcl = Stree(random_state=self._random_state, max_depth=depth)
tcl.fit(*get_dataset(self._random_state))
self.assertEqual(depth, tcl.depth_)
depths = (3, 4)
for depth in depths:
tcl = Stree(random_state=self._random_state, max_depth=depth)
tcl.fit(*get_dataset(self._random_state))
self.assertEqual(depth, tcl.depth_)
def test_unfitted_tree_is_iterable(self):
tcl = Stree()
@@ -383,8 +375,6 @@ class Snode_test(unittest.TestCase):
# only exclude pure leaves
self.assertIsNotNone(node._clf)
self.assertIsNotNone(node._clf.coef_)
self.assertIsNotNone(node._vector)
self.assertIsNotNone(node._interceptor)
if node.is_leaf():
return
run_tree(node.get_down())