mirror of
https://github.com/Doctorado-ML/STree.git
synced 2025-08-17 16:36:01 +00:00
#3 First try, change LinearSVC to SVC
make a builder start changing tests
This commit is contained in:
@@ -4,14 +4,13 @@ __copyright__ = "Copyright 2020, Ricardo Montañana Gómez"
|
||||
__license__ = "MIT"
|
||||
__version__ = "0.9"
|
||||
Build an oblique tree classifier based on SVM Trees
|
||||
Uses LinearSVC
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
from sklearn.base import BaseEstimator, ClassifierMixin
|
||||
from sklearn.svm import LinearSVC
|
||||
from sklearn.svm import SVC, LinearSVC
|
||||
from sklearn.utils.multiclass import check_classification_targets
|
||||
from sklearn.utils.validation import (
|
||||
check_X_y,
|
||||
@@ -26,12 +25,8 @@ class Snode:
|
||||
dataset assigned to it
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, clf: LinearSVC, X: np.ndarray, y: np.ndarray, title: str
|
||||
):
|
||||
def __init__(self, clf: SVC, X: np.ndarray, y: np.ndarray, title: str):
|
||||
self._clf = clf
|
||||
self._vector = None if clf is None else clf.coef_
|
||||
self._interceptor = 0.0 if clf is None else clf.intercept_
|
||||
self._title = title
|
||||
self._belief = 0.0
|
||||
# Only store dataset in Testing
|
||||
@@ -126,6 +121,7 @@ class Stree(BaseEstimator, ClassifierMixin):
|
||||
def __init__(
|
||||
self,
|
||||
C: float = 1.0,
|
||||
kernel: str = "linear",
|
||||
max_iter: int = 1000,
|
||||
random_state: int = None,
|
||||
max_depth: int = None,
|
||||
@@ -135,6 +131,7 @@ class Stree(BaseEstimator, ClassifierMixin):
|
||||
):
|
||||
self.max_iter = max_iter
|
||||
self.C = C
|
||||
self.kernel = kernel
|
||||
self.random_state = random_state
|
||||
self.use_predictions = use_predictions
|
||||
self.max_depth = max_depth
|
||||
@@ -161,8 +158,8 @@ class Stree(BaseEstimator, ClassifierMixin):
|
||||
:return: array of distances of each sample to the hyperplane
|
||||
:rtype: np.array
|
||||
"""
|
||||
coef = node._vector[0, :].reshape(-1, data.shape[1])
|
||||
return data.dot(coef.T) + node._interceptor[0]
|
||||
coef = node._clf.coef_[0, :].reshape(-1, data.shape[1])
|
||||
return data.dot(coef.T) + node._clf.intercept_[0]
|
||||
|
||||
def _split_array(self, origin: np.array, down: np.array) -> list:
|
||||
"""Split an array in two based on indices passed as down and its complement
|
||||
@@ -266,6 +263,26 @@ class Stree(BaseEstimator, ClassifierMixin):
|
||||
|
||||
run_tree(self.tree_)
|
||||
|
||||
def _build_clf(self):
|
||||
""" Select the correct classifier for the node
|
||||
"""
|
||||
|
||||
return (
|
||||
LinearSVC(
|
||||
max_iter=self.max_iter,
|
||||
random_state=self.random_state,
|
||||
C=self.C,
|
||||
tol=self.tol,
|
||||
)
|
||||
if self.kernel == "linear"
|
||||
else SVC(
|
||||
kernel=self.kernel,
|
||||
max_iter=self.max_iter,
|
||||
tol=self.tol,
|
||||
C=self.C,
|
||||
)
|
||||
)
|
||||
|
||||
def train(
|
||||
self,
|
||||
X: np.ndarray,
|
||||
@@ -296,9 +313,7 @@ class Stree(BaseEstimator, ClassifierMixin):
|
||||
# only 1 class => pure dataset
|
||||
return Snode(None, X, y, title + ", <pure>")
|
||||
# Train the model
|
||||
clf = LinearSVC(
|
||||
max_iter=self.max_iter, random_state=self.random_state, C=self.C
|
||||
) # , sample_weight=sample_weight)
|
||||
clf = self._build_clf()
|
||||
clf.fit(X, y, sample_weight=sample_weight)
|
||||
tree = Snode(clf, X, y, title)
|
||||
self.depth_ = max(depth, self.depth_)
|
||||
|
@@ -73,10 +73,10 @@ class Snode_graph(Snode):
|
||||
# get the splitting hyperplane
|
||||
def hyperplane(x, y):
|
||||
return (
|
||||
-self._interceptor
|
||||
- self._vector[0][0] * x
|
||||
- self._vector[0][1] * y
|
||||
) / self._vector[0][2]
|
||||
-self._clf.intercept_
|
||||
- self._clf.coef_[0][0] * x
|
||||
- self._clf.coef_[0][1] * y
|
||||
) / self._clf.coef_[0][2]
|
||||
|
||||
tmpx = np.linspace(self._X[:, 0].min(), self._X[:, 0].max())
|
||||
tmpy = np.linspace(self._X[:, 1].min(), self._X[:, 1].max())
|
||||
|
@@ -76,7 +76,9 @@ class Stree_grapher_test(unittest.TestCase):
|
||||
|
||||
def test_save_all(self):
|
||||
folder_name = "/tmp/"
|
||||
file_names = [f"{folder_name}STnode{i}.png" for i in range(1, 8)]
|
||||
file_names = [
|
||||
os.path.join(folder_name, f"STnode{i}.png") for i in range(1, 8)
|
||||
]
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore")
|
||||
matplotlib.use("Agg")
|
||||
@@ -160,8 +162,6 @@ class Snode_graph_test(unittest.TestCase):
|
||||
# only exclude pure leaves
|
||||
self.assertIsNotNone(node._clf)
|
||||
self.assertIsNotNone(node._clf.coef_)
|
||||
self.assertIsNotNone(node._vector)
|
||||
self.assertIsNotNone(node._interceptor)
|
||||
if node.is_leaf():
|
||||
return
|
||||
run_tree(node.get_down())
|
||||
@@ -171,7 +171,7 @@ class Snode_graph_test(unittest.TestCase):
|
||||
|
||||
def test_save_hyperplane(self):
|
||||
folder_name = "/tmp/"
|
||||
file_name = f"{folder_name}STnode1.png"
|
||||
file_name = os.path.join(folder_name, "STnode1.png")
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore")
|
||||
matplotlib.use("Agg")
|
||||
|
@@ -84,22 +84,6 @@ class Stree_test(unittest.TestCase):
|
||||
"""
|
||||
self._check_tree(self._clf.tree_)
|
||||
|
||||
def _get_file_data(self, file_name: str) -> tuple:
|
||||
"""Return X, y from data, y is the last column in array
|
||||
|
||||
Arguments:
|
||||
file_name {str} -- the file name
|
||||
|
||||
Returns:
|
||||
tuple -- tuple with samples, categories
|
||||
"""
|
||||
data = np.genfromtxt(file_name, delimiter=",")
|
||||
data = np.array(data)
|
||||
column_y = data.shape[1] - 1
|
||||
fy = data[:, column_y]
|
||||
fx = np.delete(data, column_y, axis=1)
|
||||
return fx, fy
|
||||
|
||||
def _find_out(
|
||||
self, px: np.array, x_original: np.array, y_original
|
||||
) -> list:
|
||||
@@ -134,11 +118,18 @@ class Stree_test(unittest.TestCase):
|
||||
|
||||
def test_score(self):
|
||||
X, y = get_dataset(self._random_state)
|
||||
accuracy_score = self._clf.score(X, y)
|
||||
yp = self._clf.predict(X)
|
||||
accuracy_computed = np.mean(yp == y)
|
||||
self.assertEqual(accuracy_score, accuracy_computed)
|
||||
self.assertGreater(accuracy_score, 0.9)
|
||||
for kernel in ["linear"]:
|
||||
clf = Stree(
|
||||
random_state=self._random_state,
|
||||
kernel=kernel,
|
||||
use_predictions=True,
|
||||
)
|
||||
clf.fit(X, y)
|
||||
accuracy_score = clf.score(X, y)
|
||||
yp = clf.predict(X)
|
||||
accuracy_computed = np.mean(yp == y)
|
||||
self.assertEqual(accuracy_score, accuracy_computed)
|
||||
self.assertGreater(accuracy_score, 0.9)
|
||||
|
||||
def test_single_predict_proba(self):
|
||||
"""Check that element 28 has a prediction different that the current
|
||||
@@ -306,10 +297,11 @@ class Stree_test(unittest.TestCase):
|
||||
tcl.fit(*get_dataset(self._random_state))
|
||||
|
||||
def test_check_max_depth(self):
|
||||
depth = 3
|
||||
tcl = Stree(random_state=self._random_state, max_depth=depth)
|
||||
tcl.fit(*get_dataset(self._random_state))
|
||||
self.assertEqual(depth, tcl.depth_)
|
||||
depths = (3, 4)
|
||||
for depth in depths:
|
||||
tcl = Stree(random_state=self._random_state, max_depth=depth)
|
||||
tcl.fit(*get_dataset(self._random_state))
|
||||
self.assertEqual(depth, tcl.depth_)
|
||||
|
||||
def test_unfitted_tree_is_iterable(self):
|
||||
tcl = Stree()
|
||||
@@ -383,8 +375,6 @@ class Snode_test(unittest.TestCase):
|
||||
# only exclude pure leaves
|
||||
self.assertIsNotNone(node._clf)
|
||||
self.assertIsNotNone(node._clf.coef_)
|
||||
self.assertIsNotNone(node._vector)
|
||||
self.assertIsNotNone(node._interceptor)
|
||||
if node.is_leaf():
|
||||
return
|
||||
run_tree(node.get_down())
|
||||
|
Reference in New Issue
Block a user