mirror of
https://github.com/Doctorado-ML/STree.git
synced 2025-08-15 23:46:02 +00:00
669 lines
24 KiB
Python
669 lines
24 KiB
Python
import os
|
|
import unittest
|
|
import warnings
|
|
|
|
import numpy as np
|
|
from sklearn.datasets import load_iris, load_wine
|
|
from sklearn.exceptions import ConvergenceWarning
|
|
from sklearn.svm import LinearSVC
|
|
|
|
from stree import Stree
|
|
from stree.Splitter import Snode
|
|
from .utils import load_dataset
|
|
from .._version import __version__
|
|
|
|
|
|
class Stree_test(unittest.TestCase):
|
|
def __init__(self, *args, **kwargs):
|
|
self._random_state = 1
|
|
self._kernels = ["liblinear", "linear", "rbf", "poly", "sigmoid"]
|
|
super().__init__(*args, **kwargs)
|
|
|
|
@classmethod
|
|
def setUp(cls):
|
|
os.environ["TESTING"] = "1"
|
|
|
|
def test_valid_kernels(self):
|
|
X, y = load_dataset()
|
|
for kernel in self._kernels:
|
|
clf = Stree(kernel=kernel, multiclass_strategy="ovr")
|
|
clf.fit(X, y)
|
|
self.assertIsNotNone(clf.tree_)
|
|
|
|
def test_bogus_kernel(self):
|
|
kernel = "other"
|
|
X, y = load_dataset()
|
|
clf = Stree(kernel=kernel)
|
|
with self.assertRaises(ValueError):
|
|
clf.fit(X, y)
|
|
|
|
def _check_tree(self, node: Snode):
|
|
"""Check recursively that the nodes that are not leaves have the
|
|
correct number of labels and its sons have the right number of elements
|
|
in their dataset
|
|
|
|
Parameters
|
|
----------
|
|
node : Snode
|
|
node to check
|
|
"""
|
|
if node.is_leaf():
|
|
return
|
|
y_prediction = node._clf.predict(node._X)
|
|
y_down = node.get_down()._y
|
|
y_up = node.get_up()._y
|
|
# Is a correct partition in terms of cadinality?
|
|
# i.e. The partition algorithm didn't forget any sample
|
|
self.assertEqual(node._y.shape[0], y_down.shape[0] + y_up.shape[0])
|
|
unique_y, count_y = np.unique(node._y, return_counts=True)
|
|
labels_d, count_d = np.unique(y_down, return_counts=True)
|
|
labels_u, count_u = np.unique(y_up, return_counts=True)
|
|
dict_d = {label: count_d[i] for i, label in enumerate(labels_d)}
|
|
dict_u = {label: count_u[i] for i, label in enumerate(labels_u)}
|
|
#
|
|
for i in unique_y:
|
|
try:
|
|
number_up = dict_u[i]
|
|
except KeyError:
|
|
number_up = 0
|
|
try:
|
|
number_down = dict_d[i]
|
|
except KeyError:
|
|
number_down = 0
|
|
self.assertEqual(count_y[i], number_down + number_up)
|
|
# Is the partition made the same as the prediction?
|
|
# as the node is not a leaf...
|
|
_, count_yp = np.unique(y_prediction, return_counts=True)
|
|
self.assertEqual(count_yp[1], y_up.shape[0])
|
|
self.assertEqual(count_yp[0], y_down.shape[0])
|
|
self._check_tree(node.get_down())
|
|
self._check_tree(node.get_up())
|
|
|
|
def test_build_tree(self):
|
|
"""Check if the tree is built the same way as predictions of models"""
|
|
warnings.filterwarnings("ignore")
|
|
for kernel in self._kernels:
|
|
clf = Stree(
|
|
kernel="sigmoid",
|
|
multiclass_strategy="ovr" if kernel == "liblinear" else "ovo",
|
|
random_state=self._random_state,
|
|
)
|
|
clf.fit(*load_dataset(self._random_state))
|
|
self._check_tree(clf.tree_)
|
|
|
|
def test_single_prediction(self):
|
|
X, y = load_dataset(self._random_state)
|
|
for kernel in self._kernels:
|
|
clf = Stree(
|
|
kernel=kernel,
|
|
multiclass_strategy="ovr" if kernel == "liblinear" else "ovo",
|
|
random_state=self._random_state,
|
|
)
|
|
yp = clf.fit(X, y).predict((X[0, :].reshape(-1, X.shape[1])))
|
|
self.assertEqual(yp[0], y[0])
|
|
|
|
def test_multiple_prediction(self):
|
|
# First 27 elements the predictions are the same as the truth
|
|
num = 27
|
|
X, y = load_dataset(self._random_state)
|
|
for kernel in ["liblinear", "linear", "rbf", "poly"]:
|
|
clf = Stree(
|
|
kernel=kernel,
|
|
multiclass_strategy="ovr" if kernel == "liblinear" else "ovo",
|
|
random_state=self._random_state,
|
|
)
|
|
yp = clf.fit(X, y).predict(X[:num, :])
|
|
self.assertListEqual(y[:num].tolist(), yp.tolist())
|
|
|
|
def test_single_vs_multiple_prediction(self):
|
|
"""Check if predicting sample by sample gives the same result as
|
|
predicting all samples at once
|
|
"""
|
|
X, y = load_dataset(self._random_state)
|
|
for kernel in self._kernels:
|
|
clf = Stree(
|
|
kernel=kernel,
|
|
multiclass_strategy="ovr" if kernel == "liblinear" else "ovo",
|
|
random_state=self._random_state,
|
|
)
|
|
clf.fit(X, y)
|
|
# Compute prediction line by line
|
|
yp_line = np.array([], dtype=int)
|
|
for xp in X:
|
|
yp_line = np.append(
|
|
yp_line, clf.predict(xp.reshape(-1, X.shape[1]))
|
|
)
|
|
# Compute prediction at once
|
|
yp_once = clf.predict(X)
|
|
self.assertListEqual(yp_line.tolist(), yp_once.tolist())
|
|
|
|
def test_iterator_and_str(self):
|
|
"""Check preorder iterator"""
|
|
expected = [
|
|
"root feaures=(0, 1, 2) impurity=1.0000 counts=(array([0, 1]), "
|
|
"array([750, 750]))",
|
|
"root - Down(2), <cgaf> - Leaf class=0 belief= 0.928297 impurity="
|
|
"0.3722 counts=(array([0, 1]), array([725, 56]))",
|
|
"root - Up(2) feaures=(0, 1, 2) impurity=0.2178 counts=(array([0, "
|
|
"1]), array([ 25, 694]))",
|
|
"root - Up(2) - Down(3) feaures=(0, 1, 2) impurity=0.8454 counts="
|
|
"(array([0, 1]), array([8, 3]))",
|
|
"root - Up(2) - Down(3) - Down(4), <pure> - Leaf class=0 belief= "
|
|
"1.000000 impurity=0.0000 counts=(array([0]), array([7]))",
|
|
"root - Up(2) - Down(3) - Up(4), <cgaf> - Leaf class=1 belief= "
|
|
"0.750000 impurity=0.8113 counts=(array([0, 1]), array([1, 3]))",
|
|
"root - Up(2) - Up(3), <cgaf> - Leaf class=1 belief= 0.975989 "
|
|
"impurity=0.1634 counts=(array([0, 1]), array([ 17, 691]))",
|
|
]
|
|
computed = []
|
|
expected_string = ""
|
|
clf = Stree(
|
|
kernel="liblinear",
|
|
multiclass_strategy="ovr",
|
|
random_state=self._random_state,
|
|
)
|
|
clf.fit(*load_dataset(self._random_state))
|
|
for node in iter(clf):
|
|
computed.append(str(node))
|
|
expected_string += str(node) + "\n"
|
|
self.assertListEqual(expected, computed)
|
|
self.assertEqual(expected_string, str(clf))
|
|
|
|
@staticmethod
|
|
def test_is_a_sklearn_classifier():
|
|
warnings.filterwarnings("ignore", category=ConvergenceWarning)
|
|
warnings.filterwarnings("ignore", category=RuntimeWarning)
|
|
from sklearn.utils.estimator_checks import check_estimator
|
|
|
|
check_estimator(Stree())
|
|
|
|
def test_exception_if_C_is_negative(self):
|
|
tclf = Stree(C=-1)
|
|
with self.assertRaises(ValueError):
|
|
tclf.fit(*load_dataset(self._random_state))
|
|
|
|
def test_exception_if_bogus_split_criteria(self):
|
|
tclf = Stree(split_criteria="duck")
|
|
with self.assertRaises(ValueError):
|
|
tclf.fit(*load_dataset(self._random_state))
|
|
|
|
def test_check_max_depth_is_positive_or_None(self):
|
|
tcl = Stree()
|
|
self.assertIsNone(tcl.max_depth)
|
|
tcl = Stree(max_depth=1)
|
|
self.assertGreaterEqual(1, tcl.max_depth)
|
|
with self.assertRaises(ValueError):
|
|
tcl = Stree(max_depth=-1)
|
|
tcl.fit(*load_dataset(self._random_state))
|
|
|
|
def test_check_max_depth(self):
|
|
depths = (3, 4)
|
|
for depth in depths:
|
|
tcl = Stree(
|
|
kernel="liblinear",
|
|
multiclass_strategy="ovr",
|
|
random_state=self._random_state,
|
|
max_depth=depth,
|
|
)
|
|
tcl.fit(*load_dataset(self._random_state))
|
|
self.assertEqual(depth, tcl.depth_)
|
|
|
|
def test_unfitted_tree_is_iterable(self):
|
|
tcl = Stree()
|
|
self.assertEqual(0, len(list(tcl)))
|
|
|
|
def test_min_samples_split(self):
|
|
dataset = [[1], [2], [3]], [1, 1, 0]
|
|
tcl_split = Stree(min_samples_split=3).fit(*dataset)
|
|
self.assertIsNotNone(tcl_split.tree_.get_down())
|
|
self.assertIsNotNone(tcl_split.tree_.get_up())
|
|
tcl_nosplit = Stree(min_samples_split=4).fit(*dataset)
|
|
self.assertIsNone(tcl_nosplit.tree_.get_down())
|
|
self.assertIsNone(tcl_nosplit.tree_.get_up())
|
|
|
|
def test_simple_muticlass_dataset(self):
|
|
for kernel in self._kernels:
|
|
clf = Stree(
|
|
kernel=kernel,
|
|
multiclass_strategy="ovr" if kernel == "liblinear" else "ovo",
|
|
random_state=self._random_state,
|
|
)
|
|
px = [[1, 2], [5, 6], [9, 10]]
|
|
py = [0, 1, 2]
|
|
clf.fit(px, py)
|
|
self.assertEqual(1.0, clf.score(px, py))
|
|
self.assertListEqual(py, clf.predict(px).tolist())
|
|
self.assertListEqual(py, clf.classes_.tolist())
|
|
|
|
def test_muticlass_dataset(self):
|
|
warnings.filterwarnings("ignore", category=ConvergenceWarning)
|
|
warnings.filterwarnings("ignore", category=RuntimeWarning)
|
|
datasets = {
|
|
"Synt": load_dataset(random_state=self._random_state, n_classes=3),
|
|
"Iris": load_wine(return_X_y=True),
|
|
}
|
|
outcomes = {
|
|
"Synt": {
|
|
"max_samples liblinear": 0.9493333333333334,
|
|
"max_samples linear": 0.9426666666666667,
|
|
"max_samples rbf": 0.9606666666666667,
|
|
"max_samples poly": 0.9373333333333334,
|
|
"max_samples sigmoid": 0.824,
|
|
"impurity liblinear": 0.9493333333333334,
|
|
"impurity linear": 0.9426666666666667,
|
|
"impurity rbf": 0.9606666666666667,
|
|
"impurity poly": 0.9373333333333334,
|
|
"impurity sigmoid": 0.824,
|
|
},
|
|
"Iris": {
|
|
"max_samples liblinear": 0.9550561797752809,
|
|
"max_samples linear": 1.0,
|
|
"max_samples rbf": 0.6685393258426966,
|
|
"max_samples poly": 0.6853932584269663,
|
|
"max_samples sigmoid": 0.6404494382022472,
|
|
"impurity liblinear": 0.9550561797752809,
|
|
"impurity linear": 1.0,
|
|
"impurity rbf": 0.6685393258426966,
|
|
"impurity poly": 0.6853932584269663,
|
|
"impurity sigmoid": 0.6404494382022472,
|
|
},
|
|
}
|
|
|
|
for name, dataset in datasets.items():
|
|
px, py = dataset
|
|
for criteria in ["max_samples", "impurity"]:
|
|
for kernel in self._kernels:
|
|
clf = Stree(
|
|
max_iter=1e4,
|
|
multiclass_strategy="ovr"
|
|
if kernel == "liblinear"
|
|
else "ovo",
|
|
kernel=kernel,
|
|
random_state=self._random_state,
|
|
)
|
|
clf.fit(px, py)
|
|
outcome = outcomes[name][f"{criteria} {kernel}"]
|
|
# print(f'"{criteria} {kernel}": {clf.score(px, py)},')
|
|
self.assertAlmostEqual(
|
|
outcome,
|
|
clf.score(px, py),
|
|
5,
|
|
f"{name} - {criteria} - {kernel}",
|
|
)
|
|
|
|
def test_max_features(self):
|
|
n_features = 16
|
|
expected_values = [
|
|
("auto", 4),
|
|
("log2", 4),
|
|
("sqrt", 4),
|
|
(0.5, 8),
|
|
(3, 3),
|
|
(None, 16),
|
|
]
|
|
clf = Stree()
|
|
clf.n_features_ = n_features
|
|
for max_features, expected in expected_values:
|
|
clf.set_params(**dict(max_features=max_features))
|
|
computed = clf._initialize_max_features()
|
|
self.assertEqual(expected, computed)
|
|
# Check bogus max_features
|
|
values = ["duck", -0.1, 0.0]
|
|
for max_features in values:
|
|
clf.set_params(**dict(max_features=max_features))
|
|
with self.assertRaises(ValueError):
|
|
_ = clf._initialize_max_features()
|
|
|
|
def test_wrong_max_features(self):
|
|
X, y = load_dataset(n_features=15)
|
|
clf = Stree(max_features=16)
|
|
with self.assertRaises(ValueError):
|
|
clf.fit(X, y)
|
|
|
|
def test_get_subspaces(self):
|
|
dataset = np.random.random((10, 16))
|
|
y = np.random.randint(0, 2, 10)
|
|
expected_values = [
|
|
("auto", 4),
|
|
("log2", 4),
|
|
("sqrt", 4),
|
|
(0.5, 8),
|
|
(3, 3),
|
|
(None, 16),
|
|
]
|
|
clf = Stree()
|
|
for max_features, expected in expected_values:
|
|
clf.set_params(**dict(max_features=max_features))
|
|
clf.fit(dataset, y)
|
|
computed, indices = clf.splitter_.get_subspace(
|
|
dataset, y, clf.max_features_
|
|
)
|
|
self.assertListEqual(
|
|
dataset[:, indices].tolist(), computed.tolist()
|
|
)
|
|
self.assertEqual(expected, len(indices))
|
|
|
|
def test_bogus_criterion(self):
|
|
clf = Stree(criterion="duck")
|
|
with self.assertRaises(ValueError):
|
|
clf.fit(*load_dataset())
|
|
|
|
def test_predict_feature_dimensions(self):
|
|
X = np.random.rand(10, 5)
|
|
y = np.random.randint(0, 2, 10)
|
|
clf = Stree()
|
|
clf.fit(X, y)
|
|
with self.assertRaises(ValueError):
|
|
clf.predict(X[:, :3])
|
|
|
|
# Tests of score
|
|
def test_score_binary(self):
|
|
X, y = load_dataset(self._random_state)
|
|
accuracies = [
|
|
0.9506666666666667,
|
|
0.9493333333333334,
|
|
0.9606666666666667,
|
|
0.9433333333333334,
|
|
0.9153333333333333,
|
|
]
|
|
for kernel, accuracy_expected in zip(self._kernels, accuracies):
|
|
clf = Stree(
|
|
random_state=self._random_state,
|
|
multiclass_strategy="ovr" if kernel == "liblinear" else "ovo",
|
|
kernel=kernel,
|
|
)
|
|
clf.fit(X, y)
|
|
accuracy_score = clf.score(X, y)
|
|
yp = clf.predict(X)
|
|
accuracy_computed = np.mean(yp == y)
|
|
self.assertEqual(accuracy_score, accuracy_computed)
|
|
self.assertAlmostEqual(accuracy_expected, accuracy_score)
|
|
|
|
def test_score_max_features(self):
|
|
X, y = load_dataset(self._random_state)
|
|
clf = Stree(
|
|
kernel="liblinear",
|
|
multiclass_strategy="ovr",
|
|
random_state=self._random_state,
|
|
max_features=2,
|
|
)
|
|
clf.fit(X, y)
|
|
self.assertAlmostEqual(0.9453333333333334, clf.score(X, y))
|
|
|
|
def test_bogus_splitter_parameter(self):
|
|
clf = Stree(splitter="duck")
|
|
with self.assertRaises(ValueError):
|
|
clf.fit(*load_dataset())
|
|
|
|
def test_multiclass_classifier_integrity(self):
|
|
"""Checks if the multiclass operation is done right"""
|
|
X, y = load_iris(return_X_y=True)
|
|
clf = Stree(
|
|
kernel="liblinear", multiclass_strategy="ovr", random_state=0
|
|
)
|
|
clf.fit(X, y)
|
|
score = clf.score(X, y)
|
|
# Check accuracy of the whole model
|
|
self.assertAlmostEquals(0.98, score, 5)
|
|
svm = LinearSVC(random_state=0)
|
|
svm.fit(X, y)
|
|
self.assertAlmostEquals(0.9666666666666667, svm.score(X, y), 5)
|
|
data = svm.decision_function(X)
|
|
expected = [
|
|
0.4444444444444444,
|
|
0.35777777777777775,
|
|
0.4569777777777778,
|
|
]
|
|
ty = data.copy()
|
|
ty[data <= 0] = 0
|
|
ty[data > 0] = 1
|
|
ty = ty.astype(int)
|
|
for i in range(3):
|
|
self.assertAlmostEquals(
|
|
expected[i],
|
|
clf.splitter_._gini(ty[:, i]),
|
|
)
|
|
# 1st Branch
|
|
# up has to have 50 samples of class 0
|
|
# down should have 100 [50, 50]
|
|
up = data[:, 2] > 0
|
|
resup = np.unique(y[up], return_counts=True)
|
|
resdn = np.unique(y[~up], return_counts=True)
|
|
self.assertListEqual([1, 2], resup[0].tolist())
|
|
self.assertListEqual([3, 50], resup[1].tolist())
|
|
self.assertListEqual([0, 1], resdn[0].tolist())
|
|
self.assertListEqual([50, 47], resdn[1].tolist())
|
|
# 2nd Branch
|
|
# up should have 53 samples of classes [1, 2] [3, 50]
|
|
# down shoud have 47 samples of class 1
|
|
node_up = clf.tree_.get_down().get_up()
|
|
node_dn = clf.tree_.get_down().get_down()
|
|
resup = np.unique(node_up._y, return_counts=True)
|
|
resdn = np.unique(node_dn._y, return_counts=True)
|
|
self.assertListEqual([1, 2], resup[0].tolist())
|
|
self.assertListEqual([3, 50], resup[1].tolist())
|
|
self.assertListEqual([1], resdn[0].tolist())
|
|
self.assertListEqual([47], resdn[1].tolist())
|
|
|
|
def test_score_multiclass_rbf(self):
|
|
X, y = load_dataset(
|
|
random_state=self._random_state,
|
|
n_classes=3,
|
|
n_features=5,
|
|
n_samples=500,
|
|
)
|
|
clf = Stree(kernel="rbf", random_state=self._random_state)
|
|
clf2 = Stree(
|
|
kernel="rbf", random_state=self._random_state, normalize=True
|
|
)
|
|
self.assertEqual(0.966, clf.fit(X, y).score(X, y))
|
|
self.assertEqual(0.964, clf2.fit(X, y).score(X, y))
|
|
X, y = load_wine(return_X_y=True)
|
|
self.assertEqual(0.6685393258426966, clf.fit(X, y).score(X, y))
|
|
self.assertEqual(1.0, clf2.fit(X, y).score(X, y))
|
|
|
|
def test_score_multiclass_poly(self):
|
|
X, y = load_dataset(
|
|
random_state=self._random_state,
|
|
n_classes=3,
|
|
n_features=5,
|
|
n_samples=500,
|
|
)
|
|
clf = Stree(
|
|
kernel="poly", random_state=self._random_state, C=10, degree=5
|
|
)
|
|
clf2 = Stree(
|
|
kernel="poly",
|
|
random_state=self._random_state,
|
|
normalize=True,
|
|
)
|
|
self.assertEqual(0.946, clf.fit(X, y).score(X, y))
|
|
self.assertEqual(0.972, clf2.fit(X, y).score(X, y))
|
|
X, y = load_wine(return_X_y=True)
|
|
self.assertEqual(0.7808988764044944, clf.fit(X, y).score(X, y))
|
|
self.assertEqual(1.0, clf2.fit(X, y).score(X, y))
|
|
|
|
def test_score_multiclass_liblinear(self):
|
|
X, y = load_dataset(
|
|
random_state=self._random_state,
|
|
n_classes=3,
|
|
n_features=5,
|
|
n_samples=500,
|
|
)
|
|
clf = Stree(
|
|
kernel="liblinear",
|
|
multiclass_strategy="ovr",
|
|
random_state=self._random_state,
|
|
C=10,
|
|
)
|
|
clf2 = Stree(
|
|
kernel="liblinear",
|
|
multiclass_strategy="ovr",
|
|
random_state=self._random_state,
|
|
normalize=True,
|
|
)
|
|
self.assertEqual(0.968, clf.fit(X, y).score(X, y))
|
|
self.assertEqual(0.97, clf2.fit(X, y).score(X, y))
|
|
X, y = load_wine(return_X_y=True)
|
|
self.assertEqual(1.0, clf.fit(X, y).score(X, y))
|
|
self.assertEqual(1.0, clf2.fit(X, y).score(X, y))
|
|
|
|
def test_score_multiclass_sigmoid(self):
|
|
X, y = load_dataset(
|
|
random_state=self._random_state,
|
|
n_classes=3,
|
|
n_features=5,
|
|
n_samples=500,
|
|
)
|
|
clf = Stree(kernel="sigmoid", random_state=self._random_state, C=10)
|
|
clf2 = Stree(
|
|
kernel="sigmoid",
|
|
random_state=self._random_state,
|
|
normalize=True,
|
|
C=10,
|
|
)
|
|
self.assertEqual(0.796, clf.fit(X, y).score(X, y))
|
|
self.assertEqual(0.952, clf2.fit(X, y).score(X, y))
|
|
X, y = load_wine(return_X_y=True)
|
|
self.assertEqual(0.6910112359550562, clf.fit(X, y).score(X, y))
|
|
self.assertEqual(0.9662921348314607, clf2.fit(X, y).score(X, y))
|
|
|
|
def test_score_multiclass_linear(self):
|
|
warnings.filterwarnings("ignore", category=ConvergenceWarning)
|
|
warnings.filterwarnings("ignore", category=RuntimeWarning)
|
|
X, y = load_dataset(
|
|
random_state=self._random_state,
|
|
n_classes=3,
|
|
n_features=5,
|
|
n_samples=1500,
|
|
)
|
|
clf = Stree(
|
|
kernel="liblinear",
|
|
multiclass_strategy="ovr",
|
|
random_state=self._random_state,
|
|
)
|
|
self.assertEqual(0.9533333333333334, clf.fit(X, y).score(X, y))
|
|
# Check with context based standardization
|
|
clf2 = Stree(
|
|
kernel="liblinear",
|
|
multiclass_strategy="ovr",
|
|
random_state=self._random_state,
|
|
normalize=True,
|
|
)
|
|
self.assertEqual(0.9526666666666667, clf2.fit(X, y).score(X, y))
|
|
X, y = load_wine(return_X_y=True)
|
|
self.assertEqual(0.9831460674157303, clf.fit(X, y).score(X, y))
|
|
self.assertEqual(1.0, clf2.fit(X, y).score(X, y))
|
|
|
|
def test_zero_all_sample_weights(self):
|
|
X, y = load_dataset(self._random_state)
|
|
with self.assertRaises(ValueError):
|
|
Stree().fit(X, y, np.zeros(len(y)))
|
|
|
|
def test_mask_samples_weighted_zero(self):
|
|
X = np.array(
|
|
[
|
|
[1, 1],
|
|
[1, 1],
|
|
[1, 1],
|
|
[2, 2],
|
|
[2, 2],
|
|
[2, 2],
|
|
[3, 3],
|
|
[3, 3],
|
|
[3, 3],
|
|
]
|
|
)
|
|
y = np.array([1, 1, 1, 2, 2, 2, 5, 5, 5])
|
|
yw = np.array([1, 1, 1, 1, 1, 1, 5, 5, 5])
|
|
w = [1, 1, 1, 0, 0, 0, 1, 1, 1]
|
|
model1 = Stree().fit(X, y)
|
|
model2 = Stree().fit(X, y, w)
|
|
predict1 = model1.predict(X)
|
|
predict2 = model2.predict(X)
|
|
self.assertListEqual(y.tolist(), predict1.tolist())
|
|
self.assertListEqual(yw.tolist(), predict2.tolist())
|
|
self.assertEqual(model1.score(X, y), 1)
|
|
self.assertAlmostEqual(model2.score(X, y), 0.66666667)
|
|
self.assertEqual(model2.score(X, y, w), 1)
|
|
|
|
def test_depth(self):
|
|
X, y = load_dataset(
|
|
random_state=self._random_state,
|
|
n_classes=3,
|
|
n_features=5,
|
|
n_samples=1500,
|
|
)
|
|
clf = Stree(random_state=self._random_state)
|
|
clf.fit(X, y)
|
|
self.assertEqual(6, clf.depth_)
|
|
X, y = load_wine(return_X_y=True)
|
|
clf = Stree(random_state=self._random_state)
|
|
clf.fit(X, y)
|
|
self.assertEqual(4, clf.depth_)
|
|
|
|
def test_nodes_leaves(self):
|
|
X, y = load_dataset(
|
|
random_state=self._random_state,
|
|
n_classes=3,
|
|
n_features=5,
|
|
n_samples=1500,
|
|
)
|
|
clf = Stree(random_state=self._random_state)
|
|
clf.fit(X, y)
|
|
nodes, leaves = clf.nodes_leaves()
|
|
self.assertEqual(31, nodes)
|
|
self.assertEqual(16, leaves)
|
|
X, y = load_wine(return_X_y=True)
|
|
clf = Stree(random_state=self._random_state)
|
|
clf.fit(X, y)
|
|
nodes, leaves = clf.nodes_leaves()
|
|
self.assertEqual(11, nodes)
|
|
self.assertEqual(6, leaves)
|
|
|
|
def test_nodes_leaves_artificial(self):
|
|
n1 = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test1")
|
|
n2 = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test2")
|
|
n3 = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test3")
|
|
n4 = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test4")
|
|
n5 = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test5")
|
|
n6 = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test6")
|
|
n1.set_up(n2)
|
|
n2.set_up(n3)
|
|
n2.set_down(n4)
|
|
n3.set_up(n5)
|
|
n4.set_down(n6)
|
|
clf = Stree(random_state=self._random_state)
|
|
clf.tree_ = n1
|
|
nodes, leaves = clf.nodes_leaves()
|
|
self.assertEqual(6, nodes)
|
|
self.assertEqual(2, leaves)
|
|
|
|
def test_bogus_multiclass_strategy(self):
|
|
clf = Stree(multiclass_strategy="other")
|
|
X, y = load_wine(return_X_y=True)
|
|
with self.assertRaises(ValueError):
|
|
clf.fit(X, y)
|
|
|
|
def test_multiclass_strategy(self):
|
|
X, y = load_wine(return_X_y=True)
|
|
clf_o = Stree(multiclass_strategy="ovo")
|
|
clf_r = Stree(multiclass_strategy="ovr")
|
|
score_o = clf_o.fit(X, y).score(X, y)
|
|
score_r = clf_r.fit(X, y).score(X, y)
|
|
self.assertEqual(1.0, score_o)
|
|
self.assertEqual(0.9269662921348315, score_r)
|
|
|
|
def test_incompatible_hyperparameters(self):
|
|
X, y = load_wine(return_X_y=True)
|
|
clf = Stree(kernel="liblinear", multiclass_strategy="ovo")
|
|
with self.assertRaises(ValueError):
|
|
clf.fit(X, y)
|
|
clf = Stree(multiclass_strategy="ovo", split_criteria="max_samples")
|
|
with self.assertRaises(ValueError):
|
|
clf.fit(X, y)
|
|
|
|
def test_version(self):
|
|
clf = Stree()
|
|
self.assertEqual(__version__, clf.version())
|