mirror of
https://github.com/Doctorado-ML/STree.git
synced 2025-08-15 15:36:00 +00:00
#3 Rewrite some tests & remove use_predictions
Remove use_predictions parameter as of now, the model always use it
This commit is contained in:
@@ -126,14 +126,12 @@ class Stree(BaseEstimator, ClassifierMixin):
|
||||
random_state: int = None,
|
||||
max_depth: int = None,
|
||||
tol: float = 1e-4,
|
||||
use_predictions: bool = False,
|
||||
min_samples_split: int = 0,
|
||||
):
|
||||
self.max_iter = max_iter
|
||||
self.C = C
|
||||
self.kernel = kernel
|
||||
self.random_state = random_state
|
||||
self.use_predictions = use_predictions
|
||||
self.max_depth = max_depth
|
||||
self.tol = tol
|
||||
self.min_samples_split = min_samples_split
|
||||
@@ -172,6 +170,7 @@ class Stree(BaseEstimator, ClassifierMixin):
|
||||
:rtype: list
|
||||
"""
|
||||
up = ~down
|
||||
print(self.kernel, up.shape, down.shape)
|
||||
return (
|
||||
origin[up[:, 0]] if any(up) else None,
|
||||
origin[down[:, 0]] if any(down) else None,
|
||||
@@ -188,14 +187,7 @@ class Stree(BaseEstimator, ClassifierMixin):
|
||||
the hyperplane of the node
|
||||
:rtype: np.array
|
||||
"""
|
||||
if self.use_predictions:
|
||||
res = np.expand_dims(node._clf.decision_function(data), 1)
|
||||
else:
|
||||
# doesn't work with multiclass as each sample has to do inner
|
||||
# product with its own coefficients computes positition of every
|
||||
# sample is w.r.t. the hyperplane
|
||||
res = self._linear_function(data, node)
|
||||
return res
|
||||
return np.expand_dims(node._clf.decision_function(data), 1)
|
||||
|
||||
def _split_criteria(self, data: np.array) -> np.array:
|
||||
"""Set the criteria to split arrays
|
||||
|
@@ -32,9 +32,7 @@ class Stree_grapher_test(unittest.TestCase):
|
||||
def __init__(self, *args, **kwargs):
|
||||
os.environ["TESTING"] = "1"
|
||||
self._random_state = 1
|
||||
self._clf = Stree_grapher(
|
||||
dict(random_state=self._random_state, use_predictions=False)
|
||||
)
|
||||
self._clf = Stree_grapher(dict(random_state=self._random_state))
|
||||
self._clf.fit(*get_dataset(self._random_state, n_features=4))
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
@@ -102,9 +100,7 @@ class Snode_graph_test(unittest.TestCase):
|
||||
def __init__(self, *args, **kwargs):
|
||||
os.environ["TESTING"] = "1"
|
||||
self._random_state = 1
|
||||
self._clf = Stree_grapher(
|
||||
dict(random_state=self._random_state, use_predictions=False)
|
||||
)
|
||||
self._clf = Stree_grapher(dict(random_state=self._random_state))
|
||||
self._clf.fit(*get_dataset(self._random_state))
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
|
@@ -28,10 +28,7 @@ class Stree_test(unittest.TestCase):
|
||||
def __init__(self, *args, **kwargs):
|
||||
os.environ["TESTING"] = "1"
|
||||
self._random_state = 1
|
||||
self._clf = Stree(
|
||||
random_state=self._random_state, use_predictions=False
|
||||
)
|
||||
self._clf.fit(*get_dataset(self._random_state))
|
||||
self._kernels = ["linear", "rbf", "poly"]
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
@classmethod
|
||||
@@ -82,7 +79,10 @@ class Stree_test(unittest.TestCase):
|
||||
def test_build_tree(self):
|
||||
"""Check if the tree is built the same way as predictions of models
|
||||
"""
|
||||
self._check_tree(self._clf.tree_)
|
||||
for kernel in self._kernels:
|
||||
clf = Stree(kernel=kernel, random_state=self._random_state)
|
||||
clf.fit(*get_dataset(self._random_state))
|
||||
self._check_tree(clf.tree_)
|
||||
|
||||
def _find_out(
|
||||
self, px: np.array, x_original: np.array, y_original
|
||||
@@ -105,148 +105,85 @@ class Stree_test(unittest.TestCase):
|
||||
return res
|
||||
|
||||
def test_single_prediction(self):
|
||||
probs = [0.29026400766, 0.73105613, 0.0307635]
|
||||
X, y = get_dataset(self._random_state)
|
||||
yp = self._clf.predict((X[0, :].reshape(-1, X.shape[1])))
|
||||
self.assertEqual(yp[0], y[0])
|
||||
for kernel, prob in zip(self._kernels, probs):
|
||||
clf = Stree(kernel=kernel, random_state=self._random_state)
|
||||
yp = clf.fit(X, y).predict((X[0, :].reshape(-1, X.shape[1])))
|
||||
self.assertEqual(yp[0], y[0])
|
||||
|
||||
def test_multiple_prediction(self):
|
||||
# First 27 elements the predictions are the same as the truth
|
||||
num = 27
|
||||
X, y = get_dataset(self._random_state)
|
||||
yp = self._clf.predict(X[:num, :])
|
||||
self.assertListEqual(y[:num].tolist(), yp.tolist())
|
||||
for kernel in self._kernels:
|
||||
clf = Stree(kernel=kernel, random_state=self._random_state)
|
||||
yp = clf.fit(X, y).predict(X[:num, :])
|
||||
self.assertListEqual(y[:num].tolist(), yp.tolist())
|
||||
|
||||
def test_score(self):
|
||||
X, y = get_dataset(self._random_state)
|
||||
for kernel in ["linear"]:
|
||||
clf = Stree(
|
||||
random_state=self._random_state,
|
||||
kernel=kernel,
|
||||
use_predictions=True,
|
||||
)
|
||||
for kernel, accuracy_expected in zip(
|
||||
self._kernels,
|
||||
[0.9506666666666667, 0.9606666666666667, 0.9433333333333334],
|
||||
):
|
||||
clf = Stree(random_state=self._random_state, kernel=kernel,)
|
||||
clf.fit(X, y)
|
||||
accuracy_score = clf.score(X, y)
|
||||
yp = clf.predict(X)
|
||||
accuracy_computed = np.mean(yp == y)
|
||||
self.assertEqual(accuracy_score, accuracy_computed)
|
||||
self.assertGreater(accuracy_score, 0.9)
|
||||
self.assertAlmostEqual(accuracy_expected, accuracy_score)
|
||||
|
||||
def test_single_predict_proba(self):
|
||||
"""Check that element 28 has a prediction different that the current
|
||||
label
|
||||
"""Check the element 28 probability of being 1
|
||||
"""
|
||||
# Element 28 has a different prediction than the truth
|
||||
decimals = 5
|
||||
prob = 0.29026400766
|
||||
element = 28
|
||||
probs = [0.29026400766, 0.73105613, 0.0307635]
|
||||
X, y = get_dataset(self._random_state)
|
||||
yp = self._clf.predict_proba(X[28, :].reshape(-1, X.shape[1]))
|
||||
self.assertEqual(
|
||||
np.round(1 - prob, decimals), np.round(yp[0:, 0], decimals)
|
||||
)
|
||||
self.assertEqual(1, y[28])
|
||||
|
||||
self.assertAlmostEqual(
|
||||
round(prob, decimals), round(yp[0, 1], decimals), decimals
|
||||
)
|
||||
self.assertEqual(1, y[element])
|
||||
for kernel, prob in zip(self._kernels, probs):
|
||||
clf = Stree(kernel=kernel, random_state=self._random_state)
|
||||
yp = clf.fit(X, y).predict_proba(
|
||||
X[element, :].reshape(-1, X.shape[1])
|
||||
)
|
||||
self.assertAlmostEqual(
|
||||
np.round(1 - prob, decimals), np.round(yp[0:, 0], decimals)
|
||||
)
|
||||
self.assertAlmostEqual(
|
||||
round(prob, decimals), round(yp[0, 1], decimals), decimals
|
||||
)
|
||||
|
||||
def test_multiple_predict_proba(self):
|
||||
# First 27 elements the predictions are the same as the truth
|
||||
num = 27
|
||||
decimals = 5
|
||||
X, y = get_dataset(self._random_state)
|
||||
yp = self._clf.predict_proba(X[:num, :])
|
||||
self.assertListEqual(
|
||||
y[:num].tolist(), np.argmax(yp[:num], axis=1).tolist()
|
||||
)
|
||||
expected_proba = [
|
||||
0.88395641,
|
||||
0.36746962,
|
||||
0.84158767,
|
||||
0.34106833,
|
||||
0.14269291,
|
||||
0.85193236,
|
||||
0.29876058,
|
||||
0.7282164,
|
||||
0.85958616,
|
||||
0.89517877,
|
||||
0.99745224,
|
||||
0.18860349,
|
||||
0.30756427,
|
||||
0.8318412,
|
||||
0.18981198,
|
||||
0.15564624,
|
||||
0.25740655,
|
||||
0.22923355,
|
||||
0.87365959,
|
||||
0.49928689,
|
||||
0.95574351,
|
||||
0.28761257,
|
||||
0.28906333,
|
||||
0.32643692,
|
||||
0.29788483,
|
||||
0.01657364,
|
||||
0.81149083,
|
||||
]
|
||||
expected = np.round(expected_proba, decimals=decimals).tolist()
|
||||
computed = np.round(yp[:, 1], decimals=decimals).tolist()
|
||||
for i in range(len(expected)):
|
||||
self.assertAlmostEqual(expected[i], computed[i], decimals)
|
||||
|
||||
def build_models(self):
|
||||
"""Build and train two models, model_clf will use the sklearn
|
||||
classifier to compute predictions and split data. model_computed will
|
||||
use vector of coefficients to compute both predictions and splitted
|
||||
data
|
||||
"""
|
||||
model_clf = Stree(
|
||||
random_state=self._random_state, use_predictions=True
|
||||
)
|
||||
model_computed = Stree(
|
||||
random_state=self._random_state, use_predictions=False
|
||||
)
|
||||
X, y = get_dataset(self._random_state)
|
||||
model_clf.fit(X, y)
|
||||
model_computed.fit(X, y)
|
||||
return model_clf, model_computed, X, y
|
||||
|
||||
def test_use_model_predict(self):
|
||||
"""Check that we get the same results wether we use the estimator in
|
||||
nodes to compute labels or we use the hyperplane and the position of
|
||||
samples wrt to it
|
||||
"""
|
||||
use_clf, use_math, X, _ = self.build_models()
|
||||
self.assertListEqual(
|
||||
use_clf.predict(X).tolist(), use_math.predict(X).tolist()
|
||||
)
|
||||
|
||||
def test_use_model_score(self):
|
||||
use_clf, use_math, X, y = self.build_models()
|
||||
b = use_math.score(X, y)
|
||||
self.assertEqual(use_clf.score(X, y), b)
|
||||
self.assertGreater(b, 0.95)
|
||||
|
||||
def test_use_model_predict_proba(self):
|
||||
use_clf, use_math, X, _ = self.build_models()
|
||||
self.assertListEqual(
|
||||
use_clf.predict_proba(X).tolist(),
|
||||
use_math.predict_proba(X).tolist(),
|
||||
)
|
||||
for kernel in self._kernels:
|
||||
clf = Stree(kernel=kernel, random_state=self._random_state)
|
||||
clf.fit(X, y)
|
||||
yp = clf.predict_proba(X[:num, :])
|
||||
self.assertListEqual(
|
||||
y[:num].tolist(), np.argmax(yp[:num], axis=1).tolist()
|
||||
)
|
||||
|
||||
def test_single_vs_multiple_prediction(self):
|
||||
"""Check if predicting sample by sample gives the same result as
|
||||
predicting all samples at once
|
||||
"""
|
||||
X, _ = get_dataset(self._random_state)
|
||||
# Compute prediction line by line
|
||||
yp_line = np.array([], dtype=int)
|
||||
for xp in X:
|
||||
yp_line = np.append(
|
||||
yp_line, self._clf.predict(xp.reshape(-1, X.shape[1]))
|
||||
)
|
||||
# Compute prediction at once
|
||||
yp_once = self._clf.predict(X)
|
||||
#
|
||||
self.assertListEqual(yp_line.tolist(), yp_once.tolist())
|
||||
X, y = get_dataset(self._random_state)
|
||||
for kernel in self._kernels:
|
||||
clf = Stree(kernel=kernel, random_state=self._random_state)
|
||||
clf.fit(X, y)
|
||||
# Compute prediction line by line
|
||||
yp_line = np.array([], dtype=int)
|
||||
for xp in X:
|
||||
yp_line = np.append(
|
||||
yp_line, clf.predict(xp.reshape(-1, X.shape[1]))
|
||||
)
|
||||
# Compute prediction at once
|
||||
yp_once = clf.predict(X)
|
||||
self.assertListEqual(yp_line.tolist(), yp_once.tolist())
|
||||
|
||||
def test_iterator_and_str(self):
|
||||
"""Check preorder iterator
|
||||
@@ -266,11 +203,13 @@ class Stree_test(unittest.TestCase):
|
||||
]
|
||||
computed = []
|
||||
expected_string = ""
|
||||
for node in self._clf:
|
||||
clf = Stree(kernel="linear", random_state=self._random_state)
|
||||
clf.fit(*get_dataset(self._random_state))
|
||||
for node in clf:
|
||||
computed.append(str(node))
|
||||
expected_string += str(node) + "\n"
|
||||
self.assertListEqual(expected, computed)
|
||||
self.assertEqual(expected_string, str(self._clf))
|
||||
self.assertEqual(expected_string, str(clf))
|
||||
|
||||
def test_is_a_sklearn_classifier(self):
|
||||
import warnings
|
||||
@@ -323,9 +262,7 @@ class Snode_test(unittest.TestCase):
|
||||
def __init__(self, *args, **kwargs):
|
||||
os.environ["TESTING"] = "1"
|
||||
self._random_state = 1
|
||||
self._clf = Stree(
|
||||
random_state=self._random_state, use_predictions=True
|
||||
)
|
||||
self._clf = Stree(random_state=self._random_state)
|
||||
self._clf.fit(*get_dataset(self._random_state))
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
|
Reference in New Issue
Block a user