#3 Rewrite some tests & remove use_predictions

Remove use_predictions parameter as of now, the model always use it
This commit is contained in:
2020-06-08 01:51:21 +02:00
parent 05b462716e
commit 3a48d8b405
3 changed files with 64 additions and 139 deletions

View File

@@ -126,14 +126,12 @@ class Stree(BaseEstimator, ClassifierMixin):
random_state: int = None,
max_depth: int = None,
tol: float = 1e-4,
use_predictions: bool = False,
min_samples_split: int = 0,
):
self.max_iter = max_iter
self.C = C
self.kernel = kernel
self.random_state = random_state
self.use_predictions = use_predictions
self.max_depth = max_depth
self.tol = tol
self.min_samples_split = min_samples_split
@@ -172,6 +170,7 @@ class Stree(BaseEstimator, ClassifierMixin):
:rtype: list
"""
up = ~down
print(self.kernel, up.shape, down.shape)
return (
origin[up[:, 0]] if any(up) else None,
origin[down[:, 0]] if any(down) else None,
@@ -188,14 +187,7 @@ class Stree(BaseEstimator, ClassifierMixin):
the hyperplane of the node
:rtype: np.array
"""
if self.use_predictions:
res = np.expand_dims(node._clf.decision_function(data), 1)
else:
# doesn't work with multiclass as each sample has to do inner
# product with its own coefficients computes positition of every
# sample is w.r.t. the hyperplane
res = self._linear_function(data, node)
return res
return np.expand_dims(node._clf.decision_function(data), 1)
def _split_criteria(self, data: np.array) -> np.array:
"""Set the criteria to split arrays

View File

@@ -32,9 +32,7 @@ class Stree_grapher_test(unittest.TestCase):
def __init__(self, *args, **kwargs):
os.environ["TESTING"] = "1"
self._random_state = 1
self._clf = Stree_grapher(
dict(random_state=self._random_state, use_predictions=False)
)
self._clf = Stree_grapher(dict(random_state=self._random_state))
self._clf.fit(*get_dataset(self._random_state, n_features=4))
super().__init__(*args, **kwargs)
@@ -102,9 +100,7 @@ class Snode_graph_test(unittest.TestCase):
def __init__(self, *args, **kwargs):
os.environ["TESTING"] = "1"
self._random_state = 1
self._clf = Stree_grapher(
dict(random_state=self._random_state, use_predictions=False)
)
self._clf = Stree_grapher(dict(random_state=self._random_state))
self._clf.fit(*get_dataset(self._random_state))
super().__init__(*args, **kwargs)

View File

@@ -28,10 +28,7 @@ class Stree_test(unittest.TestCase):
def __init__(self, *args, **kwargs):
os.environ["TESTING"] = "1"
self._random_state = 1
self._clf = Stree(
random_state=self._random_state, use_predictions=False
)
self._clf.fit(*get_dataset(self._random_state))
self._kernels = ["linear", "rbf", "poly"]
super().__init__(*args, **kwargs)
@classmethod
@@ -82,7 +79,10 @@ class Stree_test(unittest.TestCase):
def test_build_tree(self):
"""Check if the tree is built the same way as predictions of models
"""
self._check_tree(self._clf.tree_)
for kernel in self._kernels:
clf = Stree(kernel=kernel, random_state=self._random_state)
clf.fit(*get_dataset(self._random_state))
self._check_tree(clf.tree_)
def _find_out(
self, px: np.array, x_original: np.array, y_original
@@ -105,148 +105,85 @@ class Stree_test(unittest.TestCase):
return res
def test_single_prediction(self):
probs = [0.29026400766, 0.73105613, 0.0307635]
X, y = get_dataset(self._random_state)
yp = self._clf.predict((X[0, :].reshape(-1, X.shape[1])))
self.assertEqual(yp[0], y[0])
for kernel, prob in zip(self._kernels, probs):
clf = Stree(kernel=kernel, random_state=self._random_state)
yp = clf.fit(X, y).predict((X[0, :].reshape(-1, X.shape[1])))
self.assertEqual(yp[0], y[0])
def test_multiple_prediction(self):
# First 27 elements the predictions are the same as the truth
num = 27
X, y = get_dataset(self._random_state)
yp = self._clf.predict(X[:num, :])
self.assertListEqual(y[:num].tolist(), yp.tolist())
for kernel in self._kernels:
clf = Stree(kernel=kernel, random_state=self._random_state)
yp = clf.fit(X, y).predict(X[:num, :])
self.assertListEqual(y[:num].tolist(), yp.tolist())
def test_score(self):
X, y = get_dataset(self._random_state)
for kernel in ["linear"]:
clf = Stree(
random_state=self._random_state,
kernel=kernel,
use_predictions=True,
)
for kernel, accuracy_expected in zip(
self._kernels,
[0.9506666666666667, 0.9606666666666667, 0.9433333333333334],
):
clf = Stree(random_state=self._random_state, kernel=kernel,)
clf.fit(X, y)
accuracy_score = clf.score(X, y)
yp = clf.predict(X)
accuracy_computed = np.mean(yp == y)
self.assertEqual(accuracy_score, accuracy_computed)
self.assertGreater(accuracy_score, 0.9)
self.assertAlmostEqual(accuracy_expected, accuracy_score)
def test_single_predict_proba(self):
"""Check that element 28 has a prediction different that the current
label
"""Check the element 28 probability of being 1
"""
# Element 28 has a different prediction than the truth
decimals = 5
prob = 0.29026400766
element = 28
probs = [0.29026400766, 0.73105613, 0.0307635]
X, y = get_dataset(self._random_state)
yp = self._clf.predict_proba(X[28, :].reshape(-1, X.shape[1]))
self.assertEqual(
np.round(1 - prob, decimals), np.round(yp[0:, 0], decimals)
)
self.assertEqual(1, y[28])
self.assertAlmostEqual(
round(prob, decimals), round(yp[0, 1], decimals), decimals
)
self.assertEqual(1, y[element])
for kernel, prob in zip(self._kernels, probs):
clf = Stree(kernel=kernel, random_state=self._random_state)
yp = clf.fit(X, y).predict_proba(
X[element, :].reshape(-1, X.shape[1])
)
self.assertAlmostEqual(
np.round(1 - prob, decimals), np.round(yp[0:, 0], decimals)
)
self.assertAlmostEqual(
round(prob, decimals), round(yp[0, 1], decimals), decimals
)
def test_multiple_predict_proba(self):
# First 27 elements the predictions are the same as the truth
num = 27
decimals = 5
X, y = get_dataset(self._random_state)
yp = self._clf.predict_proba(X[:num, :])
self.assertListEqual(
y[:num].tolist(), np.argmax(yp[:num], axis=1).tolist()
)
expected_proba = [
0.88395641,
0.36746962,
0.84158767,
0.34106833,
0.14269291,
0.85193236,
0.29876058,
0.7282164,
0.85958616,
0.89517877,
0.99745224,
0.18860349,
0.30756427,
0.8318412,
0.18981198,
0.15564624,
0.25740655,
0.22923355,
0.87365959,
0.49928689,
0.95574351,
0.28761257,
0.28906333,
0.32643692,
0.29788483,
0.01657364,
0.81149083,
]
expected = np.round(expected_proba, decimals=decimals).tolist()
computed = np.round(yp[:, 1], decimals=decimals).tolist()
for i in range(len(expected)):
self.assertAlmostEqual(expected[i], computed[i], decimals)
def build_models(self):
"""Build and train two models, model_clf will use the sklearn
classifier to compute predictions and split data. model_computed will
use vector of coefficients to compute both predictions and splitted
data
"""
model_clf = Stree(
random_state=self._random_state, use_predictions=True
)
model_computed = Stree(
random_state=self._random_state, use_predictions=False
)
X, y = get_dataset(self._random_state)
model_clf.fit(X, y)
model_computed.fit(X, y)
return model_clf, model_computed, X, y
def test_use_model_predict(self):
"""Check that we get the same results wether we use the estimator in
nodes to compute labels or we use the hyperplane and the position of
samples wrt to it
"""
use_clf, use_math, X, _ = self.build_models()
self.assertListEqual(
use_clf.predict(X).tolist(), use_math.predict(X).tolist()
)
def test_use_model_score(self):
use_clf, use_math, X, y = self.build_models()
b = use_math.score(X, y)
self.assertEqual(use_clf.score(X, y), b)
self.assertGreater(b, 0.95)
def test_use_model_predict_proba(self):
use_clf, use_math, X, _ = self.build_models()
self.assertListEqual(
use_clf.predict_proba(X).tolist(),
use_math.predict_proba(X).tolist(),
)
for kernel in self._kernels:
clf = Stree(kernel=kernel, random_state=self._random_state)
clf.fit(X, y)
yp = clf.predict_proba(X[:num, :])
self.assertListEqual(
y[:num].tolist(), np.argmax(yp[:num], axis=1).tolist()
)
def test_single_vs_multiple_prediction(self):
"""Check if predicting sample by sample gives the same result as
predicting all samples at once
"""
X, _ = get_dataset(self._random_state)
# Compute prediction line by line
yp_line = np.array([], dtype=int)
for xp in X:
yp_line = np.append(
yp_line, self._clf.predict(xp.reshape(-1, X.shape[1]))
)
# Compute prediction at once
yp_once = self._clf.predict(X)
#
self.assertListEqual(yp_line.tolist(), yp_once.tolist())
X, y = get_dataset(self._random_state)
for kernel in self._kernels:
clf = Stree(kernel=kernel, random_state=self._random_state)
clf.fit(X, y)
# Compute prediction line by line
yp_line = np.array([], dtype=int)
for xp in X:
yp_line = np.append(
yp_line, clf.predict(xp.reshape(-1, X.shape[1]))
)
# Compute prediction at once
yp_once = clf.predict(X)
self.assertListEqual(yp_line.tolist(), yp_once.tolist())
def test_iterator_and_str(self):
"""Check preorder iterator
@@ -266,11 +203,13 @@ class Stree_test(unittest.TestCase):
]
computed = []
expected_string = ""
for node in self._clf:
clf = Stree(kernel="linear", random_state=self._random_state)
clf.fit(*get_dataset(self._random_state))
for node in clf:
computed.append(str(node))
expected_string += str(node) + "\n"
self.assertListEqual(expected, computed)
self.assertEqual(expected_string, str(self._clf))
self.assertEqual(expected_string, str(clf))
def test_is_a_sklearn_classifier(self):
import warnings
@@ -323,9 +262,7 @@ class Snode_test(unittest.TestCase):
def __init__(self, *args, **kwargs):
os.environ["TESTING"] = "1"
self._random_state = 1
self._clf = Stree(
random_state=self._random_state, use_predictions=True
)
self._clf = Stree(random_state=self._random_state)
self._clf.fit(*get_dataset(self._random_state))
super().__init__(*args, **kwargs)