mirror of
https://github.com/Doctorado-ML/STree.git
synced 2025-08-17 16:36:01 +00:00
#3 Rewrite some tests & remove use_predictions
Remove use_predictions parameter as of now, the model always use it
This commit is contained in:
@@ -126,14 +126,12 @@ class Stree(BaseEstimator, ClassifierMixin):
|
|||||||
random_state: int = None,
|
random_state: int = None,
|
||||||
max_depth: int = None,
|
max_depth: int = None,
|
||||||
tol: float = 1e-4,
|
tol: float = 1e-4,
|
||||||
use_predictions: bool = False,
|
|
||||||
min_samples_split: int = 0,
|
min_samples_split: int = 0,
|
||||||
):
|
):
|
||||||
self.max_iter = max_iter
|
self.max_iter = max_iter
|
||||||
self.C = C
|
self.C = C
|
||||||
self.kernel = kernel
|
self.kernel = kernel
|
||||||
self.random_state = random_state
|
self.random_state = random_state
|
||||||
self.use_predictions = use_predictions
|
|
||||||
self.max_depth = max_depth
|
self.max_depth = max_depth
|
||||||
self.tol = tol
|
self.tol = tol
|
||||||
self.min_samples_split = min_samples_split
|
self.min_samples_split = min_samples_split
|
||||||
@@ -172,6 +170,7 @@ class Stree(BaseEstimator, ClassifierMixin):
|
|||||||
:rtype: list
|
:rtype: list
|
||||||
"""
|
"""
|
||||||
up = ~down
|
up = ~down
|
||||||
|
print(self.kernel, up.shape, down.shape)
|
||||||
return (
|
return (
|
||||||
origin[up[:, 0]] if any(up) else None,
|
origin[up[:, 0]] if any(up) else None,
|
||||||
origin[down[:, 0]] if any(down) else None,
|
origin[down[:, 0]] if any(down) else None,
|
||||||
@@ -188,14 +187,7 @@ class Stree(BaseEstimator, ClassifierMixin):
|
|||||||
the hyperplane of the node
|
the hyperplane of the node
|
||||||
:rtype: np.array
|
:rtype: np.array
|
||||||
"""
|
"""
|
||||||
if self.use_predictions:
|
return np.expand_dims(node._clf.decision_function(data), 1)
|
||||||
res = np.expand_dims(node._clf.decision_function(data), 1)
|
|
||||||
else:
|
|
||||||
# doesn't work with multiclass as each sample has to do inner
|
|
||||||
# product with its own coefficients computes positition of every
|
|
||||||
# sample is w.r.t. the hyperplane
|
|
||||||
res = self._linear_function(data, node)
|
|
||||||
return res
|
|
||||||
|
|
||||||
def _split_criteria(self, data: np.array) -> np.array:
|
def _split_criteria(self, data: np.array) -> np.array:
|
||||||
"""Set the criteria to split arrays
|
"""Set the criteria to split arrays
|
||||||
|
@@ -32,9 +32,7 @@ class Stree_grapher_test(unittest.TestCase):
|
|||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
os.environ["TESTING"] = "1"
|
os.environ["TESTING"] = "1"
|
||||||
self._random_state = 1
|
self._random_state = 1
|
||||||
self._clf = Stree_grapher(
|
self._clf = Stree_grapher(dict(random_state=self._random_state))
|
||||||
dict(random_state=self._random_state, use_predictions=False)
|
|
||||||
)
|
|
||||||
self._clf.fit(*get_dataset(self._random_state, n_features=4))
|
self._clf.fit(*get_dataset(self._random_state, n_features=4))
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
@@ -102,9 +100,7 @@ class Snode_graph_test(unittest.TestCase):
|
|||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
os.environ["TESTING"] = "1"
|
os.environ["TESTING"] = "1"
|
||||||
self._random_state = 1
|
self._random_state = 1
|
||||||
self._clf = Stree_grapher(
|
self._clf = Stree_grapher(dict(random_state=self._random_state))
|
||||||
dict(random_state=self._random_state, use_predictions=False)
|
|
||||||
)
|
|
||||||
self._clf.fit(*get_dataset(self._random_state))
|
self._clf.fit(*get_dataset(self._random_state))
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
@@ -28,10 +28,7 @@ class Stree_test(unittest.TestCase):
|
|||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
os.environ["TESTING"] = "1"
|
os.environ["TESTING"] = "1"
|
||||||
self._random_state = 1
|
self._random_state = 1
|
||||||
self._clf = Stree(
|
self._kernels = ["linear", "rbf", "poly"]
|
||||||
random_state=self._random_state, use_predictions=False
|
|
||||||
)
|
|
||||||
self._clf.fit(*get_dataset(self._random_state))
|
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@@ -82,7 +79,10 @@ class Stree_test(unittest.TestCase):
|
|||||||
def test_build_tree(self):
|
def test_build_tree(self):
|
||||||
"""Check if the tree is built the same way as predictions of models
|
"""Check if the tree is built the same way as predictions of models
|
||||||
"""
|
"""
|
||||||
self._check_tree(self._clf.tree_)
|
for kernel in self._kernels:
|
||||||
|
clf = Stree(kernel=kernel, random_state=self._random_state)
|
||||||
|
clf.fit(*get_dataset(self._random_state))
|
||||||
|
self._check_tree(clf.tree_)
|
||||||
|
|
||||||
def _find_out(
|
def _find_out(
|
||||||
self, px: np.array, x_original: np.array, y_original
|
self, px: np.array, x_original: np.array, y_original
|
||||||
@@ -105,46 +105,52 @@ class Stree_test(unittest.TestCase):
|
|||||||
return res
|
return res
|
||||||
|
|
||||||
def test_single_prediction(self):
|
def test_single_prediction(self):
|
||||||
|
probs = [0.29026400766, 0.73105613, 0.0307635]
|
||||||
X, y = get_dataset(self._random_state)
|
X, y = get_dataset(self._random_state)
|
||||||
yp = self._clf.predict((X[0, :].reshape(-1, X.shape[1])))
|
for kernel, prob in zip(self._kernels, probs):
|
||||||
|
clf = Stree(kernel=kernel, random_state=self._random_state)
|
||||||
|
yp = clf.fit(X, y).predict((X[0, :].reshape(-1, X.shape[1])))
|
||||||
self.assertEqual(yp[0], y[0])
|
self.assertEqual(yp[0], y[0])
|
||||||
|
|
||||||
def test_multiple_prediction(self):
|
def test_multiple_prediction(self):
|
||||||
# First 27 elements the predictions are the same as the truth
|
# First 27 elements the predictions are the same as the truth
|
||||||
num = 27
|
num = 27
|
||||||
X, y = get_dataset(self._random_state)
|
X, y = get_dataset(self._random_state)
|
||||||
yp = self._clf.predict(X[:num, :])
|
for kernel in self._kernels:
|
||||||
|
clf = Stree(kernel=kernel, random_state=self._random_state)
|
||||||
|
yp = clf.fit(X, y).predict(X[:num, :])
|
||||||
self.assertListEqual(y[:num].tolist(), yp.tolist())
|
self.assertListEqual(y[:num].tolist(), yp.tolist())
|
||||||
|
|
||||||
def test_score(self):
|
def test_score(self):
|
||||||
X, y = get_dataset(self._random_state)
|
X, y = get_dataset(self._random_state)
|
||||||
for kernel in ["linear"]:
|
for kernel, accuracy_expected in zip(
|
||||||
clf = Stree(
|
self._kernels,
|
||||||
random_state=self._random_state,
|
[0.9506666666666667, 0.9606666666666667, 0.9433333333333334],
|
||||||
kernel=kernel,
|
):
|
||||||
use_predictions=True,
|
clf = Stree(random_state=self._random_state, kernel=kernel,)
|
||||||
)
|
|
||||||
clf.fit(X, y)
|
clf.fit(X, y)
|
||||||
accuracy_score = clf.score(X, y)
|
accuracy_score = clf.score(X, y)
|
||||||
yp = clf.predict(X)
|
yp = clf.predict(X)
|
||||||
accuracy_computed = np.mean(yp == y)
|
accuracy_computed = np.mean(yp == y)
|
||||||
self.assertEqual(accuracy_score, accuracy_computed)
|
self.assertEqual(accuracy_score, accuracy_computed)
|
||||||
self.assertGreater(accuracy_score, 0.9)
|
self.assertAlmostEqual(accuracy_expected, accuracy_score)
|
||||||
|
|
||||||
def test_single_predict_proba(self):
|
def test_single_predict_proba(self):
|
||||||
"""Check that element 28 has a prediction different that the current
|
"""Check the element 28 probability of being 1
|
||||||
label
|
|
||||||
"""
|
"""
|
||||||
# Element 28 has a different prediction than the truth
|
|
||||||
decimals = 5
|
decimals = 5
|
||||||
prob = 0.29026400766
|
element = 28
|
||||||
|
probs = [0.29026400766, 0.73105613, 0.0307635]
|
||||||
X, y = get_dataset(self._random_state)
|
X, y = get_dataset(self._random_state)
|
||||||
yp = self._clf.predict_proba(X[28, :].reshape(-1, X.shape[1]))
|
self.assertEqual(1, y[element])
|
||||||
self.assertEqual(
|
for kernel, prob in zip(self._kernels, probs):
|
||||||
|
clf = Stree(kernel=kernel, random_state=self._random_state)
|
||||||
|
yp = clf.fit(X, y).predict_proba(
|
||||||
|
X[element, :].reshape(-1, X.shape[1])
|
||||||
|
)
|
||||||
|
self.assertAlmostEqual(
|
||||||
np.round(1 - prob, decimals), np.round(yp[0:, 0], decimals)
|
np.round(1 - prob, decimals), np.round(yp[0:, 0], decimals)
|
||||||
)
|
)
|
||||||
self.assertEqual(1, y[28])
|
|
||||||
|
|
||||||
self.assertAlmostEqual(
|
self.assertAlmostEqual(
|
||||||
round(prob, decimals), round(yp[0, 1], decimals), decimals
|
round(prob, decimals), round(yp[0, 1], decimals), decimals
|
||||||
)
|
)
|
||||||
@@ -152,100 +158,31 @@ class Stree_test(unittest.TestCase):
|
|||||||
def test_multiple_predict_proba(self):
|
def test_multiple_predict_proba(self):
|
||||||
# First 27 elements the predictions are the same as the truth
|
# First 27 elements the predictions are the same as the truth
|
||||||
num = 27
|
num = 27
|
||||||
decimals = 5
|
|
||||||
X, y = get_dataset(self._random_state)
|
X, y = get_dataset(self._random_state)
|
||||||
yp = self._clf.predict_proba(X[:num, :])
|
for kernel in self._kernels:
|
||||||
|
clf = Stree(kernel=kernel, random_state=self._random_state)
|
||||||
|
clf.fit(X, y)
|
||||||
|
yp = clf.predict_proba(X[:num, :])
|
||||||
self.assertListEqual(
|
self.assertListEqual(
|
||||||
y[:num].tolist(), np.argmax(yp[:num], axis=1).tolist()
|
y[:num].tolist(), np.argmax(yp[:num], axis=1).tolist()
|
||||||
)
|
)
|
||||||
expected_proba = [
|
|
||||||
0.88395641,
|
|
||||||
0.36746962,
|
|
||||||
0.84158767,
|
|
||||||
0.34106833,
|
|
||||||
0.14269291,
|
|
||||||
0.85193236,
|
|
||||||
0.29876058,
|
|
||||||
0.7282164,
|
|
||||||
0.85958616,
|
|
||||||
0.89517877,
|
|
||||||
0.99745224,
|
|
||||||
0.18860349,
|
|
||||||
0.30756427,
|
|
||||||
0.8318412,
|
|
||||||
0.18981198,
|
|
||||||
0.15564624,
|
|
||||||
0.25740655,
|
|
||||||
0.22923355,
|
|
||||||
0.87365959,
|
|
||||||
0.49928689,
|
|
||||||
0.95574351,
|
|
||||||
0.28761257,
|
|
||||||
0.28906333,
|
|
||||||
0.32643692,
|
|
||||||
0.29788483,
|
|
||||||
0.01657364,
|
|
||||||
0.81149083,
|
|
||||||
]
|
|
||||||
expected = np.round(expected_proba, decimals=decimals).tolist()
|
|
||||||
computed = np.round(yp[:, 1], decimals=decimals).tolist()
|
|
||||||
for i in range(len(expected)):
|
|
||||||
self.assertAlmostEqual(expected[i], computed[i], decimals)
|
|
||||||
|
|
||||||
def build_models(self):
|
|
||||||
"""Build and train two models, model_clf will use the sklearn
|
|
||||||
classifier to compute predictions and split data. model_computed will
|
|
||||||
use vector of coefficients to compute both predictions and splitted
|
|
||||||
data
|
|
||||||
"""
|
|
||||||
model_clf = Stree(
|
|
||||||
random_state=self._random_state, use_predictions=True
|
|
||||||
)
|
|
||||||
model_computed = Stree(
|
|
||||||
random_state=self._random_state, use_predictions=False
|
|
||||||
)
|
|
||||||
X, y = get_dataset(self._random_state)
|
|
||||||
model_clf.fit(X, y)
|
|
||||||
model_computed.fit(X, y)
|
|
||||||
return model_clf, model_computed, X, y
|
|
||||||
|
|
||||||
def test_use_model_predict(self):
|
|
||||||
"""Check that we get the same results wether we use the estimator in
|
|
||||||
nodes to compute labels or we use the hyperplane and the position of
|
|
||||||
samples wrt to it
|
|
||||||
"""
|
|
||||||
use_clf, use_math, X, _ = self.build_models()
|
|
||||||
self.assertListEqual(
|
|
||||||
use_clf.predict(X).tolist(), use_math.predict(X).tolist()
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_use_model_score(self):
|
|
||||||
use_clf, use_math, X, y = self.build_models()
|
|
||||||
b = use_math.score(X, y)
|
|
||||||
self.assertEqual(use_clf.score(X, y), b)
|
|
||||||
self.assertGreater(b, 0.95)
|
|
||||||
|
|
||||||
def test_use_model_predict_proba(self):
|
|
||||||
use_clf, use_math, X, _ = self.build_models()
|
|
||||||
self.assertListEqual(
|
|
||||||
use_clf.predict_proba(X).tolist(),
|
|
||||||
use_math.predict_proba(X).tolist(),
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_single_vs_multiple_prediction(self):
|
def test_single_vs_multiple_prediction(self):
|
||||||
"""Check if predicting sample by sample gives the same result as
|
"""Check if predicting sample by sample gives the same result as
|
||||||
predicting all samples at once
|
predicting all samples at once
|
||||||
"""
|
"""
|
||||||
X, _ = get_dataset(self._random_state)
|
X, y = get_dataset(self._random_state)
|
||||||
|
for kernel in self._kernels:
|
||||||
|
clf = Stree(kernel=kernel, random_state=self._random_state)
|
||||||
|
clf.fit(X, y)
|
||||||
# Compute prediction line by line
|
# Compute prediction line by line
|
||||||
yp_line = np.array([], dtype=int)
|
yp_line = np.array([], dtype=int)
|
||||||
for xp in X:
|
for xp in X:
|
||||||
yp_line = np.append(
|
yp_line = np.append(
|
||||||
yp_line, self._clf.predict(xp.reshape(-1, X.shape[1]))
|
yp_line, clf.predict(xp.reshape(-1, X.shape[1]))
|
||||||
)
|
)
|
||||||
# Compute prediction at once
|
# Compute prediction at once
|
||||||
yp_once = self._clf.predict(X)
|
yp_once = clf.predict(X)
|
||||||
#
|
|
||||||
self.assertListEqual(yp_line.tolist(), yp_once.tolist())
|
self.assertListEqual(yp_line.tolist(), yp_once.tolist())
|
||||||
|
|
||||||
def test_iterator_and_str(self):
|
def test_iterator_and_str(self):
|
||||||
@@ -266,11 +203,13 @@ class Stree_test(unittest.TestCase):
|
|||||||
]
|
]
|
||||||
computed = []
|
computed = []
|
||||||
expected_string = ""
|
expected_string = ""
|
||||||
for node in self._clf:
|
clf = Stree(kernel="linear", random_state=self._random_state)
|
||||||
|
clf.fit(*get_dataset(self._random_state))
|
||||||
|
for node in clf:
|
||||||
computed.append(str(node))
|
computed.append(str(node))
|
||||||
expected_string += str(node) + "\n"
|
expected_string += str(node) + "\n"
|
||||||
self.assertListEqual(expected, computed)
|
self.assertListEqual(expected, computed)
|
||||||
self.assertEqual(expected_string, str(self._clf))
|
self.assertEqual(expected_string, str(clf))
|
||||||
|
|
||||||
def test_is_a_sklearn_classifier(self):
|
def test_is_a_sklearn_classifier(self):
|
||||||
import warnings
|
import warnings
|
||||||
@@ -323,9 +262,7 @@ class Snode_test(unittest.TestCase):
|
|||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
os.environ["TESTING"] = "1"
|
os.environ["TESTING"] = "1"
|
||||||
self._random_state = 1
|
self._random_state = 1
|
||||||
self._clf = Stree(
|
self._clf = Stree(random_state=self._random_state)
|
||||||
random_state=self._random_state, use_predictions=True
|
|
||||||
)
|
|
||||||
self._clf.fit(*get_dataset(self._random_state))
|
self._clf.fit(*get_dataset(self._random_state))
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user