#3 Rewrite some tests & remove use_predictions

Remove use_predictions parameter as of now, the model always use it
2025-08-17 16:36:01 +00:00 · 2020-06-08 01:51:21 +02:00
parent 05b462716e
commit 3a48d8b405
3 changed files with 64 additions and 139 deletions
--- a/stree/Strees.py
+++ b/stree/Strees.py
@@ -126,14 +126,12 @@ class Stree(BaseEstimator, ClassifierMixin):
        random_state: int = None,
        max_depth: int = None,
        tol: float = 1e-4,
        use_predictions: bool = False,
        min_samples_split: int = 0,
    ):
        self.max_iter = max_iter
        self.C = C
        self.kernel = kernel
        self.random_state = random_state
        self.use_predictions = use_predictions
        self.max_depth = max_depth
        self.tol = tol
        self.min_samples_split = min_samples_split
@@ -172,6 +170,7 @@ class Stree(BaseEstimator, ClassifierMixin):
        :rtype: list
        """
        up = ~down
        print(self.kernel, up.shape, down.shape)
        return (
            origin[up[:, 0]] if any(up) else None,
            origin[down[:, 0]] if any(down) else None,
@@ -188,14 +187,7 @@ class Stree(BaseEstimator, ClassifierMixin):
        the hyperplane of the node
        :rtype: np.array
        """
-        if self.use_predictions:
+        return np.expand_dims(node._clf.decision_function(data), 1)
            res = np.expand_dims(node._clf.decision_function(data), 1)
        else:
            # doesn't work with multiclass as each sample has to do inner
            # product with its own coefficients computes positition of every
            # sample is w.r.t. the hyperplane
            res = self._linear_function(data, node)
        return res
    def _split_criteria(self, data: np.array) -> np.array:
        """Set the criteria to split arrays
--- a/stree/tests/Strees_grapher_test.py
+++ b/stree/tests/Strees_grapher_test.py
@@ -32,9 +32,7 @@ class Stree_grapher_test(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        os.environ["TESTING"] = "1"
        self._random_state = 1
-        self._clf = Stree_grapher(
+        self._clf = Stree_grapher(dict(random_state=self._random_state))
            dict(random_state=self._random_state, use_predictions=False)
        )
        self._clf.fit(*get_dataset(self._random_state, n_features=4))
        super().__init__(*args, **kwargs)
@@ -102,9 +100,7 @@ class Snode_graph_test(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        os.environ["TESTING"] = "1"
        self._random_state = 1
-        self._clf = Stree_grapher(
+        self._clf = Stree_grapher(dict(random_state=self._random_state))
            dict(random_state=self._random_state, use_predictions=False)
        )
        self._clf.fit(*get_dataset(self._random_state))
        super().__init__(*args, **kwargs)
--- a/stree/tests/Strees_test.py
+++ b/stree/tests/Strees_test.py
@@ -28,10 +28,7 @@ class Stree_test(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        os.environ["TESTING"] = "1"
        self._random_state = 1
-        self._clf = Stree(
+        self._kernels = ["linear", "rbf", "poly"]
            random_state=self._random_state, use_predictions=False
        )
        self._clf.fit(*get_dataset(self._random_state))
        super().__init__(*args, **kwargs)
    @classmethod
@@ -82,7 +79,10 @@ class Stree_test(unittest.TestCase):
    def test_build_tree(self):
        """Check if the tree is built the same way as predictions of models
        """
-        self._check_tree(self._clf.tree_)
+        for kernel in self._kernels:
            clf = Stree(kernel=kernel, random_state=self._random_state)
            clf.fit(*get_dataset(self._random_state))
            self._check_tree(clf.tree_)
    def _find_out(
        self, px: np.array, x_original: np.array, y_original
@@ -105,46 +105,52 @@ class Stree_test(unittest.TestCase):
        return res
    def test_single_prediction(self):
        probs = [0.29026400766, 0.73105613, 0.0307635]
        X, y = get_dataset(self._random_state)
-        yp = self._clf.predict((X[0, :].reshape(-1, X.shape[1])))
+        for kernel, prob in zip(self._kernels, probs):
            clf = Stree(kernel=kernel, random_state=self._random_state)
            yp = clf.fit(X, y).predict((X[0, :].reshape(-1, X.shape[1])))
            self.assertEqual(yp[0], y[0])
    def test_multiple_prediction(self):
        # First 27 elements the predictions are the same as the truth
        num = 27
        X, y = get_dataset(self._random_state)
-        yp = self._clf.predict(X[:num, :])
+        for kernel in self._kernels:
            clf = Stree(kernel=kernel, random_state=self._random_state)
            yp = clf.fit(X, y).predict(X[:num, :])
            self.assertListEqual(y[:num].tolist(), yp.tolist())
    def test_score(self):
        X, y = get_dataset(self._random_state)
-        for kernel in ["linear"]:
+        for kernel, accuracy_expected in zip(
-            clf = Stree(
+            self._kernels,
-                random_state=self._random_state,
+            [0.9506666666666667, 0.9606666666666667, 0.9433333333333334],
-                kernel=kernel,
+        ):
-                use_predictions=True,
+            clf = Stree(random_state=self._random_state, kernel=kernel,)
            )
            clf.fit(X, y)
            accuracy_score = clf.score(X, y)
            yp = clf.predict(X)
            accuracy_computed = np.mean(yp == y)
            self.assertEqual(accuracy_score, accuracy_computed)
-            self.assertGreater(accuracy_score, 0.9)
+            self.assertAlmostEqual(accuracy_expected, accuracy_score)
    def test_single_predict_proba(self):
-        """Check that element 28 has a prediction different that the current
+        """Check the element 28 probability of being 1
        label
        """
        # Element 28 has a different prediction than the truth
        decimals = 5
-        prob = 0.29026400766
+        element = 28
        probs = [0.29026400766, 0.73105613, 0.0307635]
        X, y = get_dataset(self._random_state)
-        yp = self._clf.predict_proba(X[28, :].reshape(-1, X.shape[1]))
+        self.assertEqual(1, y[element])
-        self.assertEqual(
+        for kernel, prob in zip(self._kernels, probs):
            clf = Stree(kernel=kernel, random_state=self._random_state)
            yp = clf.fit(X, y).predict_proba(
                X[element, :].reshape(-1, X.shape[1])
            )
            self.assertAlmostEqual(
                np.round(1 - prob, decimals), np.round(yp[0:, 0], decimals)
            )
        self.assertEqual(1, y[28])
            self.assertAlmostEqual(
                round(prob, decimals), round(yp[0, 1], decimals), decimals
            )
@@ -152,100 +158,31 @@ class Stree_test(unittest.TestCase):
    def test_multiple_predict_proba(self):
        # First 27 elements the predictions are the same as the truth
        num = 27
        decimals = 5
        X, y = get_dataset(self._random_state)
-        yp = self._clf.predict_proba(X[:num, :])
+        for kernel in self._kernels:
            clf = Stree(kernel=kernel, random_state=self._random_state)
            clf.fit(X, y)
            yp = clf.predict_proba(X[:num, :])
            self.assertListEqual(
                y[:num].tolist(), np.argmax(yp[:num], axis=1).tolist()
            )
        expected_proba = [
            0.88395641,
            0.36746962,
            0.84158767,
            0.34106833,
            0.14269291,
            0.85193236,
            0.29876058,
            0.7282164,
            0.85958616,
            0.89517877,
            0.99745224,
            0.18860349,
            0.30756427,
            0.8318412,
            0.18981198,
            0.15564624,
            0.25740655,
            0.22923355,
            0.87365959,
            0.49928689,
            0.95574351,
            0.28761257,
            0.28906333,
            0.32643692,
            0.29788483,
            0.01657364,
            0.81149083,
        ]
        expected = np.round(expected_proba, decimals=decimals).tolist()
        computed = np.round(yp[:, 1], decimals=decimals).tolist()
        for i in range(len(expected)):
            self.assertAlmostEqual(expected[i], computed[i], decimals)
    def build_models(self):
        """Build and train two models, model_clf will use the sklearn
        classifier to compute predictions and split data. model_computed will
        use vector of coefficients to compute both predictions and splitted
        data
        """
        model_clf = Stree(
            random_state=self._random_state, use_predictions=True
        )
        model_computed = Stree(
            random_state=self._random_state, use_predictions=False
        )
        X, y = get_dataset(self._random_state)
        model_clf.fit(X, y)
        model_computed.fit(X, y)
        return model_clf, model_computed, X, y
    def test_use_model_predict(self):
        """Check that we get the same results wether we use the estimator in
        nodes to compute labels or we use the hyperplane and the position of
        samples wrt to it
        """
        use_clf, use_math, X, _ = self.build_models()
        self.assertListEqual(
            use_clf.predict(X).tolist(), use_math.predict(X).tolist()
        )
    def test_use_model_score(self):
        use_clf, use_math, X, y = self.build_models()
        b = use_math.score(X, y)
        self.assertEqual(use_clf.score(X, y), b)
        self.assertGreater(b, 0.95)
    def test_use_model_predict_proba(self):
        use_clf, use_math, X, _ = self.build_models()
        self.assertListEqual(
            use_clf.predict_proba(X).tolist(),
            use_math.predict_proba(X).tolist(),
        )
    def test_single_vs_multiple_prediction(self):
        """Check if predicting sample by sample gives the same result as
        predicting all samples at once
        """
-        X, _ = get_dataset(self._random_state)
+        X, y = get_dataset(self._random_state)
        for kernel in self._kernels:
            clf = Stree(kernel=kernel, random_state=self._random_state)
            clf.fit(X, y)
            # Compute prediction line by line
            yp_line = np.array([], dtype=int)
            for xp in X:
                yp_line = np.append(
-                yp_line, self._clf.predict(xp.reshape(-1, X.shape[1]))
+                    yp_line, clf.predict(xp.reshape(-1, X.shape[1]))
                )
            # Compute prediction at once
-        yp_once = self._clf.predict(X)
+            yp_once = clf.predict(X)
        #
            self.assertListEqual(yp_line.tolist(), yp_once.tolist())
    def test_iterator_and_str(self):
@@ -266,11 +203,13 @@ class Stree_test(unittest.TestCase):
        ]
        computed = []
        expected_string = ""
-        for node in self._clf:
+        clf = Stree(kernel="linear", random_state=self._random_state)
        clf.fit(*get_dataset(self._random_state))
        for node in clf:
            computed.append(str(node))
            expected_string += str(node) + "\n"
        self.assertListEqual(expected, computed)
-        self.assertEqual(expected_string, str(self._clf))
+        self.assertEqual(expected_string, str(clf))
    def test_is_a_sklearn_classifier(self):
        import warnings
@@ -323,9 +262,7 @@ class Snode_test(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        os.environ["TESTING"] = "1"
        self._random_state = 1
-        self._clf = Stree(
+        self._clf = Stree(random_state=self._random_state)
            random_state=self._random_state, use_predictions=True
        )
        self._clf.fit(*get_dataset(self._random_state))
        super().__init__(*args, **kwargs)