Second try, finished?

2025-08-15 15:36:00 +00:00 · 2020-05-18 12:55:00 +02:00
parent e52cbbb192
commit 382ae921ab
2 changed files with 13 additions and 19 deletions
--- a/tests/Stree_test.py
+++ b/tests/Stree_test.py
@@ -148,7 +148,7 @@ class Stree_test(unittest.TestCase):
        yp = self._clf.predict_proba(X[28, :].reshape(-1, X.shape[1]))
        self.assertEqual(0, yp[0:, 0])
        self.assertEqual(1, y[28])
-        self.assertEqual(0.9282970550576184, yp[0:, 1])
+        self.assertEqual(0.29026400765649235, yp[0, 1])

    def test_multiple_predict_proba(self):
        # First 27 elements the predictions are the same as the truth
@@ -156,16 +156,16 @@ class Stree_test(unittest.TestCase):
        X, y = self._get_Xy()
        yp = self._clf.predict_proba(X[:num, :])
        self.assertListEqual(y[:num].tolist(), yp[:, 0].tolist())
-        expected_proba = [0.9759887,  0.92829706, 0.9759887,  0.92829706, 0.92829706, 0.9759887, 
-                        0.92829706, 0.9759887,  0.9759887,  0.9759887,  0.9759887,  0.92829706, 
-                        0.92829706, 0.9759887,  0.92829706, 0.92829706, 0.92829706, 0.92829706, 
-                        0.9759887,  0.92829706, 0.9759887,  0.92829706, 0.92829706, 0.92829706,
-                        0.92829706, 0.92829706, 0.9759887]
+        expected_proba = [0.88395641, 0.36746962, 0.84158767, 0.34106833, 0.14269291, 0.85193236,
+                        0.29876058, 0.7282164,  0.85958616, 0.89517877, 0.99745224, 0.18860349,
+                        0.30756427, 0.8318412,  0.18981198, 0.15564624, 0.25740655, 0.22923355,
+                        0.87365959, 0.49928689, 0.95574351, 0.28761257, 0.28906333, 0.32643692,
+                        0.29788483, 0.01657364, 0.81149083]
        self.assertListEqual(expected_proba, np.round(yp[:, 1], decimals=8).tolist())

    def build_models(self):
-        """Build and train two models, model_clf will use the sklearn classifier to 
-        compute predictions and split data. model_computed will use vector of 
+        """Build and train two models, model_clf will use the sklearn classifier to
+        compute predictions and split data. model_computed will use vector of
        coefficients to compute both predictions and splitted data
        """
        model_clf = Stree(random_state=self._random_state,
--- a/trees/Stree.py
+++ b/trees/Stree.py
@@ -52,7 +52,7 @@ class Stree(BaseEstimator, ClassifierMixin):
        if self.__use_predictions:
            yp = node._clf.predict(data)
            down = (yp == 1).reshape(-1, 1)
-            res = node._clf.decision_function(data)
+            res = np.expand_dims(node._clf.decision_function(data), 1)
        else:
            # doesn't work with multiclass as each sample has to do inner product with its own coeficients
            # computes positition of every sample is w.r.t. the hyperplane
@@ -65,10 +65,6 @@ class Stree(BaseEstimator, ClassifierMixin):
        data_up = data[up[:, 0]] if any(up) else None
        indices_up = indices[up[:, 0]] if any(up) else None
        res_up = res[up[:, 0]] if any(up) else None
-        #if any(up):
-        #    print("+++++up", data_up.shape, indices_up.shape, res_up.shape)
-        #if any(down):
-        #    print("+++++down", data_down.shape, indices_down.shape, res_down.shape )
        return [data_up, indices_up, data_down, indices_down, res_up, res_down]

    def fit(self, X: np.ndarray, y: np.ndarray, title: str = 'root') -> 'Stree':
@@ -141,13 +137,11 @@ class Stree(BaseEstimator, ClassifierMixin):
            if node.is_leaf():
                # set a class for every sample in dataset
                prediction = np.full((xp.shape[0], 1), node._class)
-                prediction_proba = np.full((xp.shape[0], 1), node._belief)
-                #prediction_proba = dist
-                #print("******", prediction.shape, prediction_proba.shape)
+                prediction_proba = dist
                return np.append(prediction, prediction_proba, axis=1), indices
            u, i_u, d, i_d, r_u, r_d = self._split_data(node, xp, indices)
-            k, l = predict_class(d, i_d, r_u, node.get_down())
-            m, n = predict_class(u, i_u, r_d, node.get_up())
+            k, l = predict_class(d, i_d, r_d, node.get_down())
+            m, n = predict_class(u, i_u, r_u, node.get_up())
            return np.append(k, m), np.append(l, n)
        # sklearn check
        check_is_fitted(self)
@@ -158,7 +152,7 @@ class Stree(BaseEstimator, ClassifierMixin):
        result, indices = predict_class(X, indices, [], self._tree)
        result = result.reshape(X.shape[0], 2)
        # Sigmoidize distance like in sklearn based on Platt(1999)
-        #result[:, 1] = 1 / (1 + np.exp(-result[:, 1]))
+        result[:, 1] = 1 / (1 + np.exp(-result[:, 1]))
        return self._reorder_results(result, indices)

    def score(self, X: np.array, y: np.array) -> float: