diff --git a/tests/Stree_test.py b/tests/Stree_test.py index 344e3bf..65cd82a 100644 --- a/tests/Stree_test.py +++ b/tests/Stree_test.py @@ -148,7 +148,7 @@ class Stree_test(unittest.TestCase): yp = self._clf.predict_proba(X[28, :].reshape(-1, X.shape[1])) self.assertEqual(0, yp[0:, 0]) self.assertEqual(1, y[28]) - self.assertEqual(0.9282970550576184, yp[0:, 1]) + self.assertEqual(0.29026400765649235, yp[0, 1]) def test_multiple_predict_proba(self): # First 27 elements the predictions are the same as the truth @@ -156,16 +156,16 @@ class Stree_test(unittest.TestCase): X, y = self._get_Xy() yp = self._clf.predict_proba(X[:num, :]) self.assertListEqual(y[:num].tolist(), yp[:, 0].tolist()) - expected_proba = [0.9759887, 0.92829706, 0.9759887, 0.92829706, 0.92829706, 0.9759887, - 0.92829706, 0.9759887, 0.9759887, 0.9759887, 0.9759887, 0.92829706, - 0.92829706, 0.9759887, 0.92829706, 0.92829706, 0.92829706, 0.92829706, - 0.9759887, 0.92829706, 0.9759887, 0.92829706, 0.92829706, 0.92829706, - 0.92829706, 0.92829706, 0.9759887] + expected_proba = [0.88395641, 0.36746962, 0.84158767, 0.34106833, 0.14269291, 0.85193236, + 0.29876058, 0.7282164, 0.85958616, 0.89517877, 0.99745224, 0.18860349, + 0.30756427, 0.8318412, 0.18981198, 0.15564624, 0.25740655, 0.22923355, + 0.87365959, 0.49928689, 0.95574351, 0.28761257, 0.28906333, 0.32643692, + 0.29788483, 0.01657364, 0.81149083] self.assertListEqual(expected_proba, np.round(yp[:, 1], decimals=8).tolist()) def build_models(self): - """Build and train two models, model_clf will use the sklearn classifier to - compute predictions and split data. model_computed will use vector of + """Build and train two models, model_clf will use the sklearn classifier to + compute predictions and split data. model_computed will use vector of coefficients to compute both predictions and splitted data """ model_clf = Stree(random_state=self._random_state, diff --git a/trees/Stree.py b/trees/Stree.py index 86156a6..efa0d17 100644 --- a/trees/Stree.py +++ b/trees/Stree.py @@ -52,7 +52,7 @@ class Stree(BaseEstimator, ClassifierMixin): if self.__use_predictions: yp = node._clf.predict(data) down = (yp == 1).reshape(-1, 1) - res = node._clf.decision_function(data) + res = np.expand_dims(node._clf.decision_function(data), 1) else: # doesn't work with multiclass as each sample has to do inner product with its own coeficients # computes positition of every sample is w.r.t. the hyperplane @@ -65,10 +65,6 @@ class Stree(BaseEstimator, ClassifierMixin): data_up = data[up[:, 0]] if any(up) else None indices_up = indices[up[:, 0]] if any(up) else None res_up = res[up[:, 0]] if any(up) else None - #if any(up): - # print("+++++up", data_up.shape, indices_up.shape, res_up.shape) - #if any(down): - # print("+++++down", data_down.shape, indices_down.shape, res_down.shape ) return [data_up, indices_up, data_down, indices_down, res_up, res_down] def fit(self, X: np.ndarray, y: np.ndarray, title: str = 'root') -> 'Stree': @@ -141,13 +137,11 @@ class Stree(BaseEstimator, ClassifierMixin): if node.is_leaf(): # set a class for every sample in dataset prediction = np.full((xp.shape[0], 1), node._class) - prediction_proba = np.full((xp.shape[0], 1), node._belief) - #prediction_proba = dist - #print("******", prediction.shape, prediction_proba.shape) + prediction_proba = dist return np.append(prediction, prediction_proba, axis=1), indices u, i_u, d, i_d, r_u, r_d = self._split_data(node, xp, indices) - k, l = predict_class(d, i_d, r_u, node.get_down()) - m, n = predict_class(u, i_u, r_d, node.get_up()) + k, l = predict_class(d, i_d, r_d, node.get_down()) + m, n = predict_class(u, i_u, r_u, node.get_up()) return np.append(k, m), np.append(l, n) # sklearn check check_is_fitted(self) @@ -158,7 +152,7 @@ class Stree(BaseEstimator, ClassifierMixin): result, indices = predict_class(X, indices, [], self._tree) result = result.reshape(X.shape[0], 2) # Sigmoidize distance like in sklearn based on Platt(1999) - #result[:, 1] = 1 / (1 + np.exp(-result[:, 1])) + result[:, 1] = 1 / (1 + np.exp(-result[:, 1])) return self._reorder_results(result, indices) def score(self, X: np.array, y: np.array) -> float: