Second try, finished?

This commit is contained in:
2020-05-18 12:55:00 +02:00
parent e52cbbb192
commit 382ae921ab
2 changed files with 13 additions and 19 deletions

View File

@@ -148,7 +148,7 @@ class Stree_test(unittest.TestCase):
yp = self._clf.predict_proba(X[28, :].reshape(-1, X.shape[1]))
self.assertEqual(0, yp[0:, 0])
self.assertEqual(1, y[28])
self.assertEqual(0.9282970550576184, yp[0:, 1])
self.assertEqual(0.29026400765649235, yp[0, 1])
def test_multiple_predict_proba(self):
# First 27 elements the predictions are the same as the truth
@@ -156,16 +156,16 @@ class Stree_test(unittest.TestCase):
X, y = self._get_Xy()
yp = self._clf.predict_proba(X[:num, :])
self.assertListEqual(y[:num].tolist(), yp[:, 0].tolist())
expected_proba = [0.9759887, 0.92829706, 0.9759887, 0.92829706, 0.92829706, 0.9759887,
0.92829706, 0.9759887, 0.9759887, 0.9759887, 0.9759887, 0.92829706,
0.92829706, 0.9759887, 0.92829706, 0.92829706, 0.92829706, 0.92829706,
0.9759887, 0.92829706, 0.9759887, 0.92829706, 0.92829706, 0.92829706,
0.92829706, 0.92829706, 0.9759887]
expected_proba = [0.88395641, 0.36746962, 0.84158767, 0.34106833, 0.14269291, 0.85193236,
0.29876058, 0.7282164, 0.85958616, 0.89517877, 0.99745224, 0.18860349,
0.30756427, 0.8318412, 0.18981198, 0.15564624, 0.25740655, 0.22923355,
0.87365959, 0.49928689, 0.95574351, 0.28761257, 0.28906333, 0.32643692,
0.29788483, 0.01657364, 0.81149083]
self.assertListEqual(expected_proba, np.round(yp[:, 1], decimals=8).tolist())
def build_models(self):
"""Build and train two models, model_clf will use the sklearn classifier to
compute predictions and split data. model_computed will use vector of
"""Build and train two models, model_clf will use the sklearn classifier to
compute predictions and split data. model_computed will use vector of
coefficients to compute both predictions and splitted data
"""
model_clf = Stree(random_state=self._random_state,

View File

@@ -52,7 +52,7 @@ class Stree(BaseEstimator, ClassifierMixin):
if self.__use_predictions:
yp = node._clf.predict(data)
down = (yp == 1).reshape(-1, 1)
res = node._clf.decision_function(data)
res = np.expand_dims(node._clf.decision_function(data), 1)
else:
# doesn't work with multiclass as each sample has to do inner product with its own coeficients
# computes positition of every sample is w.r.t. the hyperplane
@@ -65,10 +65,6 @@ class Stree(BaseEstimator, ClassifierMixin):
data_up = data[up[:, 0]] if any(up) else None
indices_up = indices[up[:, 0]] if any(up) else None
res_up = res[up[:, 0]] if any(up) else None
#if any(up):
# print("+++++up", data_up.shape, indices_up.shape, res_up.shape)
#if any(down):
# print("+++++down", data_down.shape, indices_down.shape, res_down.shape )
return [data_up, indices_up, data_down, indices_down, res_up, res_down]
def fit(self, X: np.ndarray, y: np.ndarray, title: str = 'root') -> 'Stree':
@@ -141,13 +137,11 @@ class Stree(BaseEstimator, ClassifierMixin):
if node.is_leaf():
# set a class for every sample in dataset
prediction = np.full((xp.shape[0], 1), node._class)
prediction_proba = np.full((xp.shape[0], 1), node._belief)
#prediction_proba = dist
#print("******", prediction.shape, prediction_proba.shape)
prediction_proba = dist
return np.append(prediction, prediction_proba, axis=1), indices
u, i_u, d, i_d, r_u, r_d = self._split_data(node, xp, indices)
k, l = predict_class(d, i_d, r_u, node.get_down())
m, n = predict_class(u, i_u, r_d, node.get_up())
k, l = predict_class(d, i_d, r_d, node.get_down())
m, n = predict_class(u, i_u, r_u, node.get_up())
return np.append(k, m), np.append(l, n)
# sklearn check
check_is_fitted(self)
@@ -158,7 +152,7 @@ class Stree(BaseEstimator, ClassifierMixin):
result, indices = predict_class(X, indices, [], self._tree)
result = result.reshape(X.shape[0], 2)
# Sigmoidize distance like in sklearn based on Platt(1999)
#result[:, 1] = 1 / (1 + np.exp(-result[:, 1]))
result[:, 1] = 1 / (1 + np.exp(-result[:, 1]))
return self._reorder_results(result, indices)
def score(self, X: np.array, y: np.array) -> float: