mirror of
https://github.com/Doctorado-ML/STree.git
synced 2025-08-15 23:46:02 +00:00
First approach to Platt scaling
This commit is contained in:
46
test2.ipynb
46
test2.ipynb
@@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -17,7 +17,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -29,9 +29,15 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": "Fraud: 0.173% 492\nValid: 99.827% 284315\nX.shape (1492, 28) y.shape (1492,)\nFraud: 33.177% 495\nValid: 66.823% 997\n"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import time\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
@@ -68,7 +74,7 @@
|
||||
"\n",
|
||||
"# data = load_creditcard(-5000) # Take all true samples + 5000 of the others\n",
|
||||
"# data = load_creditcard(5000) # Take the first 5000 samples\n",
|
||||
"data = load_creditcard() # Take all the samples\n",
|
||||
"data = load_creditcard(-1000) # Take all the samples\n",
|
||||
"\n",
|
||||
"Xtrain = data[0]\n",
|
||||
"Xtest = data[1]\n",
|
||||
@@ -78,9 +84,15 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": "+++++up (733, 28) (733,) (733, 1)\n+++++down (311, 28) (311,) (311, 1)\n+++++up (733, 28) (733,) (733, 1)\n+++++down (311, 28) (311,) (311, 1)\nroot\nroot - Down, <cgaf> - Leaf class=1 belief=0.983923 counts=(array([0, 1]), array([ 5, 306]))\nroot - Up, <cgaf> - Leaf class=0 belief=0.945430 counts=(array([0, 1]), array([693, 40]))\n\n\n0.0277 secs\n"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"t = time.time()\n",
|
||||
"clf = Stree(C=.01, random_state=random_state)\n",
|
||||
@@ -90,6 +102,22 @@
|
||||
"print(f\"{time.time() - t:.4f} secs\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": "+++++up (733, 28) (733,) (733, 1)\n+++++down (311, 28) (311,) (311, 1)\n****** (311, 1) (311, 1)\n****** (733, 1) (733, 1)\n[[0. 0.94542974]\n [1. 0.98392283]\n [0. 0.94542974]\n ...\n [0. 0.94542974]\n [0. 0.94542974]\n [1. 0.98392283]]\n"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"k = clf.predict_proba(Xtrain)\n",
|
||||
"print(k)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -154,9 +182,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
"version": "3.7.6-final"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
}
|
@@ -63,8 +63,8 @@ class Stree_test(unittest.TestCase):
|
||||
# Is the partition made the same as the prediction?
|
||||
# as the node is not a leaf...
|
||||
_, count_yp = np.unique(y_prediction, return_counts=True)
|
||||
self.assertEqual(count_yp[1], y_up.shape[0])
|
||||
self.assertEqual(count_yp[0], y_down.shape[0])
|
||||
self.assertEqual(count_yp[0], y_up.shape[0])
|
||||
self.assertEqual(count_yp[1], y_down.shape[0])
|
||||
self._check_tree(node.get_down())
|
||||
self._check_tree(node.get_up())
|
||||
|
||||
@@ -154,35 +154,55 @@ class Stree_test(unittest.TestCase):
|
||||
# First 27 elements the predictions are the same as the truth
|
||||
num = 27
|
||||
X, y = self._get_Xy()
|
||||
yp = self._clf.predict_proba(X[:num,:])
|
||||
yp = self._clf.predict_proba(X[:num, :])
|
||||
self.assertListEqual(y[:num].tolist(), yp[:, 0].tolist())
|
||||
expected_proba = [0.9759887, 0.92829706, 0.9759887, 0.92829706, 0.92829706, 0.9759887,
|
||||
0.92829706, 0.9759887, 0.9759887, 0.9759887, 0.9759887, 0.92829706,
|
||||
0.92829706, 0.9759887, 0.92829706, 0.92829706, 0.92829706, 0.92829706,
|
||||
0.9759887, 0.92829706, 0.9759887, 0.92829706, 0.92829706, 0.92829706,
|
||||
0.92829706, 0.92829706, 0.9759887 ]
|
||||
0.92829706, 0.92829706, 0.9759887]
|
||||
self.assertListEqual(expected_proba, np.round(yp[:, 1], decimals=8).tolist())
|
||||
|
||||
def test_use_model_predictions(self):
|
||||
"""Check that we get the same results wether we use the estimator in nodes
|
||||
to compute labes or we use the hyperplane and the position of samples wrt to it
|
||||
def build_models(self):
|
||||
"""Build and train two models, model_clf will use the sklearn classifier to
|
||||
compute predictions and split data. model_computed will use vector of
|
||||
coefficients to compute both predictions and splitted data
|
||||
"""
|
||||
model_predictions = Stree(random_state=self._random_state,
|
||||
model_clf = Stree(random_state=self._random_state,
|
||||
use_predictions=True)
|
||||
model_hyperplane = Stree(random_state=self._random_state,
|
||||
model_computed = Stree(random_state=self._random_state,
|
||||
use_predictions=False)
|
||||
X, y = self._get_Xy()
|
||||
model_predictions.fit(X, y)
|
||||
model_hyperplane.fit(X, y)
|
||||
model_clf.fit(X, y)
|
||||
model_computed.fit(X, y)
|
||||
return model_clf, model_computed, X, y
|
||||
|
||||
def test_use_model_predict(self):
|
||||
"""Check that we get the same results wether we use the estimator in nodes
|
||||
to compute labels or we use the hyperplane and the position of samples wrt to it
|
||||
"""
|
||||
use_clf, use_math, X, _ = self.build_models()
|
||||
self.assertListEqual(
|
||||
model_predictions.predict(X).tolist(),
|
||||
model_hyperplane.predict(X).tolist()
|
||||
use_clf.predict(X).tolist(),
|
||||
use_math.predict(X).tolist()
|
||||
)
|
||||
|
||||
def test_use_model_score(self):
|
||||
use_clf, use_math, X, y = self.build_models()
|
||||
b = use_math.score(X, y)
|
||||
self.assertEqual(
|
||||
use_clf.score(X, y),
|
||||
b
|
||||
)
|
||||
a = model_predictions.score(X, y),
|
||||
b = model_hyperplane.score(X, y)
|
||||
self.assertEqual(a, b)
|
||||
self.assertGreater(b, .95)
|
||||
|
||||
def test_use_model_predict_proba(self):
|
||||
use_clf, use_math, X, _ = self.build_models()
|
||||
self.assertListEqual(
|
||||
use_clf.predict_proba(X).tolist(),
|
||||
use_math.predict_proba(X).tolist()
|
||||
)
|
||||
|
||||
def test_single_vs_multiple_prediction(self):
|
||||
"""Check if predicting sample by sample gives the same result as predicting
|
||||
all samples at once
|
||||
@@ -196,7 +216,6 @@ class Stree_test(unittest.TestCase):
|
||||
yp_once = self._clf.predict(X)
|
||||
#
|
||||
self.assertListEqual(yp_line.tolist(), yp_once.tolist())
|
||||
|
||||
|
||||
|
||||
|
||||
|
@@ -52,6 +52,7 @@ class Stree(BaseEstimator, ClassifierMixin):
|
||||
if self.__use_predictions:
|
||||
yp = node._clf.predict(data)
|
||||
down = (yp == 1).reshape(-1, 1)
|
||||
res = node._clf.decision_function(data)
|
||||
else:
|
||||
# doesn't work with multiclass as each sample has to do inner product with its own coeficients
|
||||
# computes positition of every sample is w.r.t. the hyperplane
|
||||
@@ -60,9 +61,15 @@ class Stree(BaseEstimator, ClassifierMixin):
|
||||
up = ~down
|
||||
data_down = data[down[:, 0]] if any(down) else None
|
||||
indices_down = indices[down[:, 0]] if any(down) else None
|
||||
res_down = res[down[:, 0]] if any(down) else None
|
||||
data_up = data[up[:, 0]] if any(up) else None
|
||||
indices_up = indices[up[:, 0]] if any(up) else None
|
||||
return [data_down, indices_down, data_up, indices_up]
|
||||
res_up = res[up[:, 0]] if any(up) else None
|
||||
#if any(up):
|
||||
# print("+++++up", data_up.shape, indices_up.shape, res_up.shape)
|
||||
#if any(down):
|
||||
# print("+++++down", data_down.shape, indices_down.shape, res_down.shape )
|
||||
return [data_up, indices_up, data_down, indices_down, res_up, res_down]
|
||||
|
||||
def fit(self, X: np.ndarray, y: np.ndarray, title: str = 'root') -> 'Stree':
|
||||
X, y = check_X_y(X, y.ravel())
|
||||
@@ -92,7 +99,7 @@ class Stree(BaseEstimator, ClassifierMixin):
|
||||
random_state=self._random_state)
|
||||
clf.fit(X, y)
|
||||
tree = Snode(clf, X, y, title)
|
||||
X_U, y_u, X_D, y_d = self._split_data(tree, X, y)
|
||||
X_U, y_u, X_D, y_d, _, _ = self._split_data(tree, X, y)
|
||||
if X_U is None or X_D is None:
|
||||
# didn't part anything
|
||||
return Snode(clf, X, y, title + ', <cgaf>')
|
||||
@@ -100,20 +107,22 @@ class Stree(BaseEstimator, ClassifierMixin):
|
||||
tree.set_down(self.train(X_D, y_d, title + ' - Down'))
|
||||
return tree
|
||||
|
||||
def _predict_values(self, X: np.array) -> np.array:
|
||||
def _reorder_results(self, y: np.array, indices: np.array) -> np.array:
|
||||
y_ordered = np.zeros(y.shape, dtype=int if y.ndim == 1 else float)
|
||||
indices = indices.astype(int)
|
||||
for i, index in enumerate(indices):
|
||||
y_ordered[index] = y[i]
|
||||
return y_ordered
|
||||
|
||||
def predict(self, X: np.array) -> np.array:
|
||||
def predict_class(xp: np.array, indices: np.array, node: Snode) -> np.array:
|
||||
if xp is None:
|
||||
return [], []
|
||||
if node.is_leaf():
|
||||
# set a class for every sample in dataset
|
||||
prediction = np.full((xp.shape[0], 1), node._class)
|
||||
if self.__proba:
|
||||
prediction_proba = np.full((xp.shape[0], 1), node._belief)
|
||||
#prediction_proba = self._linear_function(xp, node)
|
||||
return np.append(prediction, prediction_proba, axis=1), indices
|
||||
else:
|
||||
return prediction, indices
|
||||
u, i_u, d, i_d = self._split_data(node, xp, indices)
|
||||
return prediction, indices
|
||||
u, i_u, d, i_d, _, _ = self._split_data(node, xp, indices)
|
||||
k, l = predict_class(d, i_d, node.get_down())
|
||||
m, n = predict_class(u, i_u, node.get_up())
|
||||
return np.append(k, m), np.append(l, n)
|
||||
@@ -123,22 +132,30 @@ class Stree(BaseEstimator, ClassifierMixin):
|
||||
X = check_array(X)
|
||||
# setup prediction & make it happen
|
||||
indices = np.arange(X.shape[0])
|
||||
return predict_class(X, indices, self._tree)
|
||||
|
||||
def _reorder_results(self, y: np.array, indices: np.array) -> np.array:
|
||||
y_ordered = np.zeros(y.shape, dtype=int if y.ndim == 1 else float)
|
||||
indices = indices.astype(int)
|
||||
for i, index in enumerate(indices):
|
||||
y_ordered[index] = y[i]
|
||||
return y_ordered
|
||||
|
||||
def predict(self, X: np.array) -> np.array:
|
||||
return self._reorder_results(*self._predict_values(X))
|
||||
return self._reorder_results(*predict_class(X, indices, self._tree))
|
||||
|
||||
def predict_proba(self, X: np.array) -> np.array:
|
||||
self.__proba = True
|
||||
result, indices = self._predict_values(X)
|
||||
self.__proba = False
|
||||
def predict_class(xp: np.array, indices: np.array, dist: np.array, node: Snode) -> np.array:
|
||||
if xp is None:
|
||||
return [], []
|
||||
if node.is_leaf():
|
||||
# set a class for every sample in dataset
|
||||
prediction = np.full((xp.shape[0], 1), node._class)
|
||||
prediction_proba = np.full((xp.shape[0], 1), node._belief)
|
||||
#prediction_proba = dist
|
||||
#print("******", prediction.shape, prediction_proba.shape)
|
||||
return np.append(prediction, prediction_proba, axis=1), indices
|
||||
u, i_u, d, i_d, r_u, r_d = self._split_data(node, xp, indices)
|
||||
k, l = predict_class(d, i_d, r_u, node.get_down())
|
||||
m, n = predict_class(u, i_u, r_d, node.get_up())
|
||||
return np.append(k, m), np.append(l, n)
|
||||
# sklearn check
|
||||
check_is_fitted(self)
|
||||
# Input validation
|
||||
X = check_array(X)
|
||||
# setup prediction & make it happen
|
||||
indices = np.arange(X.shape[0])
|
||||
result, indices = predict_class(X, indices, [], self._tree)
|
||||
result = result.reshape(X.shape[0], 2)
|
||||
# Sigmoidize distance like in sklearn based on Platt(1999)
|
||||
#result[:, 1] = 1 / (1 + np.exp(-result[:, 1]))
|
||||
|
Reference in New Issue
Block a user