test results & fix single predict_proba

This commit is contained in:
2020-05-18 17:08:05 +02:00
parent 382ae921ab
commit 68512b3d75
4 changed files with 113 additions and 49 deletions

10
main.py
View File

@@ -46,4 +46,12 @@ clf.fit(Xtrain, ytrain)
print(f"Took {time.time() - now:.2f} seconds to train")
print(clf)
print(f"Classifier's accuracy (train): {clf.score(Xtrain, ytrain):.4f}")
print(f"Classifier's accuracy (test) : {clf.score(Xtest, ytest):.4f}")
print(f"Classifier's accuracy (test) : {clf.score(Xtest, ytest):.4f}")
proba = clf.predict_proba(Xtest)
res0 = proba[proba[:, 0] == 0]
res1 = proba[proba[:, 0] == 0]
print("++++++++++res0++++++++++++")
print(res0[res0[:, 1] > .8])
print("**********res1************")
print(res1[res1[:, 1] < .4])
print(clf.predict_proba(Xtest))

File diff suppressed because one or more lines are too long

View File

@@ -35,7 +35,7 @@
{
"output_type": "stream",
"name": "stdout",
"text": "Fraud: 0.173% 492\nValid: 99.827% 284315\nX.shape (1492, 28) y.shape (1492,)\nFraud: 33.177% 495\nValid: 66.823% 997\n"
"text": "Fraud: 0.173% 492\nValid: 99.827% 284315\nX.shape (284807, 28) y.shape (284807,)\nFraud: 0.173% 492\nValid: 99.827% 284315\n"
}
],
"source": [
@@ -74,7 +74,7 @@
"\n",
"# data = load_creditcard(-5000) # Take all true samples + 5000 of the others\n",
"# data = load_creditcard(5000) # Take the first 5000 samples\n",
"data = load_creditcard(-1000) # Take all the samples\n",
"data = load_creditcard() # Take all the samples\n",
"\n",
"Xtrain = data[0]\n",
"Xtest = data[1]\n",
@@ -90,7 +90,7 @@
{
"output_type": "stream",
"name": "stdout",
"text": "+++++up (733, 28) (733,) (733, 1)\n+++++down (311, 28) (311,) (311, 1)\n+++++up (733, 28) (733,) (733, 1)\n+++++down (311, 28) (311,) (311, 1)\nroot\nroot - Down, <cgaf> - Leaf class=1 belief=0.983923 counts=(array([0, 1]), array([ 5, 306]))\nroot - Up, <cgaf> - Leaf class=0 belief=0.945430 counts=(array([0, 1]), array([693, 40]))\n\n\n0.0277 secs\n"
"text": "root\nroot - Down\nroot - Down - Down, <cgaf> - Leaf class=1 belief=0.941799 counts=(array([0, 1]), array([ 11, 178]))\nroot - Down - Up\nroot - Down - Up - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([2]))\nroot - Down - Up - Up, <cgaf> - Leaf class=0 belief=0.952381 counts=(array([0, 1]), array([20, 1]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, <cgaf> - Leaf class=1 belief=0.902174 counts=(array([0, 1]), array([ 9, 83]))\nroot - Up - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([14]))\nroot - Up - Up, <cgaf> - Leaf class=0 belief=0.999598 counts=(array([0, 1]), array([198966, 80]))\n\n42.9141 secs\n"
}
],
"source": [
@@ -98,7 +98,23 @@
"clf = Stree(C=.01, random_state=random_state)\n",
"clf.fit(Xtrain, ytrain)\n",
"print(clf)\n",
"print()\n",
"print(f\"{time.time() - t:.4f} secs\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n0.2084 secs\n"
}
],
"source": [
"t = time.time()\n",
"print(clf.predict(Xtest)[:17])\n",
"print(f\"{time.time() - t:.4f} secs\")"
]
},
@@ -110,19 +126,26 @@
{
"output_type": "stream",
"name": "stdout",
"text": "+++++up (733, 28) (733,) (733, 1)\n+++++down (311, 28) (311,) (311, 1)\n****** (311, 1) (311, 1)\n****** (733, 1) (733, 1)\n[[0. 0.94542974]\n [1. 0.98392283]\n [0. 0.94542974]\n ...\n [0. 0.94542974]\n [0. 0.94542974]\n [1. 0.98392283]]\n"
"text": "[[0. 0.26356965]\n [0. 0.22665372]\n [0. 0.25678353]\n [0. 0.26056019]\n [0. 0.26583006]\n [0. 0.24360041]\n [0. 0.26366182]\n [0. 0.26012045]\n [0. 0.2298345 ]\n [0. 0.25726294]\n [0. 0.25909988]\n [0. 0.25940575]\n [0. 0.24256254]\n [0. 0.15094485]\n [0. 0.26327588]\n [0. 0.26382949]\n [0. 0.26290957]]\n0.2083 secs\n"
}
],
"source": [
"k = clf.predict_proba(Xtrain)\n",
"print(k)"
"t = time.time()\n",
"print(clf.predict_proba(Xtest)[:17, :])\n",
"print(f\"{time.time() - t:.4f} secs\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 7,
"metadata": {},
"outputs": [],
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "Classifier's accuracy (train): 0.9995\nClassifier's accuracy (test) : 0.9995\n0.5074 secs\n"
}
],
"source": [
"t = time.time()\n",
"print(f\"Classifier's accuracy (train): {clf.score(Xtrain, ytrain):.4f}\")\n",
@@ -130,20 +153,17 @@
"print(f\"{time.time() - t:.4f} secs\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# outcomes without optimization executing predict_proba. 87 seconds\n",
"(284807, 2)\n",
"87.5212 secs"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 8,
"metadata": {},
"outputs": [],
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "0.9993211848834895\n13.3835 secs\n"
}
],
"source": [
"t = time.time()\n",
"clf2 = LinearSVC(C=.01, random_state=random_state)\n",
@@ -154,9 +174,15 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 9,
"metadata": {},
"outputs": [],
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "0.9991573329588147\n22.7635 secs\n"
}
],
"source": [
"t = time.time()\n",
"clf3 = DecisionTreeClassifier(random_state=random_state)\n",

View File

@@ -148,7 +148,7 @@ class Stree_test(unittest.TestCase):
yp = self._clf.predict_proba(X[28, :].reshape(-1, X.shape[1]))
self.assertEqual(0, yp[0:, 0])
self.assertEqual(1, y[28])
self.assertEqual(0.29026400765649235, yp[0, 1])
self.assertEqual(0.29026400766, round(yp[0, 1], 11))
def test_multiple_predict_proba(self):
# First 27 elements the predictions are the same as the truth