mirror of
https://github.com/Doctorado-ML/STree.git
synced 2025-08-15 23:46:02 +00:00
test results & fix single predict_proba
This commit is contained in:
10
main.py
10
main.py
@@ -46,4 +46,12 @@ clf.fit(Xtrain, ytrain)
|
||||
print(f"Took {time.time() - now:.2f} seconds to train")
|
||||
print(clf)
|
||||
print(f"Classifier's accuracy (train): {clf.score(Xtrain, ytrain):.4f}")
|
||||
print(f"Classifier's accuracy (test) : {clf.score(Xtest, ytest):.4f}")
|
||||
print(f"Classifier's accuracy (test) : {clf.score(Xtest, ytest):.4f}")
|
||||
proba = clf.predict_proba(Xtest)
|
||||
res0 = proba[proba[:, 0] == 0]
|
||||
res1 = proba[proba[:, 0] == 0]
|
||||
print("++++++++++res0++++++++++++")
|
||||
print(res0[res0[:, 1] > .8])
|
||||
print("**********res1************")
|
||||
print(res1[res1[:, 1] < .4])
|
||||
print(clf.predict_proba(Xtest))
|
80
test.ipynb
80
test.ipynb
File diff suppressed because one or more lines are too long
70
test2.ipynb
70
test2.ipynb
@@ -35,7 +35,7 @@
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": "Fraud: 0.173% 492\nValid: 99.827% 284315\nX.shape (1492, 28) y.shape (1492,)\nFraud: 33.177% 495\nValid: 66.823% 997\n"
|
||||
"text": "Fraud: 0.173% 492\nValid: 99.827% 284315\nX.shape (284807, 28) y.shape (284807,)\nFraud: 0.173% 492\nValid: 99.827% 284315\n"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
@@ -74,7 +74,7 @@
|
||||
"\n",
|
||||
"# data = load_creditcard(-5000) # Take all true samples + 5000 of the others\n",
|
||||
"# data = load_creditcard(5000) # Take the first 5000 samples\n",
|
||||
"data = load_creditcard(-1000) # Take all the samples\n",
|
||||
"data = load_creditcard() # Take all the samples\n",
|
||||
"\n",
|
||||
"Xtrain = data[0]\n",
|
||||
"Xtest = data[1]\n",
|
||||
@@ -90,7 +90,7 @@
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": "+++++up (733, 28) (733,) (733, 1)\n+++++down (311, 28) (311,) (311, 1)\n+++++up (733, 28) (733,) (733, 1)\n+++++down (311, 28) (311,) (311, 1)\nroot\nroot - Down, <cgaf> - Leaf class=1 belief=0.983923 counts=(array([0, 1]), array([ 5, 306]))\nroot - Up, <cgaf> - Leaf class=0 belief=0.945430 counts=(array([0, 1]), array([693, 40]))\n\n\n0.0277 secs\n"
|
||||
"text": "root\nroot - Down\nroot - Down - Down, <cgaf> - Leaf class=1 belief=0.941799 counts=(array([0, 1]), array([ 11, 178]))\nroot - Down - Up\nroot - Down - Up - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([2]))\nroot - Down - Up - Up, <cgaf> - Leaf class=0 belief=0.952381 counts=(array([0, 1]), array([20, 1]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, <cgaf> - Leaf class=1 belief=0.902174 counts=(array([0, 1]), array([ 9, 83]))\nroot - Up - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([14]))\nroot - Up - Up, <cgaf> - Leaf class=0 belief=0.999598 counts=(array([0, 1]), array([198966, 80]))\n\n42.9141 secs\n"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
@@ -98,7 +98,23 @@
|
||||
"clf = Stree(C=.01, random_state=random_state)\n",
|
||||
"clf.fit(Xtrain, ytrain)\n",
|
||||
"print(clf)\n",
|
||||
"print()\n",
|
||||
"print(f\"{time.time() - t:.4f} secs\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": "[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n0.2084 secs\n"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"t = time.time()\n",
|
||||
"print(clf.predict(Xtest)[:17])\n",
|
||||
"print(f\"{time.time() - t:.4f} secs\")"
|
||||
]
|
||||
},
|
||||
@@ -110,19 +126,26 @@
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": "+++++up (733, 28) (733,) (733, 1)\n+++++down (311, 28) (311,) (311, 1)\n****** (311, 1) (311, 1)\n****** (733, 1) (733, 1)\n[[0. 0.94542974]\n [1. 0.98392283]\n [0. 0.94542974]\n ...\n [0. 0.94542974]\n [0. 0.94542974]\n [1. 0.98392283]]\n"
|
||||
"text": "[[0. 0.26356965]\n [0. 0.22665372]\n [0. 0.25678353]\n [0. 0.26056019]\n [0. 0.26583006]\n [0. 0.24360041]\n [0. 0.26366182]\n [0. 0.26012045]\n [0. 0.2298345 ]\n [0. 0.25726294]\n [0. 0.25909988]\n [0. 0.25940575]\n [0. 0.24256254]\n [0. 0.15094485]\n [0. 0.26327588]\n [0. 0.26382949]\n [0. 0.26290957]]\n0.2083 secs\n"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"k = clf.predict_proba(Xtrain)\n",
|
||||
"print(k)"
|
||||
"t = time.time()\n",
|
||||
"print(clf.predict_proba(Xtest)[:17, :])\n",
|
||||
"print(f\"{time.time() - t:.4f} secs\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": "Classifier's accuracy (train): 0.9995\nClassifier's accuracy (test) : 0.9995\n0.5074 secs\n"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"t = time.time()\n",
|
||||
"print(f\"Classifier's accuracy (train): {clf.score(Xtrain, ytrain):.4f}\")\n",
|
||||
@@ -130,20 +153,17 @@
|
||||
"print(f\"{time.time() - t:.4f} secs\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# outcomes without optimization executing predict_proba. 87 seconds\n",
|
||||
"(284807, 2)\n",
|
||||
"87.5212 secs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": "0.9993211848834895\n13.3835 secs\n"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"t = time.time()\n",
|
||||
"clf2 = LinearSVC(C=.01, random_state=random_state)\n",
|
||||
@@ -154,9 +174,15 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": "0.9991573329588147\n22.7635 secs\n"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"t = time.time()\n",
|
||||
"clf3 = DecisionTreeClassifier(random_state=random_state)\n",
|
||||
|
@@ -148,7 +148,7 @@ class Stree_test(unittest.TestCase):
|
||||
yp = self._clf.predict_proba(X[28, :].reshape(-1, X.shape[1]))
|
||||
self.assertEqual(0, yp[0:, 0])
|
||||
self.assertEqual(1, y[28])
|
||||
self.assertEqual(0.29026400765649235, yp[0, 1])
|
||||
self.assertEqual(0.29026400766, round(yp[0, 1], 11))
|
||||
|
||||
def test_multiple_predict_proba(self):
|
||||
# First 27 elements the predictions are the same as the truth
|
||||
|
Reference in New Issue
Block a user