test results & fix single predict_proba

2025-08-15 23:46:02 +00:00 · 2020-05-18 17:08:05 +02:00
parent 382ae921ab
commit 68512b3d75
4 changed files with 113 additions and 49 deletions
--- a/main.py
+++ b/main.py
@@ -46,4 +46,12 @@ clf.fit(Xtrain, ytrain)
 print(f"Took {time.time() - now:.2f} seconds to train")
 print(clf)
 print(f"Classifier's accuracy (train): {clf.score(Xtrain, ytrain):.4f}")
-print(f"Classifier's accuracy (test) : {clf.score(Xtest, ytest):.4f}")
+print(f"Classifier's accuracy (test) : {clf.score(Xtest, ytest):.4f}")
+proba = clf.predict_proba(Xtest)
+res0 = proba[proba[:, 0] == 0]
+res1 = proba[proba[:, 0] == 0]
+print("++++++++++res0++++++++++++")
+print(res0[res0[:, 1] > .8])
+print("**********res1************")
+print(res1[res1[:, 1] < .4])
+print(clf.predict_proba(Xtest))
--- a/test.ipynb
+++ b/test.ipynb
--- a/test2.ipynb
+++ b/test2.ipynb
@@ -35,7 +35,7 @@
    {
     "output_type": "stream",
     "name": "stdout",
-     "text": "Fraud: 0.173% 492\nValid: 99.827% 284315\nX.shape (1492, 28)  y.shape (1492,)\nFraud: 33.177% 495\nValid: 66.823% 997\n"
+     "text": "Fraud: 0.173% 492\nValid: 99.827% 284315\nX.shape (284807, 28)  y.shape (284807,)\nFraud: 0.173% 492\nValid: 99.827% 284315\n"
    }
   ],
   "source": [
@@ -74,7 +74,7 @@
    "\n",
    "# data = load_creditcard(-5000) # Take all true samples + 5000 of the others\n",
    "# data = load_creditcard(5000)  # Take the first 5000 samples\n",
-    "data = load_creditcard(-1000) # Take all the samples\n",
+    "data = load_creditcard() # Take all the samples\n",
    "\n",
    "Xtrain = data[0]\n",
    "Xtest = data[1]\n",
@@ -90,7 +90,7 @@
    {
     "output_type": "stream",
     "name": "stdout",
-     "text": "+++++up (733, 28) (733,) (733, 1)\n+++++down (311, 28) (311,) (311, 1)\n+++++up (733, 28) (733,) (733, 1)\n+++++down (311, 28) (311,) (311, 1)\nroot\nroot - Down, <cgaf> - Leaf class=1 belief=0.983923 counts=(array([0, 1]), array([  5, 306]))\nroot - Up, <cgaf> - Leaf class=0 belief=0.945430 counts=(array([0, 1]), array([693,  40]))\n\n\n0.0277 secs\n"
+     "text": "root\nroot - Down\nroot - Down - Down, <cgaf> - Leaf class=1 belief=0.941799 counts=(array([0, 1]), array([ 11, 178]))\nroot - Down - Up\nroot - Down - Up - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([2]))\nroot - Down - Up - Up, <cgaf> - Leaf class=0 belief=0.952381 counts=(array([0, 1]), array([20,  1]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, <cgaf> - Leaf class=1 belief=0.902174 counts=(array([0, 1]), array([ 9, 83]))\nroot - Up - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([14]))\nroot - Up - Up, <cgaf> - Leaf class=0 belief=0.999598 counts=(array([0, 1]), array([198966,     80]))\n\n42.9141 secs\n"
    }
   ],
   "source": [
@@ -98,7 +98,23 @@
    "clf = Stree(C=.01, random_state=random_state)\n",
    "clf.fit(Xtrain, ytrain)\n",
    "print(clf)\n",
-    "print()\n",
+    "print(f\"{time.time() - t:.4f} secs\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": "[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n0.2084 secs\n"
+    }
+   ],
+   "source": [
+    "t = time.time()\n",
+    "print(clf.predict(Xtest)[:17])\n",
    "print(f\"{time.time() - t:.4f} secs\")"
   ]
  },
@@ -110,19 +126,26 @@
    {
     "output_type": "stream",
     "name": "stdout",
-     "text": "+++++up (733, 28) (733,) (733, 1)\n+++++down (311, 28) (311,) (311, 1)\n****** (311, 1) (311, 1)\n****** (733, 1) (733, 1)\n[[0.         0.94542974]\n [1.         0.98392283]\n [0.         0.94542974]\n ...\n [0.         0.94542974]\n [0.         0.94542974]\n [1.         0.98392283]]\n"
+     "text": "[[0.         0.26356965]\n [0.         0.22665372]\n [0.         0.25678353]\n [0.         0.26056019]\n [0.         0.26583006]\n [0.         0.24360041]\n [0.         0.26366182]\n [0.         0.26012045]\n [0.         0.2298345 ]\n [0.         0.25726294]\n [0.         0.25909988]\n [0.         0.25940575]\n [0.         0.24256254]\n [0.         0.15094485]\n [0.         0.26327588]\n [0.         0.26382949]\n [0.         0.26290957]]\n0.2083 secs\n"
    }
   ],
   "source": [
-    "k = clf.predict_proba(Xtrain)\n",
-    "print(k)"
+    "t = time.time()\n",
+    "print(clf.predict_proba(Xtest)[:17, :])\n",
+    "print(f\"{time.time() - t:.4f} secs\")"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": "Classifier's accuracy (train): 0.9995\nClassifier's accuracy (test) : 0.9995\n0.5074 secs\n"
+    }
+   ],
   "source": [
    "t = time.time()\n",
    "print(f\"Classifier's accuracy (train): {clf.score(Xtrain, ytrain):.4f}\")\n",
@@ -130,20 +153,17 @@
    "print(f\"{time.time() - t:.4f} secs\")"
   ]
  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# outcomes without optimization executing predict_proba. 87 seconds\n",
-    "(284807, 2)\n",
-    "87.5212 secs"
-   ]
-  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": "0.9993211848834895\n13.3835 secs\n"
+    }
+   ],
   "source": [
    "t = time.time()\n",
    "clf2 = LinearSVC(C=.01, random_state=random_state)\n",
@@ -154,9 +174,15 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": "0.9991573329588147\n22.7635 secs\n"
+    }
+   ],
   "source": [
    "t = time.time()\n",
    "clf3 = DecisionTreeClassifier(random_state=random_state)\n",
--- a/tests/Stree_test.py
+++ b/tests/Stree_test.py
@@ -148,7 +148,7 @@ class Stree_test(unittest.TestCase):
        yp = self._clf.predict_proba(X[28, :].reshape(-1, X.shape[1]))
        self.assertEqual(0, yp[0:, 0])
        self.assertEqual(1, y[28])
-        self.assertEqual(0.29026400765649235, yp[0, 1])
+        self.assertEqual(0.29026400766, round(yp[0, 1], 11))

    def test_multiple_predict_proba(self):
        # First 27 elements the predictions are the same as the truth