Change adaboost notebook

2025-08-16 07:56:06 +00:00 · 2020-06-27 23:34:15 +02:00
parent 554ec03c32
commit 5e3a8e3ec5
1 changed files with 16 additions and 38 deletions
--- a/notebooks/ensemble.ipynb
+++ b/notebooks/ensemble.ipynb
@@ -4,7 +4,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "# Test AdaBoost with different configurations"
+    "# Test Stree with AdaBoost and Bagging with different configurations"
   ]
  },
  {
@@ -57,12 +57,14 @@
  {
   "cell_type": "code",
   "execution_count": 4,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
-     "text": "Fraud: 0.173% 492\nValid: 99.827% 284315\nX.shape (100492, 28)  y.shape (100492,)\nFraud: 0.659% 662\nValid: 99.341% 99830\n"
+     "text": "Fraud: 0.173% 492\nValid: 99.827% 284315\nX.shape (284807, 28)  y.shape (284807,)\nFraud: 0.173% 492\nValid: 99.827% 284315\n"
    }
   ],
   "source": [
@@ -97,8 +99,8 @@
    "\n",
    "# data = load_creditcard(-1000) # Take all true samples + 1000 of the others\n",
    "# data = load_creditcard(5000)  # Take the first 5000 samples\n",
-    "# data = load_creditcard(0) # Take all the samples\n",
-    "data = load_creditcard(-100000)\n",
+    "data = load_creditcard(0) # Take all the samples\n",
+    "# data = load_creditcard(-100000)\n",
    "\n",
    "Xtrain = data[0]\n",
    "Xtest = data[1]\n",
@@ -123,12 +125,14 @@
  {
   "cell_type": "code",
   "execution_count": 5,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
-     "text": "Score Train:  0.9985499829409757\nScore Test:  0.998407854584052\nTook 39.45 seconds\n"
+     "text": "Score Train:  0.9994632932726069\nScore Test:  0.9994967405170698\nTook 140.74 seconds\n"
    }
   ],
   "source": [
@@ -161,46 +165,20 @@
  {
   "cell_type": "code",
   "execution_count": 7,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
-     "text": "Kernel: linear\tTime: 87.00 seconds\tScore Train: 0.9982372\tScore Test: 0.9981425\nKernel: rbf\tTime: 60.60 seconds\tScore Train: 0.9934181\tScore Test: 0.9933992\nKernel: poly\tTime: 88.08 seconds\tScore Train: 0.9937450\tScore Test: 0.9938968\n"
+     "text": "Kernel: linear\tTime: 307.83 seconds\tScore Train: 0.9991924\tScore Test: 0.9994616\nKernel: rbf\tTime: 29.22 seconds\tScore Train: 0.9982745\tScore Test: 0.9982679\nKernel: poly\tTime: 207.48 seconds\tScore Train: 0.9988062\tScore Test: 0.9990403\n"
    }
   ],
   "source": [
    "for kernel in ['linear', 'rbf', 'poly']:\n",
    "    now = time.time()\n",
-    "    clf = AdaBoostClassifier(Stree(C=7, kernel=kernel, max_depth=max_depth, random_state=random_state), n_estimators=n_estimators, random_state=random_state)\n",
-    "    clf.fit(Xtrain, ytrain)\n",
-    "    score_train = clf.score(Xtrain, ytrain)\n",
-    "    score_test = clf.score(Xtest, ytest)\n",
-    "    print(f\"Kernel: {kernel}\\tTime: {time.time() - now:.2f} seconds\\tScore Train: {score_train:.7f}\\tScore Test: {score_test:.7f}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Test algorithm SAMME in AdaBoost to check speed/accuracy"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": "Kernel: linear\tTime: 58.75 seconds\tScore Train: 0.9980524\tScore Test: 0.9978771\nKernel: rbf\tTime: 12.49 seconds\tScore Train: 0.9934181\tScore Test: 0.9933992\nKernel: poly\tTime: 97.85 seconds\tScore Train: 0.9972137\tScore Test: 0.9971806\n"
-    }
-   ],
-   "source": [
-    "for kernel in ['linear', 'rbf', 'poly']:\n",
-    "    now = time.time()\n",
-    "    clf = AdaBoostClassifier(Stree(C=7, kernel=kernel, max_depth=max_depth, random_state=random_state), n_estimators=n_estimators, random_state=random_state, algorithm=\"SAMME\")\n",
+    "    clf = AdaBoostClassifier(Stree(C=7, kernel=kernel, max_depth=max_depth, random_state=random_state), algorithm=\"SAMME\", n_estimators=n_estimators, random_state=random_state)\n",
    "    clf.fit(Xtrain, ytrain)\n",
    "    score_train = clf.score(Xtrain, ytrain)\n",
    "    score_test = clf.score(Xtest, ytest)\n",