From 5e3a8e3ec5282a19e448b7b6698c464710d2888f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= <rmontanana@gmail.com>
Date: Sat, 27 Jun 2020 23:34:15 +0200
Subject: [PATCH] Change adaboost notebook

---
 notebooks/{adaboost.ipynb => ensemble.ipynb} | 54 ++++++--------------
 1 file changed, 16 insertions(+), 38 deletions(-)
 rename notebooks/{adaboost.ipynb => ensemble.ipynb} (74%)

diff --git a/notebooks/adaboost.ipynb b/notebooks/ensemble.ipynb
similarity index 74%
rename from notebooks/adaboost.ipynb
rename to notebooks/ensemble.ipynb
index 751f9c4..a604b32 100644
--- a/notebooks/adaboost.ipynb
+++ b/notebooks/ensemble.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Test AdaBoost with different configurations"
+    "# Test Stree with AdaBoost and Bagging with different configurations"
    ]
   },
   {
@@ -57,12 +57,14 @@
   {
    "cell_type": "code",
    "execution_count": 4,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [
     {
      "output_type": "stream",
      "name": "stdout",
-     "text": "Fraud: 0.173% 492\nValid: 99.827% 284315\nX.shape (100492, 28)  y.shape (100492,)\nFraud: 0.659% 662\nValid: 99.341% 99830\n"
+     "text": "Fraud: 0.173% 492\nValid: 99.827% 284315\nX.shape (284807, 28)  y.shape (284807,)\nFraud: 0.173% 492\nValid: 99.827% 284315\n"
     }
    ],
    "source": [
@@ -97,8 +99,8 @@
     "\n",
     "# data = load_creditcard(-1000) # Take all true samples + 1000 of the others\n",
     "# data = load_creditcard(5000)  # Take the first 5000 samples\n",
-    "# data = load_creditcard(0) # Take all the samples\n",
-    "data = load_creditcard(-100000)\n",
+    "data = load_creditcard(0) # Take all the samples\n",
+    "# data = load_creditcard(-100000)\n",
     "\n",
     "Xtrain = data[0]\n",
     "Xtest = data[1]\n",
@@ -123,12 +125,14 @@
   {
    "cell_type": "code",
    "execution_count": 5,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [
     {
      "output_type": "stream",
      "name": "stdout",
-     "text": "Score Train:  0.9985499829409757\nScore Test:  0.998407854584052\nTook 39.45 seconds\n"
+     "text": "Score Train:  0.9994632932726069\nScore Test:  0.9994967405170698\nTook 140.74 seconds\n"
     }
    ],
    "source": [
@@ -161,46 +165,20 @@
   {
    "cell_type": "code",
    "execution_count": 7,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [
     {
      "output_type": "stream",
      "name": "stdout",
-     "text": "Kernel: linear\tTime: 87.00 seconds\tScore Train: 0.9982372\tScore Test: 0.9981425\nKernel: rbf\tTime: 60.60 seconds\tScore Train: 0.9934181\tScore Test: 0.9933992\nKernel: poly\tTime: 88.08 seconds\tScore Train: 0.9937450\tScore Test: 0.9938968\n"
+     "text": "Kernel: linear\tTime: 307.83 seconds\tScore Train: 0.9991924\tScore Test: 0.9994616\nKernel: rbf\tTime: 29.22 seconds\tScore Train: 0.9982745\tScore Test: 0.9982679\nKernel: poly\tTime: 207.48 seconds\tScore Train: 0.9988062\tScore Test: 0.9990403\n"
     }
    ],
    "source": [
     "for kernel in ['linear', 'rbf', 'poly']:\n",
     "    now = time.time()\n",
-    "    clf = AdaBoostClassifier(Stree(C=7, kernel=kernel, max_depth=max_depth, random_state=random_state), n_estimators=n_estimators, random_state=random_state)\n",
-    "    clf.fit(Xtrain, ytrain)\n",
-    "    score_train = clf.score(Xtrain, ytrain)\n",
-    "    score_test = clf.score(Xtest, ytest)\n",
-    "    print(f\"Kernel: {kernel}\\tTime: {time.time() - now:.2f} seconds\\tScore Train: {score_train:.7f}\\tScore Test: {score_test:.7f}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Test algorithm SAMME in AdaBoost to check speed/accuracy"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": "Kernel: linear\tTime: 58.75 seconds\tScore Train: 0.9980524\tScore Test: 0.9978771\nKernel: rbf\tTime: 12.49 seconds\tScore Train: 0.9934181\tScore Test: 0.9933992\nKernel: poly\tTime: 97.85 seconds\tScore Train: 0.9972137\tScore Test: 0.9971806\n"
-    }
-   ],
-   "source": [
-    "for kernel in ['linear', 'rbf', 'poly']:\n",
-    "    now = time.time()\n",
-    "    clf = AdaBoostClassifier(Stree(C=7, kernel=kernel, max_depth=max_depth, random_state=random_state), n_estimators=n_estimators, random_state=random_state, algorithm=\"SAMME\")\n",
+    "    clf = AdaBoostClassifier(Stree(C=7, kernel=kernel, max_depth=max_depth, random_state=random_state), algorithm=\"SAMME\", n_estimators=n_estimators, random_state=random_state)\n",
     "    clf.fit(Xtrain, ytrain)\n",
     "    score_train = clf.score(Xtrain, ytrain)\n",
     "    score_test = clf.score(Xtest, ytest)\n",