From 5e3a8e3ec5282a19e448b7b6698c464710d2888f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Sat, 27 Jun 2020 23:34:15 +0200 Subject: [PATCH] Change adaboost notebook --- notebooks/{adaboost.ipynb => ensemble.ipynb} | 54 ++++++-------------- 1 file changed, 16 insertions(+), 38 deletions(-) rename notebooks/{adaboost.ipynb => ensemble.ipynb} (74%) diff --git a/notebooks/adaboost.ipynb b/notebooks/ensemble.ipynb similarity index 74% rename from notebooks/adaboost.ipynb rename to notebooks/ensemble.ipynb index 751f9c4..a604b32 100644 --- a/notebooks/adaboost.ipynb +++ b/notebooks/ensemble.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Test AdaBoost with different configurations" + "# Test Stree with AdaBoost and Bagging with different configurations" ] }, { @@ -57,12 +57,14 @@ { "cell_type": "code", "execution_count": 4, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [ { "output_type": "stream", "name": "stdout", - "text": "Fraud: 0.173% 492\nValid: 99.827% 284315\nX.shape (100492, 28) y.shape (100492,)\nFraud: 0.659% 662\nValid: 99.341% 99830\n" + "text": "Fraud: 0.173% 492\nValid: 99.827% 284315\nX.shape (284807, 28) y.shape (284807,)\nFraud: 0.173% 492\nValid: 99.827% 284315\n" } ], "source": [ @@ -97,8 +99,8 @@ "\n", "# data = load_creditcard(-1000) # Take all true samples + 1000 of the others\n", "# data = load_creditcard(5000) # Take the first 5000 samples\n", - "# data = load_creditcard(0) # Take all the samples\n", - "data = load_creditcard(-100000)\n", + "data = load_creditcard(0) # Take all the samples\n", + "# data = load_creditcard(-100000)\n", "\n", "Xtrain = data[0]\n", "Xtest = data[1]\n", @@ -123,12 +125,14 @@ { "cell_type": "code", "execution_count": 5, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [ { "output_type": "stream", "name": "stdout", - "text": "Score Train: 0.9985499829409757\nScore Test: 0.998407854584052\nTook 39.45 seconds\n" + "text": "Score Train: 0.9994632932726069\nScore Test: 0.9994967405170698\nTook 140.74 seconds\n" } ], "source": [ @@ -161,46 +165,20 @@ { "cell_type": "code", "execution_count": 7, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [ { "output_type": "stream", "name": "stdout", - "text": "Kernel: linear\tTime: 87.00 seconds\tScore Train: 0.9982372\tScore Test: 0.9981425\nKernel: rbf\tTime: 60.60 seconds\tScore Train: 0.9934181\tScore Test: 0.9933992\nKernel: poly\tTime: 88.08 seconds\tScore Train: 0.9937450\tScore Test: 0.9938968\n" + "text": "Kernel: linear\tTime: 307.83 seconds\tScore Train: 0.9991924\tScore Test: 0.9994616\nKernel: rbf\tTime: 29.22 seconds\tScore Train: 0.9982745\tScore Test: 0.9982679\nKernel: poly\tTime: 207.48 seconds\tScore Train: 0.9988062\tScore Test: 0.9990403\n" } ], "source": [ "for kernel in ['linear', 'rbf', 'poly']:\n", " now = time.time()\n", - " clf = AdaBoostClassifier(Stree(C=7, kernel=kernel, max_depth=max_depth, random_state=random_state), n_estimators=n_estimators, random_state=random_state)\n", - " clf.fit(Xtrain, ytrain)\n", - " score_train = clf.score(Xtrain, ytrain)\n", - " score_test = clf.score(Xtest, ytest)\n", - " print(f\"Kernel: {kernel}\\tTime: {time.time() - now:.2f} seconds\\tScore Train: {score_train:.7f}\\tScore Test: {score_test:.7f}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Test algorithm SAMME in AdaBoost to check speed/accuracy" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "Kernel: linear\tTime: 58.75 seconds\tScore Train: 0.9980524\tScore Test: 0.9978771\nKernel: rbf\tTime: 12.49 seconds\tScore Train: 0.9934181\tScore Test: 0.9933992\nKernel: poly\tTime: 97.85 seconds\tScore Train: 0.9972137\tScore Test: 0.9971806\n" - } - ], - "source": [ - "for kernel in ['linear', 'rbf', 'poly']:\n", - " now = time.time()\n", - " clf = AdaBoostClassifier(Stree(C=7, kernel=kernel, max_depth=max_depth, random_state=random_state), n_estimators=n_estimators, random_state=random_state, algorithm=\"SAMME\")\n", + " clf = AdaBoostClassifier(Stree(C=7, kernel=kernel, max_depth=max_depth, random_state=random_state), algorithm=\"SAMME\", n_estimators=n_estimators, random_state=random_state)\n", " clf.fit(Xtrain, ytrain)\n", " score_train = clf.score(Xtrain, ytrain)\n", " score_test = clf.score(Xtest, ytest)\n",