#2 update benchmark notebook

This commit is contained in:
2020-06-15 10:33:51 +02:00
parent c94bc068bd
commit 736ab7ef20
2 changed files with 48 additions and 210 deletions

View File

@@ -17,7 +17,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
@@ -29,7 +29,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
@@ -45,7 +45,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
@@ -64,15 +64,13 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2020-06-14 23:45:42\n"
]
"name": "stdout",
"text": "2020-06-15 10:17:17\n"
}
],
"source": [
@@ -88,7 +86,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
@@ -100,16 +98,13 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fraud: 0.173% 492\n",
"Valid: 99.827% 284,315\n"
]
"name": "stdout",
"text": "Fraud: 0.173% 492\nValid: 99.827% 284,315\n"
}
],
"source": [
@@ -119,7 +114,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
@@ -131,16 +126,13 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"X shape: (284807, 29)\n",
"y shape: (284807,)\n"
]
"name": "stdout",
"text": "X shape: (284807, 29)\ny shape: (284807,)\n"
}
],
"source": [
@@ -159,7 +151,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
@@ -170,7 +162,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
@@ -180,7 +172,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
@@ -190,7 +182,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
@@ -200,7 +192,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
@@ -210,7 +202,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
@@ -227,7 +219,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
@@ -252,179 +244,20 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 18,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"output_type": "stream",
"text": [
"************************** Linear Tree **********************\n",
"Train Model Linear Tree took: 13.52 seconds\n",
"=========== Linear Tree - Train 199,364 samples =============\n",
" precision recall f1-score support\n",
"\n",
" 0 1.000000 1.000000 1.000000 199020\n",
" 1 1.000000 1.000000 1.000000 344\n",
"\n",
" accuracy 1.000000 199364\n",
" macro avg 1.000000 1.000000 1.000000 199364\n",
"weighted avg 1.000000 1.000000 1.000000 199364\n",
"\n",
"=========== Linear Tree - Test 85,443 samples =============\n",
" precision recall f1-score support\n",
"\n",
" 0 0.999578 0.999613 0.999596 85295\n",
" 1 0.772414 0.756757 0.764505 148\n",
"\n",
" accuracy 0.999192 85443\n",
" macro avg 0.885996 0.878185 0.882050 85443\n",
"weighted avg 0.999184 0.999192 0.999188 85443\n",
"\n",
"Confusion Matrix in Train\n",
"[[199020 0]\n",
" [ 0 344]]\n",
"Confusion Matrix in Test\n",
"[[85262 33]\n",
" [ 36 112]]\n",
"************************** Random Forest **********************\n",
"Train Model Random Forest took: 152.5 seconds\n",
"=========== Random Forest - Train 199,364 samples =============\n",
" precision recall f1-score support\n",
"\n",
" 0 1.000000 1.000000 1.000000 199020\n",
" 1 1.000000 1.000000 1.000000 344\n",
"\n",
" accuracy 1.000000 199364\n",
" macro avg 1.000000 1.000000 1.000000 199364\n",
"weighted avg 1.000000 1.000000 1.000000 199364\n",
"\n",
"=========== Random Forest - Test 85,443 samples =============\n",
" precision recall f1-score support\n",
"\n",
" 0 0.999660 0.999965 0.999812 85295\n",
" 1 0.975410 0.804054 0.881481 148\n",
"\n",
" accuracy 0.999625 85443\n",
" macro avg 0.987535 0.902009 0.940647 85443\n",
"weighted avg 0.999618 0.999625 0.999607 85443\n",
"\n",
"Confusion Matrix in Train\n",
"[[199020 0]\n",
" [ 0 344]]\n",
"Confusion Matrix in Test\n",
"[[85292 3]\n",
" [ 29 119]]\n",
"************************** Stree (SVM Tree) **********************\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/rmontanana/.virtualenvs/general/lib/python3.7/site-packages/sklearn/svm/_base.py:977: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
" \"the number of iterations.\", ConvergenceWarning)\n",
"/Users/rmontanana/.virtualenvs/general/lib/python3.7/site-packages/sklearn/svm/_base.py:977: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
" \"the number of iterations.\", ConvergenceWarning)\n",
"/Users/rmontanana/.virtualenvs/general/lib/python3.7/site-packages/sklearn/svm/_base.py:977: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
" \"the number of iterations.\", ConvergenceWarning)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Train Model Stree (SVM Tree) took: 32.55 seconds\n",
"=========== Stree (SVM Tree) - Train 199,364 samples =============\n",
" precision recall f1-score support\n",
"\n",
" 0 0.999623 0.999864 0.999744 199020\n",
" 1 0.908784 0.781977 0.840625 344\n",
"\n",
" accuracy 0.999488 199364\n",
" macro avg 0.954204 0.890921 0.920184 199364\n",
"weighted avg 0.999467 0.999488 0.999469 199364\n",
"\n",
"=========== Stree (SVM Tree) - Test 85,443 samples =============\n",
" precision recall f1-score support\n",
"\n",
" 0 0.999637 0.999918 0.999777 85295\n",
" 1 0.943548 0.790541 0.860294 148\n",
"\n",
" accuracy 0.999555 85443\n",
" macro avg 0.971593 0.895229 0.930036 85443\n",
"weighted avg 0.999540 0.999555 0.999536 85443\n",
"\n",
"Confusion Matrix in Train\n",
"[[198993 27]\n",
" [ 75 269]]\n",
"Confusion Matrix in Test\n",
"[[85288 7]\n",
" [ 31 117]]\n",
"************************** AdaBoost model **********************\n",
"Train Model AdaBoost model took: 47.34 seconds\n",
"=========== AdaBoost model - Train 199,364 samples =============\n",
" precision recall f1-score support\n",
"\n",
" 0 0.999392 0.999678 0.999535 199020\n",
" 1 0.777003 0.648256 0.706815 344\n",
"\n",
" accuracy 0.999072 199364\n",
" macro avg 0.888198 0.823967 0.853175 199364\n",
"weighted avg 0.999008 0.999072 0.999030 199364\n",
"\n",
"=========== AdaBoost model - Test 85,443 samples =============\n",
" precision recall f1-score support\n",
"\n",
" 0 0.999484 0.999707 0.999596 85295\n",
" 1 0.806202 0.702703 0.750903 148\n",
"\n",
" accuracy 0.999192 85443\n",
" macro avg 0.902843 0.851205 0.875249 85443\n",
"weighted avg 0.999149 0.999192 0.999165 85443\n",
"\n",
"Confusion Matrix in Train\n",
"[[198956 64]\n",
" [ 121 223]]\n",
"Confusion Matrix in Test\n",
"[[85270 25]\n",
" [ 44 104]]\n",
"************************** Gradient Boost. **********************\n",
"Train Model Gradient Boost. took: 244.1 seconds\n",
"=========== Gradient Boost. - Train 199,364 samples =============\n",
" precision recall f1-score support\n",
"\n",
" 0 0.999096 0.999854 0.999475 199020\n",
" 1 0.849741 0.476744 0.610801 344\n",
"\n",
" accuracy 0.998952 199364\n",
" macro avg 0.924419 0.738299 0.805138 199364\n",
"weighted avg 0.998839 0.998952 0.998804 199364\n",
"\n",
"=========== Gradient Boost. - Test 85,443 samples =============\n",
" precision recall f1-score support\n",
"\n",
" 0 0.998981 0.999730 0.999355 85295\n",
" 1 0.726190 0.412162 0.525862 148\n",
"\n",
" accuracy 0.998713 85443\n",
" macro avg 0.862586 0.705946 0.762609 85443\n",
"weighted avg 0.998508 0.998713 0.998535 85443\n",
"\n",
"Confusion Matrix in Train\n",
"[[198991 29]\n",
" [ 180 164]]\n",
"Confusion Matrix in Test\n",
"[[85272 23]\n",
" [ 87 61]]\n"
]
"text": "************************** Linear Tree **********************\nTrain Model Linear Tree took: 13.91 seconds\n=========== Linear Tree - Train 199,364 samples =============\n precision recall f1-score support\n\n 0 1.000000 1.000000 1.000000 199020\n 1 1.000000 1.000000 1.000000 344\n\n accuracy 1.000000 199364\n macro avg 1.000000 1.000000 1.000000 199364\nweighted avg 1.000000 1.000000 1.000000 199364\n\n=========== Linear Tree - Test 85,443 samples =============\n precision recall f1-score support\n\n 0 0.999578 0.999613 0.999596 85295\n 1 0.772414 0.756757 0.764505 148\n\n accuracy 0.999192 85443\n macro avg 0.885996 0.878185 0.882050 85443\nweighted avg 0.999184 0.999192 0.999188 85443\n\nConfusion Matrix in Train\n[[199020 0]\n [ 0 344]]\nConfusion Matrix in Test\n[[85262 33]\n [ 36 112]]\n************************** Random Forest **********************\nTrain Model Random Forest took: 173.1 seconds\n=========== Random Forest - Train 199,364 samples =============\n precision recall f1-score support\n\n 0 1.000000 1.000000 1.000000 199020\n 1 1.000000 1.000000 1.000000 344\n\n accuracy 1.000000 199364\n macro avg 1.000000 1.000000 1.000000 199364\nweighted avg 1.000000 1.000000 1.000000 199364\n\n=========== Random Forest - Test 85,443 samples =============\n precision recall f1-score support\n\n 0 0.999660 0.999965 0.999812 85295\n 1 0.975410 0.804054 0.881481 148\n\n accuracy 0.999625 85443\n macro avg 0.987535 0.902009 0.940647 85443\nweighted avg 0.999618 0.999625 0.999607 85443\n\nConfusion Matrix in Train\n[[199020 0]\n [ 0 344]]\nConfusion Matrix in Test\n[[85292 3]\n [ 29 119]]\n************************** Stree (SVM Tree) **********************\nTrain Model Stree (SVM Tree) took: 38.4 seconds\n=========== Stree (SVM Tree) - Train 199,364 samples =============\n precision recall f1-score support\n\n 0 0.999623 0.999864 0.999744 199020\n 1 0.908784 0.781977 0.840625 344\n\n accuracy 0.999488 199364\n macro avg 0.954204 0.890921 0.920184 199364\nweighted avg 0.999467 0.999488 0.999469 199364\n\n=========== Stree (SVM Tree) - Test 85,443 samples =============\n precision recall f1-score support\n\n 0 0.999637 0.999918 0.999777 85295\n 1 0.943548 0.790541 0.860294 148\n\n accuracy 0.999555 85443\n macro avg 0.971593 0.895229 0.930036 85443\nweighted avg 0.999540 0.999555 0.999536 85443\n\nConfusion Matrix in Train\n[[198993 27]\n [ 75 269]]\nConfusion Matrix in Test\n[[85288 7]\n [ 31 117]]\n************************** AdaBoost model **********************\nTrain Model AdaBoost model took: 47.21 seconds\n=========== AdaBoost model - Train 199,364 samples =============\n precision recall f1-score support\n\n 0 0.999392 0.999678 0.999535 199020\n 1 0.777003 0.648256 0.706815 344\n\n accuracy 0.999072 199364\n macro avg 0.888198 0.823967 0.853175 199364\nweighted avg 0.999008 0.999072 0.999030 199364\n\n=========== AdaBoost model - Test 85,443 samples =============\n precision recall f1-score support\n\n 0 0.999484 0.999707 0.999596 85295\n 1 0.806202 0.702703 0.750903 148\n\n accuracy 0.999192 85443\n macro avg 0.902843 0.851205 0.875249 85443\nweighted avg 0.999149 0.999192 0.999165 85443\n\nConfusion Matrix in Train\n[[198956 64]\n [ 121 223]]\nConfusion Matrix in Test\n[[85270 25]\n [ 44 104]]\n"
}
],
"source": [
"# Train & Test models\n",
"models = {\n",
" 'Linear Tree':linear_tree, 'Random Forest': random_forest, 'Stree (SVM Tree)': stree, \n",
" 'AdaBoost model': adaboost, 'Gradient Boost.': gradient\n",
" 'AdaBoost model': adaboost\n",
"}\n",
"\n",
"best_f1 = 0\n",
@@ -440,22 +273,13 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"**************************************************************************************************************\n",
"*The best f1 model is Random Forest, with a f1 score: 0.8815 in 152.54 seconds with 0.7 samples in train dataset\n",
"**************************************************************************************************************\n",
"Model: Linear Tree\t Time: 13.52 seconds\t f1: 0.7645\n",
"Model: Random Forest\t Time: 152.54 seconds\t f1: 0.8815\n",
"Model: Stree (SVM Tree)\t Time: 32.55 seconds\t f1: 0.8603\n",
"Model: AdaBoost model\t Time: 47.34 seconds\t f1: 0.7509\n",
"Model: Gradient Boost.\t Time: 244.12 seconds\t f1: 0.5259\n"
]
"name": "stdout",
"text": "**************************************************************************************************************\n*The best f1 model is Random Forest, with a f1 score: 0.8815 in 173.095 seconds with 0.7 samples in train dataset\n**************************************************************************************************************\nModel: Linear Tree\t Time: 13.91 seconds\t f1: 0.7645\nModel: Random Forest\t Time: 173.09 seconds\t f1: 0.8815\nModel: Stree (SVM Tree)\t Time: 38.40 seconds\t f1: 0.8603\nModel: AdaBoost model\t Time: 47.21 seconds\t f1: 0.7509\n"
}
],
"source": [
@@ -466,6 +290,20 @@
" print(f\"Model: {name}\\t Time: {time_spent:6.2f} seconds\\t f1: {f1:.4}\")"
]
},
{
"cell_type": "raw",
"metadata": {},
"source": [
"**************************************************************************************************************\n",
"*The best f1 model is Random Forest, with a f1 score: 0.8815 in 152.54 seconds with 0.7 samples in train dataset\n",
"**************************************************************************************************************\n",
"Model: Linear Tree\t Time: 13.52 seconds\t f1: 0.7645\n",
"Model: Random Forest\t Time: 152.54 seconds\t f1: 0.8815\n",
"Model: Stree (SVM Tree)\t Time: 32.55 seconds\t f1: 0.8603\n",
"Model: AdaBoost model\t Time: 47.34 seconds\t f1: 0.7509\n",
"Model: Gradient Boost.\t Time: 244.12 seconds\t f1: 0.5259"
]
},
{
"cell_type": "markdown",
"metadata": {},
@@ -487,9 +325,9 @@
"metadata": {
"hide_input": false,
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3.7.6 64-bit ('general': venv)",
"language": "python",
"name": "python3"
"name": "python37664bitgeneralvenvfbd0a23e74cf4e778460f5ffc6761f39"
},
"language_info": {
"codemirror_mode": {
@@ -501,7 +339,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
"version": "3.7.6-final"
},
"toc": {
"base_numbering": 1,
@@ -555,4 +393,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}