From f438124057ad8f0f048567ba2c255c1c8fa4b3da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Tue, 23 Mar 2021 13:27:32 +0100 Subject: [PATCH] Fix mistakes (#24) (#28) Put pandas requirements in notebooks clean requirements.txt --- README.md | 8 +- notebooks/benchmark.ipynb | 302 ++++--------------------------------- notebooks/ensemble.ipynb | 84 +++-------- notebooks/features.ipynb | 257 ++++--------------------------- notebooks/gridsearch.ipynb | 149 +++--------------- requirements.txt | 5 +- 6 files changed, 104 insertions(+), 701 deletions(-) diff --git a/README.md b/README.md index 7c5d011..9485495 100644 --- a/README.md +++ b/README.md @@ -20,15 +20,13 @@ pip install git+https://github.com/doctorado-ml/stree - [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/Doctorado-ML/STree/master?urlpath=lab/tree/notebooks/benchmark.ipynb) Benchmark -- [![Test](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Doctorado-ML/STree/blob/master/notebooks/benchmark.ipynb) Benchmark +- [![benchmark](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Doctorado-ML/STree/blob/master/notebooks/benchmark.ipynb) Benchmark -- [![Test2](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Doctorado-ML/STree/blob/master/notebooks/features.ipynb) Test features - -- [![Adaboost](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Doctorado-ML/STree/blob/master/notebooks/adaboost.ipynb) Adaboost +- [![features](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Doctorado-ML/STree/blob/master/notebooks/features.ipynb) Some features - [![Gridsearch](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Doctorado-ML/STree/blob/master/notebooks/gridsearch.ipynb) Gridsearch -- [![Test Graphics](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Doctorado-ML/STree/blob/master/notebooks/test_graphs.ipynb) Test Graphics +- [![Ensemble](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Doctorado-ML/STree/blob/master/notebooks/ensemble.ipynb) Ensembles ## Hyperparameters diff --git a/notebooks/benchmark.ipynb b/notebooks/benchmark.ipynb index b9739b5..edc3091 100644 --- a/notebooks/benchmark.ipynb +++ b/notebooks/benchmark.ipynb @@ -17,23 +17,25 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#\n", "# Google Colab setup\n", "#\n", - "#!pip install git+https://github.com/doctorado-ml/stree" + "#!pip install git+https://github.com/doctorado-ml/stree\n", + "!pip install pandas" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import datetime, time\n", + "import os\n", "import numpy as np\n", "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", @@ -47,11 +49,10 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "import os\n", "if not os.path.isfile('data/creditcard.csv'):\n", " !wget --no-check-certificate --content-disposition http://nube.jccm.es/index.php/s/Zs7SYtZQJ3RQ2H2/download\n", " !tar xzf creditcard.tgz" @@ -66,19 +67,11 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2021-01-14 11:30:51\n" - ] - } - ], + "outputs": [], "source": [ "print(datetime.date.today(), time.strftime(\"%H:%M:%S\"))" ] @@ -92,7 +85,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -104,20 +97,11 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fraud: 0.173% 492\n", - "Valid: 99.827% 284,315\n" - ] - } - ], + "outputs": [], "source": [ "print(\"Fraud: {0:.3f}% {1}\".format(df.Class[df.Class == 1].count()*100/df.shape[0], df.Class[df.Class == 1].count()))\n", "print(\"Valid: {0:.3f}% {1:,}\".format(df.Class[df.Class == 0].count()*100/df.shape[0], df.Class[df.Class == 0].count()))" @@ -125,7 +109,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -137,20 +121,11 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X shape: (284807, 29)\n", - "y shape: (284807,)\n" - ] - } - ], + "outputs": [], "source": [ "# Remove unneeded features\n", "y = df.Class.values\n", @@ -167,7 +142,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -178,7 +153,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -188,7 +163,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -198,7 +173,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -208,7 +183,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -218,7 +193,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -235,7 +210,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -260,194 +235,15 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "************************** Linear Tree **********************\n", - "Train Model Linear Tree took: 10.25 seconds\n", - "=========== Linear Tree - Train 199,364 samples =============\n", - " precision recall f1-score support\n", - "\n", - " 0 1.000000 1.000000 1.000000 199020\n", - " 1 1.000000 1.000000 1.000000 344\n", - "\n", - " accuracy 1.000000 199364\n", - " macro avg 1.000000 1.000000 1.000000 199364\n", - "weighted avg 1.000000 1.000000 1.000000 199364\n", - "\n", - "=========== Linear Tree - Test 85,443 samples =============\n", - " precision recall f1-score support\n", - "\n", - " 0 0.999578 0.999613 0.999596 85295\n", - " 1 0.772414 0.756757 0.764505 148\n", - "\n", - " accuracy 0.999192 85443\n", - " macro avg 0.885996 0.878185 0.882050 85443\n", - "weighted avg 0.999184 0.999192 0.999188 85443\n", - "\n", - "Confusion Matrix in Train\n", - "[[199020 0]\n", - " [ 0 344]]\n", - "Confusion Matrix in Test\n", - "[[85262 33]\n", - " [ 36 112]]\n", - "************************** Naive Bayes **********************\n", - "Train Model Naive Bayes took: 0.09943 seconds\n", - "=========== Naive Bayes - Train 199,364 samples =============\n", - " precision recall f1-score support\n", - "\n", - " 0 0.999692 0.978238 0.988849 199020\n", - " 1 0.061538 0.825581 0.114539 344\n", - "\n", - " accuracy 0.977975 199364\n", - " macro avg 0.530615 0.901910 0.551694 199364\n", - "weighted avg 0.998073 0.977975 0.987340 199364\n", - "\n", - "=========== Naive Bayes - Test 85,443 samples =============\n", - " precision recall f1-score support\n", - "\n", - " 0 0.999712 0.977994 0.988734 85295\n", - " 1 0.061969 0.837838 0.115403 148\n", - "\n", - " accuracy 0.977751 85443\n", - " macro avg 0.530841 0.907916 0.552068 85443\n", - "weighted avg 0.998088 0.977751 0.987221 85443\n", - "\n", - "Confusion Matrix in Train\n", - "[[194689 4331]\n", - " [ 60 284]]\n", - "Confusion Matrix in Test\n", - "[[83418 1877]\n", - " [ 24 124]]\n", - "************************** Stree (SVM Tree) **********************\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/rmontanana/.virtualenvs/general/lib/python3.8/site-packages/sklearn/svm/_base.py:976: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n", - " warnings.warn(\"Liblinear failed to converge, increase \"\n", - "/Users/rmontanana/.virtualenvs/general/lib/python3.8/site-packages/sklearn/svm/_base.py:976: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n", - " warnings.warn(\"Liblinear failed to converge, increase \"\n", - "/Users/rmontanana/.virtualenvs/general/lib/python3.8/site-packages/sklearn/svm/_base.py:976: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n", - " warnings.warn(\"Liblinear failed to converge, increase \"\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Train Model Stree (SVM Tree) took: 28.47 seconds\n", - "=========== Stree (SVM Tree) - Train 199,364 samples =============\n", - " precision recall f1-score support\n", - "\n", - " 0 0.999623 0.999864 0.999744 199020\n", - " 1 0.908784 0.781977 0.840625 344\n", - "\n", - " accuracy 0.999488 199364\n", - " macro avg 0.954204 0.890921 0.920184 199364\n", - "weighted avg 0.999467 0.999488 0.999469 199364\n", - "\n", - "=========== Stree (SVM Tree) - Test 85,443 samples =============\n", - " precision recall f1-score support\n", - "\n", - " 0 0.999637 0.999918 0.999777 85295\n", - " 1 0.943548 0.790541 0.860294 148\n", - "\n", - " accuracy 0.999555 85443\n", - " macro avg 0.971593 0.895229 0.930036 85443\n", - "weighted avg 0.999540 0.999555 0.999536 85443\n", - "\n", - "Confusion Matrix in Train\n", - "[[198993 27]\n", - " [ 75 269]]\n", - "Confusion Matrix in Test\n", - "[[85288 7]\n", - " [ 31 117]]\n", - "************************** Neural Network **********************\n", - "Train Model Neural Network took: 9.76 seconds\n", - "=========== Neural Network - Train 199,364 samples =============\n", - " precision recall f1-score support\n", - "\n", - " 0 0.999247 0.999844 0.999545 199020\n", - " 1 0.862222 0.563953 0.681898 344\n", - "\n", - " accuracy 0.999092 199364\n", - " macro avg 0.930734 0.781899 0.840722 199364\n", - "weighted avg 0.999010 0.999092 0.998997 199364\n", - "\n", - "=========== Neural Network - Test 85,443 samples =============\n", - " precision recall f1-score support\n", - "\n", - " 0 0.999356 0.999871 0.999613 85295\n", - " 1 0.894231 0.628378 0.738095 148\n", - "\n", - " accuracy 0.999228 85443\n", - " macro avg 0.946793 0.814125 0.868854 85443\n", - "weighted avg 0.999173 0.999228 0.999160 85443\n", - "\n", - "Confusion Matrix in Train\n", - "[[198989 31]\n", - " [ 150 194]]\n", - "Confusion Matrix in Test\n", - "[[85284 11]\n", - " [ 55 93]]\n", - "************************** SVC (linear) **********************\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/rmontanana/.virtualenvs/general/lib/python3.8/site-packages/sklearn/svm/_base.py:976: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n", - " warnings.warn(\"Liblinear failed to converge, increase \"\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Train Model SVC (linear) took: 8.207 seconds\n", - "=========== SVC (linear) - Train 199,364 samples =============\n", - " precision recall f1-score support\n", - "\n", - " 0 0.999237 0.999859 0.999548 199020\n", - " 1 0.872727 0.558140 0.680851 344\n", - "\n", - " accuracy 0.999097 199364\n", - " macro avg 0.935982 0.778999 0.840199 199364\n", - "weighted avg 0.999018 0.999097 0.998998 199364\n", - "\n", - "=========== SVC (linear) - Test 85,443 samples =============\n", - " precision recall f1-score support\n", - "\n", - " 0 0.999344 0.999894 0.999619 85295\n", - " 1 0.910891 0.621622 0.738956 148\n", - "\n", - " accuracy 0.999239 85443\n", - " macro avg 0.955117 0.810758 0.869287 85443\n", - "weighted avg 0.999191 0.999239 0.999168 85443\n", - "\n", - "Confusion Matrix in Train\n", - "[[198992 28]\n", - " [ 152 192]]\n", - "Confusion Matrix in Test\n", - "[[85286 9]\n", - " [ 56 92]]\n" - ] - } - ], + "outputs": [], "source": [ "# Train & Test models\n", "models = {\n", - " 'Linear Tree':linear_tree, 'Naive Bayes': naive_bayes, 'Stree (SVM Tree)': stree, \n", + " 'Linear Tree':linear_tree, 'Naive Bayes': naive_bayes, 'Stree ': stree, \n", " 'Neural Network': mlp, 'SVC (linear)': svc\n", "}\n", "\n", @@ -464,26 +260,11 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "**************************************************************************************************************\n", - "*The best f1 model is Stree (SVM Tree), with a f1 score: 0.8603 in 28.4743 seconds with 0.7 samples in train dataset\n", - "**************************************************************************************************************\n", - "Model: Linear Tree\t Time: 10.25 seconds\t f1: 0.7645\n", - "Model: Naive Bayes\t Time: 0.10 seconds\t f1: 0.1154\n", - "Model: Stree (SVM Tree)\t Time: 28.47 seconds\t f1: 0.8603\n", - "Model: Neural Network\t Time: 9.76 seconds\t f1: 0.7381\n", - "Model: SVC (linear)\t Time: 8.21 seconds\t f1: 0.739\n" - ] - } - ], + "outputs": [], "source": [ "print(\"*\"*110)\n", "print(f\"*The best f1 model is {best_model}, with a f1 score: {best_f1:.4} in {best_time:.6} seconds with {train_size:,} samples in train dataset\")\n", @@ -508,32 +289,9 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'C': 0.01,\n", - " 'criterion': 'entropy',\n", - " 'degree': 3,\n", - " 'gamma': 'scale',\n", - " 'kernel': 'linear',\n", - " 'max_depth': None,\n", - " 'max_features': None,\n", - " 'max_iter': 1000.0,\n", - " 'min_samples_split': 0,\n", - " 'random_state': 2020,\n", - " 'split_criteria': 'impurity',\n", - " 'splitter': 'random',\n", - " 'tol': 0.0001}" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "stree.get_params()" ] @@ -556,7 +314,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.2" + "version": "3.8.2-final" }, "toc": { "base_numbering": 1, @@ -610,4 +368,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/notebooks/ensemble.ipynb b/notebooks/ensemble.ipynb index f89a739..06dd23e 100644 --- a/notebooks/ensemble.ipynb +++ b/notebooks/ensemble.ipynb @@ -17,38 +17,43 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#\n", "# Google Colab setup\n", "#\n", - "#!pip install git+https://github.com/doctorado-ml/stree" + "#!pip install git+https://github.com/doctorado-ml/stree\n", + "!pip install pandas" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import time\n", + "import os\n", + "import random\n", "import warnings\n", + "import pandas as pd\n", + "import numpy as np\n", "from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.exceptions import ConvergenceWarning\n", "from stree import Stree\n", + "\n", "warnings.filterwarnings(\"ignore\", category=ConvergenceWarning)" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "import os\n", "if not os.path.isfile('data/creditcard.csv'):\n", " !wget --no-check-certificate --content-disposition http://nube.jccm.es/index.php/s/Zs7SYtZQJ3RQ2H2/download\n", " !tar xzf creditcard.tgz" @@ -56,30 +61,15 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fraud: 0.173% 492\n", - "Valid: 99.827% 284315\n", - "X.shape (100492, 28) y.shape (100492,)\n", - "Fraud: 0.651% 654\n", - "Valid: 99.349% 99838\n" - ] - } - ], + "outputs": [], "source": [ "random_state=1\n", "\n", "def load_creditcard(n_examples=0):\n", - " import pandas as pd\n", - " import numpy as np\n", - " import random\n", " df = pd.read_csv('data/creditcard.csv')\n", " print(\"Fraud: {0:.3f}% {1}\".format(df.Class[df.Class == 1].count()*100/df.shape[0], df.Class[df.Class == 1].count()))\n", " print(\"Valid: {0:.3f}% {1}\".format(df.Class[df.Class == 0].count()*100/df.shape[0], df.Class[df.Class == 0].count()))\n", @@ -130,21 +120,11 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Score Train: 0.9984504719663368\n", - "Score Test: 0.9983415151917209\n", - "Took 26.09 seconds\n" - ] - } - ], + "outputs": [], "source": [ "now = time.time()\n", "clf = Stree(max_depth=3, random_state=random_state, max_iter=1e3)\n", @@ -163,7 +143,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -174,21 +154,11 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Kernel: linear\tTime: 43.49 seconds\tScore Train: 0.9980098\tScore Test: 0.9980762\n", - "Kernel: rbf\tTime: 8.86 seconds\tScore Train: 0.9934891\tScore Test: 0.9934987\n", - "Kernel: poly\tTime: 41.14 seconds\tScore Train: 0.9972279\tScore Test: 0.9973133\n" - ] - } - ], + "outputs": [], "source": [ "for kernel in ['linear', 'rbf', 'poly']:\n", " now = time.time()\n", @@ -208,7 +178,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -219,21 +189,11 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Kernel: linear\tTime: 187.51 seconds\tScore Train: 0.9984505\tScore Test: 0.9983083\n", - "Kernel: rbf\tTime: 73.65 seconds\tScore Train: 0.9993461\tScore Test: 0.9985074\n", - "Kernel: poly\tTime: 52.19 seconds\tScore Train: 0.9993461\tScore Test: 0.9987727\n" - ] - } - ], + "outputs": [], "source": [ "for kernel in ['linear', 'rbf', 'poly']:\n", " now = time.time()\n", @@ -261,9 +221,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.2" + "version": "3.8.2-final" } }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/notebooks/features.ipynb b/notebooks/features.ipynb index a7177fb..1b60745 100644 --- a/notebooks/features.ipynb +++ b/notebooks/features.ipynb @@ -17,24 +17,27 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#\n", "# Google Colab setup\n", "#\n", - "#!pip install git+https://github.com/doctorado-ml/stree" + "#!pip install git+https://github.com/doctorado-ml/stree\n", + "!pip install pandas" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import time\n", + "import random\n", "import warnings\n", + "import os\n", "import numpy as np\n", "import pandas as pd\n", "from sklearn.svm import SVC\n", @@ -42,6 +45,7 @@ "from sklearn.utils.estimator_checks import check_estimator\n", "from sklearn.datasets import make_classification, load_iris, load_wine\n", "from sklearn.model_selection import train_test_split\n", + "from sklearn.utils.class_weight import compute_sample_weight\n", "from sklearn.exceptions import ConvergenceWarning\n", "from stree import Stree\n", "warnings.filterwarnings(\"ignore\", category=ConvergenceWarning)" @@ -49,13 +53,12 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": { "tags": [] }, "outputs": [], "source": [ - "import os\n", "if not os.path.isfile('data/creditcard.csv'):\n", " !wget --no-check-certificate --content-disposition http://nube.jccm.es/index.php/s/Zs7SYtZQJ3RQ2H2/download\n", " !tar xzf creditcard.tgz" @@ -63,31 +66,15 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fraud: 0.173% 492\n", - "Valid: 99.827% 284315\n", - "X.shape (5492, 28) y.shape (5492,)\n", - "Fraud: 9.086% 499\n", - "Valid: 90.914% 4993\n", - "[0.09079084 0.09079084 0.09079084 0.09079084] [0.09101942 0.09101942 0.09101942 0.09101942]\n" - ] - } - ], + "outputs": [], "source": [ "random_state=1\n", "\n", "def load_creditcard(n_examples=0):\n", - " import pandas as pd\n", - " import numpy as np\n", - " import random\n", " df = pd.read_csv('data/creditcard.csv')\n", " print(\"Fraud: {0:.3f}% {1}\".format(df.Class[df.Class == 1].count()*100/df.shape[0], df.Class[df.Class == 1].count()))\n", " print(\"Valid: {0:.3f}% {1}\".format(df.Class[df.Class == 0].count()*100/df.shape[0], df.Class[df.Class == 0].count()))\n", @@ -119,17 +106,8 @@ "Xtest = data[1]\n", "ytrain = data[2]\n", "ytest = data[3]\n", - "_, data = np.unique(ytrain, return_counts=True)\n", - "wtrain = (data[1] / np.sum(data), data[0] / np.sum(data))\n", - "_, data = np.unique(ytest, return_counts=True)\n", - "wtest = (data[1] / np.sum(data), data[0] / np.sum(data))\n", - "# Set weights inverse to its count class in dataset\n", - "weights = np.ones(Xtrain.shape[0],)\n", - "weights[ytrain==0] = wtrain[0]\n", - "weights[ytrain==1] = wtrain[1]\n", - "weights_test = np.ones(Xtest.shape[0],)\n", - "weights_test[ytest==0] = wtest[0]\n", - "weights_test[ytest==1] = wtest[1]\n", + "weights = compute_sample_weight(\"balanced\", ytrain)\n", + "weights_test = compute_sample_weight(\"balanced\", ytest)\n", "print(weights[:4], weights_test[:4])" ] }, @@ -150,22 +128,11 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Accuracy of Train without weights 0.9849115504682622\n", - "Accuracy of Train with weights 0.9849115504682622\n", - "Accuracy of Tests without weights 0.9848300970873787\n", - "Accuracy of Tests with weights 0.9805825242718447\n" - ] - } - ], + "outputs": [], "source": [ "C = 23\n", "print(\"Accuracy of Train without weights\", Stree(C=C, random_state=1).fit(Xtrain, ytrain).score(Xtrain, ytrain))\n", @@ -184,21 +151,11 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Time: 26.59s\tKernel: linear\tAccuracy_train: 0.9846514047866806\tAccuracy_test: 0.9848300970873787\n", - "Time: 0.56s\tKernel: rbf\tAccuracy_train: 0.9947970863683663\tAccuracy_test: 0.9866504854368932\n", - "Time: 0.23s\tKernel: poly\tAccuracy_train: 0.9955775234131113\tAccuracy_test: 0.9824029126213593\n" - ] - } - ], + "outputs": [], "source": [ "random_state=1\n", "for kernel in ['linear', 'rbf', 'poly']:\n", @@ -219,77 +176,11 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "************** C=0.001 ****************************\n", - "Classifier's accuracy (train): 0.9823\n", - "Classifier's accuracy (test) : 0.9836\n", - "root feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.4391 counts=(array([0, 1]), array([3495, 349]))\n", - "root - Down, - Leaf class=0 belief= 0.981455 impurity=0.1332 counts=(array([0, 1]), array([3493, 66]))\n", - "root - Up, - Leaf class=1 belief= 0.992982 impurity=0.0603 counts=(array([0, 1]), array([ 2, 283]))\n", - "\n", - "**************************************************\n", - "************** C=0.01 ****************************\n", - "Classifier's accuracy (train): 0.9834\n", - "Classifier's accuracy (test) : 0.9842\n", - "root feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.4391 counts=(array([0, 1]), array([3495, 349]))\n", - "root - Down, - Leaf class=0 belief= 0.982288 impurity=0.1284 counts=(array([0, 1]), array([3494, 63]))\n", - "root - Up, - Leaf class=1 belief= 0.996516 impurity=0.0335 counts=(array([0, 1]), array([ 1, 286]))\n", - "\n", - "**************************************************\n", - "************** C=1 ****************************\n", - "Classifier's accuracy (train): 0.9844\n", - "Classifier's accuracy (test) : 0.9848\n", - "root feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.4391 counts=(array([0, 1]), array([3495, 349]))\n", - "root - Down feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.1236 counts=(array([0, 1]), array([3493, 60]))\n", - "root - Down - Down, - Leaf class=0 belief= 0.983108 impurity=0.1236 counts=(array([0, 1]), array([3492, 60]))\n", - "root - Down - Up, - Leaf class=0 belief= 1.000000 impurity=0.0000 counts=(array([0]), array([1]))\n", - "root - Up feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.0593 counts=(array([0, 1]), array([ 2, 289]))\n", - "root - Up - Down, - Leaf class=0 belief= 1.000000 impurity=0.0000 counts=(array([0]), array([2]))\n", - "root - Up - Up, - Leaf class=1 belief= 1.000000 impurity=0.0000 counts=(array([1]), array([289]))\n", - "\n", - "**************************************************\n", - "************** C=5 ****************************\n", - "Classifier's accuracy (train): 0.9847\n", - "Classifier's accuracy (test) : 0.9848\n", - "root feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.4391 counts=(array([0, 1]), array([3495, 349]))\n", - "root - Down feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.1236 counts=(array([0, 1]), array([3493, 60]))\n", - "root - Down - Down feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.1236 counts=(array([0, 1]), array([3492, 60]))\n", - "root - Down - Down - Down, - Leaf class=0 belief= 0.983385 impurity=0.1220 counts=(array([0, 1]), array([3492, 59]))\n", - "root - Down - Down - Up, - Leaf class=1 belief= 1.000000 impurity=0.0000 counts=(array([1]), array([1]))\n", - "root - Down - Up, - Leaf class=0 belief= 1.000000 impurity=0.0000 counts=(array([0]), array([1]))\n", - "root - Up feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.0593 counts=(array([0, 1]), array([ 2, 289]))\n", - "root - Up - Down, - Leaf class=0 belief= 1.000000 impurity=0.0000 counts=(array([0]), array([2]))\n", - "root - Up - Up, - Leaf class=1 belief= 1.000000 impurity=0.0000 counts=(array([1]), array([289]))\n", - "\n", - "**************************************************\n", - "************** C=17 ****************************\n", - "Classifier's accuracy (train): 0.9847\n", - "Classifier's accuracy (test) : 0.9848\n", - "root feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.4391 counts=(array([0, 1]), array([3495, 349]))\n", - "root - Down feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.1236 counts=(array([0, 1]), array([3493, 60]))\n", - "root - Down - Down feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.1220 counts=(array([0, 1]), array([3492, 59]))\n", - "root - Down - Down - Down, - Leaf class=0 belief= 0.983380 impurity=0.1220 counts=(array([0, 1]), array([3491, 59]))\n", - "root - Down - Down - Up, - Leaf class=0 belief= 1.000000 impurity=0.0000 counts=(array([0]), array([1]))\n", - "root - Down - Up feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=1.0000 counts=(array([0, 1]), array([1, 1]))\n", - "root - Down - Up - Down, - Leaf class=0 belief= 1.000000 impurity=0.0000 counts=(array([0]), array([1]))\n", - "root - Down - Up - Up, - Leaf class=1 belief= 1.000000 impurity=0.0000 counts=(array([1]), array([1]))\n", - "root - Up feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.0593 counts=(array([0, 1]), array([ 2, 289]))\n", - "root - Up - Down, - Leaf class=0 belief= 1.000000 impurity=0.0000 counts=(array([0]), array([2]))\n", - "root - Up - Up, - Leaf class=1 belief= 1.000000 impurity=0.0000 counts=(array([1]), array([289]))\n", - "\n", - "**************************************************\n", - "59.0161 secs\n" - ] - } - ], + "outputs": [], "source": [ "t = time.time()\n", "for C in (.001, .01, 1, 5, 17):\n", @@ -313,29 +204,11 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "root feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.4391 counts=(array([0, 1]), array([3495, 349]))\n", - "root - Down feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.1236 counts=(array([0, 1]), array([3493, 60]))\n", - "root - Down - Down feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.1220 counts=(array([0, 1]), array([3492, 59]))\n", - "root - Down - Down - Down, - Leaf class=0 belief= 0.983380 impurity=0.1220 counts=(array([0, 1]), array([3491, 59]))\n", - "root - Down - Down - Up, - Leaf class=0 belief= 1.000000 impurity=0.0000 counts=(array([0]), array([1]))\n", - "root - Down - Up feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=1.0000 counts=(array([0, 1]), array([1, 1]))\n", - "root - Down - Up - Down, - Leaf class=0 belief= 1.000000 impurity=0.0000 counts=(array([0]), array([1]))\n", - "root - Down - Up - Up, - Leaf class=1 belief= 1.000000 impurity=0.0000 counts=(array([1]), array([1]))\n", - "root - Up feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.0593 counts=(array([0, 1]), array([ 2, 289]))\n", - "root - Up - Down, - Leaf class=0 belief= 1.000000 impurity=0.0000 counts=(array([0]), array([2]))\n", - "root - Up - Up, - Leaf class=1 belief= 1.000000 impurity=0.0000 counts=(array([1]), array([289]))\n" - ] - } - ], + "outputs": [], "source": [ "#check iterator\n", "for i in list(clf):\n", @@ -344,29 +217,11 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "root feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.4391 counts=(array([0, 1]), array([3495, 349]))\n", - "root - Down feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.1236 counts=(array([0, 1]), array([3493, 60]))\n", - "root - Down - Down feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.1220 counts=(array([0, 1]), array([3492, 59]))\n", - "root - Down - Down - Down, - Leaf class=0 belief= 0.983380 impurity=0.1220 counts=(array([0, 1]), array([3491, 59]))\n", - "root - Down - Down - Up, - Leaf class=0 belief= 1.000000 impurity=0.0000 counts=(array([0]), array([1]))\n", - "root - Down - Up feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=1.0000 counts=(array([0, 1]), array([1, 1]))\n", - "root - Down - Up - Down, - Leaf class=0 belief= 1.000000 impurity=0.0000 counts=(array([0]), array([1]))\n", - "root - Down - Up - Up, - Leaf class=1 belief= 1.000000 impurity=0.0000 counts=(array([1]), array([1]))\n", - "root - Up feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.0593 counts=(array([0, 1]), array([ 2, 289]))\n", - "root - Up - Down, - Leaf class=0 belief= 1.000000 impurity=0.0000 counts=(array([0]), array([2]))\n", - "root - Up - Up, - Leaf class=1 belief= 1.000000 impurity=0.0000 counts=(array([1]), array([289]))\n" - ] - } - ], + "outputs": [], "source": [ "#check iterator again\n", "for i in clf:\n", @@ -382,75 +237,19 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1 functools.partial(, 'Stree')\n", - "2 functools.partial(, 'Stree')\n", - "3 functools.partial(, 'Stree')\n", - "4 functools.partial(, 'Stree')\n", - "5 functools.partial(, 'Stree')\n", - "6 functools.partial(, 'Stree')\n", - "7 functools.partial(, 'Stree')\n", - "8 functools.partial(, 'Stree', kind='ones')\n", - "10 functools.partial(, 'Stree')\n", - "11 functools.partial(, 'Stree', readonly_memmap=True)\n", - "12 functools.partial(, 'Stree')\n", - "13 functools.partial(, 'Stree')\n", - "14 functools.partial(, 'Stree')\n", - "15 functools.partial(, 'Stree')\n", - "16 functools.partial(, 'Stree')\n", - "17 functools.partial(, 'Stree')\n", - "18 functools.partial(, 'Stree')\n", - "19 functools.partial(, 'Stree')\n", - "20 functools.partial(, 'Stree')\n", - "21 functools.partial(, 'Stree')\n", - "22 functools.partial(, 'Stree')\n", - "23 functools.partial(, 'Stree')\n", - "24 functools.partial(, 'Stree')\n", - "25 functools.partial(, 'Stree')\n", - "26 functools.partial(, 'Stree', readonly_memmap=True)\n", - "27 functools.partial(, 'Stree', readonly_memmap=True, X_dtype='float32')\n", - "28 functools.partial(, 'Stree')\n", - "29 functools.partial(, 'Stree')\n", - "30 functools.partial(, 'Stree')\n", - "31 functools.partial(, 'Stree')\n", - "32 functools.partial(, 'Stree')\n", - "33 functools.partial(, 'Stree')\n", - "34 functools.partial(, 'Stree')\n", - "35 functools.partial(, 'Stree')\n", - "36 functools.partial(, 'Stree')\n", - "37 functools.partial(, 'Stree')\n", - "38 functools.partial(, 'Stree')\n", - "39 functools.partial(, 'Stree')\n", - "40 functools.partial(, 'Stree')\n", - "41 functools.partial(, 'Stree')\n", - "42 functools.partial(, 'Stree')\n", - "43 functools.partial(, 'Stree')\n", - "44 functools.partial(, 'Stree')\n", - "45 functools.partial(, 'Stree')\n", - "46 functools.partial(, 'Stree')\n", - "47 functools.partial(, 'Stree')\n" - ] - } - ], + "outputs": [], "source": [ "# Make checks one by one\n", "c = 0\n", "checks = check_estimator(Stree(), generate_only=True)\n", "for check in checks:\n", " c += 1\n", - " if c == 9:\n", - " pass\n", - " else:\n", - " print(c, check[1])\n", - " check[1](check[0])" + " print(c, check[1])\n", + " check[1](check[0])" ] }, { @@ -552,9 +351,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.1" + "version": "3.8.2-final" } }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/notebooks/gridsearch.ipynb b/notebooks/gridsearch.ipynb index 3cf59a5..af3c741 100644 --- a/notebooks/gridsearch.ipynb +++ b/notebooks/gridsearch.ipynb @@ -18,19 +18,20 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#\n", "# Google Colab setup\n", "#\n", - "#!pip install git+https://github.com/doctorado-ml/stree" + "#!pip install git+https://github.com/doctorado-ml/stree\n", + "!pip install pandas" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -38,6 +39,10 @@ }, "outputs": [], "source": [ + "import random\n", + "import os\n", + "import pandas as pd\n", + "import numpy as np\n", "from sklearn.ensemble import AdaBoostClassifier\n", "from sklearn.svm import LinearSVC\n", "from sklearn.model_selection import GridSearchCV, train_test_split\n", @@ -46,7 +51,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -54,7 +59,6 @@ }, "outputs": [], "source": [ - "import os\n", "if not os.path.isfile('data/creditcard.csv'):\n", " !wget --no-check-certificate --content-disposition http://nube.jccm.es/index.php/s/Zs7SYtZQJ3RQ2H2/download\n", " !tar xzf creditcard.tgz" @@ -62,7 +66,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -70,26 +74,11 @@ "outputId": "afc822fb-f16a-4302-8a67-2b9e2880159b", "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fraud: 0.173% 492\n", - "Valid: 99.827% 284315\n", - "X.shape (1492, 28) y.shape (1492,)\n", - "Fraud: 33.177% 495\n", - "Valid: 66.823% 997\n" - ] - } - ], + "outputs": [], "source": [ "random_state=1\n", "\n", "def load_creditcard(n_examples=0):\n", - " import pandas as pd\n", - " import numpy as np\n", - " import random\n", " df = pd.read_csv('data/creditcard.csv')\n", " print(\"Fraud: {0:.3f}% {1}\".format(df.Class[df.Class == 1].count()*100/df.shape[0], df.Class[df.Class == 1].count()))\n", " print(\"Valid: {0:.3f}% {1}\".format(df.Class[df.Class == 0].count()*100/df.shape[0], df.Class[df.Class == 0].count()))\n", @@ -132,7 +121,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -176,39 +165,16 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'C': 1.0,\n", - " 'criterion': 'entropy',\n", - " 'degree': 3,\n", - " 'gamma': 'scale',\n", - " 'kernel': 'linear',\n", - " 'max_depth': None,\n", - " 'max_features': None,\n", - " 'max_iter': 100000.0,\n", - " 'min_samples_split': 0,\n", - " 'random_state': None,\n", - " 'split_criteria': 'impurity',\n", - " 'splitter': 'random',\n", - " 'tol': 0.0001}" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "Stree().get_params()" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -216,69 +182,7 @@ "outputId": "7703413a-d563-4289-a13b-532f38f82762", "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fitting 5 folds for each of 1008 candidates, totalling 5040 fits\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.\n", - "[Parallel(n_jobs=-1)]: Done 40 tasks | elapsed: 1.6s\n", - "[Parallel(n_jobs=-1)]: Done 130 tasks | elapsed: 3.1s\n", - "[Parallel(n_jobs=-1)]: Done 256 tasks | elapsed: 5.5s\n", - "[Parallel(n_jobs=-1)]: Done 418 tasks | elapsed: 9.3s\n", - "[Parallel(n_jobs=-1)]: Done 616 tasks | elapsed: 18.6s\n", - "[Parallel(n_jobs=-1)]: Done 850 tasks | elapsed: 28.2s\n", - "[Parallel(n_jobs=-1)]: Done 1120 tasks | elapsed: 35.4s\n", - "[Parallel(n_jobs=-1)]: Done 1426 tasks | elapsed: 43.5s\n", - "[Parallel(n_jobs=-1)]: Done 1768 tasks | elapsed: 51.3s\n", - "[Parallel(n_jobs=-1)]: Done 2146 tasks | elapsed: 1.0min\n", - "[Parallel(n_jobs=-1)]: Done 2560 tasks | elapsed: 1.2min\n", - "[Parallel(n_jobs=-1)]: Done 3010 tasks | elapsed: 1.4min\n", - "[Parallel(n_jobs=-1)]: Done 3496 tasks | elapsed: 1.7min\n", - "[Parallel(n_jobs=-1)]: Done 4018 tasks | elapsed: 2.1min\n", - "[Parallel(n_jobs=-1)]: Done 4576 tasks | elapsed: 2.6min\n", - "[Parallel(n_jobs=-1)]: Done 5040 out of 5040 | elapsed: 2.9min finished\n" - ] - }, - { - "data": { - "text/plain": [ - "GridSearchCV(estimator=AdaBoostClassifier(algorithm='SAMME', random_state=1),\n", - " n_jobs=-1,\n", - " param_grid=[{'base_estimator': [Stree(C=55, max_depth=7,\n", - " random_state=1,\n", - " split_criteria='max_samples',\n", - " tol=0.1)],\n", - " 'base_estimator__C': [1, 7, 55],\n", - " 'base_estimator__kernel': ['linear'],\n", - " 'base_estimator__max_depth': [3, 5, 7],\n", - " 'base_estimator__split_criteria': ['max_samples',\n", - " 'impuri...\n", - " {'base_estimator': [Stree(random_state=1)],\n", - " 'base_estimator__C': [1, 7, 55],\n", - " 'base_estimator__gamma': [0.1, 1, 10],\n", - " 'base_estimator__kernel': ['rbf'],\n", - " 'base_estimator__max_depth': [3, 5, 7],\n", - " 'base_estimator__split_criteria': ['max_samples',\n", - " 'impurity'],\n", - " 'base_estimator__tol': [0.1, 0.01],\n", - " 'learning_rate': [0.5, 1],\n", - " 'n_estimators': [10, 25]}],\n", - " return_train_score=True, verbose=5)" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "clf = AdaBoostClassifier(random_state=random_state, algorithm=\"SAMME\")\n", "grid = GridSearchCV(clf, parameters, verbose=5, n_jobs=-1, return_train_score=True)\n", @@ -287,7 +191,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -295,20 +199,7 @@ "outputId": "285163c8-fa33-4915-8ae7-61c4f7844344", "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Best estimator: AdaBoostClassifier(algorithm='SAMME',\n", - " base_estimator=Stree(C=55, max_depth=7, random_state=1,\n", - " split_criteria='max_samples', tol=0.1),\n", - " learning_rate=0.5, n_estimators=25, random_state=1)\n", - "Best hyperparameters: {'base_estimator': Stree(C=55, max_depth=7, random_state=1, split_criteria='max_samples', tol=0.1), 'base_estimator__C': 55, 'base_estimator__kernel': 'linear', 'base_estimator__max_depth': 7, 'base_estimator__split_criteria': 'max_samples', 'base_estimator__tol': 0.1, 'learning_rate': 0.5, 'n_estimators': 25}\n", - "Best accuracy: 0.9511777695988222\n" - ] - } - ], + "outputs": [], "source": [ "print(\"Best estimator: \", grid.best_estimator_)\n", "print(\"Best hyperparameters: \", grid.best_params_)\n", @@ -354,9 +245,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.2" + "version": "3.8.2-final" } }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index dbfe578..b8202e2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1 @@ -numpy -scikit-learn -pandas -ipympl \ No newline at end of file +scikit-learn>0.24 \ No newline at end of file