From 8bef3efc03e2665d0b9331f214e4fa55fd381628 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Thu, 4 Nov 2021 10:21:38 +0100 Subject: [PATCH] Update requirements for tests --- notebooks/benchmark.ipynb | 81 ++++++++++++--------------------------- notebooks/wine_iris.ipynb | 77 ++++++++++++------------------------- requirements.txt | 2 +- 3 files changed, 51 insertions(+), 109 deletions(-) diff --git a/notebooks/benchmark.ipynb b/notebooks/benchmark.ipynb index 0063a25..a16d30d 100644 --- a/notebooks/benchmark.ipynb +++ b/notebooks/benchmark.ipynb @@ -17,20 +17,19 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#\n", "# Google Colab setup\n", "#\n", - "#!pip install git+https://github.com/doctorado-ml/odte\n", - "#!pip install git+https://github.com/doctorado-ml/stree" + "#!pip install git+https://github.com/doctorado-ml/odte\n" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -47,7 +46,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -66,17 +65,11 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": { "tags": [] }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "2020-07-04 21:56:25\n" - } - ], + "outputs": [], "source": [ "print(datetime.date.today(), time.strftime(\"%H:%M:%S\"))" ] @@ -90,7 +83,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -102,17 +95,11 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": { "tags": [] }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "Fraud: 0.173% 492\nValid: 99.827% 284,315\n" - } - ], + "outputs": [], "source": [ "print(\"Fraud: {0:.3f}% {1}\".format(df.Class[df.Class == 1].count()*100/df.shape[0], df.Class[df.Class == 1].count()))\n", "print(\"Valid: {0:.3f}% {1:,}\".format(df.Class[df.Class == 0].count()*100/df.shape[0], df.Class[df.Class == 0].count()))" @@ -120,7 +107,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -132,17 +119,11 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": { "tags": [] }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "X shape: (284807, 29)\ny shape: (284807,)\n" - } - ], + "outputs": [], "source": [ "# Remove unneeded features\n", "y = df.Class.values\n", @@ -159,7 +140,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -170,7 +151,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -180,7 +161,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -190,7 +171,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -200,7 +181,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -210,7 +191,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -220,7 +201,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -237,7 +218,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -262,17 +243,11 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": { "tags": [] }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "************************** Linear Tree **********************\nTrain Model Linear Tree took: 14.81 seconds\n=========== Linear Tree - Train 199,364 samples =============\n precision recall f1-score support\n\n 0 1.000000 1.000000 1.000000 199020\n 1 1.000000 1.000000 1.000000 344\n\n accuracy 1.000000 199364\n macro avg 1.000000 1.000000 1.000000 199364\nweighted avg 1.000000 1.000000 1.000000 199364\n\n=========== Linear Tree - Test 85,443 samples =============\n precision recall f1-score support\n\n 0 0.999578 0.999613 0.999596 85295\n 1 0.772414 0.756757 0.764505 148\n\n accuracy 0.999192 85443\n macro avg 0.885996 0.878185 0.882050 85443\nweighted avg 0.999184 0.999192 0.999188 85443\n\nConfusion Matrix in Train\n[[199020 0]\n [ 0 344]]\nConfusion Matrix in Test\n[[85262 33]\n [ 36 112]]\n************************** Random Forest **********************\nTrain Model Random Forest took: 172.6 seconds\n=========== Random Forest - Train 199,364 samples =============\n precision recall f1-score support\n\n 0 1.000000 1.000000 1.000000 199020\n 1 1.000000 1.000000 1.000000 344\n\n accuracy 1.000000 199364\n macro avg 1.000000 1.000000 1.000000 199364\nweighted avg 1.000000 1.000000 1.000000 199364\n\n=========== Random Forest - Test 85,443 samples =============\n precision recall f1-score support\n\n 0 0.999660 0.999965 0.999812 85295\n 1 0.975410 0.804054 0.881481 148\n\n accuracy 0.999625 85443\n macro avg 0.987535 0.902009 0.940647 85443\nweighted avg 0.999618 0.999625 0.999607 85443\n\nConfusion Matrix in Train\n[[199020 0]\n [ 0 344]]\nConfusion Matrix in Test\n[[85292 3]\n [ 29 119]]\n************************** Stree (SVM Tree) **********************\nTrain Model Stree (SVM Tree) took: 39.26 seconds\n=========== Stree (SVM Tree) - Train 199,364 samples =============\n precision recall f1-score support\n\n 0 0.999623 0.999864 0.999744 199020\n 1 0.908784 0.781977 0.840625 344\n\n accuracy 0.999488 199364\n macro avg 0.954204 0.890921 0.920184 199364\nweighted avg 0.999467 0.999488 0.999469 199364\n\n=========== Stree (SVM Tree) - Test 85,443 samples =============\n precision recall f1-score support\n\n 0 0.999637 0.999918 0.999777 85295\n 1 0.943548 0.790541 0.860294 148\n\n accuracy 0.999555 85443\n macro avg 0.971593 0.895229 0.930036 85443\nweighted avg 0.999540 0.999555 0.999536 85443\n\nConfusion Matrix in Train\n[[198993 27]\n [ 75 269]]\nConfusion Matrix in Test\n[[85288 7]\n [ 31 117]]\n************************** AdaBoost model **********************\nTrain Model AdaBoost model took: 49.55 seconds\n=========== AdaBoost model - Train 199,364 samples =============\n precision recall f1-score support\n\n 0 0.999392 0.999678 0.999535 199020\n 1 0.777003 0.648256 0.706815 344\n\n accuracy 0.999072 199364\n macro avg 0.888198 0.823967 0.853175 199364\nweighted avg 0.999008 0.999072 0.999030 199364\n\n=========== AdaBoost model - Test 85,443 samples =============\n precision recall f1-score support\n\n 0 0.999484 0.999707 0.999596 85295\n 1 0.806202 0.702703 0.750903 148\n\n accuracy 0.999192 85443\n macro avg 0.902843 0.851205 0.875249 85443\nweighted avg 0.999149 0.999192 0.999165 85443\n\nConfusion Matrix in Train\n[[198956 64]\n [ 121 223]]\nConfusion Matrix in Test\n[[85270 25]\n [ 44 104]]\n************************** Odte model **********************\nTrain Model Odte model took: 5.758e+03 seconds\n=========== Odte model - Train 199,364 samples =============\n precision recall f1-score support\n\n 0 0.998725 0.999990 0.999357 199020\n 1 0.978261 0.261628 0.412844 344\n\n accuracy 0.998716 199364\n macro avg 0.988493 0.630809 0.706101 199364\nweighted avg 0.998690 0.998716 0.998345 199364\n\n=========== Odte model - Test 85,443 samples =============\n precision recall f1-score support\n\n 0 0.998794 0.999988 0.999391 85295\n 1 0.978261 0.304054 0.463918 148\n\n accuracy 0.998783 85443\n macro avg 0.988527 0.652021 0.731654 85443\nweighted avg 0.998758 0.998783 0.998463 85443\n\nConfusion Matrix in Train\n[[199018 2]\n [ 254 90]]\nConfusion Matrix in Test\n[[85294 1]\n [ 103 45]]\n" - } - ], + "outputs": [], "source": [ "# Train & Test models\n", "models = {\n", @@ -293,17 +268,11 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": { "tags": [] }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "**************************************************************************************************************\n*The best f1 model is Random Forest, with a f1 score: 0.8815 in 172.611 seconds with 0.7 samples in train dataset\n**************************************************************************************************************\nModel: Linear Tree\t Time: 14.81 seconds\t f1: 0.7645\nModel: Random Forest\t Time: 172.61 seconds\t f1: 0.8815\nModel: Stree (SVM Tree)\t Time: 39.26 seconds\t f1: 0.8603\nModel: AdaBoost model\t Time: 49.55 seconds\t f1: 0.7509\nModel: Odte model\t Time: 5758.26 seconds\t f1: 0.4639\n" - } - ], + "outputs": [], "source": [ "print(\"*\"*110)\n", "print(f\"*The best f1 model is {best_model}, with a f1 score: {best_f1:.4} in {best_time:.6} seconds with {train_size:,} samples in train dataset\")\n", @@ -416,4 +385,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/notebooks/wine_iris.ipynb b/notebooks/wine_iris.ipynb index 53157eb..f868ee7 100644 --- a/notebooks/wine_iris.ipynb +++ b/notebooks/wine_iris.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -21,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -32,7 +32,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -47,17 +47,11 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": { "tags": [] }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "****************************** Results for wine ******************************\nTraining stree...\nScore: 94.444 in 0.18 seconds\nTraining odte...\nScore: 100.000 in 1.33 seconds\nTraining adaboost...\nScore: 94.444 in 0.62 seconds\nTraining bagging...\nScore: 100.000 in 2.88 seconds\n" - } - ], + "outputs": [], "source": [ "print(\"*\"*30,\"Results for wine\", \"*\"*30)\n", "for clf_type, item in clf.items():\n", @@ -69,7 +63,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -80,7 +74,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -94,17 +88,11 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": { "tags": [] }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "****************************** Results for iris ******************************\nTraining stree...\nScore: 100.000 in 0.02 seconds\nTraining odte...\nScore: 100.000 in 0.12 seconds\nTraining adaboost...\nScore: 83.333 in 0.01 seconds\nTraining bagging...\nScore: 100.000 in 0.13 seconds\n" - } - ], + "outputs": [], "source": [ "print(\"*\"*30,\"Results for iris\", \"*\"*30)\n", "for clf_type, item in clf.items():\n", @@ -116,17 +104,11 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": { "tags": [] }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "{'fit_time': array([0.22121811, 0.21985221, 0.19185114, 0.19187999, 0.20067477]), 'score_time': array([0.01268458, 0.01461887, 0.01160598, 0.01308703, 0.01070738]), 'test_score': array([1. , 1. , 1. , 0.93333333, 1. ]), 'train_score': array([0.98333333, 0.96666667, 0.99166667, 0.99166667, 0.975 ])}\n98.667 +- 0.027\n" - } - ], + "outputs": [], "source": [ "cross = cross_validate(estimator=clf[\"odte\"], X=X, y=y, n_jobs=-1, return_train_score=True)\n", "print(cross)\n", @@ -135,17 +117,11 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": { "tags": [] }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "{'fit_time': array([0.02130818, 0.02036786, 0.02829814, 0.02326989, 0.03807497]), 'score_time': array([0.00140715, 0.00173712, 0.00199389, 0.00132608, 0.00199199]), 'test_score': array([1. , 0.93333333, 0.93333333, 0.93333333, 0.96666667]), 'train_score': array([0.93333333, 0.96666667, 0.96666667, 0.96666667, 0.95 ])}\n95.333 +- 0.027\n" - } - ], + "outputs": [], "source": [ "cross = cross_validate(estimator=clf[\"adaboost\"], X=X, y=y, n_jobs=-1, return_train_score=True)\n", "print(cross)\n", @@ -154,17 +130,11 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": { "tags": [] }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "1 functools.partial(, 'Odte')\n2 functools.partial(, 'Odte')\n3 functools.partial(, 'Odte')\n4 functools.partial(, 'Odte')\n5 functools.partial(, 'Odte')\n6 functools.partial(, 'Odte')\n7 functools.partial(, 'Odte')\n8 functools.partial(, 'Odte')\n9 functools.partial(, 'Odte')\n10 functools.partial(, 'Odte', readonly_memmap=True)\n11 functools.partial(, 'Odte')\n12 functools.partial(, 'Odte')\n13 functools.partial(, 'Odte')\n14 functools.partial(, 'Odte')\n15 functools.partial(, 'Odte')\n16 functools.partial(, 'Odte')\n17 functools.partial(, 'Odte')\n18 functools.partial(, 'Odte')\n19 functools.partial(, 'Odte')\n20 functools.partial(, 'Odte')\n21 functools.partial(, 'Odte')\n22 functools.partial(, 'Odte')\n23 functools.partial(, 'Odte')\n24 functools.partial(, 'Odte', readonly_memmap=True)\n25 functools.partial(, 'Odte', readonly_memmap=True, X_dtype='float32')\n26 functools.partial(, 'Odte')\n27 functools.partial(, 'Odte')\n28 functools.partial(, 'Odte')\n29 functools.partial(, 'Odte')\n30 functools.partial(, 'Odte')\n31 functools.partial(, 'Odte')\n32 functools.partial(, 'Odte')\n33 functools.partial(, 'Odte')\n34 functools.partial(, 'Odte')\n35 functools.partial(, 'Odte')\n36 functools.partial(, 'Odte')\n37 functools.partial(, 'Odte')\n38 functools.partial(, 'Odte')\n39 functools.partial(, 'Odte')\n40 functools.partial(, 'Odte')\n41 functools.partial(, 'Odte')\n42 functools.partial(, 'Odte')\n" - } - ], + "outputs": [], "source": [ "from sklearn.utils.estimator_checks import check_estimator\n", "# Make checks one by one\n", @@ -178,6 +148,13 @@ } ], "metadata": { + "interpreter": { + "hash": "da86226729227d0e8962a5ec29ea906307507ca2c30ceaaf651c09a617630939" + }, + "kernelspec": { + "display_name": "Python 3.9.2 64-bit ('general': venv)", + "name": "python3" + }, "language_info": { "codemirror_mode": { "name": "ipython", @@ -188,14 +165,10 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.6-final" + "version": "3.9.7" }, - "orig_nbformat": 2, - "kernelspec": { - "name": "python37664bitgeneralvenvfbd0a23e74cf4e778460f5ffc6761f39", - "display_name": "Python 3.7.6 64-bit ('general': venv)" - } + "orig_nbformat": 2 }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/requirements.txt b/requirements.txt index 35bba27..238c39c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1 @@ -stree \ No newline at end of file +stree>1.2.2 \ No newline at end of file