From 8bef3efc03e2665d0b9331f214e4fa55fd381628 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= <rmontanana@gmail.com>
Date: Thu, 4 Nov 2021 10:21:38 +0100
Subject: [PATCH] Update requirements for tests

---
 notebooks/benchmark.ipynb | 81 ++++++++++++---------------------------
 notebooks/wine_iris.ipynb | 77 ++++++++++++-------------------------
 requirements.txt          |  2 +-
 3 files changed, 51 insertions(+), 109 deletions(-)

diff --git a/notebooks/benchmark.ipynb b/notebooks/benchmark.ipynb
index 0063a25..a16d30d 100644
--- a/notebooks/benchmark.ipynb
+++ b/notebooks/benchmark.ipynb
@@ -17,20 +17,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "#\n",
     "# Google Colab setup\n",
     "#\n",
-    "#!pip install git+https://github.com/doctorado-ml/odte\n",
-    "#!pip install git+https://github.com/doctorado-ml/stree"
+    "#!pip install git+https://github.com/doctorado-ml/odte\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -47,7 +46,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -66,17 +65,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {
     "tags": []
    },
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": "2020-07-04 21:56:25\n"
-    }
-   ],
+   "outputs": [],
    "source": [
     "print(datetime.date.today(), time.strftime(\"%H:%M:%S\"))"
    ]
@@ -90,7 +83,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -102,17 +95,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {
     "tags": []
    },
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": "Fraud: 0.173% 492\nValid: 99.827% 284,315\n"
-    }
-   ],
+   "outputs": [],
    "source": [
     "print(\"Fraud: {0:.3f}% {1}\".format(df.Class[df.Class == 1].count()*100/df.shape[0], df.Class[df.Class == 1].count()))\n",
     "print(\"Valid: {0:.3f}% {1:,}\".format(df.Class[df.Class == 0].count()*100/df.shape[0], df.Class[df.Class == 0].count()))"
@@ -120,7 +107,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -132,17 +119,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {
     "tags": []
    },
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": "X shape: (284807, 29)\ny shape: (284807,)\n"
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Remove unneeded features\n",
     "y = df.Class.values\n",
@@ -159,7 +140,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -170,7 +151,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -180,7 +161,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -190,7 +171,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -200,7 +181,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -210,7 +191,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -220,7 +201,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -237,7 +218,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -262,17 +243,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": null,
    "metadata": {
     "tags": []
    },
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": "************************** Linear Tree **********************\nTrain Model Linear Tree took: 14.81 seconds\n=========== Linear Tree - Train 199,364 samples =============\n              precision    recall  f1-score   support\n\n           0   1.000000  1.000000  1.000000    199020\n           1   1.000000  1.000000  1.000000       344\n\n    accuracy                       1.000000    199364\n   macro avg   1.000000  1.000000  1.000000    199364\nweighted avg   1.000000  1.000000  1.000000    199364\n\n=========== Linear Tree - Test 85,443 samples =============\n              precision    recall  f1-score   support\n\n           0   0.999578  0.999613  0.999596     85295\n           1   0.772414  0.756757  0.764505       148\n\n    accuracy                       0.999192     85443\n   macro avg   0.885996  0.878185  0.882050     85443\nweighted avg   0.999184  0.999192  0.999188     85443\n\nConfusion Matrix in Train\n[[199020      0]\n [     0    344]]\nConfusion Matrix in Test\n[[85262    33]\n [   36   112]]\n************************** Random Forest **********************\nTrain Model Random Forest took: 172.6 seconds\n=========== Random Forest - Train 199,364 samples =============\n              precision    recall  f1-score   support\n\n           0   1.000000  1.000000  1.000000    199020\n           1   1.000000  1.000000  1.000000       344\n\n    accuracy                       1.000000    199364\n   macro avg   1.000000  1.000000  1.000000    199364\nweighted avg   1.000000  1.000000  1.000000    199364\n\n=========== Random Forest - Test 85,443 samples =============\n              precision    recall  f1-score   support\n\n           0   0.999660  0.999965  0.999812     85295\n           1   0.975410  0.804054  0.881481       148\n\n    accuracy                       0.999625     85443\n   macro avg   0.987535  0.902009  0.940647     85443\nweighted avg   0.999618  0.999625  0.999607     85443\n\nConfusion Matrix in Train\n[[199020      0]\n [     0    344]]\nConfusion Matrix in Test\n[[85292     3]\n [   29   119]]\n************************** Stree (SVM Tree) **********************\nTrain Model Stree (SVM Tree) took: 39.26 seconds\n=========== Stree (SVM Tree) - Train 199,364 samples =============\n              precision    recall  f1-score   support\n\n           0   0.999623  0.999864  0.999744    199020\n           1   0.908784  0.781977  0.840625       344\n\n    accuracy                       0.999488    199364\n   macro avg   0.954204  0.890921  0.920184    199364\nweighted avg   0.999467  0.999488  0.999469    199364\n\n=========== Stree (SVM Tree) - Test 85,443 samples =============\n              precision    recall  f1-score   support\n\n           0   0.999637  0.999918  0.999777     85295\n           1   0.943548  0.790541  0.860294       148\n\n    accuracy                       0.999555     85443\n   macro avg   0.971593  0.895229  0.930036     85443\nweighted avg   0.999540  0.999555  0.999536     85443\n\nConfusion Matrix in Train\n[[198993     27]\n [    75    269]]\nConfusion Matrix in Test\n[[85288     7]\n [   31   117]]\n************************** AdaBoost model **********************\nTrain Model AdaBoost model took: 49.55 seconds\n=========== AdaBoost model - Train 199,364 samples =============\n              precision    recall  f1-score   support\n\n           0   0.999392  0.999678  0.999535    199020\n           1   0.777003  0.648256  0.706815       344\n\n    accuracy                       0.999072    199364\n   macro avg   0.888198  0.823967  0.853175    199364\nweighted avg   0.999008  0.999072  0.999030    199364\n\n=========== AdaBoost model - Test 85,443 samples =============\n              precision    recall  f1-score   support\n\n           0   0.999484  0.999707  0.999596     85295\n           1   0.806202  0.702703  0.750903       148\n\n    accuracy                       0.999192     85443\n   macro avg   0.902843  0.851205  0.875249     85443\nweighted avg   0.999149  0.999192  0.999165     85443\n\nConfusion Matrix in Train\n[[198956     64]\n [   121    223]]\nConfusion Matrix in Test\n[[85270    25]\n [   44   104]]\n************************** Odte model **********************\nTrain Model Odte model took: 5.758e+03 seconds\n=========== Odte model - Train 199,364 samples =============\n              precision    recall  f1-score   support\n\n           0   0.998725  0.999990  0.999357    199020\n           1   0.978261  0.261628  0.412844       344\n\n    accuracy                       0.998716    199364\n   macro avg   0.988493  0.630809  0.706101    199364\nweighted avg   0.998690  0.998716  0.998345    199364\n\n=========== Odte model - Test 85,443 samples =============\n              precision    recall  f1-score   support\n\n           0   0.998794  0.999988  0.999391     85295\n           1   0.978261  0.304054  0.463918       148\n\n    accuracy                       0.998783     85443\n   macro avg   0.988527  0.652021  0.731654     85443\nweighted avg   0.998758  0.998783  0.998463     85443\n\nConfusion Matrix in Train\n[[199018      2]\n [   254     90]]\nConfusion Matrix in Test\n[[85294     1]\n [  103    45]]\n"
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Train & Test models\n",
     "models = {\n",
@@ -293,17 +268,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
    "metadata": {
     "tags": []
    },
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": "**************************************************************************************************************\n*The best f1 model is Random Forest, with a f1 score: 0.8815 in 172.611 seconds with 0.7 samples in train dataset\n**************************************************************************************************************\nModel: Linear Tree\t Time:  14.81 seconds\t f1: 0.7645\nModel: Random Forest\t Time: 172.61 seconds\t f1: 0.8815\nModel: Stree (SVM Tree)\t Time:  39.26 seconds\t f1: 0.8603\nModel: AdaBoost model\t Time:  49.55 seconds\t f1: 0.7509\nModel: Odte model\t Time: 5758.26 seconds\t f1: 0.4639\n"
-    }
-   ],
+   "outputs": [],
    "source": [
     "print(\"*\"*110)\n",
     "print(f\"*The best f1 model is {best_model}, with a f1 score: {best_f1:.4} in {best_time:.6} seconds with {train_size:,} samples in train dataset\")\n",
@@ -416,4 +385,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
\ No newline at end of file
+}
diff --git a/notebooks/wine_iris.ipynb b/notebooks/wine_iris.ipynb
index 53157eb..f868ee7 100644
--- a/notebooks/wine_iris.ipynb
+++ b/notebooks/wine_iris.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -21,7 +21,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -32,7 +32,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -47,17 +47,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "metadata": {
     "tags": []
    },
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": "****************************** Results for wine ******************************\nTraining stree...\nScore: 94.444 in 0.18 seconds\nTraining odte...\nScore: 100.000 in 1.33 seconds\nTraining adaboost...\nScore: 94.444 in 0.62 seconds\nTraining bagging...\nScore: 100.000 in 2.88 seconds\n"
-    }
-   ],
+   "outputs": [],
    "source": [
     "print(\"*\"*30,\"Results for wine\", \"*\"*30)\n",
     "for clf_type, item in clf.items():\n",
@@ -69,7 +63,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -80,7 +74,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -94,17 +88,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {
     "tags": []
    },
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": "****************************** Results for iris ******************************\nTraining stree...\nScore: 100.000 in 0.02 seconds\nTraining odte...\nScore: 100.000 in 0.12 seconds\nTraining adaboost...\nScore: 83.333 in 0.01 seconds\nTraining bagging...\nScore: 100.000 in 0.13 seconds\n"
-    }
-   ],
+   "outputs": [],
    "source": [
     "print(\"*\"*30,\"Results for iris\", \"*\"*30)\n",
     "for clf_type, item in clf.items():\n",
@@ -116,17 +104,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {
     "tags": []
    },
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": "{'fit_time': array([0.22121811, 0.21985221, 0.19185114, 0.19187999, 0.20067477]), 'score_time': array([0.01268458, 0.01461887, 0.01160598, 0.01308703, 0.01070738]), 'test_score': array([1.        , 1.        , 1.        , 0.93333333, 1.        ]), 'train_score': array([0.98333333, 0.96666667, 0.99166667, 0.99166667, 0.975     ])}\n98.667 +- 0.027\n"
-    }
-   ],
+   "outputs": [],
    "source": [
     "cross = cross_validate(estimator=clf[\"odte\"], X=X, y=y, n_jobs=-1, return_train_score=True)\n",
     "print(cross)\n",
@@ -135,17 +117,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "metadata": {
     "tags": []
    },
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": "{'fit_time': array([0.02130818, 0.02036786, 0.02829814, 0.02326989, 0.03807497]), 'score_time': array([0.00140715, 0.00173712, 0.00199389, 0.00132608, 0.00199199]), 'test_score': array([1.        , 0.93333333, 0.93333333, 0.93333333, 0.96666667]), 'train_score': array([0.93333333, 0.96666667, 0.96666667, 0.96666667, 0.95      ])}\n95.333 +- 0.027\n"
-    }
-   ],
+   "outputs": [],
    "source": [
     "cross = cross_validate(estimator=clf[\"adaboost\"], X=X, y=y, n_jobs=-1, return_train_score=True)\n",
     "print(cross)\n",
@@ -154,17 +130,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "metadata": {
     "tags": []
    },
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": "1 functools.partial(<function check_no_attributes_set_in_init at 0x12b593290>, 'Odte')\n2 functools.partial(<function check_estimators_dtypes at 0x12b58d3b0>, 'Odte')\n3 functools.partial(<function check_fit_score_takes_y at 0x12b58d290>, 'Odte')\n4 functools.partial(<function check_sample_weights_pandas_series at 0x12b586b90>, 'Odte')\n5 functools.partial(<function check_sample_weights_not_an_array at 0x12b586cb0>, 'Odte')\n6 functools.partial(<function check_sample_weights_list at 0x12b586dd0>, 'Odte')\n7 functools.partial(<function check_sample_weights_shape at 0x12b586ef0>, 'Odte')\n8 functools.partial(<function check_sample_weights_invariance at 0x12b58a050>, 'Odte')\n9 functools.partial(<function check_estimators_fit_returns_self at 0x12b5913b0>, 'Odte')\n10 functools.partial(<function check_estimators_fit_returns_self at 0x12b5913b0>, 'Odte', readonly_memmap=True)\n11 functools.partial(<function check_complex_data at 0x12b58a200>, 'Odte')\n12 functools.partial(<function check_dtype_object at 0x12b58a170>, 'Odte')\n13 functools.partial(<function check_estimators_empty_data_messages at 0x12b58d4d0>, 'Odte')\n14 functools.partial(<function check_pipeline_consistency at 0x12b58d170>, 'Odte')\n15 functools.partial(<function check_estimators_nan_inf at 0x12b58d5f0>, 'Odte')\n16 functools.partial(<function check_estimators_overwrite_params at 0x12b593170>, 'Odte')\n17 functools.partial(<function check_estimator_sparse_data at 0x12b586a70>, 'Odte')\n18 functools.partial(<function check_estimators_pickle at 0x12b58d830>, 'Odte')\n19 functools.partial(<function check_classifier_data_not_an_array at 0x12b5934d0>, 'Odte')\n20 functools.partial(<function check_classifiers_one_label at 0x12b58def0>, 'Odte')\n21 functools.partial(<function check_classifiers_classes at 0x12b591950>, 'Odte')\n22 functools.partial(<function check_estimators_partial_fit_n_features at 0x12b58d950>, 'Odte')\n23 functools.partial(<function check_classifiers_train at 0x12b591050>, 'Odte')\n24 functools.partial(<function check_classifiers_train at 0x12b591050>, 'Odte', readonly_memmap=True)\n25 functools.partial(<function check_classifiers_train at 0x12b591050>, 'Odte', readonly_memmap=True, X_dtype='float32')\n26 functools.partial(<function check_classifiers_regression_target at 0x12b593f80>, 'Odte')\n27 functools.partial(<function check_supervised_y_no_nan at 0x12b57eb90>, 'Odte')\n28 functools.partial(<function check_supervised_y_2d at 0x12b5915f0>, 'Odte')\n29 functools.partial(<function check_estimators_unfitted at 0x12b5914d0>, 'Odte')\n30 functools.partial(<function check_non_transformer_estimators_n_iter at 0x12b593b00>, 'Odte')\n31 functools.partial(<function check_decision_proba_consistency at 0x12b5970e0>, 'Odte')\n32 functools.partial(<function check_fit2d_predict1d at 0x12b58a710>, 'Odte')\n33 functools.partial(<function check_methods_subset_invariance at 0x12b58a8c0>, 'Odte')\n34 functools.partial(<function check_fit2d_1sample at 0x12b58a9e0>, 'Odte')\n35 functools.partial(<function check_fit2d_1feature at 0x12b58ab00>, 'Odte')\n36 functools.partial(<function check_fit1d at 0x12b58ac20>, 'Odte')\n37 functools.partial(<function check_get_params_invariance at 0x12b593d40>, 'Odte')\n38 functools.partial(<function check_set_params at 0x12b593e60>, 'Odte')\n39 functools.partial(<function check_dict_unchanged at 0x12b58a320>, 'Odte')\n40 functools.partial(<function check_dont_overwrite_parameters at 0x12b58a5f0>, 'Odte')\n41 functools.partial(<function check_fit_idempotent at 0x12b597290>, 'Odte')\n42 functools.partial(<function check_n_features_in at 0x12b597320>, 'Odte')\n"
-    }
-   ],
+   "outputs": [],
    "source": [
     "from sklearn.utils.estimator_checks import check_estimator\n",
     "# Make checks one by one\n",
@@ -178,6 +148,13 @@
   }
  ],
  "metadata": {
+  "interpreter": {
+   "hash": "da86226729227d0e8962a5ec29ea906307507ca2c30ceaaf651c09a617630939"
+  },
+  "kernelspec": {
+   "display_name": "Python 3.9.2 64-bit ('general': venv)",
+   "name": "python3"
+  },
   "language_info": {
    "codemirror_mode": {
     "name": "ipython",
@@ -188,14 +165,10 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.6-final"
+   "version": "3.9.7"
   },
-  "orig_nbformat": 2,
-  "kernelspec": {
-   "name": "python37664bitgeneralvenvfbd0a23e74cf4e778460f5ffc6761f39",
-   "display_name": "Python 3.7.6 64-bit ('general': venv)"
-  }
+  "orig_nbformat": 2
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
\ No newline at end of file
+}
diff --git a/requirements.txt b/requirements.txt
index 35bba27..238c39c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1 @@
-stree
\ No newline at end of file
+stree>1.2.2
\ No newline at end of file