From d9f5bfee6c1ec9248d52333e5b390a189a0d5bdf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Tue, 27 Apr 2021 09:50:02 +0200 Subject: [PATCH] Add ticks to report_score --- FeatureSelection.ipynb | 586 +++++++++++++++++++++++++++-------------- report_score.py | 60 ++++- 2 files changed, 446 insertions(+), 200 deletions(-) diff --git a/FeatureSelection.ipynb b/FeatureSelection.ipynb index f2278e5..f3b5428 100644 --- a/FeatureSelection.ipynb +++ b/FeatureSelection.ipynb @@ -2,8 +2,8 @@ "cells": [ { "cell_type": "code", - "execution_count": 59, - "id": "intimate-convergence", + "execution_count": 1, + "id": "celtic-handbook", "metadata": {}, "outputs": [], "source": [ @@ -23,8 +23,8 @@ }, { "cell_type": "code", - "execution_count": 26, - "id": "specified-western", + "execution_count": 2, + "id": "ordinary-banana", "metadata": {}, "outputs": [ { @@ -45,7 +45,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "governing-botswana", + "id": "utility-masters", "metadata": {}, "outputs": [], "source": [ @@ -63,7 +63,7 @@ { "cell_type": "code", "execution_count": 4, - "id": "warming-corner", + "id": "collaborative-baghdad", "metadata": {}, "outputs": [ { @@ -71,10 +71,37 @@ "output_type": "stream", "text": [ "low-res-spect : 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.886951 in 22.44 seconds\n", - "KBest-10 : 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.544225 in 2.89 seconds\n", - "KBest-50 : 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.862511 in 16.75 seconds\n", - "KBest-75 : 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.900123 in 20.00 seconds\n", - "KBest-76 : 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.898237 in 20.59 seconds\n" + "KBest-10 : 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.544225 in 2.88 seconds\n", + "KBest-50 : 57, 31, 1714, " + ] + }, + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0mX\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mXt\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mnum_features\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"all\"\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mSelectKBest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnum_features\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit_transform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mXt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0myt\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"{label:15s}: \"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mend\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mflush\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 12\u001b[0;31m \u001b[0maccuracy\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mevaluate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0myt\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 13\u001b[0m \u001b[0mspent\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mnow\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"{accuracy:.6f} in {spent:5.2f} seconds\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m\u001b[0m in \u001b[0;36mevaluate\u001b[0;34m(X, y, kernel, C)\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mkfold\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mKFold\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mshuffle\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrandom_state\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrandom_state\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_splits\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0mclf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mStree\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrandom_state\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrandom_state\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mC\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mC\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkernel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mkernel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0mresults\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcross_val_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mclf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcv\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mkfold\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 8\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"end: \"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mend\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mflush\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresults\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.virtualenvs/py392/lib/python3.9/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36minner_f\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 61\u001b[0m \u001b[0mextra_args\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mall_args\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mextra_args\u001b[0m \u001b[0;34m<=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 63\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 64\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[0;31m# extra_args > 0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.virtualenvs/py392/lib/python3.9/site-packages/sklearn/model_selection/_validation.py\u001b[0m in \u001b[0;36mcross_val_score\u001b[0;34m(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, error_score)\u001b[0m\n\u001b[1;32m 438\u001b[0m \u001b[0mscorer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_scoring\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mestimator\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscoring\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mscoring\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 439\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 440\u001b[0;31m cv_results = cross_validate(estimator=estimator, X=X, y=y, groups=groups,\n\u001b[0m\u001b[1;32m 441\u001b[0m \u001b[0mscoring\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m'score'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mscorer\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcv\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcv\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 442\u001b[0m \u001b[0mn_jobs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mn_jobs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mverbose\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.virtualenvs/py392/lib/python3.9/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36minner_f\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 61\u001b[0m \u001b[0mextra_args\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mall_args\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mextra_args\u001b[0m \u001b[0;34m<=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 63\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 64\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[0;31m# extra_args > 0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.virtualenvs/py392/lib/python3.9/site-packages/sklearn/model_selection/_validation.py\u001b[0m in \u001b[0;36mcross_validate\u001b[0;34m(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score, return_estimator, error_score)\u001b[0m\n\u001b[1;32m 244\u001b[0m parallel = Parallel(n_jobs=n_jobs, verbose=verbose,\n\u001b[1;32m 245\u001b[0m pre_dispatch=pre_dispatch)\n\u001b[0;32m--> 246\u001b[0;31m results = parallel(\n\u001b[0m\u001b[1;32m 247\u001b[0m delayed(_fit_and_score)(\n\u001b[1;32m 248\u001b[0m \u001b[0mclone\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mestimator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscorers\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtrain\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.virtualenvs/py392/lib/python3.9/site-packages/joblib/parallel.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, iterable)\u001b[0m\n\u001b[1;32m 1042\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_iterating\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_original_iterator\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1043\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1044\u001b[0;31m \u001b[0;32mwhile\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdispatch_one_batch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1045\u001b[0m \u001b[0;32mpass\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1046\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.virtualenvs/py392/lib/python3.9/site-packages/joblib/parallel.py\u001b[0m in \u001b[0;36mdispatch_one_batch\u001b[0;34m(self, iterator)\u001b[0m\n\u001b[1;32m 857\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 858\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 859\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_dispatch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtasks\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 860\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 861\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.virtualenvs/py392/lib/python3.9/site-packages/joblib/parallel.py\u001b[0m in \u001b[0;36m_dispatch\u001b[0;34m(self, batch)\u001b[0m\n\u001b[1;32m 775\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lock\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 776\u001b[0m \u001b[0mjob_idx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jobs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 777\u001b[0;31m \u001b[0mjob\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_async\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbatch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcallback\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 778\u001b[0m \u001b[0;31m# A job can complete so quickly than its callback is\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 779\u001b[0m \u001b[0;31m# called before we get here, causing self._jobs to\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.virtualenvs/py392/lib/python3.9/site-packages/joblib/_parallel_backends.py\u001b[0m in \u001b[0;36mapply_async\u001b[0;34m(self, func, callback)\u001b[0m\n\u001b[1;32m 206\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mapply_async\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcallback\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 207\u001b[0m \u001b[0;34m\"\"\"Schedule a func to be run\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 208\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mImmediateResult\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 209\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcallback\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 210\u001b[0m \u001b[0mcallback\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.virtualenvs/py392/lib/python3.9/site-packages/joblib/_parallel_backends.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, batch)\u001b[0m\n\u001b[1;32m 570\u001b[0m \u001b[0;31m# Don't delay the application, to avoid keeping the input\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 571\u001b[0m \u001b[0;31m# arguments in memory\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 572\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbatch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 573\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 574\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.virtualenvs/py392/lib/python3.9/site-packages/joblib/parallel.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 260\u001b[0m \u001b[0;31m# change the default number of processes to -1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 261\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mparallel_backend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_jobs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_n_jobs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 262\u001b[0;31m return [func(*args, **kwargs)\n\u001b[0m\u001b[1;32m 263\u001b[0m for func, args, kwargs in self.items]\n\u001b[1;32m 264\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.virtualenvs/py392/lib/python3.9/site-packages/joblib/parallel.py\u001b[0m in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 260\u001b[0m \u001b[0;31m# change the default number of processes to -1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 261\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mparallel_backend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_jobs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_n_jobs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 262\u001b[0;31m return [func(*args, **kwargs)\n\u001b[0m\u001b[1;32m 263\u001b[0m for func, args, kwargs in self.items]\n\u001b[1;32m 264\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.virtualenvs/py392/lib/python3.9/site-packages/sklearn/utils/fixes.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 220\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__call__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 221\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mconfig_context\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 222\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfunction\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/.virtualenvs/py392/lib/python3.9/site-packages/sklearn/model_selection/_validation.py\u001b[0m in \u001b[0;36m_fit_and_score\u001b[0;34m(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, split_progress, candidate_progress, error_score)\u001b[0m\n\u001b[1;32m 591\u001b[0m \u001b[0mestimator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfit_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 592\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 593\u001b[0;31m \u001b[0mestimator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfit_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 594\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 595\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Code/STree/stree/Strees.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[1;32m 654\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_features_in_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 655\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax_features_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_initialize_max_features\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 656\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtree_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_train\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"root\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 657\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mX_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 658\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0my_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Code/STree/stree/Strees.py\u001b[0m in \u001b[0;36m_train\u001b[0;34m(self, X, y, sample_weight, depth, title)\u001b[0m\n\u001b[1;32m 710\u001b[0m \u001b[0mscaler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mXs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 711\u001b[0m \u001b[0mXs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mscaler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mXs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 712\u001b[0;31m \u001b[0mclf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mXs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msample_weight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 713\u001b[0m \u001b[0mnode\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_impurity\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplitter_\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpartition_impurity\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 714\u001b[0m \u001b[0mnode\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_classifier\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mclf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.virtualenvs/py392/lib/python3.9/site-packages/sklearn/svm/_classes.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[1;32m 232\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclasses_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munique\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 233\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 234\u001b[0;31m self.coef_, self.intercept_, self.n_iter_ = _fit_liblinear(\n\u001b[0m\u001b[1;32m 235\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mC\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit_intercept\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mintercept_scaling\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 236\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclass_weight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpenalty\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdual\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mverbose\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.virtualenvs/py392/lib/python3.9/site-packages/sklearn/svm/_base.py\u001b[0m in \u001b[0;36m_fit_liblinear\u001b[0;34m(X, y, C, fit_intercept, intercept_scaling, class_weight, penalty, dual, verbose, max_iter, tol, random_state, multi_class, loss, epsilon, sample_weight)\u001b[0m\n\u001b[1;32m 973\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 974\u001b[0m \u001b[0msolver_type\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_get_liblinear_solver_type\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmulti_class\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpenalty\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mloss\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdual\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 975\u001b[0;31m raw_coef_, n_iter_ = liblinear.train_wrap(\n\u001b[0m\u001b[1;32m 976\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_ind\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misspmatrix\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msolver_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtol\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbias\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mC\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 977\u001b[0m \u001b[0mclass_weight_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmax_iter\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrnd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miinfo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'i'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], @@ -97,33 +124,10 @@ }, { "cell_type": "code", - "execution_count": 5, - "id": "confidential-opportunity", + "execution_count": null, + "id": "liquid-hotel", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "KBest-60: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.900141 in 16.53 seconds\n", - "KBest-61: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.898272 in 17.12 seconds\n", - "KBest-62: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.902028 in 17.24 seconds\n", - "KBest-63: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.898254 in 17.25 seconds\n", - "KBest-64: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.901993 in 17.96 seconds\n", - "KBest-65: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.896367 in 17.54 seconds\n", - "KBest-66: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.890707 in 17.63 seconds\n", - "KBest-67: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.883160 in 18.05 seconds\n", - "KBest-68: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.887004 in 18.01 seconds\n", - "KBest-69: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.890760 in 18.71 seconds\n", - "KBest-70: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.890707 in 18.11 seconds\n", - "KBest-71: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.888803 in 19.67 seconds\n", - "KBest-72: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.890689 in 19.16 seconds\n", - "KBest-73: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.896350 in 19.47 seconds\n", - "KBest-74: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.894481 in 19.93 seconds\n", - "KBest-75: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.900123 in 19.95 seconds\n" - ] - } - ], + "outputs": [], "source": [ "for num_features in range(60, 76):\n", " now = time.time()\n", @@ -136,32 +140,10 @@ }, { "cell_type": "code", - "execution_count": 6, - "id": "recent-treasury", + "execution_count": null, + "id": "likely-single", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "KBest-30: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.849356 in 11.43 seconds\n", - "KBest-31: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.855017 in 11.54 seconds\n", - "KBest-32: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.855034 in 11.90 seconds\n", - "KBest-33: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.855034 in 11.98 seconds\n", - "KBest-34: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.855034 in 12.07 seconds\n", - "KBest-35: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.855052 in 12.56 seconds\n", - "KBest-36: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.860695 in 12.59 seconds\n", - "KBest-37: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.864468 in 13.06 seconds\n", - "KBest-38: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.860695 in 13.55 seconds\n", - "KBest-39: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.855034 in 13.75 seconds\n", - "KBest-40: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.864415 in 13.58 seconds\n", - "KBest-41: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.858790 in 14.13 seconds\n", - "KBest-42: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.862546 in 14.87 seconds\n", - "KBest-43: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.856886 in 15.23 seconds\n", - "KBest-44: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.853095 in 15.66 seconds\n" - ] - } - ], + "outputs": [], "source": [ "for num_features in range(30, 45):\n", " now = time.time()\n", @@ -174,28 +156,10 @@ }, { "cell_type": "code", - "execution_count": 30, - "id": "indirect-poker", + "execution_count": null, + "id": "defensive-affiliate", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "KBest-linear-84: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.890689 in 21.51 seconds\n", - "KBest-linear-85: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.888803 in 21.58 seconds\n", - "KBest-linear-86: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.888803 in 22.06 seconds\n", - "KBest-linear-87: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.888803 in 21.44 seconds\n", - "KBest-linear-88: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.890689 in 21.84 seconds\n", - "KBest-linear-89: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.886934 in 22.40 seconds\n", - "KBest-linear-90: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.890689 in 22.31 seconds\n", - "KBest-linear-91: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.890689 in 22.49 seconds\n", - "KBest-linear-92: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.890689 in 22.48 seconds\n", - "KBest-linear-93: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.894463 in 23.19 seconds\n", - "KBest-linear-94: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.900141 in 23.67 seconds\n" - ] - } - ], + "outputs": [], "source": [ "kernel=\"linear\"\n", "for num_features in range(84, 95):\n", @@ -209,21 +173,10 @@ }, { "cell_type": "code", - "execution_count": 31, - "id": "moving-hamburg", + "execution_count": null, + "id": "catholic-bulletin", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "mui-linear-50: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.894481 in 12.37 seconds\n", - "mui-linear-75: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.886934 in 19.47 seconds\n", - "mui-linear-85: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.892576 in 21.31 seconds\n", - "mui-linear-90: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.894463 in 23.06 seconds\n" - ] - } - ], + "outputs": [], "source": [ "kernel=\"linear\"\n", "mui = mutual_info_classif(Xt, yt, random_state=random_state)\n", @@ -239,21 +192,10 @@ }, { "cell_type": "code", - "execution_count": 49, - "id": "closed-policy", + "execution_count": null, + "id": "raised-supplier", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "From model-linear-C=0.01-X.shape=(531, 16): 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.886898 in 1.91 seconds\n", - "From model-linear-C=0.10-X.shape=(531, 62): 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.883213 in 10.30 seconds\n", - "From model-linear-C=1.00-X.shape=(531, 93): 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.890725 in 20.57 seconds\n", - "From model-linear-C=5.00-X.shape=(531, 100): 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.886951 in 22.83 seconds\n" - ] - } - ], + "outputs": [], "source": [ "# From model\n", "warnings.filterwarnings(\"ignore\",message=\"Liblinear failed to converge\")\n", @@ -270,21 +212,10 @@ }, { "cell_type": "code", - "execution_count": 104, - "id": "chubby-patio", + "execution_count": null, + "id": "complimentary-external", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(12,)" - ] - }, - "execution_count": 104, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "lsvc = LinearSVC(C=.01, penalty=\"l1\", dual=False, random_state=random_state).fit(Xt, yt)\n", "res = np.linalg.norm(lsvc.coef_, axis=0, ord=1)\n", @@ -293,28 +224,10 @@ }, { "cell_type": "code", - "execution_count": 115, - "id": "refined-display", + "execution_count": null, + "id": "broadband-mouth", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(531, 48)\n" - ] - }, - { - "data": { - "text/plain": [ - "(100,)" - ] - }, - "execution_count": 115, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "warnings.filterwarnings(\"ignore\",message=\"lbfgs failed to converge\")\n", "logi = LogisticRegression(C=1, dual=False, random_state=random_state).fit(Xt, yt)\n", @@ -329,29 +242,18 @@ }, { "cell_type": "code", - "execution_count": 118, - "id": "hydraulic-labor", + "execution_count": null, + "id": "finite-election", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[0, 1, 2, 3, 4, 5, 6]" - ] - }, - "execution_count": 118, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "list(range(7))" ] }, { "cell_type": "code", - "execution_count": 127, - "id": "settled-rolling", + "execution_count": null, + "id": "waiting-picking", "metadata": {}, "outputs": [], "source": [ @@ -360,50 +262,352 @@ }, { "cell_type": "code", - "execution_count": 128, - "id": "instructional-democracy", + "execution_count": null, + "id": "psychological-customs", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([90, 92, 93, 94, 95])" - ] - }, - "execution_count": 128, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "select" ] }, { "cell_type": "code", - "execution_count": 126, - "id": "detailed-optimum", + "execution_count": null, + "id": "floating-software", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([90, 92, 93, 94, 95])" - ] - }, - "execution_count": 126, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "select.get_support(indices=True)" ] }, + { + "cell_type": "code", + "execution_count": 9, + "id": "attractive-painting", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.feature_selection import f_classif" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "coral-teaching", + "metadata": {}, + "outputs": [], + "source": [ + "fanova, pvalue = f_classif(Xt, yt)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "gothic-romantic", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 1.09308168, 1.62547111, 0.69978451, 6.20443793,\n", + " 6.18658597, 7.79765435, 9.90395874, 69.43497462,\n", + " 72.01132136, 78.22168481, 85.92297455, 90.04448857,\n", + " 89.86335217, 88.31587961, 86.87025456, 87.14549907,\n", + " 110.58770486, 137.43089999, 176.43757295, 183.31628291,\n", + " 190.60432118, 193.36310136, 192.22003186, 196.29320987,\n", + " 200.99032733, 189.23476493, 178.80842033, 184.2189575 ,\n", + " 179.72263542, 170.99562705, 150.10468598, 129.17773187,\n", + " 113.53668942, 101.68225009, 84.17902557, 76.03181091,\n", + " 59.47574346, 42.59668791, 33.55829903, 32.91344733,\n", + " 29.49142934, 33.57357444, 36.87268606, 38.44410931,\n", + " 45.49741621, 44.48105541, 37.48298365, 41.47147282,\n", + " 39.88554644, 43.54150269, 45.96034055, 168.0734794 ,\n", + " 173.33864515, 132.98785049, 74.22525889, 31.35844666,\n", + " 30.41731535, 43.95412897, 54.16843899, 61.6216788 ,\n", + " 80.30044086, 92.87208486, 96.59526304, 102.89702793,\n", + " 107.36928433, 114.34040823, 116.86370901, 119.36305571,\n", + " 120.14502511, 127.7704721 , 131.22937066, 135.71075677,\n", + " 139.37315355, 140.72463693, 146.35335 , 155.10945271,\n", + " 162.08444927, 174.68679112, 187.9929742 , 192.26347333,\n", + " 195.97748723, 206.21832653, 234.3930169 , 227.20791092,\n", + " 234.70071448, 246.7266392 , 257.46797395, 267.18172811,\n", + " 270.07487349, 284.47388239, 289.76578077, 284.68419857,\n", + " 293.37447937, 295.09981245, 293.87971764, 290.1120576 ,\n", + " 281.75957225, 286.10121621, 268.19098519, 264.15298645])" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fanova" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "comic-employment", + "metadata": {}, + "outputs": [], + "source": [ + "var1 = np.var(X[:,0])" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "necessary-desire", + "metadata": {}, + "outputs": [], + "source": [ + "vart = np.var(X)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "rural-compact", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9999999469373468" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "var1/vart" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "stock-pakistan", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1.000000053062656" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vart / var1" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "enormous-potential", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(178, 13)" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.datasets import load_wine\n", + "X, y = load_wine(return_X_y=True)\n", + "X.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "competent-lebanon", + "metadata": {}, + "outputs": [], + "source": [ + "from scipy import stats" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "honest-astrology", + "metadata": {}, + "outputs": [], + "source": [ + "args = [X[y==k] for k in np.unique(y)]\n", + "resf, resp =stats.f_oneway(*args)" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "instrumental-france", + "metadata": {}, + "outputs": [], + "source": [ + "res2f, res2p = f_classif(X, y)" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "offensive-stanley", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([135.07762424, 36.94342496, 13.3129012 , 35.77163741,\n", + " 12.42958434, 93.73300962, 233.92587268, 27.57541715,\n", + " 30.27138317, 120.66401844, 101.31679539, 189.97232058,\n", + " 207.9203739 ])" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res2f" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "behind-harvey", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([3.31950380e-36, 4.12722880e-14, 4.14996797e-06, 9.44447294e-14,\n", + " 8.96339544e-06, 2.13767002e-28, 3.59858583e-50, 3.88804090e-11,\n", + " 5.12535874e-12, 1.16200802e-33, 5.91766222e-30, 1.39310496e-44,\n", + " 5.78316836e-47])" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res2p" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "electronic-crazy", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([135.07762424, 36.94342496, 13.3129012 , 35.77163741,\n", + " 12.42958434, 93.73300962, 233.92587268, 27.57541715,\n", + " 30.27138317, 120.66401844, 101.31679539, 189.97232058,\n", + " 207.9203739 ])" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res2f" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "successful-envelope", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([3.31950380e-36, 4.12722880e-14, 4.14996797e-06, 9.44447294e-14,\n", + " 8.96339544e-06, 2.13767002e-28, 3.59858583e-50, 3.88804090e-11,\n", + " 5.12535874e-12, 1.16200802e-33, 5.91766222e-30, 1.39310496e-44,\n", + " 5.78316836e-47])" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res2p" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "forced-replacement", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([135.07762424, 36.94342496, 13.3129012 , 35.77163741,\n", + " 12.42958434, 93.73300962, 233.92587268, 27.57541715,\n", + " 30.27138317, 120.66401844, 101.31679539, 189.97232058,\n", + " 207.9203739 ])" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sel=SelectKBest(k=4)\n", + "sel.fit(X, y)\n", + "sel.scores_" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "intimate-procedure", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 0, 6, 11, 12])" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sel.get_support(indices=True)" + ] + }, { "cell_type": "code", "execution_count": null, - "id": "recent-paraguay", + "id": "earned-petite", "metadata": {}, "outputs": [], "source": [] @@ -430,4 +634,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +} diff --git a/report_score.py b/report_score.py index ba699e1..16e536b 100644 --- a/report_score.py +++ b/report_score.py @@ -10,6 +10,7 @@ from sklearn.model_selection import KFold, cross_validate from experimentation.Sets import Datasets from experimentation.Database import MySQL from wodt import TreeClassifier +from experimentation.Utils import TextColor def parse_arguments(): @@ -178,7 +179,36 @@ def store_string( return result +def compute_status(dbh, name, model, accuracy): + better_default = "\N{heavy check mark}" + better_stree = TextColor.GREEN + "\N{heavy check mark}" + TextColor.ENDC + best = TextColor.RED + "\N{black star}" + TextColor.ENDC + best_default, _ = get_best_score(dbh, name, model) + best_stree, _ = get_best_score(dbh, name, "stree") + best_all, _ = get_best_score(dbh, name, models_tree) + status = better_default if accuracy >= best_default else " " + status = better_stree if accuracy >= best_stree else status + status = best if accuracy >= best_all else status + return status + + +def get_best_score(dbh, name, model): + record = dbh.find_best(name, model, "crossval") + accuracy = record[5] if record is not None else 0.0 + acc_std = record[11] if record is not None else 0.0 + return accuracy, acc_std + + random_seeds = [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] +models_tree = [ + "stree", + "stree_default", + "wodt", + "j48svm", + "oc1", + "cart", + "baseRaF", +] standardize = False (set_of_files, model, dataset, sql, normalize, parameters) = parse_arguments() dbh = MySQL() @@ -206,17 +236,22 @@ if dataset == "all": "Parameters", ] header_lengths = [30, 5, 3, 3, 7, 7, 7, 15, 15, 10] + parameters = json.dumps(json.loads(parameters)) + if parameters != "{}" and len(parameters) > 10: + header_lengths.pop() + header_lengths.append(len(parameters)) line_col = "" for field, underscore in zip(header_cols, header_lengths): print(f"{field:{underscore}s} ", end="") line_col += "=" * underscore + " " print(f"\n{line_col}") for dataset in dt: - X, y = dt.load(dataset[0]) # type: ignore + name = dataset[0] + X, y = dt.load(name) # type: ignore samples, features = X.shape classes = len(np.unique(y)) print( - f"{dataset[0]:30s} {samples:5d} {features:3d} {classes:3d} ", + f"{name:30s} {samples:5d} {features:3d} {classes:3d} ", end="", ) scores, times, hyperparameters, nodes, leaves, depth = process_dataset( @@ -232,28 +267,35 @@ if dataset == "all": f"{nodes_item:7.2f} {leaves_item:7.2f} {depth_item:7.2f} ", end="", ) - print(f"{np.mean(scores):8.6f}±{np.std(scores):6.4f} ", end="") + accuracy = np.mean(scores) + status = ( + compute_status(dbh, name, model, accuracy) + if model == "stree_default" + else " " + ) + print(f"{accuracy:8.6f}±{np.std(scores):6.4f}{status}", end="") print(f"{np.mean(times):8.6f}±{np.std(times):6.4f} {hyperparameters}") if sql: command = store_string( - dataset[0], model, scores, times, hyperparameters, complexity + name, model, scores, times, hyperparameters, complexity ) print(command, file=sql_output) else: scores, times, hyperparameters, nodes, leaves, depth = process_dataset( dataset, verbose=True, model=model, params=parameters ) - record = dbh.find_best(dataset, model, "crossval") + best_accuracy, acc_best_std = get_best_score(dbh, dataset, model) accuracy = np.mean(scores) - accuracy_best = record[5] if record is not None else 0.0 - acc_best_std = record[11] if record is not None else 0.0 print(f"* Normalize/Standard.: {normalize} / {standardize}") print( f"* Accuracy Computed .: {accuracy:6.4f}±{np.std(scores):6.4f} " f"{np.mean(times):5.3f}s" ) - print(f"* Accuracy Best .....: {accuracy_best:6.4f}±{acc_best_std:6.4f}") - print(f"* Difference ........: {accuracy_best - accuracy:6.4f}") + print(f"* Best Accuracy model: {best_accuracy:6.4f}±{acc_best_std:6.4f}") + print(f"* Difference ........: {best_accuracy - accuracy:6.4f}") + best_accuracy, acc_best_std = get_best_score(dbh, dataset, models_tree) + print(f"* Best Accuracy .....: {best_accuracy:6.4f}±{acc_best_std:6.4f}") + print(f"* Difference ........: {best_accuracy - accuracy:6.4f}") print( f"* Nodes/Leaves/Depth : {np.mean(nodes):.2f} {np.mean(leaves):.2f} " f"{np.mean(depth):.2f} "