Add ticks to report_score

This commit is contained in:
2021-04-27 09:50:02 +02:00
parent b061d40355
commit d9f5bfee6c
2 changed files with 446 additions and 200 deletions

View File

@@ -2,8 +2,8 @@
"cells": [
{
"cell_type": "code",
"execution_count": 59,
"id": "intimate-convergence",
"execution_count": 1,
"id": "celtic-handbook",
"metadata": {},
"outputs": [],
"source": [
@@ -23,8 +23,8 @@
},
{
"cell_type": "code",
"execution_count": 26,
"id": "specified-western",
"execution_count": 2,
"id": "ordinary-banana",
"metadata": {},
"outputs": [
{
@@ -45,7 +45,7 @@
{
"cell_type": "code",
"execution_count": 3,
"id": "governing-botswana",
"id": "utility-masters",
"metadata": {},
"outputs": [],
"source": [
@@ -63,7 +63,7 @@
{
"cell_type": "code",
"execution_count": 4,
"id": "warming-corner",
"id": "collaborative-baghdad",
"metadata": {},
"outputs": [
{
@@ -71,10 +71,37 @@
"output_type": "stream",
"text": [
"low-res-spect : 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.886951 in 22.44 seconds\n",
"KBest-10 : 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.544225 in 2.89 seconds\n",
"KBest-50 : 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.862511 in 16.75 seconds\n",
"KBest-75 : 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.900123 in 20.00 seconds\n",
"KBest-76 : 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.898237 in 20.59 seconds\n"
"KBest-10 : 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.544225 in 2.88 seconds\n",
"KBest-50 : 57, 31, 1714, "
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-4-28dc57ba2f31>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0mX\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mXt\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mnum_features\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"all\"\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mSelectKBest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnum_features\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit_transform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mXt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0myt\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"{label:15s}: \"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mend\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mflush\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 12\u001b[0;31m \u001b[0maccuracy\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mevaluate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0myt\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 13\u001b[0m \u001b[0mspent\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mnow\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"{accuracy:.6f} in {spent:5.2f} seconds\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m<ipython-input-3-18608dfb1f5a>\u001b[0m in \u001b[0;36mevaluate\u001b[0;34m(X, y, kernel, C)\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mkfold\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mKFold\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mshuffle\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrandom_state\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrandom_state\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_splits\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0mclf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mStree\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrandom_state\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrandom_state\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mC\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mC\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkernel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mkernel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0mresults\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcross_val_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mclf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcv\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mkfold\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 8\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"end: \"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mend\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mflush\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresults\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.virtualenvs/py392/lib/python3.9/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36minner_f\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 61\u001b[0m \u001b[0mextra_args\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mall_args\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mextra_args\u001b[0m \u001b[0;34m<=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 63\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 64\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[0;31m# extra_args > 0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.virtualenvs/py392/lib/python3.9/site-packages/sklearn/model_selection/_validation.py\u001b[0m in \u001b[0;36mcross_val_score\u001b[0;34m(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, error_score)\u001b[0m\n\u001b[1;32m 438\u001b[0m \u001b[0mscorer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_scoring\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mestimator\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscoring\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mscoring\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 439\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 440\u001b[0;31m cv_results = cross_validate(estimator=estimator, X=X, y=y, groups=groups,\n\u001b[0m\u001b[1;32m 441\u001b[0m \u001b[0mscoring\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m'score'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mscorer\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcv\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcv\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 442\u001b[0m \u001b[0mn_jobs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mn_jobs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mverbose\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.virtualenvs/py392/lib/python3.9/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36minner_f\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 61\u001b[0m \u001b[0mextra_args\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mall_args\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mextra_args\u001b[0m \u001b[0;34m<=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 63\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 64\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[0;31m# extra_args > 0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.virtualenvs/py392/lib/python3.9/site-packages/sklearn/model_selection/_validation.py\u001b[0m in \u001b[0;36mcross_validate\u001b[0;34m(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score, return_estimator, error_score)\u001b[0m\n\u001b[1;32m 244\u001b[0m parallel = Parallel(n_jobs=n_jobs, verbose=verbose,\n\u001b[1;32m 245\u001b[0m pre_dispatch=pre_dispatch)\n\u001b[0;32m--> 246\u001b[0;31m results = parallel(\n\u001b[0m\u001b[1;32m 247\u001b[0m delayed(_fit_and_score)(\n\u001b[1;32m 248\u001b[0m \u001b[0mclone\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mestimator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscorers\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtrain\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.virtualenvs/py392/lib/python3.9/site-packages/joblib/parallel.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, iterable)\u001b[0m\n\u001b[1;32m 1042\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_iterating\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_original_iterator\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1043\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1044\u001b[0;31m \u001b[0;32mwhile\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdispatch_one_batch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1045\u001b[0m \u001b[0;32mpass\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1046\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.virtualenvs/py392/lib/python3.9/site-packages/joblib/parallel.py\u001b[0m in \u001b[0;36mdispatch_one_batch\u001b[0;34m(self, iterator)\u001b[0m\n\u001b[1;32m 857\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 858\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 859\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_dispatch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtasks\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 860\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 861\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.virtualenvs/py392/lib/python3.9/site-packages/joblib/parallel.py\u001b[0m in \u001b[0;36m_dispatch\u001b[0;34m(self, batch)\u001b[0m\n\u001b[1;32m 775\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lock\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 776\u001b[0m \u001b[0mjob_idx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jobs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 777\u001b[0;31m \u001b[0mjob\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_async\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbatch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcallback\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 778\u001b[0m \u001b[0;31m# A job can complete so quickly than its callback is\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 779\u001b[0m \u001b[0;31m# called before we get here, causing self._jobs to\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.virtualenvs/py392/lib/python3.9/site-packages/joblib/_parallel_backends.py\u001b[0m in \u001b[0;36mapply_async\u001b[0;34m(self, func, callback)\u001b[0m\n\u001b[1;32m 206\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mapply_async\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcallback\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 207\u001b[0m \u001b[0;34m\"\"\"Schedule a func to be run\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 208\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mImmediateResult\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 209\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcallback\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 210\u001b[0m \u001b[0mcallback\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.virtualenvs/py392/lib/python3.9/site-packages/joblib/_parallel_backends.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, batch)\u001b[0m\n\u001b[1;32m 570\u001b[0m \u001b[0;31m# Don't delay the application, to avoid keeping the input\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 571\u001b[0m \u001b[0;31m# arguments in memory\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 572\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbatch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 573\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 574\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.virtualenvs/py392/lib/python3.9/site-packages/joblib/parallel.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 260\u001b[0m \u001b[0;31m# change the default number of processes to -1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 261\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mparallel_backend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_jobs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_n_jobs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 262\u001b[0;31m return [func(*args, **kwargs)\n\u001b[0m\u001b[1;32m 263\u001b[0m for func, args, kwargs in self.items]\n\u001b[1;32m 264\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.virtualenvs/py392/lib/python3.9/site-packages/joblib/parallel.py\u001b[0m in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 260\u001b[0m \u001b[0;31m# change the default number of processes to -1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 261\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mparallel_backend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_jobs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_n_jobs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 262\u001b[0;31m return [func(*args, **kwargs)\n\u001b[0m\u001b[1;32m 263\u001b[0m for func, args, kwargs in self.items]\n\u001b[1;32m 264\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.virtualenvs/py392/lib/python3.9/site-packages/sklearn/utils/fixes.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 220\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__call__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 221\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mconfig_context\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 222\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfunction\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m~/.virtualenvs/py392/lib/python3.9/site-packages/sklearn/model_selection/_validation.py\u001b[0m in \u001b[0;36m_fit_and_score\u001b[0;34m(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, split_progress, candidate_progress, error_score)\u001b[0m\n\u001b[1;32m 591\u001b[0m \u001b[0mestimator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfit_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 592\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 593\u001b[0;31m \u001b[0mestimator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfit_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 594\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 595\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/Code/STree/stree/Strees.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[1;32m 654\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_features_in_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 655\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax_features_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_initialize_max_features\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 656\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtree_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_train\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"root\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 657\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mX_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 658\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0my_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/Code/STree/stree/Strees.py\u001b[0m in \u001b[0;36m_train\u001b[0;34m(self, X, y, sample_weight, depth, title)\u001b[0m\n\u001b[1;32m 710\u001b[0m \u001b[0mscaler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mXs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 711\u001b[0m \u001b[0mXs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mscaler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mXs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 712\u001b[0;31m \u001b[0mclf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mXs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msample_weight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 713\u001b[0m \u001b[0mnode\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_impurity\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplitter_\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpartition_impurity\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 714\u001b[0m \u001b[0mnode\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_classifier\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mclf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.virtualenvs/py392/lib/python3.9/site-packages/sklearn/svm/_classes.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[1;32m 232\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclasses_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munique\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 233\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 234\u001b[0;31m self.coef_, self.intercept_, self.n_iter_ = _fit_liblinear(\n\u001b[0m\u001b[1;32m 235\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mC\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit_intercept\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mintercept_scaling\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 236\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclass_weight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpenalty\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdual\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mverbose\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.virtualenvs/py392/lib/python3.9/site-packages/sklearn/svm/_base.py\u001b[0m in \u001b[0;36m_fit_liblinear\u001b[0;34m(X, y, C, fit_intercept, intercept_scaling, class_weight, penalty, dual, verbose, max_iter, tol, random_state, multi_class, loss, epsilon, sample_weight)\u001b[0m\n\u001b[1;32m 973\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 974\u001b[0m \u001b[0msolver_type\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_get_liblinear_solver_type\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmulti_class\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpenalty\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mloss\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdual\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 975\u001b[0;31m raw_coef_, n_iter_ = liblinear.train_wrap(\n\u001b[0m\u001b[1;32m 976\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_ind\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misspmatrix\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msolver_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtol\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbias\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mC\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 977\u001b[0m \u001b[0mclass_weight_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmax_iter\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrnd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miinfo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'i'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
]
}
],
@@ -97,33 +124,10 @@
},
{
"cell_type": "code",
"execution_count": 5,
"id": "confidential-opportunity",
"execution_count": null,
"id": "liquid-hotel",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"KBest-60: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.900141 in 16.53 seconds\n",
"KBest-61: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.898272 in 17.12 seconds\n",
"KBest-62: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.902028 in 17.24 seconds\n",
"KBest-63: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.898254 in 17.25 seconds\n",
"KBest-64: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.901993 in 17.96 seconds\n",
"KBest-65: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.896367 in 17.54 seconds\n",
"KBest-66: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.890707 in 17.63 seconds\n",
"KBest-67: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.883160 in 18.05 seconds\n",
"KBest-68: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.887004 in 18.01 seconds\n",
"KBest-69: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.890760 in 18.71 seconds\n",
"KBest-70: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.890707 in 18.11 seconds\n",
"KBest-71: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.888803 in 19.67 seconds\n",
"KBest-72: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.890689 in 19.16 seconds\n",
"KBest-73: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.896350 in 19.47 seconds\n",
"KBest-74: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.894481 in 19.93 seconds\n",
"KBest-75: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.900123 in 19.95 seconds\n"
]
}
],
"outputs": [],
"source": [
"for num_features in range(60, 76):\n",
" now = time.time()\n",
@@ -136,32 +140,10 @@
},
{
"cell_type": "code",
"execution_count": 6,
"id": "recent-treasury",
"execution_count": null,
"id": "likely-single",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"KBest-30: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.849356 in 11.43 seconds\n",
"KBest-31: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.855017 in 11.54 seconds\n",
"KBest-32: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.855034 in 11.90 seconds\n",
"KBest-33: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.855034 in 11.98 seconds\n",
"KBest-34: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.855034 in 12.07 seconds\n",
"KBest-35: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.855052 in 12.56 seconds\n",
"KBest-36: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.860695 in 12.59 seconds\n",
"KBest-37: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.864468 in 13.06 seconds\n",
"KBest-38: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.860695 in 13.55 seconds\n",
"KBest-39: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.855034 in 13.75 seconds\n",
"KBest-40: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.864415 in 13.58 seconds\n",
"KBest-41: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.858790 in 14.13 seconds\n",
"KBest-42: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.862546 in 14.87 seconds\n",
"KBest-43: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.856886 in 15.23 seconds\n",
"KBest-44: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.853095 in 15.66 seconds\n"
]
}
],
"outputs": [],
"source": [
"for num_features in range(30, 45):\n",
" now = time.time()\n",
@@ -174,28 +156,10 @@
},
{
"cell_type": "code",
"execution_count": 30,
"id": "indirect-poker",
"execution_count": null,
"id": "defensive-affiliate",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"KBest-linear-84: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.890689 in 21.51 seconds\n",
"KBest-linear-85: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.888803 in 21.58 seconds\n",
"KBest-linear-86: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.888803 in 22.06 seconds\n",
"KBest-linear-87: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.888803 in 21.44 seconds\n",
"KBest-linear-88: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.890689 in 21.84 seconds\n",
"KBest-linear-89: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.886934 in 22.40 seconds\n",
"KBest-linear-90: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.890689 in 22.31 seconds\n",
"KBest-linear-91: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.890689 in 22.49 seconds\n",
"KBest-linear-92: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.890689 in 22.48 seconds\n",
"KBest-linear-93: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.894463 in 23.19 seconds\n",
"KBest-linear-94: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.900141 in 23.67 seconds\n"
]
}
],
"outputs": [],
"source": [
"kernel=\"linear\"\n",
"for num_features in range(84, 95):\n",
@@ -209,21 +173,10 @@
},
{
"cell_type": "code",
"execution_count": 31,
"id": "moving-hamburg",
"execution_count": null,
"id": "catholic-bulletin",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"mui-linear-50: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.894481 in 12.37 seconds\n",
"mui-linear-75: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.886934 in 19.47 seconds\n",
"mui-linear-85: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.892576 in 21.31 seconds\n",
"mui-linear-90: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.894463 in 23.06 seconds\n"
]
}
],
"outputs": [],
"source": [
"kernel=\"linear\"\n",
"mui = mutual_info_classif(Xt, yt, random_state=random_state)\n",
@@ -239,21 +192,10 @@
},
{
"cell_type": "code",
"execution_count": 49,
"id": "closed-policy",
"execution_count": null,
"id": "raised-supplier",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"From model-linear-C=0.01-X.shape=(531, 16): 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.886898 in 1.91 seconds\n",
"From model-linear-C=0.10-X.shape=(531, 62): 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.883213 in 10.30 seconds\n",
"From model-linear-C=1.00-X.shape=(531, 93): 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.890725 in 20.57 seconds\n",
"From model-linear-C=5.00-X.shape=(531, 100): 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.886951 in 22.83 seconds\n"
]
}
],
"outputs": [],
"source": [
"# From model\n",
"warnings.filterwarnings(\"ignore\",message=\"Liblinear failed to converge\")\n",
@@ -270,21 +212,10 @@
},
{
"cell_type": "code",
"execution_count": 104,
"id": "chubby-patio",
"execution_count": null,
"id": "complimentary-external",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(12,)"
]
},
"execution_count": 104,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"lsvc = LinearSVC(C=.01, penalty=\"l1\", dual=False, random_state=random_state).fit(Xt, yt)\n",
"res = np.linalg.norm(lsvc.coef_, axis=0, ord=1)\n",
@@ -293,28 +224,10 @@
},
{
"cell_type": "code",
"execution_count": 115,
"id": "refined-display",
"execution_count": null,
"id": "broadband-mouth",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(531, 48)\n"
]
},
{
"data": {
"text/plain": [
"(100,)"
]
},
"execution_count": 115,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"warnings.filterwarnings(\"ignore\",message=\"lbfgs failed to converge\")\n",
"logi = LogisticRegression(C=1, dual=False, random_state=random_state).fit(Xt, yt)\n",
@@ -329,29 +242,18 @@
},
{
"cell_type": "code",
"execution_count": 118,
"id": "hydraulic-labor",
"execution_count": null,
"id": "finite-election",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[0, 1, 2, 3, 4, 5, 6]"
]
},
"execution_count": 118,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"list(range(7))"
]
},
{
"cell_type": "code",
"execution_count": 127,
"id": "settled-rolling",
"execution_count": null,
"id": "waiting-picking",
"metadata": {},
"outputs": [],
"source": [
@@ -360,50 +262,352 @@
},
{
"cell_type": "code",
"execution_count": 128,
"id": "instructional-democracy",
"execution_count": null,
"id": "psychological-customs",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([90, 92, 93, 94, 95])"
]
},
"execution_count": 128,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"select"
]
},
{
"cell_type": "code",
"execution_count": 126,
"id": "detailed-optimum",
"execution_count": null,
"id": "floating-software",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([90, 92, 93, 94, 95])"
]
},
"execution_count": 126,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"select.get_support(indices=True)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "attractive-painting",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.feature_selection import f_classif"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "coral-teaching",
"metadata": {},
"outputs": [],
"source": [
"fanova, pvalue = f_classif(Xt, yt)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "gothic-romantic",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 1.09308168, 1.62547111, 0.69978451, 6.20443793,\n",
" 6.18658597, 7.79765435, 9.90395874, 69.43497462,\n",
" 72.01132136, 78.22168481, 85.92297455, 90.04448857,\n",
" 89.86335217, 88.31587961, 86.87025456, 87.14549907,\n",
" 110.58770486, 137.43089999, 176.43757295, 183.31628291,\n",
" 190.60432118, 193.36310136, 192.22003186, 196.29320987,\n",
" 200.99032733, 189.23476493, 178.80842033, 184.2189575 ,\n",
" 179.72263542, 170.99562705, 150.10468598, 129.17773187,\n",
" 113.53668942, 101.68225009, 84.17902557, 76.03181091,\n",
" 59.47574346, 42.59668791, 33.55829903, 32.91344733,\n",
" 29.49142934, 33.57357444, 36.87268606, 38.44410931,\n",
" 45.49741621, 44.48105541, 37.48298365, 41.47147282,\n",
" 39.88554644, 43.54150269, 45.96034055, 168.0734794 ,\n",
" 173.33864515, 132.98785049, 74.22525889, 31.35844666,\n",
" 30.41731535, 43.95412897, 54.16843899, 61.6216788 ,\n",
" 80.30044086, 92.87208486, 96.59526304, 102.89702793,\n",
" 107.36928433, 114.34040823, 116.86370901, 119.36305571,\n",
" 120.14502511, 127.7704721 , 131.22937066, 135.71075677,\n",
" 139.37315355, 140.72463693, 146.35335 , 155.10945271,\n",
" 162.08444927, 174.68679112, 187.9929742 , 192.26347333,\n",
" 195.97748723, 206.21832653, 234.3930169 , 227.20791092,\n",
" 234.70071448, 246.7266392 , 257.46797395, 267.18172811,\n",
" 270.07487349, 284.47388239, 289.76578077, 284.68419857,\n",
" 293.37447937, 295.09981245, 293.87971764, 290.1120576 ,\n",
" 281.75957225, 286.10121621, 268.19098519, 264.15298645])"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fanova"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "comic-employment",
"metadata": {},
"outputs": [],
"source": [
"var1 = np.var(X[:,0])"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "necessary-desire",
"metadata": {},
"outputs": [],
"source": [
"vart = np.var(X)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "rural-compact",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.9999999469373468"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"var1/vart"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "stock-pakistan",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1.000000053062656"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vart / var1"
]
},
{
"cell_type": "code",
"execution_count": 42,
"id": "enormous-potential",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(178, 13)"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.datasets import load_wine\n",
"X, y = load_wine(return_X_y=True)\n",
"X.shape"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "competent-lebanon",
"metadata": {},
"outputs": [],
"source": [
"from scipy import stats"
]
},
{
"cell_type": "code",
"execution_count": 52,
"id": "honest-astrology",
"metadata": {},
"outputs": [],
"source": [
"args = [X[y==k] for k in np.unique(y)]\n",
"resf, resp =stats.f_oneway(*args)"
]
},
{
"cell_type": "code",
"execution_count": 53,
"id": "instrumental-france",
"metadata": {},
"outputs": [],
"source": [
"res2f, res2p = f_classif(X, y)"
]
},
{
"cell_type": "code",
"execution_count": 54,
"id": "offensive-stanley",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([135.07762424, 36.94342496, 13.3129012 , 35.77163741,\n",
" 12.42958434, 93.73300962, 233.92587268, 27.57541715,\n",
" 30.27138317, 120.66401844, 101.31679539, 189.97232058,\n",
" 207.9203739 ])"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"res2f"
]
},
{
"cell_type": "code",
"execution_count": 55,
"id": "behind-harvey",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([3.31950380e-36, 4.12722880e-14, 4.14996797e-06, 9.44447294e-14,\n",
" 8.96339544e-06, 2.13767002e-28, 3.59858583e-50, 3.88804090e-11,\n",
" 5.12535874e-12, 1.16200802e-33, 5.91766222e-30, 1.39310496e-44,\n",
" 5.78316836e-47])"
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"res2p"
]
},
{
"cell_type": "code",
"execution_count": 56,
"id": "electronic-crazy",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([135.07762424, 36.94342496, 13.3129012 , 35.77163741,\n",
" 12.42958434, 93.73300962, 233.92587268, 27.57541715,\n",
" 30.27138317, 120.66401844, 101.31679539, 189.97232058,\n",
" 207.9203739 ])"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"res2f"
]
},
{
"cell_type": "code",
"execution_count": 57,
"id": "successful-envelope",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([3.31950380e-36, 4.12722880e-14, 4.14996797e-06, 9.44447294e-14,\n",
" 8.96339544e-06, 2.13767002e-28, 3.59858583e-50, 3.88804090e-11,\n",
" 5.12535874e-12, 1.16200802e-33, 5.91766222e-30, 1.39310496e-44,\n",
" 5.78316836e-47])"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"res2p"
]
},
{
"cell_type": "code",
"execution_count": 61,
"id": "forced-replacement",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([135.07762424, 36.94342496, 13.3129012 , 35.77163741,\n",
" 12.42958434, 93.73300962, 233.92587268, 27.57541715,\n",
" 30.27138317, 120.66401844, 101.31679539, 189.97232058,\n",
" 207.9203739 ])"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sel=SelectKBest(k=4)\n",
"sel.fit(X, y)\n",
"sel.scores_"
]
},
{
"cell_type": "code",
"execution_count": 62,
"id": "intimate-procedure",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 0, 6, 11, 12])"
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sel.get_support(indices=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "recent-paraguay",
"id": "earned-petite",
"metadata": {},
"outputs": [],
"source": []
@@ -430,4 +634,4 @@
},
"nbformat": 4,
"nbformat_minor": 5
}
}

View File

@@ -10,6 +10,7 @@ from sklearn.model_selection import KFold, cross_validate
from experimentation.Sets import Datasets
from experimentation.Database import MySQL
from wodt import TreeClassifier
from experimentation.Utils import TextColor
def parse_arguments():
@@ -178,7 +179,36 @@ def store_string(
return result
def compute_status(dbh, name, model, accuracy):
better_default = "\N{heavy check mark}"
better_stree = TextColor.GREEN + "\N{heavy check mark}" + TextColor.ENDC
best = TextColor.RED + "\N{black star}" + TextColor.ENDC
best_default, _ = get_best_score(dbh, name, model)
best_stree, _ = get_best_score(dbh, name, "stree")
best_all, _ = get_best_score(dbh, name, models_tree)
status = better_default if accuracy >= best_default else " "
status = better_stree if accuracy >= best_stree else status
status = best if accuracy >= best_all else status
return status
def get_best_score(dbh, name, model):
record = dbh.find_best(name, model, "crossval")
accuracy = record[5] if record is not None else 0.0
acc_std = record[11] if record is not None else 0.0
return accuracy, acc_std
random_seeds = [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]
models_tree = [
"stree",
"stree_default",
"wodt",
"j48svm",
"oc1",
"cart",
"baseRaF",
]
standardize = False
(set_of_files, model, dataset, sql, normalize, parameters) = parse_arguments()
dbh = MySQL()
@@ -206,17 +236,22 @@ if dataset == "all":
"Parameters",
]
header_lengths = [30, 5, 3, 3, 7, 7, 7, 15, 15, 10]
parameters = json.dumps(json.loads(parameters))
if parameters != "{}" and len(parameters) > 10:
header_lengths.pop()
header_lengths.append(len(parameters))
line_col = ""
for field, underscore in zip(header_cols, header_lengths):
print(f"{field:{underscore}s} ", end="")
line_col += "=" * underscore + " "
print(f"\n{line_col}")
for dataset in dt:
X, y = dt.load(dataset[0]) # type: ignore
name = dataset[0]
X, y = dt.load(name) # type: ignore
samples, features = X.shape
classes = len(np.unique(y))
print(
f"{dataset[0]:30s} {samples:5d} {features:3d} {classes:3d} ",
f"{name:30s} {samples:5d} {features:3d} {classes:3d} ",
end="",
)
scores, times, hyperparameters, nodes, leaves, depth = process_dataset(
@@ -232,28 +267,35 @@ if dataset == "all":
f"{nodes_item:7.2f} {leaves_item:7.2f} {depth_item:7.2f} ",
end="",
)
print(f"{np.mean(scores):8.6f}±{np.std(scores):6.4f} ", end="")
accuracy = np.mean(scores)
status = (
compute_status(dbh, name, model, accuracy)
if model == "stree_default"
else " "
)
print(f"{accuracy:8.6f}±{np.std(scores):6.4f}{status}", end="")
print(f"{np.mean(times):8.6f}±{np.std(times):6.4f} {hyperparameters}")
if sql:
command = store_string(
dataset[0], model, scores, times, hyperparameters, complexity
name, model, scores, times, hyperparameters, complexity
)
print(command, file=sql_output)
else:
scores, times, hyperparameters, nodes, leaves, depth = process_dataset(
dataset, verbose=True, model=model, params=parameters
)
record = dbh.find_best(dataset, model, "crossval")
best_accuracy, acc_best_std = get_best_score(dbh, dataset, model)
accuracy = np.mean(scores)
accuracy_best = record[5] if record is not None else 0.0
acc_best_std = record[11] if record is not None else 0.0
print(f"* Normalize/Standard.: {normalize} / {standardize}")
print(
f"* Accuracy Computed .: {accuracy:6.4f}±{np.std(scores):6.4f} "
f"{np.mean(times):5.3f}s"
)
print(f"* Accuracy Best .....: {accuracy_best:6.4f}±{acc_best_std:6.4f}")
print(f"* Difference ........: {accuracy_best - accuracy:6.4f}")
print(f"* Best Accuracy model: {best_accuracy:6.4f}±{acc_best_std:6.4f}")
print(f"* Difference ........: {best_accuracy - accuracy:6.4f}")
best_accuracy, acc_best_std = get_best_score(dbh, dataset, models_tree)
print(f"* Best Accuracy .....: {best_accuracy:6.4f}±{acc_best_std:6.4f}")
print(f"* Difference ........: {best_accuracy - accuracy:6.4f}")
print(
f"* Nodes/Leaves/Depth : {np.mean(nodes):.2f} {np.mean(leaves):.2f} "
f"{np.mean(depth):.2f} "