From b14edf430352fe5ffd3c1bcedfb9c48bc7ba01d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Tue, 24 Nov 2020 01:42:41 +0100 Subject: [PATCH] Add results report Add directory of datasets Add validation test --- analysis_mysql.py | 115 ++++++++ dir_data.py | 32 ++ kite_tutorial.ipynb | 182 ++++++++++++ test_mysql.ipynb => mysql_import.ipynb | 70 +---- param_analysis.ipynb | 386 +++++++++++++++++++++++++ report_mysql.ipynb | 290 ------------------- test_validation.ipynb | 323 +++++++++++++++++---- 7 files changed, 985 insertions(+), 413 deletions(-) create mode 100644 analysis_mysql.py create mode 100644 dir_data.py create mode 100644 kite_tutorial.ipynb rename test_mysql.ipynb => mysql_import.ipynb (91%) create mode 100644 param_analysis.ipynb delete mode 100644 report_mysql.ipynb diff --git a/analysis_mysql.py b/analysis_mysql.py new file mode 100644 index 0000000..76ada06 --- /dev/null +++ b/analysis_mysql.py @@ -0,0 +1,115 @@ +from experimentation.Sets import Datasets +from experimentation.Utils import TextColor, MySQL + +models = ["stree", "odte", "adaBoost", "bagging"] +title = "Best model results" +lengths = (30, 9, 11, 11, 11, 11) + + +def find_best(dataset, classifier): + cursor = database.cursor(buffered=True) + if classifier == "any": + command = ( + f"select * from results r inner join reference e on " + f"r.dataset=e.dataset where r.dataset='{dataset}' " + ) + else: + command = ( + f"select * from results r inner join reference e on " + f"r.dataset=e.dataset where r.dataset='{dataset}' and classifier" + f"='{classifier}'" + ) + command += ( + " order by r.dataset, accuracy desc, classifier desc, type, date, time" + ) + cursor.execute(command) + return cursor.fetchone() + + +def report_header_content(title): + length = sum(lengths) + len(lengths) - 1 + output = "\n" + "*" * length + "\n" + num = (length - len(title) - 2) // 2 + num2 = length - len(title) - 2 - 2 * num + output += "*" + " " * num + title + " " * (num + num2) + "*\n" + output += "*" * length + "\n\n" + lines = "" + for item, data in enumerate(fields): + output += f"{fields[item]:{lengths[item]}} " + lines += "=" * lengths[item] + " " + output += f"\n{lines}" + return output + + +def report_header(exclude_params): + print(TextColor.HEADER + report_header_content(title) + TextColor.ENDC) + + +def report_line(line): + output = f"{line['dataset']:{lengths[0] + 5}s} " + data = models.copy() + data.insert(0, "reference") + for key, model in enumerate(data): + output += f"{line[model]:{lengths[key + 1]}s} " + return output + + +def report_footer(agg): + print(TextColor.GREEN + f"we have better results {agg['better']:2d} times") + print(TextColor.RED + f"we have worse results {agg['worse']:2d} times") + color = TextColor.LINE1 + for item in models: + print(color + f"{item:10s} used {agg[item]:2d} times") + color = ( + TextColor.LINE2 if color == TextColor.LINE1 else TextColor.LINE1 + ) + + +database = MySQL.get_connection() +dt = Datasets(False, False, "tanveer") +fields = ("Dataset", "Reference") +for model in models: + fields += (f"{model}",) +report_header(title) +color = TextColor.LINE1 +agg = {} +for item in [ + "better", + "worse", +] + models: + agg[item] = 0 +for dataset in dt: + find_one = False + line = {"dataset": color + dataset[0]} + record = find_best(dataset[0], "any") + max_accuracy = 0.0 if record is None else record[5] + for model in models: + record = find_best(dataset[0], model) + if record is None: + line[model] = color + "-" * 9 + " " + else: + reference = record[10] + accuracy = record[5] + find_one = True + agg[model] += 1 + if accuracy > reference: + sign = "+" + agg["better"] += 1 + else: + sign = "-" + agg["worse"] += 1 + item = f"{accuracy:9.7} {sign}" + line["reference"] = f"{reference:9.7}" + line[model] = ( + TextColor.GREEN + TextColor.BOLD + item + TextColor.ENDC + if accuracy == max_accuracy + else color + item + ) + if not find_one: + print(TextColor.FAIL + f"*No results found for {dataset[0]}") + else: + color = ( + TextColor.LINE2 if color == TextColor.LINE1 else TextColor.LINE1 + ) + print(report_line(line)) +report_footer(agg) diff --git a/dir_data.py b/dir_data.py new file mode 100644 index 0000000..591c6ac --- /dev/null +++ b/dir_data.py @@ -0,0 +1,32 @@ +import os +import pandas as pd +import numpy as np +from experimentation.Utils import TextColor +from experimentation.Sets import Datasets + +path = os.path.join(os.getcwd(), "data/tanveer") +color = TextColor.LINE1 +dt = np.array(list(Datasets(False, False, "tanveer")), dtype="object") +dt = dt[:, 0] +good = bad = 0 +for folder in sorted(os.listdir(path)): + file_name = os.path.join(path, folder, f"{folder}_R.dat") + try: + data = pd.read_csv( + file_name, + sep="\t", + index_col=0, + ) + X = data.drop("clase", axis=1).to_numpy() + y = data["clase"].to_numpy() + sign = "*" if folder in dt else "-" + print(color + f"{folder:30s} {str(X.shape):>10s} {sign}") + color = ( + TextColor.LINE1 if color == TextColor.LINE2 else TextColor.LINE2 + ) + good += 1 + except FileNotFoundError: + print(TextColor.FAIL + f"{folder} not found.") + bad += 1 +print(TextColor.SUCCESS + f"{good:3d} datasets Ok.") +print(TextColor.FAIL + f"{bad:3d} datasets Wrong.") diff --git a/kite_tutorial.ipynb b/kite_tutorial.ipynb new file mode 100644 index 0000000..ddc3764 --- /dev/null +++ b/kite_tutorial.ipynb @@ -0,0 +1,182 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Logo](https://kite.com/kite-public/kite-plus-jlab-scaled.png)\n", + "\n", + "### Welcome to Kite's JupyterLab extension tutorial\n", + "\n", + "Kite gives you **ML-powered autocompletions** and **rich documentation** inside JupyterLab. This guide will teach you everything you need to know about Kite in 5 minutes or less.\n", + "\n", + "> 💡 _**Tip:** You can open this file at any time with the command `Kite: Open Tutorial` in JupyterLab's command palette._\n", + "\n", + "#### Before we start...\n", + "\n", + "Make sure that the Kite icon at the bottom of the window reads `Kite: ready`.\n", + "\n", + "![Kite icon](https://kite.com/kite-public/kite-status.png)\n", + "\n", + "* If it says `Kite: not running`, please start the Kite Engine first.\n", + "* If it says `Kite: not installed`, please [download and install Kite](https://kite.com/download) first." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Part 1: Autocompletions\n", + "\n", + "**Step 1a**
\n", + "Run the code cell below with all the necessary imports 👇" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# Run me!\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Step 1b**
\n", + "Let's try typing out some code to plot a sine graph. As you type, Kite will automatically show you completions for what you're going to type next.\n", + "\n", + "![Autocompletions](https://www.kite.com/kite-public/kite-jlab-autocompletions.gif)\n", + "\n", + "> 💡 _**Tip:** You can turn completions docs on and off in JupyterLab's command palette with the command `Kite: Toggle Docs Panel`._\n", + "\n", + "> 💡 _**Tip:** The starred completions ★ are from Kite Pro. You can [start your free Kite Pro trial](https://www.kite.com/pro/trial/) anytime. Afterwards, if you choose not to upgrade, you can still use Kite 100% for free._\n", + "\n", + "Try typing out the code yourself to see Kite's autocompletions in action.
\n", + "\n", + "```python\n", + "x = np.linspace(-np.pi, np.pi, 50)\n", + "y = np.sin(x)\n", + "plt.plot(x, y)\n", + "```\n", + "\n", + "Type this code in the cell below 👇" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Put code in me\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Part 2: Manual completions\n", + "\n", + "You can still use JupyterLab's builtin kernel completions. These are particularly useful when you need to access a `DataFrame`'s column names.\n", + "\n", + "**Step 2a**
\n", + "First, run the code cell below to get some sample data to store in a `DataFrame` 👇" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Run me!\n", + "url = 'https://kite.com/kite-public/iris.csv'\n", + "df = pd.read_csv(url)\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Step 2b**
\n", + "Let's plot a scatter graph of sepal length vs. sepal width. When you are accessing a `DataFrame`'s columns, you'll still need to hit `tab` to request completions from the kernel.\n", + "\n", + "![Manual completions](https://www.kite.com/kite-public/kite-jlab-manual-completions.gif)\n", + "\n", + "Try requesting kernel completions yourself.\n", + "\n", + "```python\n", + "plt.scatter(df['sepal_length'], df['sepal_width'])\n", + "```\n", + "\n", + "Type this code in the cell below, making sure to hit `tab` when you are filling in the column names 👇" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Put code in me\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Part 3: Copilot Documentation\n", + "\n", + "If you've enabled \"docs following cursor\" in the Copilot, the Copilot will automatically update with the documentation of the identifier underneath your cursor.\n", + "\n", + "![Autosearch](https://www.kite.com/kite-public/kite-jlab-autosearch.gif)\n", + "\n", + "**Step 3a**
\n", + "Try it yourself! Just click around in the code cells of this notebook and see the Copilot update automatically." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### The End\n", + "\n", + "Now you know everything you need to know about Kite's JupyterLab plugin. Kite is under active development and we expect to ship improvements and more features in the near future.\n", + "\n", + "In the meantime, if you experience bugs or have feature requests, feel free to open an issue in our [public GitHub repo](https://github.com/kiteco/issue-tracker).\n", + "\n", + "Happy coding!" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.2" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/test_mysql.ipynb b/mysql_import.ipynb similarity index 91% rename from test_mysql.ipynb rename to mysql_import.ipynb index e32234b..f891424 100644 --- a/test_mysql.ipynb +++ b/mysql_import.ipynb @@ -30,65 +30,9 @@ "import json\n", "import sqlite3\n", "import mysql.connector\n", + "from experimentation.Utils import MySQL\n", "\n", - "database = mysql.connector.connect(\n", - " host=\"atenea.rmontanana.es\",\n", - " port=31428,\n", - " user=\"stree\",\n", - " password=\"xtree\",\n", - " database=\"stree\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters) values (%s, %s, %s, %s, %s, %s, %s, %s, %s) ('2020-11-18', '12:03:17', 0.99786, '', 'stree', True, False, '{\"C\": 0.15, \"degree\": 6, \"gamma\": 0.7, \"kernel\": \"poly\", \"max_features\": null, \"max_iter\": 100000.0, \"random_state\": 0}')\n" - ] - } - ], - "source": [ - "cursor = database.cursor()\n", - "date = '2020-11-18'\n", - "time = '12:03:17'\n", - "accuracy = 0.99786\n", - "dataset = ''\n", - "clf = 'stree'\n", - "norm = True\n", - "stand = False\n", - "parameters = {\"C\": .15, \"degree\": 6, \"gamma\": .7, \"kernel\": \"poly\", \"max_features\": None, \"max_iter\": 100000.0, \"random_state\": 0}\n", - "command_insert = \"replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters) values (%s, %s, %s, %s, %s, %s, %s, %s, %s)\"\n", - "values = (date, time, accuracy, dataset, clf, norm, stand, json.dumps(parameters))\n", - "print(command_insert, values)\n", - "#cursor.execute(command_insert, values)\n", - "database.commit()" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "'{\"C\": 0.15, \"degree\": 6, \"gamma\": 0.7, \"kernel\": \"poly\", \"max_features\": null, \"max_iter\": 100000.0, \"random_state\": 0}'" - ] - }, - "metadata": {}, - "execution_count": 3 - } - ], - "source": [ - "import json\n", - "json.dumps(parameters)" + "database = MySQL.get_connection()" ] }, { @@ -473,8 +417,7 @@ " values =(date, time, record_type, record['test_score'], record['dataset'], model, record['normalize'], record['standardize'], record['parameters'])\n", " print(f\"{date} - {time} - {record['dataset']}\")\n", " cursor.execute(command_insert, values)\n", - " database.commit()\n", - "\n" + " database.commit()" ] }, { @@ -566,13 +509,6 @@ " cursor.execute(command, values)\n", "database.commit()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ] } \ No newline at end of file diff --git a/param_analysis.ipynb b/param_analysis.ipynb new file mode 100644 index 0000000..31ac230 --- /dev/null +++ b/param_analysis.ipynb @@ -0,0 +1,386 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.2-final" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import sqlite3\n", + "import mysql.connector\n", + "from experimentation.Utils import MySQL\n", + "from experimentation.Sets import Datasets\n", + "\n", + "database = MySQL.get_connection()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "classifier = 'bagging'\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "def find_best(dataset):\n", + " cursor = database.cursor(buffered=True)\n", + " if classifier == \"any\":\n", + " command = (\n", + " f\"select * from results r inner join reference e on \"\n", + " f\"r.dataset=e.dataset where r.dataset='{dataset}' \"\n", + " )\n", + " else:\n", + " command = (\n", + " f\"select * from results r inner join reference e on \"\n", + " f\"r.dataset=e.dataset where r.dataset='{dataset}' and classifier\"\n", + " f\"='{classifier}'\"\n", + " )\n", + " command += (\n", + " \" order by r.dataset, accuracy desc, classifier desc, type, date, time\"\n", + " )\n", + " cursor.execute(command)\n", + " return cursor.fetchone()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "def find_values(parameter, kernel_chosen):\n", + " result = []\n", + " for data in agg[kernel_chosen]:\n", + " base_parameter = f\"base_estimator__{parameter}\"\n", + " if parameter in data.keys():\n", + " result.append(data[parameter])\n", + " if base_parameter in data.keys():\n", + " result.append(data[base_parameter])\n", + " try:\n", + " result_ordered = sorted(result)\n", + " return result_ordered\n", + " except TypeError:\n", + " return result" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Aggregating data ..................................................\n", + "stree has 0 results\n", + "adaBoost has 0 results\n", + "bagging has 43 results\n", + "odte has 0 results\n" + ] + } + ], + "source": [ + "dt = Datasets(False, False, 'tanveer')\n", + "models = ['stree', 'adaBoost', 'bagging', 'odte']\n", + "agg_models = {}\n", + "for i in models:\n", + " agg_models[i] = 0\n", + "agg = {'linear': [], 'rbf': [], 'poly': []}\n", + "print(\"Aggregating data .\", end='')\n", + "for dataset in dt:\n", + " result = find_best(dataset[0])\n", + " print('.', end='')\n", + " if result:\n", + " agg_models[result[3]] += 1\n", + " json_result = json.loads(result[8])\n", + " key = json_result['kernel'] if 'kernel' in json_result.keys() else 'linear'\n", + " agg[key].append(json_result)\n", + "print('')\n", + "for i in models:\n", + " print(f\"{i:10} has {agg_models[i]:2} results\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Used kernel linear: 43 times\nUsed kernel poly: 0 times\nUsed kernel rbf: 0 times\n" + ] + } + ], + "source": [ + "print(\"Used kernel linear: \", len(agg['linear']), ' times')\n", + "print(\"Used kernel poly: \", len(agg['poly']), ' times')\n", + "print(\"Used kernel rbf: \", len(agg['rbf']), ' times')" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 7 + } + ], + "source": [ + "find_values('gamma', 'poly')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[0.05,\n", + " 0.05,\n", + " 0.05,\n", + " 0.05,\n", + " 0.05,\n", + " 0.05,\n", + " 0.2,\n", + " 0.2,\n", + " 0.2,\n", + " 0.2,\n", + " 0.2,\n", + " 0.2,\n", + " 0.2,\n", + " 0.55,\n", + " 0.55,\n", + " 0.55,\n", + " 1.0,\n", + " 7,\n", + " 7,\n", + " 7,\n", + " 7,\n", + " 7,\n", + " 7,\n", + " 7,\n", + " 7,\n", + " 7,\n", + " 7,\n", + " 7,\n", + " 55,\n", + " 55,\n", + " 55,\n", + " 55,\n", + " 55,\n", + " 55,\n", + " 10000.0,\n", + " 10000.0,\n", + " 10000.0,\n", + " 10000.0,\n", + " 10000.0,\n", + " 10000.0,\n", + " 10000.0,\n", + " 10000.0,\n", + " 10000.0]" + ] + }, + "metadata": {}, + "execution_count": 8 + } + ], + "source": [ + "find_values('C', 'linear')" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ], + "source": [ + "find_values('C', 'poly')" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ], + "source": [ + "find_values('C', 'rbf')" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[0.6,\n", + " None,\n", + " 0.6,\n", + " None,\n", + " 0.6,\n", + " None,\n", + " 0.6,\n", + " None,\n", + " 0.6,\n", + " 'auto',\n", + " 0.6,\n", + " None,\n", + " 0.6,\n", + " None,\n", + " 0.6,\n", + " None,\n", + " 0.6,\n", + " 'auto',\n", + " 0.6,\n", + " None,\n", + " 0.6,\n", + " 'auto',\n", + " 0.6,\n", + " None,\n", + " 0.2,\n", + " None,\n", + " 0.6,\n", + " 'auto',\n", + " 0.6,\n", + " None,\n", + " 0.6,\n", + " None,\n", + " 0.6,\n", + " 'auto',\n", + " None,\n", + " 0.6,\n", + " None,\n", + " 0.6,\n", + " None,\n", + " 0.6,\n", + " 'auto',\n", + " 0.6,\n", + " None,\n", + " 0.6,\n", + " None,\n", + " 0.6,\n", + " None,\n", + " 0.6,\n", + " None,\n", + " 0.6,\n", + " 'auto',\n", + " 0.6,\n", + " None,\n", + " 0.6,\n", + " None,\n", + " 0.6,\n", + " 'auto',\n", + " 0.6,\n", + " None,\n", + " 0.6,\n", + " None,\n", + " 0.6,\n", + " 'auto',\n", + " 0.2,\n", + " None,\n", + " 0.6,\n", + " None,\n", + " 0.2,\n", + " 'auto',\n", + " 0.6,\n", + " None,\n", + " 0.6,\n", + " 'auto',\n", + " 0.6,\n", + " None,\n", + " 0.6,\n", + " None,\n", + " 0.2,\n", + " None,\n", + " 0.6,\n", + " None,\n", + " 0.6,\n", + " 'auto',\n", + " 0.6,\n", + " 'auto']" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ], + "source": [ + "find_values('max_features', 'linear')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ] +} \ No newline at end of file diff --git a/report_mysql.ipynb b/report_mysql.ipynb deleted file mode 100644 index 67e6ed8..0000000 --- a/report_mysql.ipynb +++ /dev/null @@ -1,290 +0,0 @@ -{ - "metadata": { - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.2-final" - }, - "orig_nbformat": 2, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - } - }, - "nbformat": 4, - "nbformat_minor": 2, - "cells": [ - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "import sqlite3\n", - "import mysql.connector\n", - "from experimentation.Sets import Datasets\n", - "from experimentation.Utils import TextColor\n", - "\n", - "\n", - "database = mysql.connector.connect(\n", - " host=\"atenea.rmontanana.es\",\n", - " port=31428,\n", - " user=\"stree\",\n", - " password=\"xtree\",\n", - " database=\"stree\",\n", - ")\n", - "dt = Datasets(False, False, 'tanveer')\n", - "\n", - "#\n", - "# CONFIGURABLE REPORT PARAMETERS\n", - "#\n", - "exclude_parameters = True\n", - "# classififer can be {any, stree, bagging, adaBoost, odte}\n", - "classifier = \"stree\"\n", - "title = \"Best Hyperparameters found for datasets\"" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [], - "source": [ - "def find_best(dataset):\n", - " cursor = database.cursor(buffered=True)\n", - " if classifier == \"any\":\n", - " command = f\"select * from results r inner join reference e on r.dataset=e.dataset where r.dataset='{dataset}' \"\n", - " else:\n", - " command = f\"select * from results r inner join reference e on r.dataset=e.dataset where r.dataset='{dataset}' and classifier='{classifier}'\" \n", - " command += \" order by r.dataset, accuracy desc, classifier desc, type, date, time\"\n", - " cursor.execute(command)\n", - " return cursor.fetchone()" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [], - "source": [ - "lengths = (10, 8, 10, 10, 30, 3, 3, 9, 11)\n", - "if exclude_parameters:\n", - " fields = ('Date', 'Time', 'Type', 'Classifier', 'Dataset', 'Nor', 'Std', 'Accuracy', 'Reference')\n", - " lengths = (10, 8, 10, 10, 30, 3, 3, 9, 11)\n", - "else:\n", - " fields = ('Date', 'Time', 'Type', 'Classifier', 'Dataset', 'Nor', 'Std', 'Accuracy', 'Reference', 'Parameters')\n", - " lengths = (10, 8, 10, 10, 30, 3, 3, 9, 11, 30)\n", - "def report_header_content(title):\n", - " length = sum(lengths) + len(lengths) - 1\n", - " output = \"\\n\" + \"*\" * length + \"\\n\"\n", - " title = title + f\" -- {classifier} classifier --\"\n", - " num = (length - len(title) - 2) // 2\n", - " num2 = length - len(title) - 2 - 2 * num\n", - " output += \"*\" + \" \" * num + title + \" \" * (num + num2) + \"*\\n\"\n", - " output += \"*\" * length + \"\\n\\n\"\n", - " lines = \"\"\n", - " for item, data in enumerate(fields):\n", - " output += f\"{fields[item]:{lengths[item]}} \"\n", - " lines += \"=\" * lengths[item] + \" \"\n", - " output += f\"\\n{lines}\"\n", - " return output\n", - "def report_header(exclude_params):\n", - " print(\n", - " TextColor.HEADER\n", - " + report_header_content(title)\n", - " + TextColor.ENDC\n", - " )\n", - "def report_line(record, agg):\n", - " accuracy = record[5]\n", - " expected = record[10]\n", - " if accuracy < expected: \n", - " agg['worse'] += 1\n", - " sign = '-'\n", - " elif accuracy > expected:\n", - " agg['better'] += 1\n", - " sign = '+'\n", - " else:\n", - " agg['equal'] +=1\n", - " sign = '='\n", - " output = f\"{record[0]:%Y-%m-%d} {str(record[1]):>8s} {record[2]:10s} {record[3]:10s} {record[4]:30s}\"\n", - " output += f\" {record[6]:3d} {record[7]:3d} {accuracy:8.7f} {expected:8.7f} {sign}\"\n", - " if not exclude_parameters:\n", - " output += f\" {record[8]}\"\n", - " return output\n", - "def report_footer(agg):\n", - " print(\n", - " TextColor.GREEN\n", - " + f\"we have better results {agg['better']:2d} times\"\n", - " )\n", - " print(\n", - " TextColor.RED\n", - " + f\"we have worse results {agg['worse']:2d} times\"\n", - " )\n", - " print(\n", - " TextColor.MAGENTA\n", - " + f\"we have equal results {agg['equal']:2d} times\"\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\u001b[95m\n", - "******************************************************************************************************\n", - "* Best Hyperparameters found for datasets -- stree classifier -- *\n", - "******************************************************************************************************\n", - "\n", - "Date Time Type Classifier Dataset Nor Std Accuracy Reference \n", - "========== ======== ========== ========== ============================== === === ========= =========== \u001b[0m\n", - "\u001b[95m2020-11-13 12:04:24 crossval stree balance-scale 1 0 0.9488000 0.9046280 +\n", - "\u001b[94m2020-11-13 12:04:25 crossval stree balloons 1 0 0.8666670 0.6625000 +\n", - "\u001b[95m2020-11-13 12:04:25 crossval stree breast-cancer-wisc-diag 1 0 0.9789320 0.9743450 +\n", - "\u001b[94m2020-11-13 12:04:25 crossval stree breast-cancer-wisc-prog 1 0 0.8284620 0.7993400 +\n", - "\u001b[95m2020-11-13 12:04:25 crossval stree breast-cancer-wisc 1 0 0.9656940 0.9702560 -\n", - "\u001b[94m2020-11-13 12:04:26 crossval stree breast-cancer 1 0 0.7308530 0.7382400 -\n", - "\u001b[95m2020-11-13 12:04:26 crossval stree cardiotocography-10clases 1 0 0.6665220 0.8277610 -\n", - "\u001b[94m2020-11-13 12:04:27 crossval stree cardiotocography-3clases 1 0 0.8480740 0.9201340 -\n", - "\u001b[91m*No results found for conn-bench-sonar-mines-rocks\n", - "\u001b[95m2020-11-13 12:04:27 crossval stree cylinder-bands 1 0 0.6326670 0.7691410 -\n", - "\u001b[94m2020-11-13 12:04:27 crossval stree dermatology 1 0 0.9754540 0.9732780 +\n", - "\u001b[95m2020-11-13 12:04:27 crossval stree echocardiogram 1 0 0.8472930 0.8485270 -\n", - "\u001b[94m2020-11-13 12:04:27 crossval stree fertility 1 0 0.8800000 0.8840000 -\n", - "\u001b[95m2020-11-13 12:04:28 crossval stree haberman-survival 1 0 0.7646750 0.7392540 +\n", - "\u001b[94m2020-11-13 12:04:28 crossval stree heart-hungarian 1 0 0.8299240 0.8204750 +\n", - "\u001b[95m2020-11-13 12:04:28 crossval stree hepatitis 1 0 0.8645160 0.8232030 +\n", - "\u001b[94m2020-11-13 12:04:28 crossval stree ilpd-indian-liver 1 0 0.7426910 0.7150280 +\n", - "\u001b[95m2020-11-13 12:04:28 crossval stree ionosphere 1 0 0.9487320 0.9442150 +\n", - "\u001b[94m2020-11-13 12:04:28 crossval stree iris 0 0 0.9800000 0.9786560 +\n", - "\u001b[95m2020-11-13 12:15:39 crossval stree led-display 1 0 0.7120000 0.7102000 +\n", - "\u001b[91m*No results found for libras\n", - "\u001b[91m*No results found for low-res-spect\n", - "\u001b[94m2020-11-13 12:15:39 crossval stree lymphography 1 0 0.8648280 0.8554050 +\n", - "\u001b[95m2020-11-13 12:15:39 crossval stree mammographic 1 0 0.8293720 0.8274720 +\n", - "\u001b[91m*No results found for molec-biol-promoter\n", - "\u001b[91m*No results found for musk-1\n", - "\u001b[94m2020-11-13 12:15:39 crossval stree oocytes_merluccius_nucleus_4d 1 0 0.8082210 0.8399630 -\n", - "\u001b[95m2020-11-13 12:15:39 crossval stree oocytes_merluccius_states_2f 1 0 0.9119030 0.9299630 -\n", - "\u001b[94m2020-11-13 12:15:40 crossval stree oocytes_trisopterus_nucleus_2f 1 0 0.7476910 0.8333330 -\n", - "\u001b[95m2020-11-13 12:15:40 crossval stree oocytes_trisopterus_states_5b 1 0 0.8453610 0.9315790 -\n", - "\u001b[94m2020-11-13 12:15:40 crossval stree parkinsons 1 0 0.8461540 0.9202210 -\n", - "\u001b[95m2020-11-13 12:15:41 crossval stree pima 1 0 0.7800020 0.7671880 +\n", - "\u001b[94m2020-11-13 12:15:41 crossval stree pittsburg-bridges-MATERIAL 1 0 0.8861470 0.8642860 +\n", - "\u001b[95m2020-11-13 12:15:41 crossval stree pittsburg-bridges-REL-L 1 0 0.6761900 0.6959290 -\n", - "\u001b[94m2020-11-13 12:15:41 crossval stree pittsburg-bridges-SPAN 1 0 0.6771930 0.6891300 -\n", - "\u001b[95m2020-11-13 12:15:41 crossval stree pittsburg-bridges-T-OR-D 1 0 0.9023810 0.8743700 +\n", - "\u001b[94m2020-11-13 12:15:41 crossval stree planning 1 0 0.7255250 0.7255790 -\n", - "\u001b[95m2020-11-13 12:15:41 crossval stree post-operative 1 0 0.7222220 0.7117420 +\n", - "\u001b[94m2020-11-13 12:15:41 crossval stree seeds 1 0 0.9619050 0.9563030 +\n", - "\u001b[95m2020-11-13 12:15:41 crossval stree statlog-australian-credit 1 0 0.6797100 0.6782810 +\n", - "\u001b[94m2020-11-13 12:21:08 crossval stree statlog-german-credit 1 0 0.7620000 0.7562000 +\n", - "\u001b[95m2020-11-13 12:21:08 crossval stree statlog-heart 1 0 0.8481480 0.8422990 +\n", - "\u001b[94m2020-11-13 12:21:12 crossval stree statlog-image 1 0 0.9593070 0.9761940 -\n", - "\u001b[95m2020-11-13 12:21:13 crossval stree statlog-vehicle 1 0 0.8014130 0.8006730 +\n", - "\u001b[91m*No results found for synthetic-control\n", - "\u001b[94m2020-11-13 12:21:13 crossval stree tic-tac-toe 1 0 0.9874350 0.9853850 +\n", - "\u001b[95m2020-11-13 12:21:14 crossval stree vertebral-column-2clases 1 0 0.8290320 0.8491530 -\n", - "\u001b[94m2020-11-13 12:21:14 crossval stree wine 1 0 0.9777780 0.9932810 -\n", - "\u001b[95m2020-11-13 12:21:14 crossval stree zoo 1 0 0.9704760 0.9603850 +\n", - "\u001b[92mwe have better results 25 times\n", - "\u001b[91mwe have worse results 18 times\n", - "\u001b[95mwe have equal results 0 times\n" - ] - } - ], - "source": [ - "report_header(title)\n", - "color = TextColor.LINE1\n", - "agg = {'equal': 0, 'better': 0, 'worse': 0}\n", - "for dataset in dt:\n", - " record = find_best(dataset[0])\n", - " if record is None:\n", - " print(TextColor.FAIL + f\"*No results found for {dataset[0]}\")\n", - " else:\n", - " color = TextColor.MAGENTA if color == TextColor.LINE1 else TextColor.LINE1\n", - " print(color + report_line(record, agg))\n", - "report_footer(agg)" - ] - }, - { - "source": [ - "******************************************************************************************************\n", - "* Best Hyperparameters found for datasets -- any classifier -- *\n", - "******************************************************************************************************\n", - "\n", - "Date Time Type Classifier Dataset Nor Std Accuracy Reference \n", - "========== ======== ========== ========== ============================== === === ========= =========== \n", - "\n", - "2020-11-13 12:04:24 crossval stree balance-scale 1 0 0.9488000 0.9046280 +\n", - "2020-11-13 12:04:25 crossval stree balloons 1 0 0.8666670 0.6625000 +\n", - "2020-11-16 13:34:47 gridsearch bagging breast-cancer-wisc-diag 1 1 0.9806860 0.9743450 +\n", - "2020-11-11 23:10:52 gridsearch bagging breast-cancer-wisc-prog 1 1 0.8337180 0.7993400 +\n", - "2020-11-16 13:34:49 gridsearch bagging breast-cancer-wisc 1 1 0.9699790 0.9702560 -\n", - "2020-11-18 23:04:30 gridsearch odte breast-cancer 1 1 0.7449490 0.7382400 +\n", - "2020-11-16 13:37:13 gridsearch bagging cardiotocography-10clases 1 1 0.7022580 0.8277610 -\n", - "2020-11-16 13:35:23 gridsearch bagging cardiotocography-3clases 1 1 0.8490220 0.9201340 -\n", - "*No results found for conn-bench-sonar-mines-rocks\n", - "2020-11-13 12:04:27 crossval stree cylinder-bands 1 0 0.6326670 0.7691410 -\n", - "2020-11-13 12:04:27 crossval stree dermatology 1 0 0.9754540 0.9732780 +\n", - "2020-11-13 12:04:27 crossval stree echocardiogram 1 0 0.8472930 0.8485270 -\n", - "2020-11-19 15:22:26 gridsearch bagging fertility 1 1 0.8900000 0.8840000 +\n", - "2020-11-13 12:04:28 crossval stree haberman-survival 1 0 0.7646750 0.7392540 +\n", - "2020-11-13 12:04:28 crossval stree heart-hungarian 1 0 0.8299240 0.8204750 +\n", - "2020-11-13 12:04:28 crossval stree hepatitis 1 0 0.8645160 0.8232030 +\n", - "2020-11-13 12:04:28 crossval stree ilpd-indian-liver 1 0 0.7426910 0.7150280 +\n", - "2020-11-16 13:34:59 gridsearch bagging ionosphere 1 1 0.9515490 0.9442150 +\n", - "2020-11-03 18:52:15 gridsearch odte iris 1 0 0.9933330 0.9786560 +\n", - "2020-11-13 12:15:39 crossval stree led-display 1 0 0.7120000 0.7102000 +\n", - "*No results found for libras\n", - "*No results found for low-res-spect\n", - "2020-11-16 13:35:08 gridsearch bagging lymphography 1 1 0.8781610 0.8554050 +\n", - "2020-11-13 12:15:39 crossval stree mammographic 1 0 0.8293720 0.8274720 +\n", - "2020-11-16 22:17:03 gridsearch bagging molec-biol-promoter 1 1 0.9060610 0.8182690 +\n", - "*No results found for musk-1\n", - "2020-11-13 12:15:39 crossval stree oocytes_merluccius_nucleus_4d 1 0 0.8082210 0.8399630 -\n", - "2020-11-13 12:15:39 crossval stree oocytes_merluccius_states_2f 1 0 0.9119030 0.9299630 -\n", - "2020-11-13 12:15:40 crossval stree oocytes_trisopterus_nucleus_2f 1 0 0.7476910 0.8333330 -\n", - "2020-11-16 13:35:41 gridsearch bagging oocytes_trisopterus_states_5b 1 1 0.8540800 0.9315790 -\n", - "2020-11-13 12:15:40 crossval stree parkinsons 1 0 0.8461540 0.9202210 -\n", - "2020-11-13 12:15:41 crossval stree pima 1 0 0.7800020 0.7671880 +\n", - "2020-11-13 12:15:41 crossval stree pittsburg-bridges-MATERIAL 1 0 0.8861470 0.8642860 +\n", - "2020-11-16 13:36:41 gridsearch bagging pittsburg-bridges-REL-L 1 1 0.6766670 0.6959290 -\n", - "2020-11-16 13:22:50 gridsearch adaBoost pittsburg-bridges-SPAN 1 1 0.7210530 0.6891300 +\n", - "2020-11-13 12:15:41 crossval stree pittsburg-bridges-T-OR-D 1 0 0.9023810 0.8743700 +\n", - "2020-11-13 12:15:41 crossval stree planning 1 0 0.7255250 0.7255790 -\n", - "2020-11-13 12:15:41 crossval stree post-operative 1 0 0.7222220 0.7117420 +\n", - "2020-11-13 12:15:41 crossval stree seeds 1 0 0.9619050 0.9563030 +\n", - "2020-11-13 12:15:41 crossval stree statlog-australian-credit 1 0 0.6797100 0.6782810 +\n", - "2020-11-13 12:21:08 crossval stree statlog-german-credit 1 0 0.7620000 0.7562000 +\n", - "2020-11-16 13:36:12 gridsearch bagging statlog-heart 1 1 0.8518520 0.8422990 +\n", - "2020-11-16 14:04:13 gridsearch bagging statlog-image 1 1 0.9627710 0.9761940 -\n", - "2020-11-13 12:21:13 crossval stree statlog-vehicle 1 0 0.8014130 0.8006730 +\n", - "*No results found for synthetic-control\n", - "2020-11-13 12:21:13 crossval stree tic-tac-toe 1 0 0.9874350 0.9853850 +\n", - "2020-11-13 12:21:14 crossval stree vertebral-column-2clases 1 0 0.8290320 0.8491530 -\n", - "2020-06-26 11:03:03 gridsearch odte wine 0 0 0.9800000 0.9932810 -\n", - "2020-11-13 12:21:14 crossval stree zoo 1 0 0.9704760 0.9603850 +\n", - "we have better results 29 times\n", - "we have worse results 15 times\n", - "we have equal results 0 times" - ], - "cell_type": "markdown", - "metadata": {} - } - ] -} \ No newline at end of file diff --git a/test_validation.ipynb b/test_validation.ipynb index ec985ad..27851e1 100644 --- a/test_validation.ipynb +++ b/test_validation.ipynb @@ -1,25 +1,4 @@ { - "metadata": { - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.2-final" - }, - "orig_nbformat": 2, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - } - }, - "nbformat": 4, - "nbformat_minor": 2, "cells": [ { "cell_type": "code", @@ -63,7 +42,16 @@ "cell_type": "code", "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/rmontanana/.virtualenvs/general/lib/python3.8/site-packages/sklearn/svm/_base.py:976: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n", + " warnings.warn(\"Liblinear failed to converge, increase \"\n" + ] + } + ], "source": [ "clf = Stree(**parameters)\n", "results = cross_validate(clf, X, y, n_jobs=1)" @@ -75,16 +63,16 @@ "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ - "{'fit_time': array([0.00772715, 0.03221297, 0.01418114, 0.06252027, 0.05369782]),\n", - " 'score_time': array([0.00048399, 0.00044394, 0.00045371, 0.00051093, 0.00044894]),\n", + "{'fit_time': array([0.0078361 , 0.03171897, 0.01422501, 0.06850815, 0.05387974]),\n", + " 'score_time': array([0.0005939 , 0.00044203, 0.00043583, 0.00050902, 0.00044012]),\n", " 'test_score': array([0.4047619 , 0.61904762, 0.66666667, 0.92682927, 0.58536585])}" ] }, + "execution_count": 5, "metadata": {}, - "execution_count": 5 + "output_type": "execute_result" } ], "source": [ @@ -97,14 +85,14 @@ "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "0.640534262485482" ] }, + "execution_count": 6, "metadata": {}, - "execution_count": 6 + "output_type": "execute_result" } ], "source": [ @@ -118,11 +106,11 @@ "outputs": [], "source": [ "# 864 modelos por cada dataset\n", - "C = [0.05, 0.2, 0.55, 7, 55, 1e4]\n", - "max_iter = [1e4, 1e5, 1e6]\n", - "gamma = [1e-1, 1, 1e1]\n", - "max_features = [None, \"auto\"]\n", - "split_criteria = [\"impurity\", \"max_samples\"]\n", + "C = [0.2, 7, 55]\n", + "max_iter = [1e4]\n", + "gamma = [\"scale\"] #[1e-1, 1, 1e1]\n", + "max_features = [\"auto\"]\n", + "split_criteria = [\"impurity\"]\n", "param_grid = [\n", " {\n", " \"random_state\": [1],\n", @@ -159,7 +147,7 @@ "metadata": {}, "outputs": [], "source": [ - "param_grid = [\n", + "param2_grid = [\n", " {\n", " \"random_state\": [1],\n", " \"kernel\": [\"poly\"],\n", @@ -177,33 +165,235 @@ "metadata": {}, "outputs": [ { - "output_type": "execute_result", + "name": "stderr", + "output_type": "stream", + "text": [ + "[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fitting 2 folds for each of 12 candidates, totalling 24 fits\n", + "[CV] C=0.2, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n", + "[CV] C=0.2, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.548, total= 6.4min\n", + "[CV] C=0.2, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 6.4min remaining: 0.0s\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[CV] C=0.2, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.346, total= 9.2min\n", + "[CV] C=7, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Parallel(n_jobs=1)]: Done 2 out of 2 | elapsed: 15.7min remaining: 0.0s\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[CV] C=7, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.673, total=17.0min\n", + "[CV] C=7, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Parallel(n_jobs=1)]: Done 3 out of 3 | elapsed: 32.7min remaining: 0.0s\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[CV] C=7, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.356, total=10.9min\n", + "[CV] C=55, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Parallel(n_jobs=1)]: Done 4 out of 4 | elapsed: 43.6min remaining: 0.0s\n", + "/Users/rmontanana/.virtualenvs/general/lib/python3.8/site-packages/sklearn/svm/_base.py:976: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n", + " warnings.warn(\"Liblinear failed to converge, increase \"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[CV] C=55, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.538, total= 7.3min\n", + "[CV] C=55, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Parallel(n_jobs=1)]: Done 5 out of 5 | elapsed: 50.9min remaining: 0.0s\n", + "/Users/rmontanana/.virtualenvs/general/lib/python3.8/site-packages/sklearn/svm/_base.py:976: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n", + " warnings.warn(\"Liblinear failed to converge, increase \"\n", + "/Users/rmontanana/.virtualenvs/general/lib/python3.8/site-packages/sklearn/svm/_base.py:976: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n", + " warnings.warn(\"Liblinear failed to converge, increase \"\n", + "/Users/rmontanana/.virtualenvs/general/lib/python3.8/site-packages/sklearn/svm/_base.py:976: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n", + " warnings.warn(\"Liblinear failed to converge, increase \"\n", + "[Parallel(n_jobs=1)]: Done 6 out of 6 | elapsed: 55.8min remaining: 0.0s\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[CV] C=55, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.317, total= 4.9min\n", + "[CV] C=0.2, gamma=scale, kernel=rbf, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n", + "[CV] C=0.2, gamma=scale, kernel=rbf, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.462, total=33.4min\n", + "[CV] C=0.2, gamma=scale, kernel=rbf, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Parallel(n_jobs=1)]: Done 7 out of 7 | elapsed: 89.2min remaining: 0.0s\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[CV] C=0.2, gamma=scale, kernel=rbf, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.413, total= 2.8min\n", + "[CV] C=7, gamma=scale, kernel=rbf, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Parallel(n_jobs=1)]: Done 8 out of 8 | elapsed: 92.0min remaining: 0.0s\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[CV] C=7, gamma=scale, kernel=rbf, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.529, total= 1.9min\n", + "[CV] C=7, gamma=scale, kernel=rbf, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Parallel(n_jobs=1)]: Done 9 out of 9 | elapsed: 93.8min remaining: 0.0s\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[CV] C=7, gamma=scale, kernel=rbf, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.433, total= 2.8min\n", + "[CV] C=55, gamma=scale, kernel=rbf, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n", + "[CV] C=55, gamma=scale, kernel=rbf, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.519, total= 55.6s\n", + "[CV] C=55, gamma=scale, kernel=rbf, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n", + "[CV] C=55, gamma=scale, kernel=rbf, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.394, total= 55.7s\n", + "[CV] C=0.2, degree=3, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n", + "[CV] C=0.2, degree=3, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.567, total= 5.5min\n", + "[CV] C=0.2, degree=3, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n", + "[CV] C=0.2, degree=3, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.413, total= 3.7min\n", + "[CV] C=0.2, degree=5, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n", + "[CV] C=0.2, degree=5, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.538, total= 5.5min\n", + "[CV] C=0.2, degree=5, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n", + "[CV] C=0.2, degree=5, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.442, total= 4.6min\n", + "[CV] C=7, degree=3, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n", + "[CV] C=7, degree=3, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.577, total= 2.7min\n", + "[CV] C=7, degree=3, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n", + "[CV] C=7, degree=3, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.510, total=15.6min\n", + "[CV] C=7, degree=5, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n", + "[CV] C=7, degree=5, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.452, total= 2.8min\n", + "[CV] C=7, degree=5, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n", + "[CV] C=7, degree=5, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.462, total= 4.7min\n", + "[CV] C=55, degree=3, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n", + "[CV] C=55, degree=3, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.596, total= 56.0s\n", + "[CV] C=55, degree=3, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n", + "[CV] C=55, degree=3, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.404, total= 2.8min\n", + "[CV] C=55, degree=5, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n", + "[CV] C=55, degree=5, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.538, total= 1.8min\n", + "[CV] C=55, degree=5, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n", + "[CV] C=55, degree=5, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.375, total= 2.7min\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Parallel(n_jobs=1)]: Done 24 out of 24 | elapsed: 151.8min finished\n" + ] + }, + { "data": { "text/plain": [ - "GridSearchCV(estimator=Stree(), n_jobs=1,\n", - " param_grid=[{'C': [55], 'degree': [5, 7], 'kernel': ['poly'],\n", - " 'max_features': [None, 'auto'], 'random_state': [1],\n", - " 'split_criteria': ['impurity', 'max_samples']}])" + "GridSearchCV(cv=2, estimator=Stree(), n_jobs=1,\n", + " param_grid=[{'C': [0.2, 7, 55], 'max_features': ['auto'],\n", + " 'max_iter': [10000.0], 'random_state': [1],\n", + " 'split_criteria': ['impurity']},\n", + " {'C': [0.2, 7, 55], 'gamma': ['scale'],\n", + " 'kernel': ['rbf'], 'max_features': ['auto'],\n", + " 'max_iter': [10000.0], 'random_state': [1],\n", + " 'split_criteria': ['impurity']},\n", + " {'C': [0.2, 7, 55], 'degree': [3, 5],\n", + " 'gamma': ['scale'], 'kernel': ['poly'],\n", + " 'max_features': ['auto'], 'max_iter': [10000.0],\n", + " 'random_state': [1],\n", + " 'split_criteria': ['impurity']}],\n", + " verbose=10)" ] }, + "execution_count": 9, "metadata": {}, - "execution_count": 9 + "output_type": "execute_result" } ], "source": [ "clf = Stree()\n", - "model = GridSearchCV(clf, n_jobs=1, param_grid=param_grid)\n", + "model = GridSearchCV(clf, n_jobs=1, verbose=10, param_grid=param_grid, cv=2)\n", "model.fit(X, y)" ] }, { - "source": [ - "print(model.cv_results_['params'][model.best_index_])" - ], - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 10, "metadata": { "tags": [] - } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'C': 7, 'degree': 3, 'gamma': 'scale', 'kernel': 'poly', 'max_features': 'auto', 'max_iter': 10000.0, 'random_state': 1, 'split_criteria': 'impurity'}\n" + ] + } + ], + "source": [ + "print(model.cv_results_['params'][model.best_index_])" + ] }, { "cell_type": "code", @@ -211,10 +401,10 @@ "metadata": {}, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ - "0.6448315911730547\n" + "0.5432692307692307\n" ] } ], @@ -224,12 +414,12 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 12, "metadata": {}, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "\n" ] @@ -410,7 +600,6 @@ "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "[{'n_estimators': [50],\n", @@ -441,8 +630,9 @@ " 'base_estimator__max_features': [None, 'auto']}]" ] }, + "execution_count": 3, "metadata": {}, - "execution_count": 3 + "output_type": "execute_result" } ], "source": [ @@ -455,8 +645,8 @@ "metadata": {}, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "{'n_estimators': [50], 'algorithm': ['SAMME'], 'base_estimator__C': 7, 'base_estimator__degree': 7, 'base_estimator__gamma': 0.1, 'base_estimator__kernel': 'poly', 'base_estimator__max_features': 'auto', 'base_estimator__max_iter': 10000.0, 'base_estimator__random_state': 1, 'base_estimator__split_criteria': 'impurity'}\n" ] @@ -473,14 +663,14 @@ "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "'stree'" ] }, + "execution_count": 5, "metadata": {}, - "execution_count": 5 + "output_type": "execute_result" } ], "source": [ @@ -494,5 +684,26 @@ "outputs": [], "source": [] } - ] + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.2" + } + }, + "nbformat": 4, + "nbformat_minor": 4 } \ No newline at end of file