diff --git a/analysis_mysql.py b/analysis_mysql.py
new file mode 100644
index 0000000..76ada06
--- /dev/null
+++ b/analysis_mysql.py
@@ -0,0 +1,115 @@
+from experimentation.Sets import Datasets
+from experimentation.Utils import TextColor, MySQL
+
+models = ["stree", "odte", "adaBoost", "bagging"]
+title = "Best model results"
+lengths = (30, 9, 11, 11, 11, 11)
+
+
+def find_best(dataset, classifier):
+ cursor = database.cursor(buffered=True)
+ if classifier == "any":
+ command = (
+ f"select * from results r inner join reference e on "
+ f"r.dataset=e.dataset where r.dataset='{dataset}' "
+ )
+ else:
+ command = (
+ f"select * from results r inner join reference e on "
+ f"r.dataset=e.dataset where r.dataset='{dataset}' and classifier"
+ f"='{classifier}'"
+ )
+ command += (
+ " order by r.dataset, accuracy desc, classifier desc, type, date, time"
+ )
+ cursor.execute(command)
+ return cursor.fetchone()
+
+
+def report_header_content(title):
+ length = sum(lengths) + len(lengths) - 1
+ output = "\n" + "*" * length + "\n"
+ num = (length - len(title) - 2) // 2
+ num2 = length - len(title) - 2 - 2 * num
+ output += "*" + " " * num + title + " " * (num + num2) + "*\n"
+ output += "*" * length + "\n\n"
+ lines = ""
+ for item, data in enumerate(fields):
+ output += f"{fields[item]:{lengths[item]}} "
+ lines += "=" * lengths[item] + " "
+ output += f"\n{lines}"
+ return output
+
+
+def report_header(exclude_params):
+ print(TextColor.HEADER + report_header_content(title) + TextColor.ENDC)
+
+
+def report_line(line):
+ output = f"{line['dataset']:{lengths[0] + 5}s} "
+ data = models.copy()
+ data.insert(0, "reference")
+ for key, model in enumerate(data):
+ output += f"{line[model]:{lengths[key + 1]}s} "
+ return output
+
+
+def report_footer(agg):
+ print(TextColor.GREEN + f"we have better results {agg['better']:2d} times")
+ print(TextColor.RED + f"we have worse results {agg['worse']:2d} times")
+ color = TextColor.LINE1
+ for item in models:
+ print(color + f"{item:10s} used {agg[item]:2d} times")
+ color = (
+ TextColor.LINE2 if color == TextColor.LINE1 else TextColor.LINE1
+ )
+
+
+database = MySQL.get_connection()
+dt = Datasets(False, False, "tanveer")
+fields = ("Dataset", "Reference")
+for model in models:
+ fields += (f"{model}",)
+report_header(title)
+color = TextColor.LINE1
+agg = {}
+for item in [
+ "better",
+ "worse",
+] + models:
+ agg[item] = 0
+for dataset in dt:
+ find_one = False
+ line = {"dataset": color + dataset[0]}
+ record = find_best(dataset[0], "any")
+ max_accuracy = 0.0 if record is None else record[5]
+ for model in models:
+ record = find_best(dataset[0], model)
+ if record is None:
+ line[model] = color + "-" * 9 + " "
+ else:
+ reference = record[10]
+ accuracy = record[5]
+ find_one = True
+ agg[model] += 1
+ if accuracy > reference:
+ sign = "+"
+ agg["better"] += 1
+ else:
+ sign = "-"
+ agg["worse"] += 1
+ item = f"{accuracy:9.7} {sign}"
+ line["reference"] = f"{reference:9.7}"
+ line[model] = (
+ TextColor.GREEN + TextColor.BOLD + item + TextColor.ENDC
+ if accuracy == max_accuracy
+ else color + item
+ )
+ if not find_one:
+ print(TextColor.FAIL + f"*No results found for {dataset[0]}")
+ else:
+ color = (
+ TextColor.LINE2 if color == TextColor.LINE1 else TextColor.LINE1
+ )
+ print(report_line(line))
+report_footer(agg)
diff --git a/dir_data.py b/dir_data.py
new file mode 100644
index 0000000..591c6ac
--- /dev/null
+++ b/dir_data.py
@@ -0,0 +1,32 @@
+import os
+import pandas as pd
+import numpy as np
+from experimentation.Utils import TextColor
+from experimentation.Sets import Datasets
+
+path = os.path.join(os.getcwd(), "data/tanveer")
+color = TextColor.LINE1
+dt = np.array(list(Datasets(False, False, "tanveer")), dtype="object")
+dt = dt[:, 0]
+good = bad = 0
+for folder in sorted(os.listdir(path)):
+ file_name = os.path.join(path, folder, f"{folder}_R.dat")
+ try:
+ data = pd.read_csv(
+ file_name,
+ sep="\t",
+ index_col=0,
+ )
+ X = data.drop("clase", axis=1).to_numpy()
+ y = data["clase"].to_numpy()
+ sign = "*" if folder in dt else "-"
+ print(color + f"{folder:30s} {str(X.shape):>10s} {sign}")
+ color = (
+ TextColor.LINE1 if color == TextColor.LINE2 else TextColor.LINE2
+ )
+ good += 1
+ except FileNotFoundError:
+ print(TextColor.FAIL + f"{folder} not found.")
+ bad += 1
+print(TextColor.SUCCESS + f"{good:3d} datasets Ok.")
+print(TextColor.FAIL + f"{bad:3d} datasets Wrong.")
diff --git a/kite_tutorial.ipynb b/kite_tutorial.ipynb
new file mode 100644
index 0000000..ddc3764
--- /dev/null
+++ b/kite_tutorial.ipynb
@@ -0,0 +1,182 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "\n",
+ "### Welcome to Kite's JupyterLab extension tutorial\n",
+ "\n",
+ "Kite gives you **ML-powered autocompletions** and **rich documentation** inside JupyterLab. This guide will teach you everything you need to know about Kite in 5 minutes or less.\n",
+ "\n",
+ "> 💡 _**Tip:** You can open this file at any time with the command `Kite: Open Tutorial` in JupyterLab's command palette._\n",
+ "\n",
+ "#### Before we start...\n",
+ "\n",
+ "Make sure that the Kite icon at the bottom of the window reads `Kite: ready`.\n",
+ "\n",
+ "\n",
+ "\n",
+ "* If it says `Kite: not running`, please start the Kite Engine first.\n",
+ "* If it says `Kite: not installed`, please [download and install Kite](https://kite.com/download) first."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Part 1: Autocompletions\n",
+ "\n",
+ "**Step 1a**
\n",
+ "Run the code cell below with all the necessary imports 👇"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Run me!\n",
+ "import matplotlib.pyplot as plt\n",
+ "import numpy as np\n",
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Step 1b**
\n",
+ "Let's try typing out some code to plot a sine graph. As you type, Kite will automatically show you completions for what you're going to type next.\n",
+ "\n",
+ "\n",
+ "\n",
+ "> 💡 _**Tip:** You can turn completions docs on and off in JupyterLab's command palette with the command `Kite: Toggle Docs Panel`._\n",
+ "\n",
+ "> 💡 _**Tip:** The starred completions ★ are from Kite Pro. You can [start your free Kite Pro trial](https://www.kite.com/pro/trial/) anytime. Afterwards, if you choose not to upgrade, you can still use Kite 100% for free._\n",
+ "\n",
+ "Try typing out the code yourself to see Kite's autocompletions in action.
\n",
+ "\n",
+ "```python\n",
+ "x = np.linspace(-np.pi, np.pi, 50)\n",
+ "y = np.sin(x)\n",
+ "plt.plot(x, y)\n",
+ "```\n",
+ "\n",
+ "Type this code in the cell below 👇"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Put code in me\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Part 2: Manual completions\n",
+ "\n",
+ "You can still use JupyterLab's builtin kernel completions. These are particularly useful when you need to access a `DataFrame`'s column names.\n",
+ "\n",
+ "**Step 2a**
\n",
+ "First, run the code cell below to get some sample data to store in a `DataFrame` 👇"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Run me!\n",
+ "url = 'https://kite.com/kite-public/iris.csv'\n",
+ "df = pd.read_csv(url)\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Step 2b**
\n",
+ "Let's plot a scatter graph of sepal length vs. sepal width. When you are accessing a `DataFrame`'s columns, you'll still need to hit `tab` to request completions from the kernel.\n",
+ "\n",
+ "\n",
+ "\n",
+ "Try requesting kernel completions yourself.\n",
+ "\n",
+ "```python\n",
+ "plt.scatter(df['sepal_length'], df['sepal_width'])\n",
+ "```\n",
+ "\n",
+ "Type this code in the cell below, making sure to hit `tab` when you are filling in the column names 👇"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Put code in me\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Part 3: Copilot Documentation\n",
+ "\n",
+ "If you've enabled \"docs following cursor\" in the Copilot, the Copilot will automatically update with the documentation of the identifier underneath your cursor.\n",
+ "\n",
+ "\n",
+ "\n",
+ "**Step 3a**
\n",
+ "Try it yourself! Just click around in the code cells of this notebook and see the Copilot update automatically."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### The End\n",
+ "\n",
+ "Now you know everything you need to know about Kite's JupyterLab plugin. Kite is under active development and we expect to ship improvements and more features in the near future.\n",
+ "\n",
+ "In the meantime, if you experience bugs or have feature requests, feel free to open an issue in our [public GitHub repo](https://github.com/kiteco/issue-tracker).\n",
+ "\n",
+ "Happy coding!"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.2"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/test_mysql.ipynb b/mysql_import.ipynb
similarity index 91%
rename from test_mysql.ipynb
rename to mysql_import.ipynb
index e32234b..f891424 100644
--- a/test_mysql.ipynb
+++ b/mysql_import.ipynb
@@ -30,65 +30,9 @@
"import json\n",
"import sqlite3\n",
"import mysql.connector\n",
+ "from experimentation.Utils import MySQL\n",
"\n",
- "database = mysql.connector.connect(\n",
- " host=\"atenea.rmontanana.es\",\n",
- " port=31428,\n",
- " user=\"stree\",\n",
- " password=\"xtree\",\n",
- " database=\"stree\",\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters) values (%s, %s, %s, %s, %s, %s, %s, %s, %s) ('2020-11-18', '12:03:17', 0.99786, '', 'stree', True, False, '{\"C\": 0.15, \"degree\": 6, \"gamma\": 0.7, \"kernel\": \"poly\", \"max_features\": null, \"max_iter\": 100000.0, \"random_state\": 0}')\n"
- ]
- }
- ],
- "source": [
- "cursor = database.cursor()\n",
- "date = '2020-11-18'\n",
- "time = '12:03:17'\n",
- "accuracy = 0.99786\n",
- "dataset = ''\n",
- "clf = 'stree'\n",
- "norm = True\n",
- "stand = False\n",
- "parameters = {\"C\": .15, \"degree\": 6, \"gamma\": .7, \"kernel\": \"poly\", \"max_features\": None, \"max_iter\": 100000.0, \"random_state\": 0}\n",
- "command_insert = \"replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters) values (%s, %s, %s, %s, %s, %s, %s, %s, %s)\"\n",
- "values = (date, time, accuracy, dataset, clf, norm, stand, json.dumps(parameters))\n",
- "print(command_insert, values)\n",
- "#cursor.execute(command_insert, values)\n",
- "database.commit()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "'{\"C\": 0.15, \"degree\": 6, \"gamma\": 0.7, \"kernel\": \"poly\", \"max_features\": null, \"max_iter\": 100000.0, \"random_state\": 0}'"
- ]
- },
- "metadata": {},
- "execution_count": 3
- }
- ],
- "source": [
- "import json\n",
- "json.dumps(parameters)"
+ "database = MySQL.get_connection()"
]
},
{
@@ -473,8 +417,7 @@
" values =(date, time, record_type, record['test_score'], record['dataset'], model, record['normalize'], record['standardize'], record['parameters'])\n",
" print(f\"{date} - {time} - {record['dataset']}\")\n",
" cursor.execute(command_insert, values)\n",
- " database.commit()\n",
- "\n"
+ " database.commit()"
]
},
{
@@ -566,13 +509,6 @@
" cursor.execute(command, values)\n",
"database.commit()"
]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
}
]
}
\ No newline at end of file
diff --git a/param_analysis.ipynb b/param_analysis.ipynb
new file mode 100644
index 0000000..31ac230
--- /dev/null
+++ b/param_analysis.ipynb
@@ -0,0 +1,386 @@
+{
+ "metadata": {
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.2-final"
+ },
+ "orig_nbformat": 2,
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2,
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import json\n",
+ "import sqlite3\n",
+ "import mysql.connector\n",
+ "from experimentation.Utils import MySQL\n",
+ "from experimentation.Sets import Datasets\n",
+ "\n",
+ "database = MySQL.get_connection()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "classifier = 'bagging'\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def find_best(dataset):\n",
+ " cursor = database.cursor(buffered=True)\n",
+ " if classifier == \"any\":\n",
+ " command = (\n",
+ " f\"select * from results r inner join reference e on \"\n",
+ " f\"r.dataset=e.dataset where r.dataset='{dataset}' \"\n",
+ " )\n",
+ " else:\n",
+ " command = (\n",
+ " f\"select * from results r inner join reference e on \"\n",
+ " f\"r.dataset=e.dataset where r.dataset='{dataset}' and classifier\"\n",
+ " f\"='{classifier}'\"\n",
+ " )\n",
+ " command += (\n",
+ " \" order by r.dataset, accuracy desc, classifier desc, type, date, time\"\n",
+ " )\n",
+ " cursor.execute(command)\n",
+ " return cursor.fetchone()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def find_values(parameter, kernel_chosen):\n",
+ " result = []\n",
+ " for data in agg[kernel_chosen]:\n",
+ " base_parameter = f\"base_estimator__{parameter}\"\n",
+ " if parameter in data.keys():\n",
+ " result.append(data[parameter])\n",
+ " if base_parameter in data.keys():\n",
+ " result.append(data[base_parameter])\n",
+ " try:\n",
+ " result_ordered = sorted(result)\n",
+ " return result_ordered\n",
+ " except TypeError:\n",
+ " return result"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Aggregating data ..................................................\n",
+ "stree has 0 results\n",
+ "adaBoost has 0 results\n",
+ "bagging has 43 results\n",
+ "odte has 0 results\n"
+ ]
+ }
+ ],
+ "source": [
+ "dt = Datasets(False, False, 'tanveer')\n",
+ "models = ['stree', 'adaBoost', 'bagging', 'odte']\n",
+ "agg_models = {}\n",
+ "for i in models:\n",
+ " agg_models[i] = 0\n",
+ "agg = {'linear': [], 'rbf': [], 'poly': []}\n",
+ "print(\"Aggregating data .\", end='')\n",
+ "for dataset in dt:\n",
+ " result = find_best(dataset[0])\n",
+ " print('.', end='')\n",
+ " if result:\n",
+ " agg_models[result[3]] += 1\n",
+ " json_result = json.loads(result[8])\n",
+ " key = json_result['kernel'] if 'kernel' in json_result.keys() else 'linear'\n",
+ " agg[key].append(json_result)\n",
+ "print('')\n",
+ "for i in models:\n",
+ " print(f\"{i:10} has {agg_models[i]:2} results\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Used kernel linear: 43 times\nUsed kernel poly: 0 times\nUsed kernel rbf: 0 times\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"Used kernel linear: \", len(agg['linear']), ' times')\n",
+ "print(\"Used kernel poly: \", len(agg['poly']), ' times')\n",
+ "print(\"Used kernel rbf: \", len(agg['rbf']), ' times')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "[]"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 7
+ }
+ ],
+ "source": [
+ "find_values('gamma', 'poly')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "[0.05,\n",
+ " 0.05,\n",
+ " 0.05,\n",
+ " 0.05,\n",
+ " 0.05,\n",
+ " 0.05,\n",
+ " 0.2,\n",
+ " 0.2,\n",
+ " 0.2,\n",
+ " 0.2,\n",
+ " 0.2,\n",
+ " 0.2,\n",
+ " 0.2,\n",
+ " 0.55,\n",
+ " 0.55,\n",
+ " 0.55,\n",
+ " 1.0,\n",
+ " 7,\n",
+ " 7,\n",
+ " 7,\n",
+ " 7,\n",
+ " 7,\n",
+ " 7,\n",
+ " 7,\n",
+ " 7,\n",
+ " 7,\n",
+ " 7,\n",
+ " 7,\n",
+ " 55,\n",
+ " 55,\n",
+ " 55,\n",
+ " 55,\n",
+ " 55,\n",
+ " 55,\n",
+ " 10000.0,\n",
+ " 10000.0,\n",
+ " 10000.0,\n",
+ " 10000.0,\n",
+ " 10000.0,\n",
+ " 10000.0,\n",
+ " 10000.0,\n",
+ " 10000.0,\n",
+ " 10000.0]"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 8
+ }
+ ],
+ "source": [
+ "find_values('C', 'linear')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "[]"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 9
+ }
+ ],
+ "source": [
+ "find_values('C', 'poly')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "[]"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 10
+ }
+ ],
+ "source": [
+ "find_values('C', 'rbf')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "[0.6,\n",
+ " None,\n",
+ " 0.6,\n",
+ " None,\n",
+ " 0.6,\n",
+ " None,\n",
+ " 0.6,\n",
+ " None,\n",
+ " 0.6,\n",
+ " 'auto',\n",
+ " 0.6,\n",
+ " None,\n",
+ " 0.6,\n",
+ " None,\n",
+ " 0.6,\n",
+ " None,\n",
+ " 0.6,\n",
+ " 'auto',\n",
+ " 0.6,\n",
+ " None,\n",
+ " 0.6,\n",
+ " 'auto',\n",
+ " 0.6,\n",
+ " None,\n",
+ " 0.2,\n",
+ " None,\n",
+ " 0.6,\n",
+ " 'auto',\n",
+ " 0.6,\n",
+ " None,\n",
+ " 0.6,\n",
+ " None,\n",
+ " 0.6,\n",
+ " 'auto',\n",
+ " None,\n",
+ " 0.6,\n",
+ " None,\n",
+ " 0.6,\n",
+ " None,\n",
+ " 0.6,\n",
+ " 'auto',\n",
+ " 0.6,\n",
+ " None,\n",
+ " 0.6,\n",
+ " None,\n",
+ " 0.6,\n",
+ " None,\n",
+ " 0.6,\n",
+ " None,\n",
+ " 0.6,\n",
+ " 'auto',\n",
+ " 0.6,\n",
+ " None,\n",
+ " 0.6,\n",
+ " None,\n",
+ " 0.6,\n",
+ " 'auto',\n",
+ " 0.6,\n",
+ " None,\n",
+ " 0.6,\n",
+ " None,\n",
+ " 0.6,\n",
+ " 'auto',\n",
+ " 0.2,\n",
+ " None,\n",
+ " 0.6,\n",
+ " None,\n",
+ " 0.2,\n",
+ " 'auto',\n",
+ " 0.6,\n",
+ " None,\n",
+ " 0.6,\n",
+ " 'auto',\n",
+ " 0.6,\n",
+ " None,\n",
+ " 0.6,\n",
+ " None,\n",
+ " 0.2,\n",
+ " None,\n",
+ " 0.6,\n",
+ " None,\n",
+ " 0.6,\n",
+ " 'auto',\n",
+ " 0.6,\n",
+ " 'auto']"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 11
+ }
+ ],
+ "source": [
+ "find_values('max_features', 'linear')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ]
+}
\ No newline at end of file
diff --git a/report_mysql.ipynb b/report_mysql.ipynb
deleted file mode 100644
index 67e6ed8..0000000
--- a/report_mysql.ipynb
+++ /dev/null
@@ -1,290 +0,0 @@
-{
- "metadata": {
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.8.2-final"
- },
- "orig_nbformat": 2,
- "kernelspec": {
- "name": "python3",
- "display_name": "Python 3"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2,
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 25,
- "metadata": {},
- "outputs": [],
- "source": [
- "import json\n",
- "import sqlite3\n",
- "import mysql.connector\n",
- "from experimentation.Sets import Datasets\n",
- "from experimentation.Utils import TextColor\n",
- "\n",
- "\n",
- "database = mysql.connector.connect(\n",
- " host=\"atenea.rmontanana.es\",\n",
- " port=31428,\n",
- " user=\"stree\",\n",
- " password=\"xtree\",\n",
- " database=\"stree\",\n",
- ")\n",
- "dt = Datasets(False, False, 'tanveer')\n",
- "\n",
- "#\n",
- "# CONFIGURABLE REPORT PARAMETERS\n",
- "#\n",
- "exclude_parameters = True\n",
- "# classififer can be {any, stree, bagging, adaBoost, odte}\n",
- "classifier = \"stree\"\n",
- "title = \"Best Hyperparameters found for datasets\""
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 26,
- "metadata": {},
- "outputs": [],
- "source": [
- "def find_best(dataset):\n",
- " cursor = database.cursor(buffered=True)\n",
- " if classifier == \"any\":\n",
- " command = f\"select * from results r inner join reference e on r.dataset=e.dataset where r.dataset='{dataset}' \"\n",
- " else:\n",
- " command = f\"select * from results r inner join reference e on r.dataset=e.dataset where r.dataset='{dataset}' and classifier='{classifier}'\" \n",
- " command += \" order by r.dataset, accuracy desc, classifier desc, type, date, time\"\n",
- " cursor.execute(command)\n",
- " return cursor.fetchone()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 27,
- "metadata": {},
- "outputs": [],
- "source": [
- "lengths = (10, 8, 10, 10, 30, 3, 3, 9, 11)\n",
- "if exclude_parameters:\n",
- " fields = ('Date', 'Time', 'Type', 'Classifier', 'Dataset', 'Nor', 'Std', 'Accuracy', 'Reference')\n",
- " lengths = (10, 8, 10, 10, 30, 3, 3, 9, 11)\n",
- "else:\n",
- " fields = ('Date', 'Time', 'Type', 'Classifier', 'Dataset', 'Nor', 'Std', 'Accuracy', 'Reference', 'Parameters')\n",
- " lengths = (10, 8, 10, 10, 30, 3, 3, 9, 11, 30)\n",
- "def report_header_content(title):\n",
- " length = sum(lengths) + len(lengths) - 1\n",
- " output = \"\\n\" + \"*\" * length + \"\\n\"\n",
- " title = title + f\" -- {classifier} classifier --\"\n",
- " num = (length - len(title) - 2) // 2\n",
- " num2 = length - len(title) - 2 - 2 * num\n",
- " output += \"*\" + \" \" * num + title + \" \" * (num + num2) + \"*\\n\"\n",
- " output += \"*\" * length + \"\\n\\n\"\n",
- " lines = \"\"\n",
- " for item, data in enumerate(fields):\n",
- " output += f\"{fields[item]:{lengths[item]}} \"\n",
- " lines += \"=\" * lengths[item] + \" \"\n",
- " output += f\"\\n{lines}\"\n",
- " return output\n",
- "def report_header(exclude_params):\n",
- " print(\n",
- " TextColor.HEADER\n",
- " + report_header_content(title)\n",
- " + TextColor.ENDC\n",
- " )\n",
- "def report_line(record, agg):\n",
- " accuracy = record[5]\n",
- " expected = record[10]\n",
- " if accuracy < expected: \n",
- " agg['worse'] += 1\n",
- " sign = '-'\n",
- " elif accuracy > expected:\n",
- " agg['better'] += 1\n",
- " sign = '+'\n",
- " else:\n",
- " agg['equal'] +=1\n",
- " sign = '='\n",
- " output = f\"{record[0]:%Y-%m-%d} {str(record[1]):>8s} {record[2]:10s} {record[3]:10s} {record[4]:30s}\"\n",
- " output += f\" {record[6]:3d} {record[7]:3d} {accuracy:8.7f} {expected:8.7f}Â {sign}\"\n",
- " if not exclude_parameters:\n",
- " output += f\" {record[8]}\"\n",
- " return output\n",
- "def report_footer(agg):\n",
- " print(\n",
- " TextColor.GREEN\n",
- " + f\"we have better results {agg['better']:2d} times\"\n",
- " )\n",
- " print(\n",
- " TextColor.RED\n",
- " + f\"we have worse results {agg['worse']:2d} times\"\n",
- " )\n",
- " print(\n",
- " TextColor.MAGENTA\n",
- " + f\"we have equal results {agg['equal']:2d} times\"\n",
- " )"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 28,
- "metadata": {
- "tags": []
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "\u001b[95m\n",
- "******************************************************************************************************\n",
- "* Best Hyperparameters found for datasets -- stree classifier -- *\n",
- "******************************************************************************************************\n",
- "\n",
- "Date Time Type Classifier Dataset Nor Std Accuracy Reference \n",
- "========== ======== ========== ========== ============================== === === ========= =========== \u001b[0m\n",
- "\u001b[95m2020-11-13 12:04:24 crossval stree balance-scale 1 0 0.9488000 0.9046280Â +\n",
- "\u001b[94m2020-11-13 12:04:25 crossval stree balloons 1 0 0.8666670 0.6625000Â +\n",
- "\u001b[95m2020-11-13 12:04:25 crossval stree breast-cancer-wisc-diag 1 0 0.9789320 0.9743450Â +\n",
- "\u001b[94m2020-11-13 12:04:25 crossval stree breast-cancer-wisc-prog 1 0 0.8284620 0.7993400Â +\n",
- "\u001b[95m2020-11-13 12:04:25 crossval stree breast-cancer-wisc 1 0 0.9656940 0.9702560Â -\n",
- "\u001b[94m2020-11-13 12:04:26 crossval stree breast-cancer 1 0 0.7308530 0.7382400Â -\n",
- "\u001b[95m2020-11-13 12:04:26 crossval stree cardiotocography-10clases 1 0 0.6665220 0.8277610Â -\n",
- "\u001b[94m2020-11-13 12:04:27 crossval stree cardiotocography-3clases 1 0 0.8480740 0.9201340Â -\n",
- "\u001b[91m*No results found for conn-bench-sonar-mines-rocks\n",
- "\u001b[95m2020-11-13 12:04:27 crossval stree cylinder-bands 1 0 0.6326670 0.7691410Â -\n",
- "\u001b[94m2020-11-13 12:04:27 crossval stree dermatology 1 0 0.9754540 0.9732780Â +\n",
- "\u001b[95m2020-11-13 12:04:27 crossval stree echocardiogram 1 0 0.8472930 0.8485270Â -\n",
- "\u001b[94m2020-11-13 12:04:27 crossval stree fertility 1 0 0.8800000 0.8840000Â -\n",
- "\u001b[95m2020-11-13 12:04:28 crossval stree haberman-survival 1 0 0.7646750 0.7392540Â +\n",
- "\u001b[94m2020-11-13 12:04:28 crossval stree heart-hungarian 1 0 0.8299240 0.8204750Â +\n",
- "\u001b[95m2020-11-13 12:04:28 crossval stree hepatitis 1 0 0.8645160 0.8232030Â +\n",
- "\u001b[94m2020-11-13 12:04:28 crossval stree ilpd-indian-liver 1 0 0.7426910 0.7150280Â +\n",
- "\u001b[95m2020-11-13 12:04:28 crossval stree ionosphere 1 0 0.9487320 0.9442150Â +\n",
- "\u001b[94m2020-11-13 12:04:28 crossval stree iris 0 0 0.9800000 0.9786560Â +\n",
- "\u001b[95m2020-11-13 12:15:39 crossval stree led-display 1 0 0.7120000 0.7102000Â +\n",
- "\u001b[91m*No results found for libras\n",
- "\u001b[91m*No results found for low-res-spect\n",
- "\u001b[94m2020-11-13 12:15:39 crossval stree lymphography 1 0 0.8648280 0.8554050Â +\n",
- "\u001b[95m2020-11-13 12:15:39 crossval stree mammographic 1 0 0.8293720 0.8274720Â +\n",
- "\u001b[91m*No results found for molec-biol-promoter\n",
- "\u001b[91m*No results found for musk-1\n",
- "\u001b[94m2020-11-13 12:15:39 crossval stree oocytes_merluccius_nucleus_4d 1 0 0.8082210 0.8399630Â -\n",
- "\u001b[95m2020-11-13 12:15:39 crossval stree oocytes_merluccius_states_2f 1 0 0.9119030 0.9299630Â -\n",
- "\u001b[94m2020-11-13 12:15:40 crossval stree oocytes_trisopterus_nucleus_2f 1 0 0.7476910 0.8333330Â -\n",
- "\u001b[95m2020-11-13 12:15:40 crossval stree oocytes_trisopterus_states_5b 1 0 0.8453610 0.9315790Â -\n",
- "\u001b[94m2020-11-13 12:15:40 crossval stree parkinsons 1 0 0.8461540 0.9202210Â -\n",
- "\u001b[95m2020-11-13 12:15:41 crossval stree pima 1 0 0.7800020 0.7671880Â +\n",
- "\u001b[94m2020-11-13 12:15:41 crossval stree pittsburg-bridges-MATERIAL 1 0 0.8861470 0.8642860Â +\n",
- "\u001b[95m2020-11-13 12:15:41 crossval stree pittsburg-bridges-REL-L 1 0 0.6761900 0.6959290Â -\n",
- "\u001b[94m2020-11-13 12:15:41 crossval stree pittsburg-bridges-SPAN 1 0 0.6771930 0.6891300Â -\n",
- "\u001b[95m2020-11-13 12:15:41 crossval stree pittsburg-bridges-T-OR-D 1 0 0.9023810 0.8743700Â +\n",
- "\u001b[94m2020-11-13 12:15:41 crossval stree planning 1 0 0.7255250 0.7255790Â -\n",
- "\u001b[95m2020-11-13 12:15:41 crossval stree post-operative 1 0 0.7222220 0.7117420Â +\n",
- "\u001b[94m2020-11-13 12:15:41 crossval stree seeds 1 0 0.9619050 0.9563030Â +\n",
- "\u001b[95m2020-11-13 12:15:41 crossval stree statlog-australian-credit 1 0 0.6797100 0.6782810Â +\n",
- "\u001b[94m2020-11-13 12:21:08 crossval stree statlog-german-credit 1 0 0.7620000 0.7562000Â +\n",
- "\u001b[95m2020-11-13 12:21:08 crossval stree statlog-heart 1 0 0.8481480 0.8422990Â +\n",
- "\u001b[94m2020-11-13 12:21:12 crossval stree statlog-image 1 0 0.9593070 0.9761940Â -\n",
- "\u001b[95m2020-11-13 12:21:13 crossval stree statlog-vehicle 1 0 0.8014130 0.8006730Â +\n",
- "\u001b[91m*No results found for synthetic-control\n",
- "\u001b[94m2020-11-13 12:21:13 crossval stree tic-tac-toe 1 0 0.9874350 0.9853850Â +\n",
- "\u001b[95m2020-11-13 12:21:14 crossval stree vertebral-column-2clases 1 0 0.8290320 0.8491530Â -\n",
- "\u001b[94m2020-11-13 12:21:14 crossval stree wine 1 0 0.9777780 0.9932810Â -\n",
- "\u001b[95m2020-11-13 12:21:14 crossval stree zoo 1 0 0.9704760 0.9603850Â +\n",
- "\u001b[92mwe have better results 25 times\n",
- "\u001b[91mwe have worse results 18 times\n",
- "\u001b[95mwe have equal results 0 times\n"
- ]
- }
- ],
- "source": [
- "report_header(title)\n",
- "color = TextColor.LINE1\n",
- "agg = {'equal': 0, 'better': 0, 'worse': 0}\n",
- "for dataset in dt:\n",
- " record = find_best(dataset[0])\n",
- " if record is None:\n",
- " print(TextColor.FAIL + f\"*No results found for {dataset[0]}\")\n",
- " else:\n",
- " color = TextColor.MAGENTA if color == TextColor.LINE1 else TextColor.LINE1\n",
- " print(color + report_line(record, agg))\n",
- "report_footer(agg)"
- ]
- },
- {
- "source": [
- "******************************************************************************************************\n",
- "* Best Hyperparameters found for datasets -- any classifier -- *\n",
- "******************************************************************************************************\n",
- "\n",
- "Date Time Type Classifier Dataset Nor Std Accuracy Reference \n",
- "========== ======== ========== ========== ============================== === === ========= =========== \n",
- "\n",
- "2020-11-13 12:04:24 crossval stree balance-scale 1 0 0.9488000 0.9046280 +\n",
- "2020-11-13 12:04:25 crossval stree balloons 1 0 0.8666670 0.6625000 +\n",
- "2020-11-16 13:34:47 gridsearch bagging breast-cancer-wisc-diag 1 1 0.9806860 0.9743450 +\n",
- "2020-11-11 23:10:52 gridsearch bagging breast-cancer-wisc-prog 1 1 0.8337180 0.7993400 +\n",
- "2020-11-16 13:34:49 gridsearch bagging breast-cancer-wisc 1 1 0.9699790 0.9702560 -\n",
- "2020-11-18 23:04:30 gridsearch odte breast-cancer 1 1 0.7449490 0.7382400 +\n",
- "2020-11-16 13:37:13 gridsearch bagging cardiotocography-10clases 1 1 0.7022580 0.8277610 -\n",
- "2020-11-16 13:35:23 gridsearch bagging cardiotocography-3clases 1 1 0.8490220 0.9201340 -\n",
- "*No results found for conn-bench-sonar-mines-rocks\n",
- "2020-11-13 12:04:27 crossval stree cylinder-bands 1 0 0.6326670 0.7691410 -\n",
- "2020-11-13 12:04:27 crossval stree dermatology 1 0 0.9754540 0.9732780 +\n",
- "2020-11-13 12:04:27 crossval stree echocardiogram 1 0 0.8472930 0.8485270 -\n",
- "2020-11-19 15:22:26 gridsearch bagging fertility 1 1 0.8900000 0.8840000 +\n",
- "2020-11-13 12:04:28 crossval stree haberman-survival 1 0 0.7646750 0.7392540 +\n",
- "2020-11-13 12:04:28 crossval stree heart-hungarian 1 0 0.8299240 0.8204750 +\n",
- "2020-11-13 12:04:28 crossval stree hepatitis 1 0 0.8645160 0.8232030 +\n",
- "2020-11-13 12:04:28 crossval stree ilpd-indian-liver 1 0 0.7426910 0.7150280 +\n",
- "2020-11-16 13:34:59 gridsearch bagging ionosphere 1 1 0.9515490 0.9442150 +\n",
- "2020-11-03 18:52:15 gridsearch odte iris 1 0 0.9933330 0.9786560 +\n",
- "2020-11-13 12:15:39 crossval stree led-display 1 0 0.7120000 0.7102000 +\n",
- "*No results found for libras\n",
- "*No results found for low-res-spect\n",
- "2020-11-16 13:35:08 gridsearch bagging lymphography 1 1 0.8781610 0.8554050 +\n",
- "2020-11-13 12:15:39 crossval stree mammographic 1 0 0.8293720 0.8274720 +\n",
- "2020-11-16 22:17:03 gridsearch bagging molec-biol-promoter 1 1 0.9060610 0.8182690 +\n",
- "*No results found for musk-1\n",
- "2020-11-13 12:15:39 crossval stree oocytes_merluccius_nucleus_4d 1 0 0.8082210 0.8399630 -\n",
- "2020-11-13 12:15:39 crossval stree oocytes_merluccius_states_2f 1 0 0.9119030 0.9299630 -\n",
- "2020-11-13 12:15:40 crossval stree oocytes_trisopterus_nucleus_2f 1 0 0.7476910 0.8333330 -\n",
- "2020-11-16 13:35:41 gridsearch bagging oocytes_trisopterus_states_5b 1 1 0.8540800 0.9315790 -\n",
- "2020-11-13 12:15:40 crossval stree parkinsons 1 0 0.8461540 0.9202210 -\n",
- "2020-11-13 12:15:41 crossval stree pima 1 0 0.7800020 0.7671880 +\n",
- "2020-11-13 12:15:41 crossval stree pittsburg-bridges-MATERIAL 1 0 0.8861470 0.8642860 +\n",
- "2020-11-16 13:36:41 gridsearch bagging pittsburg-bridges-REL-L 1 1 0.6766670 0.6959290 -\n",
- "2020-11-16 13:22:50 gridsearch adaBoost pittsburg-bridges-SPAN 1 1 0.7210530 0.6891300 +\n",
- "2020-11-13 12:15:41 crossval stree pittsburg-bridges-T-OR-D 1 0 0.9023810 0.8743700 +\n",
- "2020-11-13 12:15:41 crossval stree planning 1 0 0.7255250 0.7255790 -\n",
- "2020-11-13 12:15:41 crossval stree post-operative 1 0 0.7222220 0.7117420 +\n",
- "2020-11-13 12:15:41 crossval stree seeds 1 0 0.9619050 0.9563030 +\n",
- "2020-11-13 12:15:41 crossval stree statlog-australian-credit 1 0 0.6797100 0.6782810 +\n",
- "2020-11-13 12:21:08 crossval stree statlog-german-credit 1 0 0.7620000 0.7562000 +\n",
- "2020-11-16 13:36:12 gridsearch bagging statlog-heart 1 1 0.8518520 0.8422990 +\n",
- "2020-11-16 14:04:13 gridsearch bagging statlog-image 1 1 0.9627710 0.9761940 -\n",
- "2020-11-13 12:21:13 crossval stree statlog-vehicle 1 0 0.8014130 0.8006730 +\n",
- "*No results found for synthetic-control\n",
- "2020-11-13 12:21:13 crossval stree tic-tac-toe 1 0 0.9874350 0.9853850 +\n",
- "2020-11-13 12:21:14 crossval stree vertebral-column-2clases 1 0 0.8290320 0.8491530 -\n",
- "2020-06-26 11:03:03 gridsearch odte wine 0 0 0.9800000 0.9932810 -\n",
- "2020-11-13 12:21:14 crossval stree zoo 1 0 0.9704760 0.9603850 +\n",
- "we have better results 29 times\n",
- "we have worse results 15 times\n",
- "we have equal results 0 times"
- ],
- "cell_type": "markdown",
- "metadata": {}
- }
- ]
-}
\ No newline at end of file
diff --git a/test_validation.ipynb b/test_validation.ipynb
index ec985ad..27851e1 100644
--- a/test_validation.ipynb
+++ b/test_validation.ipynb
@@ -1,25 +1,4 @@
{
- "metadata": {
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.8.2-final"
- },
- "orig_nbformat": 2,
- "kernelspec": {
- "name": "python3",
- "display_name": "Python 3"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2,
"cells": [
{
"cell_type": "code",
@@ -63,7 +42,16 @@
"cell_type": "code",
"execution_count": 4,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/Users/rmontanana/.virtualenvs/general/lib/python3.8/site-packages/sklearn/svm/_base.py:976: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
+ " warnings.warn(\"Liblinear failed to converge, increase \"\n"
+ ]
+ }
+ ],
"source": [
"clf = Stree(**parameters)\n",
"results = cross_validate(clf, X, y, n_jobs=1)"
@@ -75,16 +63,16 @@
"metadata": {},
"outputs": [
{
- "output_type": "execute_result",
"data": {
"text/plain": [
- "{'fit_time': array([0.00772715, 0.03221297, 0.01418114, 0.06252027, 0.05369782]),\n",
- " 'score_time': array([0.00048399, 0.00044394, 0.00045371, 0.00051093, 0.00044894]),\n",
+ "{'fit_time': array([0.0078361 , 0.03171897, 0.01422501, 0.06850815, 0.05387974]),\n",
+ " 'score_time': array([0.0005939 , 0.00044203, 0.00043583, 0.00050902, 0.00044012]),\n",
" 'test_score': array([0.4047619 , 0.61904762, 0.66666667, 0.92682927, 0.58536585])}"
]
},
+ "execution_count": 5,
"metadata": {},
- "execution_count": 5
+ "output_type": "execute_result"
}
],
"source": [
@@ -97,14 +85,14 @@
"metadata": {},
"outputs": [
{
- "output_type": "execute_result",
"data": {
"text/plain": [
"0.640534262485482"
]
},
+ "execution_count": 6,
"metadata": {},
- "execution_count": 6
+ "output_type": "execute_result"
}
],
"source": [
@@ -118,11 +106,11 @@
"outputs": [],
"source": [
"# 864 modelos por cada dataset\n",
- "C = [0.05, 0.2, 0.55, 7, 55, 1e4]\n",
- "max_iter = [1e4, 1e5, 1e6]\n",
- "gamma = [1e-1, 1, 1e1]\n",
- "max_features = [None, \"auto\"]\n",
- "split_criteria = [\"impurity\", \"max_samples\"]\n",
+ "C = [0.2, 7, 55]\n",
+ "max_iter = [1e4]\n",
+ "gamma = [\"scale\"] #[1e-1, 1, 1e1]\n",
+ "max_features = [\"auto\"]\n",
+ "split_criteria = [\"impurity\"]\n",
"param_grid = [\n",
" {\n",
" \"random_state\": [1],\n",
@@ -159,7 +147,7 @@
"metadata": {},
"outputs": [],
"source": [
- "param_grid = [\n",
+ "param2_grid = [\n",
" {\n",
" \"random_state\": [1],\n",
" \"kernel\": [\"poly\"],\n",
@@ -177,33 +165,235 @@
"metadata": {},
"outputs": [
{
- "output_type": "execute_result",
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Fitting 2 folds for each of 12 candidates, totalling 24 fits\n",
+ "[CV] C=0.2, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n",
+ "[CV] C=0.2, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.548, total= 6.4min\n",
+ "[CV] C=0.2, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 6.4min remaining: 0.0s\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[CV] C=0.2, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.346, total= 9.2min\n",
+ "[CV] C=7, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "[Parallel(n_jobs=1)]: Done 2 out of 2 | elapsed: 15.7min remaining: 0.0s\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[CV] C=7, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.673, total=17.0min\n",
+ "[CV] C=7, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "[Parallel(n_jobs=1)]: Done 3 out of 3 | elapsed: 32.7min remaining: 0.0s\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[CV] C=7, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.356, total=10.9min\n",
+ "[CV] C=55, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "[Parallel(n_jobs=1)]: Done 4 out of 4 | elapsed: 43.6min remaining: 0.0s\n",
+ "/Users/rmontanana/.virtualenvs/general/lib/python3.8/site-packages/sklearn/svm/_base.py:976: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
+ " warnings.warn(\"Liblinear failed to converge, increase \"\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[CV] C=55, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.538, total= 7.3min\n",
+ "[CV] C=55, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "[Parallel(n_jobs=1)]: Done 5 out of 5 | elapsed: 50.9min remaining: 0.0s\n",
+ "/Users/rmontanana/.virtualenvs/general/lib/python3.8/site-packages/sklearn/svm/_base.py:976: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
+ " warnings.warn(\"Liblinear failed to converge, increase \"\n",
+ "/Users/rmontanana/.virtualenvs/general/lib/python3.8/site-packages/sklearn/svm/_base.py:976: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
+ " warnings.warn(\"Liblinear failed to converge, increase \"\n",
+ "/Users/rmontanana/.virtualenvs/general/lib/python3.8/site-packages/sklearn/svm/_base.py:976: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
+ " warnings.warn(\"Liblinear failed to converge, increase \"\n",
+ "[Parallel(n_jobs=1)]: Done 6 out of 6 | elapsed: 55.8min remaining: 0.0s\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[CV] C=55, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.317, total= 4.9min\n",
+ "[CV] C=0.2, gamma=scale, kernel=rbf, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n",
+ "[CV] C=0.2, gamma=scale, kernel=rbf, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.462, total=33.4min\n",
+ "[CV] C=0.2, gamma=scale, kernel=rbf, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "[Parallel(n_jobs=1)]: Done 7 out of 7 | elapsed: 89.2min remaining: 0.0s\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[CV] C=0.2, gamma=scale, kernel=rbf, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.413, total= 2.8min\n",
+ "[CV] C=7, gamma=scale, kernel=rbf, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "[Parallel(n_jobs=1)]: Done 8 out of 8 | elapsed: 92.0min remaining: 0.0s\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[CV] C=7, gamma=scale, kernel=rbf, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.529, total= 1.9min\n",
+ "[CV] C=7, gamma=scale, kernel=rbf, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "[Parallel(n_jobs=1)]: Done 9 out of 9 | elapsed: 93.8min remaining: 0.0s\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[CV] C=7, gamma=scale, kernel=rbf, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.433, total= 2.8min\n",
+ "[CV] C=55, gamma=scale, kernel=rbf, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n",
+ "[CV] C=55, gamma=scale, kernel=rbf, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.519, total= 55.6s\n",
+ "[CV] C=55, gamma=scale, kernel=rbf, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n",
+ "[CV] C=55, gamma=scale, kernel=rbf, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.394, total= 55.7s\n",
+ "[CV] C=0.2, degree=3, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n",
+ "[CV] C=0.2, degree=3, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.567, total= 5.5min\n",
+ "[CV] C=0.2, degree=3, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n",
+ "[CV] C=0.2, degree=3, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.413, total= 3.7min\n",
+ "[CV] C=0.2, degree=5, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n",
+ "[CV] C=0.2, degree=5, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.538, total= 5.5min\n",
+ "[CV] C=0.2, degree=5, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n",
+ "[CV] C=0.2, degree=5, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.442, total= 4.6min\n",
+ "[CV] C=7, degree=3, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n",
+ "[CV] C=7, degree=3, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.577, total= 2.7min\n",
+ "[CV] C=7, degree=3, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n",
+ "[CV] C=7, degree=3, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.510, total=15.6min\n",
+ "[CV] C=7, degree=5, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n",
+ "[CV] C=7, degree=5, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.452, total= 2.8min\n",
+ "[CV] C=7, degree=5, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n",
+ "[CV] C=7, degree=5, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.462, total= 4.7min\n",
+ "[CV] C=55, degree=3, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n",
+ "[CV] C=55, degree=3, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.596, total= 56.0s\n",
+ "[CV] C=55, degree=3, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n",
+ "[CV] C=55, degree=3, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.404, total= 2.8min\n",
+ "[CV] C=55, degree=5, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n",
+ "[CV] C=55, degree=5, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.538, total= 1.8min\n",
+ "[CV] C=55, degree=5, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n",
+ "[CV] C=55, degree=5, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.375, total= 2.7min\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "[Parallel(n_jobs=1)]: Done 24 out of 24 | elapsed: 151.8min finished\n"
+ ]
+ },
+ {
"data": {
"text/plain": [
- "GridSearchCV(estimator=Stree(), n_jobs=1,\n",
- " param_grid=[{'C': [55], 'degree': [5, 7], 'kernel': ['poly'],\n",
- " 'max_features': [None, 'auto'], 'random_state': [1],\n",
- " 'split_criteria': ['impurity', 'max_samples']}])"
+ "GridSearchCV(cv=2, estimator=Stree(), n_jobs=1,\n",
+ " param_grid=[{'C': [0.2, 7, 55], 'max_features': ['auto'],\n",
+ " 'max_iter': [10000.0], 'random_state': [1],\n",
+ " 'split_criteria': ['impurity']},\n",
+ " {'C': [0.2, 7, 55], 'gamma': ['scale'],\n",
+ " 'kernel': ['rbf'], 'max_features': ['auto'],\n",
+ " 'max_iter': [10000.0], 'random_state': [1],\n",
+ " 'split_criteria': ['impurity']},\n",
+ " {'C': [0.2, 7, 55], 'degree': [3, 5],\n",
+ " 'gamma': ['scale'], 'kernel': ['poly'],\n",
+ " 'max_features': ['auto'], 'max_iter': [10000.0],\n",
+ " 'random_state': [1],\n",
+ " 'split_criteria': ['impurity']}],\n",
+ " verbose=10)"
]
},
+ "execution_count": 9,
"metadata": {},
- "execution_count": 9
+ "output_type": "execute_result"
}
],
"source": [
"clf = Stree()\n",
- "model = GridSearchCV(clf, n_jobs=1, param_grid=param_grid)\n",
+ "model = GridSearchCV(clf, n_jobs=1, verbose=10, param_grid=param_grid, cv=2)\n",
"model.fit(X, y)"
]
},
{
- "source": [
- "print(model.cv_results_['params'][model.best_index_])"
- ],
- "cell_type": "markdown",
+ "cell_type": "code",
+ "execution_count": 10,
"metadata": {
"tags": []
- }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'C': 7, 'degree': 3, 'gamma': 'scale', 'kernel': 'poly', 'max_features': 'auto', 'max_iter': 10000.0, 'random_state': 1, 'split_criteria': 'impurity'}\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(model.cv_results_['params'][model.best_index_])"
+ ]
},
{
"cell_type": "code",
@@ -211,10 +401,10 @@
"metadata": {},
"outputs": [
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
- "0.6448315911730547\n"
+ "0.5432692307692307\n"
]
}
],
@@ -224,12 +414,12 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 12,
"metadata": {},
"outputs": [
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"\n"
]
@@ -410,7 +600,6 @@
"metadata": {},
"outputs": [
{
- "output_type": "execute_result",
"data": {
"text/plain": [
"[{'n_estimators': [50],\n",
@@ -441,8 +630,9 @@
" 'base_estimator__max_features': [None, 'auto']}]"
]
},
+ "execution_count": 3,
"metadata": {},
- "execution_count": 3
+ "output_type": "execute_result"
}
],
"source": [
@@ -455,8 +645,8 @@
"metadata": {},
"outputs": [
{
- "output_type": "stream",
"name": "stdout",
+ "output_type": "stream",
"text": [
"{'n_estimators': [50], 'algorithm': ['SAMME'], 'base_estimator__C': 7, 'base_estimator__degree': 7, 'base_estimator__gamma': 0.1, 'base_estimator__kernel': 'poly', 'base_estimator__max_features': 'auto', 'base_estimator__max_iter': 10000.0, 'base_estimator__random_state': 1, 'base_estimator__split_criteria': 'impurity'}\n"
]
@@ -473,14 +663,14 @@
"metadata": {},
"outputs": [
{
- "output_type": "execute_result",
"data": {
"text/plain": [
"'stree'"
]
},
+ "execution_count": 5,
"metadata": {},
- "execution_count": 5
+ "output_type": "execute_result"
}
],
"source": [
@@ -494,5 +684,26 @@
"outputs": [],
"source": []
}
- ]
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.2"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
}
\ No newline at end of file