Add results report

Add directory of datasets
Add validation test
This commit is contained in:
2020-11-24 01:42:41 +01:00
parent 5611e5bc01
commit b14edf4303
7 changed files with 985 additions and 413 deletions

115
analysis_mysql.py Normal file
View File

@@ -0,0 +1,115 @@
from experimentation.Sets import Datasets
from experimentation.Utils import TextColor, MySQL
models = ["stree", "odte", "adaBoost", "bagging"]
title = "Best model results"
lengths = (30, 9, 11, 11, 11, 11)
def find_best(dataset, classifier):
cursor = database.cursor(buffered=True)
if classifier == "any":
command = (
f"select * from results r inner join reference e on "
f"r.dataset=e.dataset where r.dataset='{dataset}' "
)
else:
command = (
f"select * from results r inner join reference e on "
f"r.dataset=e.dataset where r.dataset='{dataset}' and classifier"
f"='{classifier}'"
)
command += (
" order by r.dataset, accuracy desc, classifier desc, type, date, time"
)
cursor.execute(command)
return cursor.fetchone()
def report_header_content(title):
length = sum(lengths) + len(lengths) - 1
output = "\n" + "*" * length + "\n"
num = (length - len(title) - 2) // 2
num2 = length - len(title) - 2 - 2 * num
output += "*" + " " * num + title + " " * (num + num2) + "*\n"
output += "*" * length + "\n\n"
lines = ""
for item, data in enumerate(fields):
output += f"{fields[item]:{lengths[item]}} "
lines += "=" * lengths[item] + " "
output += f"\n{lines}"
return output
def report_header(exclude_params):
print(TextColor.HEADER + report_header_content(title) + TextColor.ENDC)
def report_line(line):
output = f"{line['dataset']:{lengths[0] + 5}s} "
data = models.copy()
data.insert(0, "reference")
for key, model in enumerate(data):
output += f"{line[model]:{lengths[key + 1]}s} "
return output
def report_footer(agg):
print(TextColor.GREEN + f"we have better results {agg['better']:2d} times")
print(TextColor.RED + f"we have worse results {agg['worse']:2d} times")
color = TextColor.LINE1
for item in models:
print(color + f"{item:10s} used {agg[item]:2d} times")
color = (
TextColor.LINE2 if color == TextColor.LINE1 else TextColor.LINE1
)
database = MySQL.get_connection()
dt = Datasets(False, False, "tanveer")
fields = ("Dataset", "Reference")
for model in models:
fields += (f"{model}",)
report_header(title)
color = TextColor.LINE1
agg = {}
for item in [
"better",
"worse",
] + models:
agg[item] = 0
for dataset in dt:
find_one = False
line = {"dataset": color + dataset[0]}
record = find_best(dataset[0], "any")
max_accuracy = 0.0 if record is None else record[5]
for model in models:
record = find_best(dataset[0], model)
if record is None:
line[model] = color + "-" * 9 + " "
else:
reference = record[10]
accuracy = record[5]
find_one = True
agg[model] += 1
if accuracy > reference:
sign = "+"
agg["better"] += 1
else:
sign = "-"
agg["worse"] += 1
item = f"{accuracy:9.7} {sign}"
line["reference"] = f"{reference:9.7}"
line[model] = (
TextColor.GREEN + TextColor.BOLD + item + TextColor.ENDC
if accuracy == max_accuracy
else color + item
)
if not find_one:
print(TextColor.FAIL + f"*No results found for {dataset[0]}")
else:
color = (
TextColor.LINE2 if color == TextColor.LINE1 else TextColor.LINE1
)
print(report_line(line))
report_footer(agg)

32
dir_data.py Normal file
View File

@@ -0,0 +1,32 @@
import os
import pandas as pd
import numpy as np
from experimentation.Utils import TextColor
from experimentation.Sets import Datasets
path = os.path.join(os.getcwd(), "data/tanveer")
color = TextColor.LINE1
dt = np.array(list(Datasets(False, False, "tanveer")), dtype="object")
dt = dt[:, 0]
good = bad = 0
for folder in sorted(os.listdir(path)):
file_name = os.path.join(path, folder, f"{folder}_R.dat")
try:
data = pd.read_csv(
file_name,
sep="\t",
index_col=0,
)
X = data.drop("clase", axis=1).to_numpy()
y = data["clase"].to_numpy()
sign = "*" if folder in dt else "-"
print(color + f"{folder:30s} {str(X.shape):>10s} {sign}")
color = (
TextColor.LINE1 if color == TextColor.LINE2 else TextColor.LINE2
)
good += 1
except FileNotFoundError:
print(TextColor.FAIL + f"{folder} not found.")
bad += 1
print(TextColor.SUCCESS + f"{good:3d} datasets Ok.")
print(TextColor.FAIL + f"{bad:3d} datasets Wrong.")

182
kite_tutorial.ipynb Normal file
View File

@@ -0,0 +1,182 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"![Logo](https://kite.com/kite-public/kite-plus-jlab-scaled.png)\n",
"\n",
"### Welcome to Kite's JupyterLab extension tutorial\n",
"\n",
"Kite gives you **ML-powered autocompletions** and **rich documentation** inside JupyterLab. This guide will teach you everything you need to know about Kite in 5 minutes or less.\n",
"\n",
"> 💡 _**Tip:** You can open this file at any time with the command `Kite: Open Tutorial` in JupyterLab's command palette._\n",
"\n",
"#### Before we start...\n",
"\n",
"Make sure that the Kite icon at the bottom of the window reads `Kite: ready`.\n",
"\n",
"![Kite icon](https://kite.com/kite-public/kite-status.png)\n",
"\n",
"* If it says `Kite: not running`, please start the Kite Engine first.\n",
"* If it says `Kite: not installed`, please [download and install Kite](https://kite.com/download) first."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Part 1: Autocompletions\n",
"\n",
"**Step 1a**<br/>\n",
"Run the code cell below with all the necessary imports 👇"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# Run me!\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import pandas as pd"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Step 1b**<br/>\n",
"Let's try typing out some code to plot a sine graph. As you type, Kite will automatically show you completions for what you're going to type next.\n",
"\n",
"![Autocompletions](https://www.kite.com/kite-public/kite-jlab-autocompletions.gif)\n",
"\n",
"> 💡 _**Tip:** You can turn completions docs on and off in JupyterLab's command palette with the command `Kite: Toggle Docs Panel`._\n",
"\n",
"> 💡 _**Tip:** The starred completions ★ are from Kite Pro. You can [start your free Kite Pro trial](https://www.kite.com/pro/trial/) anytime. Afterwards, if you choose not to upgrade, you can still use Kite 100% for free._\n",
"\n",
"Try typing out the code yourself to see Kite's autocompletions in action.<br/>\n",
"\n",
"```python\n",
"x = np.linspace(-np.pi, np.pi, 50)\n",
"y = np.sin(x)\n",
"plt.plot(x, y)\n",
"```\n",
"\n",
"Type this code in the cell below 👇"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Put code in me\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Part 2: Manual completions\n",
"\n",
"You can still use JupyterLab's builtin kernel completions. These are particularly useful when you need to access a `DataFrame`'s column names.\n",
"\n",
"**Step 2a**<br/>\n",
"First, run the code cell below to get some sample data to store in a `DataFrame` 👇"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Run me!\n",
"url = 'https://kite.com/kite-public/iris.csv'\n",
"df = pd.read_csv(url)\n",
"df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Step 2b**<br/>\n",
"Let's plot a scatter graph of sepal length vs. sepal width. When you are accessing a `DataFrame`'s columns, you'll still need to hit `tab` to request completions from the kernel.\n",
"\n",
"![Manual completions](https://www.kite.com/kite-public/kite-jlab-manual-completions.gif)\n",
"\n",
"Try requesting kernel completions yourself.\n",
"\n",
"```python\n",
"plt.scatter(df['sepal_length'], df['sepal_width'])\n",
"```\n",
"\n",
"Type this code in the cell below, making sure to hit `tab` when you are filling in the column names 👇"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Put code in me\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Part 3: Copilot Documentation\n",
"\n",
"If you've enabled \"docs following cursor\" in the Copilot, the Copilot will automatically update with the documentation of the identifier underneath your cursor.\n",
"\n",
"![Autosearch](https://www.kite.com/kite-public/kite-jlab-autosearch.gif)\n",
"\n",
"**Step 3a**<br/>\n",
"Try it yourself! Just click around in the code cells of this notebook and see the Copilot update automatically."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### The End\n",
"\n",
"Now you know everything you need to know about Kite's JupyterLab plugin. Kite is under active development and we expect to ship improvements and more features in the near future.\n",
"\n",
"In the meantime, if you experience bugs or have feature requests, feel free to open an issue in our [public GitHub repo](https://github.com/kiteco/issue-tracker).\n",
"\n",
"Happy coding!"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.2"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

View File

@@ -30,65 +30,9 @@
"import json\n",
"import sqlite3\n",
"import mysql.connector\n",
"from experimentation.Utils import MySQL\n",
"\n",
"database = mysql.connector.connect(\n",
" host=\"atenea.rmontanana.es\",\n",
" port=31428,\n",
" user=\"stree\",\n",
" password=\"xtree\",\n",
" database=\"stree\",\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters) values (%s, %s, %s, %s, %s, %s, %s, %s, %s) ('2020-11-18', '12:03:17', 0.99786, '', 'stree', True, False, '{\"C\": 0.15, \"degree\": 6, \"gamma\": 0.7, \"kernel\": \"poly\", \"max_features\": null, \"max_iter\": 100000.0, \"random_state\": 0}')\n"
]
}
],
"source": [
"cursor = database.cursor()\n",
"date = '2020-11-18'\n",
"time = '12:03:17'\n",
"accuracy = 0.99786\n",
"dataset = ''\n",
"clf = 'stree'\n",
"norm = True\n",
"stand = False\n",
"parameters = {\"C\": .15, \"degree\": 6, \"gamma\": .7, \"kernel\": \"poly\", \"max_features\": None, \"max_iter\": 100000.0, \"random_state\": 0}\n",
"command_insert = \"replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters) values (%s, %s, %s, %s, %s, %s, %s, %s, %s)\"\n",
"values = (date, time, accuracy, dataset, clf, norm, stand, json.dumps(parameters))\n",
"print(command_insert, values)\n",
"#cursor.execute(command_insert, values)\n",
"database.commit()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"'{\"C\": 0.15, \"degree\": 6, \"gamma\": 0.7, \"kernel\": \"poly\", \"max_features\": null, \"max_iter\": 100000.0, \"random_state\": 0}'"
]
},
"metadata": {},
"execution_count": 3
}
],
"source": [
"import json\n",
"json.dumps(parameters)"
"database = MySQL.get_connection()"
]
},
{
@@ -473,8 +417,7 @@
" values =(date, time, record_type, record['test_score'], record['dataset'], model, record['normalize'], record['standardize'], record['parameters'])\n",
" print(f\"{date} - {time} - {record['dataset']}\")\n",
" cursor.execute(command_insert, values)\n",
" database.commit()\n",
"\n"
" database.commit()"
]
},
{
@@ -566,13 +509,6 @@
" cursor.execute(command, values)\n",
"database.commit()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
]
}

386
param_analysis.ipynb Normal file
View File

@@ -0,0 +1,386 @@
{
"metadata": {
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.2-final"
},
"orig_nbformat": 2,
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"nbformat": 4,
"nbformat_minor": 2,
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import sqlite3\n",
"import mysql.connector\n",
"from experimentation.Utils import MySQL\n",
"from experimentation.Sets import Datasets\n",
"\n",
"database = MySQL.get_connection()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"classifier = 'bagging'\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def find_best(dataset):\n",
" cursor = database.cursor(buffered=True)\n",
" if classifier == \"any\":\n",
" command = (\n",
" f\"select * from results r inner join reference e on \"\n",
" f\"r.dataset=e.dataset where r.dataset='{dataset}' \"\n",
" )\n",
" else:\n",
" command = (\n",
" f\"select * from results r inner join reference e on \"\n",
" f\"r.dataset=e.dataset where r.dataset='{dataset}' and classifier\"\n",
" f\"='{classifier}'\"\n",
" )\n",
" command += (\n",
" \" order by r.dataset, accuracy desc, classifier desc, type, date, time\"\n",
" )\n",
" cursor.execute(command)\n",
" return cursor.fetchone()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"def find_values(parameter, kernel_chosen):\n",
" result = []\n",
" for data in agg[kernel_chosen]:\n",
" base_parameter = f\"base_estimator__{parameter}\"\n",
" if parameter in data.keys():\n",
" result.append(data[parameter])\n",
" if base_parameter in data.keys():\n",
" result.append(data[base_parameter])\n",
" try:\n",
" result_ordered = sorted(result)\n",
" return result_ordered\n",
" except TypeError:\n",
" return result"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Aggregating data ..................................................\n",
"stree has 0 results\n",
"adaBoost has 0 results\n",
"bagging has 43 results\n",
"odte has 0 results\n"
]
}
],
"source": [
"dt = Datasets(False, False, 'tanveer')\n",
"models = ['stree', 'adaBoost', 'bagging', 'odte']\n",
"agg_models = {}\n",
"for i in models:\n",
" agg_models[i] = 0\n",
"agg = {'linear': [], 'rbf': [], 'poly': []}\n",
"print(\"Aggregating data .\", end='')\n",
"for dataset in dt:\n",
" result = find_best(dataset[0])\n",
" print('.', end='')\n",
" if result:\n",
" agg_models[result[3]] += 1\n",
" json_result = json.loads(result[8])\n",
" key = json_result['kernel'] if 'kernel' in json_result.keys() else 'linear'\n",
" agg[key].append(json_result)\n",
"print('')\n",
"for i in models:\n",
" print(f\"{i:10} has {agg_models[i]:2} results\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Used kernel linear: 43 times\nUsed kernel poly: 0 times\nUsed kernel rbf: 0 times\n"
]
}
],
"source": [
"print(\"Used kernel linear: \", len(agg['linear']), ' times')\n",
"print(\"Used kernel poly: \", len(agg['poly']), ' times')\n",
"print(\"Used kernel rbf: \", len(agg['rbf']), ' times')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[]"
]
},
"metadata": {},
"execution_count": 7
}
],
"source": [
"find_values('gamma', 'poly')"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[0.05,\n",
" 0.05,\n",
" 0.05,\n",
" 0.05,\n",
" 0.05,\n",
" 0.05,\n",
" 0.2,\n",
" 0.2,\n",
" 0.2,\n",
" 0.2,\n",
" 0.2,\n",
" 0.2,\n",
" 0.2,\n",
" 0.55,\n",
" 0.55,\n",
" 0.55,\n",
" 1.0,\n",
" 7,\n",
" 7,\n",
" 7,\n",
" 7,\n",
" 7,\n",
" 7,\n",
" 7,\n",
" 7,\n",
" 7,\n",
" 7,\n",
" 7,\n",
" 55,\n",
" 55,\n",
" 55,\n",
" 55,\n",
" 55,\n",
" 55,\n",
" 10000.0,\n",
" 10000.0,\n",
" 10000.0,\n",
" 10000.0,\n",
" 10000.0,\n",
" 10000.0,\n",
" 10000.0,\n",
" 10000.0,\n",
" 10000.0]"
]
},
"metadata": {},
"execution_count": 8
}
],
"source": [
"find_values('C', 'linear')"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[]"
]
},
"metadata": {},
"execution_count": 9
}
],
"source": [
"find_values('C', 'poly')"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[]"
]
},
"metadata": {},
"execution_count": 10
}
],
"source": [
"find_values('C', 'rbf')"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[0.6,\n",
" None,\n",
" 0.6,\n",
" None,\n",
" 0.6,\n",
" None,\n",
" 0.6,\n",
" None,\n",
" 0.6,\n",
" 'auto',\n",
" 0.6,\n",
" None,\n",
" 0.6,\n",
" None,\n",
" 0.6,\n",
" None,\n",
" 0.6,\n",
" 'auto',\n",
" 0.6,\n",
" None,\n",
" 0.6,\n",
" 'auto',\n",
" 0.6,\n",
" None,\n",
" 0.2,\n",
" None,\n",
" 0.6,\n",
" 'auto',\n",
" 0.6,\n",
" None,\n",
" 0.6,\n",
" None,\n",
" 0.6,\n",
" 'auto',\n",
" None,\n",
" 0.6,\n",
" None,\n",
" 0.6,\n",
" None,\n",
" 0.6,\n",
" 'auto',\n",
" 0.6,\n",
" None,\n",
" 0.6,\n",
" None,\n",
" 0.6,\n",
" None,\n",
" 0.6,\n",
" None,\n",
" 0.6,\n",
" 'auto',\n",
" 0.6,\n",
" None,\n",
" 0.6,\n",
" None,\n",
" 0.6,\n",
" 'auto',\n",
" 0.6,\n",
" None,\n",
" 0.6,\n",
" None,\n",
" 0.6,\n",
" 'auto',\n",
" 0.2,\n",
" None,\n",
" 0.6,\n",
" None,\n",
" 0.2,\n",
" 'auto',\n",
" 0.6,\n",
" None,\n",
" 0.6,\n",
" 'auto',\n",
" 0.6,\n",
" None,\n",
" 0.6,\n",
" None,\n",
" 0.2,\n",
" None,\n",
" 0.6,\n",
" None,\n",
" 0.6,\n",
" 'auto',\n",
" 0.6,\n",
" 'auto']"
]
},
"metadata": {},
"execution_count": 11
}
],
"source": [
"find_values('max_features', 'linear')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
]
}

View File

@@ -1,290 +0,0 @@
{
"metadata": {
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.2-final"
},
"orig_nbformat": 2,
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"nbformat": 4,
"nbformat_minor": 2,
"cells": [
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import sqlite3\n",
"import mysql.connector\n",
"from experimentation.Sets import Datasets\n",
"from experimentation.Utils import TextColor\n",
"\n",
"\n",
"database = mysql.connector.connect(\n",
" host=\"atenea.rmontanana.es\",\n",
" port=31428,\n",
" user=\"stree\",\n",
" password=\"xtree\",\n",
" database=\"stree\",\n",
")\n",
"dt = Datasets(False, False, 'tanveer')\n",
"\n",
"#\n",
"# CONFIGURABLE REPORT PARAMETERS\n",
"#\n",
"exclude_parameters = True\n",
"# classififer can be {any, stree, bagging, adaBoost, odte}\n",
"classifier = \"stree\"\n",
"title = \"Best Hyperparameters found for datasets\""
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"def find_best(dataset):\n",
" cursor = database.cursor(buffered=True)\n",
" if classifier == \"any\":\n",
" command = f\"select * from results r inner join reference e on r.dataset=e.dataset where r.dataset='{dataset}' \"\n",
" else:\n",
" command = f\"select * from results r inner join reference e on r.dataset=e.dataset where r.dataset='{dataset}' and classifier='{classifier}'\" \n",
" command += \" order by r.dataset, accuracy desc, classifier desc, type, date, time\"\n",
" cursor.execute(command)\n",
" return cursor.fetchone()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"lengths = (10, 8, 10, 10, 30, 3, 3, 9, 11)\n",
"if exclude_parameters:\n",
" fields = ('Date', 'Time', 'Type', 'Classifier', 'Dataset', 'Nor', 'Std', 'Accuracy', 'Reference')\n",
" lengths = (10, 8, 10, 10, 30, 3, 3, 9, 11)\n",
"else:\n",
" fields = ('Date', 'Time', 'Type', 'Classifier', 'Dataset', 'Nor', 'Std', 'Accuracy', 'Reference', 'Parameters')\n",
" lengths = (10, 8, 10, 10, 30, 3, 3, 9, 11, 30)\n",
"def report_header_content(title):\n",
" length = sum(lengths) + len(lengths) - 1\n",
" output = \"\\n\" + \"*\" * length + \"\\n\"\n",
" title = title + f\" -- {classifier} classifier --\"\n",
" num = (length - len(title) - 2) // 2\n",
" num2 = length - len(title) - 2 - 2 * num\n",
" output += \"*\" + \" \" * num + title + \" \" * (num + num2) + \"*\\n\"\n",
" output += \"*\" * length + \"\\n\\n\"\n",
" lines = \"\"\n",
" for item, data in enumerate(fields):\n",
" output += f\"{fields[item]:{lengths[item]}} \"\n",
" lines += \"=\" * lengths[item] + \" \"\n",
" output += f\"\\n{lines}\"\n",
" return output\n",
"def report_header(exclude_params):\n",
" print(\n",
" TextColor.HEADER\n",
" + report_header_content(title)\n",
" + TextColor.ENDC\n",
" )\n",
"def report_line(record, agg):\n",
" accuracy = record[5]\n",
" expected = record[10]\n",
" if accuracy < expected: \n",
" agg['worse'] += 1\n",
" sign = '-'\n",
" elif accuracy > expected:\n",
" agg['better'] += 1\n",
" sign = '+'\n",
" else:\n",
" agg['equal'] +=1\n",
" sign = '='\n",
" output = f\"{record[0]:%Y-%m-%d} {str(record[1]):>8s} {record[2]:10s} {record[3]:10s} {record[4]:30s}\"\n",
" output += f\" {record[6]:3d} {record[7]:3d} {accuracy:8.7f} {expected:8.7f} {sign}\"\n",
" if not exclude_parameters:\n",
" output += f\" {record[8]}\"\n",
" return output\n",
"def report_footer(agg):\n",
" print(\n",
" TextColor.GREEN\n",
" + f\"we have better results {agg['better']:2d} times\"\n",
" )\n",
" print(\n",
" TextColor.RED\n",
" + f\"we have worse results {agg['worse']:2d} times\"\n",
" )\n",
" print(\n",
" TextColor.MAGENTA\n",
" + f\"we have equal results {agg['equal']:2d} times\"\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"tags": []
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\u001b[95m\n",
"******************************************************************************************************\n",
"* Best Hyperparameters found for datasets -- stree classifier -- *\n",
"******************************************************************************************************\n",
"\n",
"Date Time Type Classifier Dataset Nor Std Accuracy Reference \n",
"========== ======== ========== ========== ============================== === === ========= =========== \u001b[0m\n",
"\u001b[95m2020-11-13 12:04:24 crossval stree balance-scale 1 0 0.9488000 0.9046280 +\n",
"\u001b[94m2020-11-13 12:04:25 crossval stree balloons 1 0 0.8666670 0.6625000 +\n",
"\u001b[95m2020-11-13 12:04:25 crossval stree breast-cancer-wisc-diag 1 0 0.9789320 0.9743450 +\n",
"\u001b[94m2020-11-13 12:04:25 crossval stree breast-cancer-wisc-prog 1 0 0.8284620 0.7993400 +\n",
"\u001b[95m2020-11-13 12:04:25 crossval stree breast-cancer-wisc 1 0 0.9656940 0.9702560 -\n",
"\u001b[94m2020-11-13 12:04:26 crossval stree breast-cancer 1 0 0.7308530 0.7382400 -\n",
"\u001b[95m2020-11-13 12:04:26 crossval stree cardiotocography-10clases 1 0 0.6665220 0.8277610 -\n",
"\u001b[94m2020-11-13 12:04:27 crossval stree cardiotocography-3clases 1 0 0.8480740 0.9201340 -\n",
"\u001b[91m*No results found for conn-bench-sonar-mines-rocks\n",
"\u001b[95m2020-11-13 12:04:27 crossval stree cylinder-bands 1 0 0.6326670 0.7691410 -\n",
"\u001b[94m2020-11-13 12:04:27 crossval stree dermatology 1 0 0.9754540 0.9732780 +\n",
"\u001b[95m2020-11-13 12:04:27 crossval stree echocardiogram 1 0 0.8472930 0.8485270 -\n",
"\u001b[94m2020-11-13 12:04:27 crossval stree fertility 1 0 0.8800000 0.8840000 -\n",
"\u001b[95m2020-11-13 12:04:28 crossval stree haberman-survival 1 0 0.7646750 0.7392540 +\n",
"\u001b[94m2020-11-13 12:04:28 crossval stree heart-hungarian 1 0 0.8299240 0.8204750 +\n",
"\u001b[95m2020-11-13 12:04:28 crossval stree hepatitis 1 0 0.8645160 0.8232030 +\n",
"\u001b[94m2020-11-13 12:04:28 crossval stree ilpd-indian-liver 1 0 0.7426910 0.7150280 +\n",
"\u001b[95m2020-11-13 12:04:28 crossval stree ionosphere 1 0 0.9487320 0.9442150 +\n",
"\u001b[94m2020-11-13 12:04:28 crossval stree iris 0 0 0.9800000 0.9786560 +\n",
"\u001b[95m2020-11-13 12:15:39 crossval stree led-display 1 0 0.7120000 0.7102000 +\n",
"\u001b[91m*No results found for libras\n",
"\u001b[91m*No results found for low-res-spect\n",
"\u001b[94m2020-11-13 12:15:39 crossval stree lymphography 1 0 0.8648280 0.8554050 +\n",
"\u001b[95m2020-11-13 12:15:39 crossval stree mammographic 1 0 0.8293720 0.8274720 +\n",
"\u001b[91m*No results found for molec-biol-promoter\n",
"\u001b[91m*No results found for musk-1\n",
"\u001b[94m2020-11-13 12:15:39 crossval stree oocytes_merluccius_nucleus_4d 1 0 0.8082210 0.8399630 -\n",
"\u001b[95m2020-11-13 12:15:39 crossval stree oocytes_merluccius_states_2f 1 0 0.9119030 0.9299630 -\n",
"\u001b[94m2020-11-13 12:15:40 crossval stree oocytes_trisopterus_nucleus_2f 1 0 0.7476910 0.8333330 -\n",
"\u001b[95m2020-11-13 12:15:40 crossval stree oocytes_trisopterus_states_5b 1 0 0.8453610 0.9315790 -\n",
"\u001b[94m2020-11-13 12:15:40 crossval stree parkinsons 1 0 0.8461540 0.9202210 -\n",
"\u001b[95m2020-11-13 12:15:41 crossval stree pima 1 0 0.7800020 0.7671880 +\n",
"\u001b[94m2020-11-13 12:15:41 crossval stree pittsburg-bridges-MATERIAL 1 0 0.8861470 0.8642860 +\n",
"\u001b[95m2020-11-13 12:15:41 crossval stree pittsburg-bridges-REL-L 1 0 0.6761900 0.6959290 -\n",
"\u001b[94m2020-11-13 12:15:41 crossval stree pittsburg-bridges-SPAN 1 0 0.6771930 0.6891300 -\n",
"\u001b[95m2020-11-13 12:15:41 crossval stree pittsburg-bridges-T-OR-D 1 0 0.9023810 0.8743700 +\n",
"\u001b[94m2020-11-13 12:15:41 crossval stree planning 1 0 0.7255250 0.7255790 -\n",
"\u001b[95m2020-11-13 12:15:41 crossval stree post-operative 1 0 0.7222220 0.7117420 +\n",
"\u001b[94m2020-11-13 12:15:41 crossval stree seeds 1 0 0.9619050 0.9563030 +\n",
"\u001b[95m2020-11-13 12:15:41 crossval stree statlog-australian-credit 1 0 0.6797100 0.6782810 +\n",
"\u001b[94m2020-11-13 12:21:08 crossval stree statlog-german-credit 1 0 0.7620000 0.7562000 +\n",
"\u001b[95m2020-11-13 12:21:08 crossval stree statlog-heart 1 0 0.8481480 0.8422990 +\n",
"\u001b[94m2020-11-13 12:21:12 crossval stree statlog-image 1 0 0.9593070 0.9761940 -\n",
"\u001b[95m2020-11-13 12:21:13 crossval stree statlog-vehicle 1 0 0.8014130 0.8006730 +\n",
"\u001b[91m*No results found for synthetic-control\n",
"\u001b[94m2020-11-13 12:21:13 crossval stree tic-tac-toe 1 0 0.9874350 0.9853850 +\n",
"\u001b[95m2020-11-13 12:21:14 crossval stree vertebral-column-2clases 1 0 0.8290320 0.8491530 -\n",
"\u001b[94m2020-11-13 12:21:14 crossval stree wine 1 0 0.9777780 0.9932810 -\n",
"\u001b[95m2020-11-13 12:21:14 crossval stree zoo 1 0 0.9704760 0.9603850 +\n",
"\u001b[92mwe have better results 25 times\n",
"\u001b[91mwe have worse results 18 times\n",
"\u001b[95mwe have equal results 0 times\n"
]
}
],
"source": [
"report_header(title)\n",
"color = TextColor.LINE1\n",
"agg = {'equal': 0, 'better': 0, 'worse': 0}\n",
"for dataset in dt:\n",
" record = find_best(dataset[0])\n",
" if record is None:\n",
" print(TextColor.FAIL + f\"*No results found for {dataset[0]}\")\n",
" else:\n",
" color = TextColor.MAGENTA if color == TextColor.LINE1 else TextColor.LINE1\n",
" print(color + report_line(record, agg))\n",
"report_footer(agg)"
]
},
{
"source": [
"******************************************************************************************************\n",
"* Best Hyperparameters found for datasets -- any classifier -- *\n",
"******************************************************************************************************\n",
"\n",
"Date Time Type Classifier Dataset Nor Std Accuracy Reference \n",
"========== ======== ========== ========== ============================== === === ========= =========== \n",
"\n",
"2020-11-13 12:04:24 crossval stree balance-scale 1 0 0.9488000 0.9046280 +\n",
"2020-11-13 12:04:25 crossval stree balloons 1 0 0.8666670 0.6625000 +\n",
"2020-11-16 13:34:47 gridsearch bagging breast-cancer-wisc-diag 1 1 0.9806860 0.9743450 +\n",
"2020-11-11 23:10:52 gridsearch bagging breast-cancer-wisc-prog 1 1 0.8337180 0.7993400 +\n",
"2020-11-16 13:34:49 gridsearch bagging breast-cancer-wisc 1 1 0.9699790 0.9702560 -\n",
"2020-11-18 23:04:30 gridsearch odte breast-cancer 1 1 0.7449490 0.7382400 +\n",
"2020-11-16 13:37:13 gridsearch bagging cardiotocography-10clases 1 1 0.7022580 0.8277610 -\n",
"2020-11-16 13:35:23 gridsearch bagging cardiotocography-3clases 1 1 0.8490220 0.9201340 -\n",
"*No results found for conn-bench-sonar-mines-rocks\n",
"2020-11-13 12:04:27 crossval stree cylinder-bands 1 0 0.6326670 0.7691410 -\n",
"2020-11-13 12:04:27 crossval stree dermatology 1 0 0.9754540 0.9732780 +\n",
"2020-11-13 12:04:27 crossval stree echocardiogram 1 0 0.8472930 0.8485270 -\n",
"2020-11-19 15:22:26 gridsearch bagging fertility 1 1 0.8900000 0.8840000 +\n",
"2020-11-13 12:04:28 crossval stree haberman-survival 1 0 0.7646750 0.7392540 +\n",
"2020-11-13 12:04:28 crossval stree heart-hungarian 1 0 0.8299240 0.8204750 +\n",
"2020-11-13 12:04:28 crossval stree hepatitis 1 0 0.8645160 0.8232030 +\n",
"2020-11-13 12:04:28 crossval stree ilpd-indian-liver 1 0 0.7426910 0.7150280 +\n",
"2020-11-16 13:34:59 gridsearch bagging ionosphere 1 1 0.9515490 0.9442150 +\n",
"2020-11-03 18:52:15 gridsearch odte iris 1 0 0.9933330 0.9786560 +\n",
"2020-11-13 12:15:39 crossval stree led-display 1 0 0.7120000 0.7102000 +\n",
"*No results found for libras\n",
"*No results found for low-res-spect\n",
"2020-11-16 13:35:08 gridsearch bagging lymphography 1 1 0.8781610 0.8554050 +\n",
"2020-11-13 12:15:39 crossval stree mammographic 1 0 0.8293720 0.8274720 +\n",
"2020-11-16 22:17:03 gridsearch bagging molec-biol-promoter 1 1 0.9060610 0.8182690 +\n",
"*No results found for musk-1\n",
"2020-11-13 12:15:39 crossval stree oocytes_merluccius_nucleus_4d 1 0 0.8082210 0.8399630 -\n",
"2020-11-13 12:15:39 crossval stree oocytes_merluccius_states_2f 1 0 0.9119030 0.9299630 -\n",
"2020-11-13 12:15:40 crossval stree oocytes_trisopterus_nucleus_2f 1 0 0.7476910 0.8333330 -\n",
"2020-11-16 13:35:41 gridsearch bagging oocytes_trisopterus_states_5b 1 1 0.8540800 0.9315790 -\n",
"2020-11-13 12:15:40 crossval stree parkinsons 1 0 0.8461540 0.9202210 -\n",
"2020-11-13 12:15:41 crossval stree pima 1 0 0.7800020 0.7671880 +\n",
"2020-11-13 12:15:41 crossval stree pittsburg-bridges-MATERIAL 1 0 0.8861470 0.8642860 +\n",
"2020-11-16 13:36:41 gridsearch bagging pittsburg-bridges-REL-L 1 1 0.6766670 0.6959290 -\n",
"2020-11-16 13:22:50 gridsearch adaBoost pittsburg-bridges-SPAN 1 1 0.7210530 0.6891300 +\n",
"2020-11-13 12:15:41 crossval stree pittsburg-bridges-T-OR-D 1 0 0.9023810 0.8743700 +\n",
"2020-11-13 12:15:41 crossval stree planning 1 0 0.7255250 0.7255790 -\n",
"2020-11-13 12:15:41 crossval stree post-operative 1 0 0.7222220 0.7117420 +\n",
"2020-11-13 12:15:41 crossval stree seeds 1 0 0.9619050 0.9563030 +\n",
"2020-11-13 12:15:41 crossval stree statlog-australian-credit 1 0 0.6797100 0.6782810 +\n",
"2020-11-13 12:21:08 crossval stree statlog-german-credit 1 0 0.7620000 0.7562000 +\n",
"2020-11-16 13:36:12 gridsearch bagging statlog-heart 1 1 0.8518520 0.8422990 +\n",
"2020-11-16 14:04:13 gridsearch bagging statlog-image 1 1 0.9627710 0.9761940 -\n",
"2020-11-13 12:21:13 crossval stree statlog-vehicle 1 0 0.8014130 0.8006730 +\n",
"*No results found for synthetic-control\n",
"2020-11-13 12:21:13 crossval stree tic-tac-toe 1 0 0.9874350 0.9853850 +\n",
"2020-11-13 12:21:14 crossval stree vertebral-column-2clases 1 0 0.8290320 0.8491530 -\n",
"2020-06-26 11:03:03 gridsearch odte wine 0 0 0.9800000 0.9932810 -\n",
"2020-11-13 12:21:14 crossval stree zoo 1 0 0.9704760 0.9603850 +\n",
"we have better results 29 times\n",
"we have worse results 15 times\n",
"we have equal results 0 times"
],
"cell_type": "markdown",
"metadata": {}
}
]
}

View File

@@ -1,25 +1,4 @@
{
"metadata": {
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.2-final"
},
"orig_nbformat": 2,
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"nbformat": 4,
"nbformat_minor": 2,
"cells": [
{
"cell_type": "code",
@@ -63,7 +42,16 @@
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/rmontanana/.virtualenvs/general/lib/python3.8/site-packages/sklearn/svm/_base.py:976: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
" warnings.warn(\"Liblinear failed to converge, increase \"\n"
]
}
],
"source": [
"clf = Stree(**parameters)\n",
"results = cross_validate(clf, X, y, n_jobs=1)"
@@ -75,16 +63,16 @@
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'fit_time': array([0.00772715, 0.03221297, 0.01418114, 0.06252027, 0.05369782]),\n",
" 'score_time': array([0.00048399, 0.00044394, 0.00045371, 0.00051093, 0.00044894]),\n",
"{'fit_time': array([0.0078361 , 0.03171897, 0.01422501, 0.06850815, 0.05387974]),\n",
" 'score_time': array([0.0005939 , 0.00044203, 0.00043583, 0.00050902, 0.00044012]),\n",
" 'test_score': array([0.4047619 , 0.61904762, 0.66666667, 0.92682927, 0.58536585])}"
]
},
"execution_count": 5,
"metadata": {},
"execution_count": 5
"output_type": "execute_result"
}
],
"source": [
@@ -97,14 +85,14 @@
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0.640534262485482"
]
},
"execution_count": 6,
"metadata": {},
"execution_count": 6
"output_type": "execute_result"
}
],
"source": [
@@ -118,11 +106,11 @@
"outputs": [],
"source": [
"# 864 modelos por cada dataset\n",
"C = [0.05, 0.2, 0.55, 7, 55, 1e4]\n",
"max_iter = [1e4, 1e5, 1e6]\n",
"gamma = [1e-1, 1, 1e1]\n",
"max_features = [None, \"auto\"]\n",
"split_criteria = [\"impurity\", \"max_samples\"]\n",
"C = [0.2, 7, 55]\n",
"max_iter = [1e4]\n",
"gamma = [\"scale\"] #[1e-1, 1, 1e1]\n",
"max_features = [\"auto\"]\n",
"split_criteria = [\"impurity\"]\n",
"param_grid = [\n",
" {\n",
" \"random_state\": [1],\n",
@@ -159,7 +147,7 @@
"metadata": {},
"outputs": [],
"source": [
"param_grid = [\n",
"param2_grid = [\n",
" {\n",
" \"random_state\": [1],\n",
" \"kernel\": [\"poly\"],\n",
@@ -177,33 +165,235 @@
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fitting 2 folds for each of 12 candidates, totalling 24 fits\n",
"[CV] C=0.2, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n",
"[CV] C=0.2, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.548, total= 6.4min\n",
"[CV] C=0.2, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 6.4min remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] C=0.2, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.346, total= 9.2min\n",
"[CV] C=7, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 2 out of 2 | elapsed: 15.7min remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] C=7, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.673, total=17.0min\n",
"[CV] C=7, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 3 out of 3 | elapsed: 32.7min remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] C=7, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.356, total=10.9min\n",
"[CV] C=55, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 4 out of 4 | elapsed: 43.6min remaining: 0.0s\n",
"/Users/rmontanana/.virtualenvs/general/lib/python3.8/site-packages/sklearn/svm/_base.py:976: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
" warnings.warn(\"Liblinear failed to converge, increase \"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] C=55, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.538, total= 7.3min\n",
"[CV] C=55, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 5 out of 5 | elapsed: 50.9min remaining: 0.0s\n",
"/Users/rmontanana/.virtualenvs/general/lib/python3.8/site-packages/sklearn/svm/_base.py:976: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
" warnings.warn(\"Liblinear failed to converge, increase \"\n",
"/Users/rmontanana/.virtualenvs/general/lib/python3.8/site-packages/sklearn/svm/_base.py:976: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
" warnings.warn(\"Liblinear failed to converge, increase \"\n",
"/Users/rmontanana/.virtualenvs/general/lib/python3.8/site-packages/sklearn/svm/_base.py:976: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
" warnings.warn(\"Liblinear failed to converge, increase \"\n",
"[Parallel(n_jobs=1)]: Done 6 out of 6 | elapsed: 55.8min remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] C=55, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.317, total= 4.9min\n",
"[CV] C=0.2, gamma=scale, kernel=rbf, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n",
"[CV] C=0.2, gamma=scale, kernel=rbf, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.462, total=33.4min\n",
"[CV] C=0.2, gamma=scale, kernel=rbf, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 7 out of 7 | elapsed: 89.2min remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] C=0.2, gamma=scale, kernel=rbf, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.413, total= 2.8min\n",
"[CV] C=7, gamma=scale, kernel=rbf, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 8 out of 8 | elapsed: 92.0min remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] C=7, gamma=scale, kernel=rbf, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.529, total= 1.9min\n",
"[CV] C=7, gamma=scale, kernel=rbf, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 9 out of 9 | elapsed: 93.8min remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] C=7, gamma=scale, kernel=rbf, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.433, total= 2.8min\n",
"[CV] C=55, gamma=scale, kernel=rbf, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n",
"[CV] C=55, gamma=scale, kernel=rbf, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.519, total= 55.6s\n",
"[CV] C=55, gamma=scale, kernel=rbf, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n",
"[CV] C=55, gamma=scale, kernel=rbf, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.394, total= 55.7s\n",
"[CV] C=0.2, degree=3, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n",
"[CV] C=0.2, degree=3, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.567, total= 5.5min\n",
"[CV] C=0.2, degree=3, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n",
"[CV] C=0.2, degree=3, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.413, total= 3.7min\n",
"[CV] C=0.2, degree=5, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n",
"[CV] C=0.2, degree=5, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.538, total= 5.5min\n",
"[CV] C=0.2, degree=5, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n",
"[CV] C=0.2, degree=5, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.442, total= 4.6min\n",
"[CV] C=7, degree=3, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n",
"[CV] C=7, degree=3, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.577, total= 2.7min\n",
"[CV] C=7, degree=3, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n",
"[CV] C=7, degree=3, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.510, total=15.6min\n",
"[CV] C=7, degree=5, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n",
"[CV] C=7, degree=5, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.452, total= 2.8min\n",
"[CV] C=7, degree=5, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n",
"[CV] C=7, degree=5, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.462, total= 4.7min\n",
"[CV] C=55, degree=3, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n",
"[CV] C=55, degree=3, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.596, total= 56.0s\n",
"[CV] C=55, degree=3, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n",
"[CV] C=55, degree=3, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.404, total= 2.8min\n",
"[CV] C=55, degree=5, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n",
"[CV] C=55, degree=5, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.538, total= 1.8min\n",
"[CV] C=55, degree=5, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity \n",
"[CV] C=55, degree=5, gamma=scale, kernel=poly, max_features=auto, max_iter=10000.0, random_state=1, split_criteria=impurity, score=0.375, total= 2.7min\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 24 out of 24 | elapsed: 151.8min finished\n"
]
},
{
"data": {
"text/plain": [
"GridSearchCV(estimator=Stree(), n_jobs=1,\n",
" param_grid=[{'C': [55], 'degree': [5, 7], 'kernel': ['poly'],\n",
" 'max_features': [None, 'auto'], 'random_state': [1],\n",
" 'split_criteria': ['impurity', 'max_samples']}])"
"GridSearchCV(cv=2, estimator=Stree(), n_jobs=1,\n",
" param_grid=[{'C': [0.2, 7, 55], 'max_features': ['auto'],\n",
" 'max_iter': [10000.0], 'random_state': [1],\n",
" 'split_criteria': ['impurity']},\n",
" {'C': [0.2, 7, 55], 'gamma': ['scale'],\n",
" 'kernel': ['rbf'], 'max_features': ['auto'],\n",
" 'max_iter': [10000.0], 'random_state': [1],\n",
" 'split_criteria': ['impurity']},\n",
" {'C': [0.2, 7, 55], 'degree': [3, 5],\n",
" 'gamma': ['scale'], 'kernel': ['poly'],\n",
" 'max_features': ['auto'], 'max_iter': [10000.0],\n",
" 'random_state': [1],\n",
" 'split_criteria': ['impurity']}],\n",
" verbose=10)"
]
},
"execution_count": 9,
"metadata": {},
"execution_count": 9
"output_type": "execute_result"
}
],
"source": [
"clf = Stree()\n",
"model = GridSearchCV(clf, n_jobs=1, param_grid=param_grid)\n",
"model = GridSearchCV(clf, n_jobs=1, verbose=10, param_grid=param_grid, cv=2)\n",
"model.fit(X, y)"
]
},
{
"source": [
"print(model.cv_results_['params'][model.best_index_])"
],
"cell_type": "markdown",
"cell_type": "code",
"execution_count": 10,
"metadata": {
"tags": []
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'C': 7, 'degree': 3, 'gamma': 'scale', 'kernel': 'poly', 'max_features': 'auto', 'max_iter': 10000.0, 'random_state': 1, 'split_criteria': 'impurity'}\n"
]
}
],
"source": [
"print(model.cv_results_['params'][model.best_index_])"
]
},
{
"cell_type": "code",
@@ -211,10 +401,10 @@
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"output_type": "stream",
"text": [
"0.6448315911730547\n"
"0.5432692307692307\n"
]
}
],
@@ -224,12 +414,12 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 12,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
@@ -410,7 +600,6 @@
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[{'n_estimators': [50],\n",
@@ -441,8 +630,9 @@
" 'base_estimator__max_features': [None, 'auto']}]"
]
},
"execution_count": 3,
"metadata": {},
"execution_count": 3
"output_type": "execute_result"
}
],
"source": [
@@ -455,8 +645,8 @@
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"output_type": "stream",
"text": [
"{'n_estimators': [50], 'algorithm': ['SAMME'], 'base_estimator__C': 7, 'base_estimator__degree': 7, 'base_estimator__gamma': 0.1, 'base_estimator__kernel': 'poly', 'base_estimator__max_features': 'auto', 'base_estimator__max_iter': 10000.0, 'base_estimator__random_state': 1, 'base_estimator__split_criteria': 'impurity'}\n"
]
@@ -473,14 +663,14 @@
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"'stree'"
]
},
"execution_count": 5,
"metadata": {},
"execution_count": 5
"output_type": "execute_result"
}
],
"source": [
@@ -494,5 +684,26 @@
"outputs": [],
"source": []
}
]
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.2"
}
},
"nbformat": 4,
"nbformat_minor": 4
}