mirror of
https://github.com/Doctorado-ML/Stree_datasets.git
synced 2025-08-15 23:46:03 +00:00
Refactor MySQL class and develop param_analysis
This commit is contained in:
@@ -7,26 +7,6 @@ title = "Best model results"
|
|||||||
lengths = (30, 9, 11, 11, 11, 11)
|
lengths = (30, 9, 11, 11, 11, 11)
|
||||||
|
|
||||||
|
|
||||||
def find_best(dataset, classifier):
|
|
||||||
cursor = database.cursor(buffered=True)
|
|
||||||
if classifier == "any":
|
|
||||||
command = (
|
|
||||||
f"select * from results r inner join reference e on "
|
|
||||||
f"r.dataset=e.dataset where r.dataset='{dataset}' "
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
command = (
|
|
||||||
f"select * from results r inner join reference e on "
|
|
||||||
f"r.dataset=e.dataset where r.dataset='{dataset}' and classifier"
|
|
||||||
f"='{classifier}'"
|
|
||||||
)
|
|
||||||
command += (
|
|
||||||
" order by r.dataset, accuracy desc, classifier desc, type, date, time"
|
|
||||||
)
|
|
||||||
cursor.execute(command)
|
|
||||||
return cursor.fetchone()
|
|
||||||
|
|
||||||
|
|
||||||
def report_header_content(title):
|
def report_header_content(title):
|
||||||
length = sum(lengths) + len(lengths) - 1
|
length = sum(lengths) + len(lengths) - 1
|
||||||
output = "\n" + "*" * length + "\n"
|
output = "\n" + "*" * length + "\n"
|
||||||
@@ -99,10 +79,10 @@ for item in [
|
|||||||
for dataset in dt:
|
for dataset in dt:
|
||||||
find_one = False
|
find_one = False
|
||||||
line = {"dataset": color + dataset[0]}
|
line = {"dataset": color + dataset[0]}
|
||||||
record = find_best(dataset[0], "any")
|
record = dbh.find_best(dataset[0], "any")
|
||||||
max_accuracy = 0.0 if record is None else record[5]
|
max_accuracy = 0.0 if record is None else record[5]
|
||||||
for model in models:
|
for model in models:
|
||||||
record = find_best(dataset[0], model)
|
record = dbh.find_best(dataset[0], model)
|
||||||
if record is None:
|
if record is None:
|
||||||
line[model] = color + "-" * 9 + " "
|
line[model] = color + "-" * 9 + " "
|
||||||
else:
|
else:
|
||||||
|
@@ -12,7 +12,7 @@ from .Utils import TextColor
|
|||||||
|
|
||||||
class MySQL:
|
class MySQL:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.server = None
|
self._server = None
|
||||||
|
|
||||||
def get_connection(self):
|
def get_connection(self):
|
||||||
config_db = dict()
|
config_db = dict()
|
||||||
@@ -32,14 +32,35 @@ class MySQL:
|
|||||||
config_tunnel["ssh_address_or_host"] = make_tuple(
|
config_tunnel["ssh_address_or_host"] = make_tuple(
|
||||||
config_tunnel["ssh_address_or_host"]
|
config_tunnel["ssh_address_or_host"]
|
||||||
)
|
)
|
||||||
self.server = SSHTunnelForwarder(**config_tunnel)
|
self._server = SSHTunnelForwarder(**config_tunnel)
|
||||||
self.server.daemon_forward_servers = True
|
self._server.daemon_forward_servers = True
|
||||||
self.server.start()
|
self._server.start()
|
||||||
config_db["port"] = self.server.local_bind_port
|
config_db["port"] = self._server.local_bind_port
|
||||||
return mysql.connector.connect(**config_db)
|
self._database = mysql.connector.connect(**config_db)
|
||||||
|
return self._database
|
||||||
|
|
||||||
|
def find_best(self, dataset, classifier="any"):
|
||||||
|
cursor = self._database.cursor(buffered=True)
|
||||||
|
if classifier == "any":
|
||||||
|
command = (
|
||||||
|
f"select * from results r inner join reference e on "
|
||||||
|
f"r.dataset=e.dataset where r.dataset='{dataset}' "
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
command = (
|
||||||
|
f"select * from results r inner join reference e on "
|
||||||
|
f"r.dataset=e.dataset where r.dataset='{dataset}' and "
|
||||||
|
f"classifier='{classifier}'"
|
||||||
|
)
|
||||||
|
command += (
|
||||||
|
" order by r.dataset, accuracy desc, classifier desc, "
|
||||||
|
"type, date, time"
|
||||||
|
)
|
||||||
|
cursor.execute(command)
|
||||||
|
return cursor.fetchone()
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
self.server.close()
|
self._server.close()
|
||||||
|
|
||||||
|
|
||||||
class BD(ABC):
|
class BD(ABC):
|
||||||
|
@@ -1,395 +0,0 @@
|
|||||||
{
|
|
||||||
"metadata": {
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.8.2-final"
|
|
||||||
},
|
|
||||||
"orig_nbformat": 2,
|
|
||||||
"kernelspec": {
|
|
||||||
"name": "python3",
|
|
||||||
"display_name": "Python 3"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 2,
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 1,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import json\n",
|
|
||||||
"import sqlite3\n",
|
|
||||||
"import mysql.connector\n",
|
|
||||||
"from experimentation.Database import MySQL\n",
|
|
||||||
"from experimentation.Sets import Datasets\n",
|
|
||||||
"dbh = MySQL()\n",
|
|
||||||
"database = dbh.get_connection()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 2,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"classifier = 'bagging'\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 3,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"def find_best(dataset):\n",
|
|
||||||
" cursor = database.cursor(buffered=True)\n",
|
|
||||||
" if classifier == \"any\":\n",
|
|
||||||
" command = (\n",
|
|
||||||
" f\"select * from results r inner join reference e on \"\n",
|
|
||||||
" f\"r.dataset=e.dataset where r.dataset='{dataset}' \"\n",
|
|
||||||
" )\n",
|
|
||||||
" else:\n",
|
|
||||||
" command = (\n",
|
|
||||||
" f\"select * from results r inner join reference e on \"\n",
|
|
||||||
" f\"r.dataset=e.dataset where r.dataset='{dataset}' and classifier\"\n",
|
|
||||||
" f\"='{classifier}'\"\n",
|
|
||||||
" )\n",
|
|
||||||
" command += (\n",
|
|
||||||
" \" order by r.dataset, accuracy desc, classifier desc, type, date, time\"\n",
|
|
||||||
" )\n",
|
|
||||||
" cursor.execute(command)\n",
|
|
||||||
" return cursor.fetchone()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 4,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"def find_values(parameter, kernel_chosen):\n",
|
|
||||||
" result = []\n",
|
|
||||||
" for data in agg[kernel_chosen]:\n",
|
|
||||||
" base_parameter = f\"base_estimator__{parameter}\"\n",
|
|
||||||
" if parameter in data.keys():\n",
|
|
||||||
" result.append(data[parameter])\n",
|
|
||||||
" if base_parameter in data.keys():\n",
|
|
||||||
" result.append(data[base_parameter])\n",
|
|
||||||
" try:\n",
|
|
||||||
" result_ordered = sorted(result)\n",
|
|
||||||
" return result_ordered\n",
|
|
||||||
" except TypeError:\n",
|
|
||||||
" return result"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 5,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"output_type": "stream",
|
|
||||||
"name": "stdout",
|
|
||||||
"text": [
|
|
||||||
"Aggregating data ..................................................\n",
|
|
||||||
"stree has 0 results\n",
|
|
||||||
"adaBoost has 0 results\n",
|
|
||||||
"bagging has 43 results\n",
|
|
||||||
"odte has 0 results\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"dt = Datasets(False, False, 'tanveer')\n",
|
|
||||||
"models = ['stree', 'adaBoost', 'bagging', 'odte']\n",
|
|
||||||
"agg_models = {}\n",
|
|
||||||
"for i in models:\n",
|
|
||||||
" agg_models[i] = 0\n",
|
|
||||||
"agg = {'linear': [], 'rbf': [], 'poly': []}\n",
|
|
||||||
"print(\"Aggregating data .\", end='')\n",
|
|
||||||
"for dataset in dt:\n",
|
|
||||||
" result = find_best(dataset[0])\n",
|
|
||||||
" print('.', end='')\n",
|
|
||||||
" if result:\n",
|
|
||||||
" agg_models[result[3]] += 1\n",
|
|
||||||
" json_result = json.loads(result[8])\n",
|
|
||||||
" key = json_result['kernel'] if 'kernel' in json_result.keys() else 'linear'\n",
|
|
||||||
" agg[key].append(json_result)\n",
|
|
||||||
"print('')\n",
|
|
||||||
"for i in models:\n",
|
|
||||||
" print(f\"{i:10} has {agg_models[i]:2} results\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 6,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"output_type": "stream",
|
|
||||||
"name": "stdout",
|
|
||||||
"text": [
|
|
||||||
"Used kernel linear: 43 times\nUsed kernel poly: 0 times\nUsed kernel rbf: 0 times\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"print(\"Used kernel linear: \", len(agg['linear']), ' times')\n",
|
|
||||||
"print(\"Used kernel poly: \", len(agg['poly']), ' times')\n",
|
|
||||||
"print(\"Used kernel rbf: \", len(agg['rbf']), ' times')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 7,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"output_type": "execute_result",
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"[]"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"metadata": {},
|
|
||||||
"execution_count": 7
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"find_values('gamma', 'poly')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 8,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"output_type": "execute_result",
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"[0.05,\n",
|
|
||||||
" 0.05,\n",
|
|
||||||
" 0.05,\n",
|
|
||||||
" 0.05,\n",
|
|
||||||
" 0.05,\n",
|
|
||||||
" 0.05,\n",
|
|
||||||
" 0.2,\n",
|
|
||||||
" 0.2,\n",
|
|
||||||
" 0.2,\n",
|
|
||||||
" 0.2,\n",
|
|
||||||
" 0.2,\n",
|
|
||||||
" 0.2,\n",
|
|
||||||
" 0.2,\n",
|
|
||||||
" 0.55,\n",
|
|
||||||
" 0.55,\n",
|
|
||||||
" 0.55,\n",
|
|
||||||
" 1.0,\n",
|
|
||||||
" 7,\n",
|
|
||||||
" 7,\n",
|
|
||||||
" 7,\n",
|
|
||||||
" 7,\n",
|
|
||||||
" 7,\n",
|
|
||||||
" 7,\n",
|
|
||||||
" 7,\n",
|
|
||||||
" 7,\n",
|
|
||||||
" 7,\n",
|
|
||||||
" 7,\n",
|
|
||||||
" 7,\n",
|
|
||||||
" 55,\n",
|
|
||||||
" 55,\n",
|
|
||||||
" 55,\n",
|
|
||||||
" 55,\n",
|
|
||||||
" 55,\n",
|
|
||||||
" 55,\n",
|
|
||||||
" 10000.0,\n",
|
|
||||||
" 10000.0,\n",
|
|
||||||
" 10000.0,\n",
|
|
||||||
" 10000.0,\n",
|
|
||||||
" 10000.0,\n",
|
|
||||||
" 10000.0,\n",
|
|
||||||
" 10000.0,\n",
|
|
||||||
" 10000.0,\n",
|
|
||||||
" 10000.0]"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"metadata": {},
|
|
||||||
"execution_count": 8
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"find_values('C', 'linear')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 9,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"output_type": "execute_result",
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"[]"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"metadata": {},
|
|
||||||
"execution_count": 9
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"find_values('C', 'poly')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 10,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"output_type": "execute_result",
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"[]"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"metadata": {},
|
|
||||||
"execution_count": 10
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"find_values('C', 'rbf')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 11,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"output_type": "execute_result",
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"[0.6,\n",
|
|
||||||
" None,\n",
|
|
||||||
" 0.6,\n",
|
|
||||||
" None,\n",
|
|
||||||
" 0.6,\n",
|
|
||||||
" None,\n",
|
|
||||||
" 0.6,\n",
|
|
||||||
" None,\n",
|
|
||||||
" 0.6,\n",
|
|
||||||
" 'auto',\n",
|
|
||||||
" 0.6,\n",
|
|
||||||
" None,\n",
|
|
||||||
" 0.6,\n",
|
|
||||||
" None,\n",
|
|
||||||
" 0.6,\n",
|
|
||||||
" None,\n",
|
|
||||||
" 0.6,\n",
|
|
||||||
" 'auto',\n",
|
|
||||||
" 0.6,\n",
|
|
||||||
" None,\n",
|
|
||||||
" 0.6,\n",
|
|
||||||
" 'auto',\n",
|
|
||||||
" 0.6,\n",
|
|
||||||
" None,\n",
|
|
||||||
" 0.2,\n",
|
|
||||||
" None,\n",
|
|
||||||
" 0.6,\n",
|
|
||||||
" 'auto',\n",
|
|
||||||
" 0.6,\n",
|
|
||||||
" None,\n",
|
|
||||||
" 0.6,\n",
|
|
||||||
" None,\n",
|
|
||||||
" 0.6,\n",
|
|
||||||
" 'auto',\n",
|
|
||||||
" None,\n",
|
|
||||||
" 0.6,\n",
|
|
||||||
" None,\n",
|
|
||||||
" 0.6,\n",
|
|
||||||
" None,\n",
|
|
||||||
" 0.6,\n",
|
|
||||||
" 'auto',\n",
|
|
||||||
" 0.6,\n",
|
|
||||||
" None,\n",
|
|
||||||
" 0.6,\n",
|
|
||||||
" None,\n",
|
|
||||||
" 0.6,\n",
|
|
||||||
" None,\n",
|
|
||||||
" 0.6,\n",
|
|
||||||
" None,\n",
|
|
||||||
" 0.6,\n",
|
|
||||||
" 'auto',\n",
|
|
||||||
" 0.6,\n",
|
|
||||||
" None,\n",
|
|
||||||
" 0.6,\n",
|
|
||||||
" None,\n",
|
|
||||||
" 0.6,\n",
|
|
||||||
" 'auto',\n",
|
|
||||||
" 0.6,\n",
|
|
||||||
" None,\n",
|
|
||||||
" 0.6,\n",
|
|
||||||
" None,\n",
|
|
||||||
" 0.6,\n",
|
|
||||||
" 'auto',\n",
|
|
||||||
" 0.2,\n",
|
|
||||||
" None,\n",
|
|
||||||
" 0.6,\n",
|
|
||||||
" None,\n",
|
|
||||||
" 0.2,\n",
|
|
||||||
" 'auto',\n",
|
|
||||||
" 0.6,\n",
|
|
||||||
" None,\n",
|
|
||||||
" 0.6,\n",
|
|
||||||
" 'auto',\n",
|
|
||||||
" 0.6,\n",
|
|
||||||
" None,\n",
|
|
||||||
" 0.6,\n",
|
|
||||||
" None,\n",
|
|
||||||
" 0.2,\n",
|
|
||||||
" None,\n",
|
|
||||||
" 0.6,\n",
|
|
||||||
" None,\n",
|
|
||||||
" 0.6,\n",
|
|
||||||
" 'auto',\n",
|
|
||||||
" 0.6,\n",
|
|
||||||
" 'auto']"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"metadata": {},
|
|
||||||
"execution_count": 11
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"find_values('max_features', 'linear')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 2,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"dbh.close()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
156
param_analysis.py
Normal file
156
param_analysis.py
Normal file
@@ -0,0 +1,156 @@
|
|||||||
|
import json
|
||||||
|
import argparse
|
||||||
|
import collections
|
||||||
|
from typing import Tuple
|
||||||
|
from experimentation.Database import MySQL
|
||||||
|
from experimentation.Sets import Datasets
|
||||||
|
from experimentation.Utils import TextColor
|
||||||
|
|
||||||
|
kernel_names = ["linear", "rbf", "poly"]
|
||||||
|
|
||||||
|
|
||||||
|
class Aggregation:
|
||||||
|
def __init__(self, dbh):
|
||||||
|
self._dbh = dbh
|
||||||
|
self._report = {}
|
||||||
|
self._model_names = ["stree", "adaBoost", "bagging", "odte"]
|
||||||
|
self._kernel_names = kernel_names
|
||||||
|
|
||||||
|
def find_values(self, dataset, parameter):
|
||||||
|
result = []
|
||||||
|
for data in self._report[dataset]:
|
||||||
|
base_parameter = f"base_estimator__{parameter}"
|
||||||
|
if parameter in data.keys():
|
||||||
|
result.append(data[parameter])
|
||||||
|
if base_parameter in data.keys():
|
||||||
|
result.append(data[base_parameter])
|
||||||
|
try:
|
||||||
|
result_ordered = sorted(result)
|
||||||
|
return result_ordered
|
||||||
|
except TypeError:
|
||||||
|
return result
|
||||||
|
|
||||||
|
def load(self):
|
||||||
|
dt = Datasets(False, False, "tanveer")
|
||||||
|
print("Aggregating data of best results ...")
|
||||||
|
for dataset in dt:
|
||||||
|
if result := self._dbh.find_best(dataset[0]):
|
||||||
|
accuracy = result[5]
|
||||||
|
expected = result[10]
|
||||||
|
model = result[3]
|
||||||
|
json_result = json.loads(result[8])
|
||||||
|
if "kernel" in json_result.keys():
|
||||||
|
kernel = json_result["kernel"]
|
||||||
|
elif "base_estimator__kernel" in json_result.keys():
|
||||||
|
kernel = json_result["base_estimator__kernel"]
|
||||||
|
else:
|
||||||
|
kernel = "linear"
|
||||||
|
best = accuracy > expected
|
||||||
|
self._report[dataset[0]] = {
|
||||||
|
"model": model,
|
||||||
|
"kernel": kernel,
|
||||||
|
"parameters": json_result,
|
||||||
|
"best": best,
|
||||||
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def report_header(title, lengths, fields, parameter):
|
||||||
|
length = sum(lengths) + len(lengths) - 1
|
||||||
|
output = "\n" + "*" * length + "\n"
|
||||||
|
title = title + f" -- {parameter} parameter --"
|
||||||
|
num = (length - len(title) - 2) // 2
|
||||||
|
num2 = length - len(title) - 2 - 2 * num
|
||||||
|
output += "*" + " " * num + title + " " * (num + num2) + "*\n"
|
||||||
|
output += "*" * length + "\n\n"
|
||||||
|
lines = ""
|
||||||
|
for item, data in enumerate(fields):
|
||||||
|
output += f"{fields[item]:{lengths[item]}} "
|
||||||
|
lines += "=" * lengths[item] + " "
|
||||||
|
output += f"\n{lines}"
|
||||||
|
return output
|
||||||
|
|
||||||
|
def report(self, parameter):
|
||||||
|
agg = {}
|
||||||
|
agg_result = collections.OrderedDict()
|
||||||
|
title = "Best Hyperparameters found for datasets"
|
||||||
|
lengths = (32, 10, 7, 20)
|
||||||
|
fields = (
|
||||||
|
"Dataset",
|
||||||
|
"Classifier",
|
||||||
|
"Kernel",
|
||||||
|
"Parameter Value",
|
||||||
|
)
|
||||||
|
print(Aggregation.report_header(title, lengths, fields, parameter))
|
||||||
|
for i in self._kernel_names + self._model_names:
|
||||||
|
agg[i] = {}
|
||||||
|
agg[i]["total"] = 0
|
||||||
|
agg[i]["better"] = 0
|
||||||
|
agg[i]["worse"] = 0
|
||||||
|
for dataset, data in self._report.items():
|
||||||
|
kernel = data["kernel"]
|
||||||
|
model = data["model"]
|
||||||
|
if data["best"]:
|
||||||
|
key = "better"
|
||||||
|
sign = "+"
|
||||||
|
else:
|
||||||
|
key = "worse"
|
||||||
|
sign = "-"
|
||||||
|
base_parameter = f"base_estimator__{parameter}"
|
||||||
|
result = ""
|
||||||
|
if parameter in data["parameters"]:
|
||||||
|
result = data["parameters"][parameter]
|
||||||
|
try:
|
||||||
|
agg_result[result] += 1
|
||||||
|
except KeyError:
|
||||||
|
agg_result[result] = 1
|
||||||
|
elif base_parameter in data["parameters"]:
|
||||||
|
result = data["parameters"][base_parameter]
|
||||||
|
try:
|
||||||
|
agg_result[result] += 1
|
||||||
|
except KeyError:
|
||||||
|
agg_result[result] = 1
|
||||||
|
print(f"{sign} {dataset:30s} {model:10s} {kernel:7s} {result}")
|
||||||
|
agg[kernel]["total"] += 1
|
||||||
|
agg[kernel][key] += 1
|
||||||
|
agg[model]["total"] += 1
|
||||||
|
agg[model][key] += 1
|
||||||
|
print(TextColor.BOLD, "Models", TextColor.ENDC)
|
||||||
|
for i in self._model_names:
|
||||||
|
print(
|
||||||
|
f"{i:10} has {agg[i]['total']:2} results {agg[i]['better']:2} "
|
||||||
|
f"better {agg[i]['worse']:2} worse"
|
||||||
|
)
|
||||||
|
print(TextColor.BOLD, "Kernels", TextColor.ENDC)
|
||||||
|
for i in self._kernel_names:
|
||||||
|
print(
|
||||||
|
f"{i:10} has {agg[i]['total']:2} results {agg[i]['better']:2} "
|
||||||
|
f"better {agg[i]['worse']:2} worse"
|
||||||
|
)
|
||||||
|
print(TextColor.BOLD, f"{parameter} Values:", TextColor.ENDC)
|
||||||
|
try:
|
||||||
|
max_len = f"{len(max(agg_result.keys(), key=len))}s"
|
||||||
|
except TypeError:
|
||||||
|
max_len = "10.2f"
|
||||||
|
for key in sorted(agg_result):
|
||||||
|
print(f"{key:{max_len}} -> {agg_result[key]:2d} times")
|
||||||
|
|
||||||
|
|
||||||
|
def parse_arguments() -> Tuple[str, str, str, bool, bool]:
|
||||||
|
ap = argparse.ArgumentParser()
|
||||||
|
ap.add_argument(
|
||||||
|
"-p",
|
||||||
|
"--param",
|
||||||
|
type=str,
|
||||||
|
default="C",
|
||||||
|
)
|
||||||
|
args = ap.parse_args()
|
||||||
|
return (args.param,)
|
||||||
|
|
||||||
|
|
||||||
|
(param,) = parse_arguments()
|
||||||
|
dbh = MySQL()
|
||||||
|
dbh.get_connection()
|
||||||
|
agg = Aggregation(dbh)
|
||||||
|
agg.load()
|
||||||
|
agg.report(param)
|
||||||
|
dbh.close()
|
@@ -32,26 +32,6 @@ def parse_arguments() -> Tuple[str, str, str, bool, bool]:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def find_best(dataset):
|
|
||||||
cursor = database.cursor(buffered=True)
|
|
||||||
if classifier == "any":
|
|
||||||
command = (
|
|
||||||
f"select * from results r inner join reference e on "
|
|
||||||
f"r.dataset=e.dataset where r.dataset='{dataset}' "
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
command = (
|
|
||||||
f"select * from results r inner join reference e on "
|
|
||||||
f"r.dataset=e.dataset where r.dataset='{dataset}' and classifier"
|
|
||||||
f"='{classifier}'"
|
|
||||||
)
|
|
||||||
command += (
|
|
||||||
" order by r.dataset, accuracy desc, classifier desc, type, date, time"
|
|
||||||
)
|
|
||||||
cursor.execute(command)
|
|
||||||
return cursor.fetchone()
|
|
||||||
|
|
||||||
|
|
||||||
def report_header_content(title):
|
def report_header_content(title):
|
||||||
length = sum(lengths) + len(lengths) - 1
|
length = sum(lengths) + len(lengths) - 1
|
||||||
output = "\n" + "*" * length + "\n"
|
output = "\n" + "*" * length + "\n"
|
||||||
@@ -144,7 +124,7 @@ for item in [
|
|||||||
] + models:
|
] + models:
|
||||||
agg[item] = 0
|
agg[item] = 0
|
||||||
for dataset in dt:
|
for dataset in dt:
|
||||||
record = find_best(dataset[0])
|
record = dbh.find_best(dataset[0], classifier)
|
||||||
if record is None:
|
if record is None:
|
||||||
print(TextColor.FAIL + f"*No results found for {dataset[0]}")
|
print(TextColor.FAIL + f"*No results found for {dataset[0]}")
|
||||||
else:
|
else:
|
||||||
|
Reference in New Issue
Block a user