Merge branch 'main' of github.com:doctorado-ml/benchmark into main

This commit is contained in:
2022-03-11 11:51:50 +01:00
5 changed files with 82 additions and 27 deletions

View File

@@ -1,5 +1,11 @@
[ [
{ {
"n_jobs": [
-1
],
"n_estimators": [
100
],
"base_estimator__C": [ "base_estimator__C": [
1.0 1.0
], ],
@@ -11,6 +17,12 @@
] ]
}, },
{ {
"n_jobs": [
-1
],
"n_estimators": [
100
],
"base_estimator__C": [ "base_estimator__C": [
0.001, 0.001,
0.0275, 0.0275,
@@ -32,6 +44,12 @@
] ]
}, },
{ {
"n_jobs": [
-1
],
"n_estimators": [
100
],
"base_estimator__C": [ "base_estimator__C": [
0.05, 0.05,
1.0, 1.0,
@@ -60,6 +78,12 @@
] ]
}, },
{ {
"n_jobs": [
-1
],
"n_estimators": [
100
],
"base_estimator__C": [ "base_estimator__C": [
0.05, 0.05,
0.2, 0.2,

View File

@@ -279,6 +279,7 @@ class Experiment:
output["results"] = self.results output["results"] = self.results
with open(self.output_file, "w") as f: with open(self.output_file, "w") as f:
json.dump(output, f) json.dump(output, f)
f.flush()
def do_experiment(self): def do_experiment(self):
now = time.time() now = time.time()
@@ -287,6 +288,7 @@ class Experiment:
position=0, position=0,
disable=not self.progress_bar, disable=not self.progress_bar,
) )
self.duration = 0.0
for name in loop: for name in loop:
loop.set_description(f"{name:30s}") loop.set_description(f"{name:30s}")
X, y = self.datasets.load(name) X, y = self.datasets.load(name)
@@ -296,6 +298,7 @@ class Experiment:
self._init_experiment() self._init_experiment()
self._n_fold_crossval(X, y, hyperparameters) self._n_fold_crossval(X, y, hyperparameters)
self._add_results(name, hyperparameters, samp, feat, n_classes) self._add_results(name, hyperparameters, samp, feat, n_classes)
self._output_results()
self.duration = time.time() - now self.duration = time.time() - now
self._output_results() self._output_results()
if self.progress_bar: if self.progress_bar:

View File

@@ -183,18 +183,23 @@ class Report(BaseReport):
class ReportBest(BaseReport): class ReportBest(BaseReport):
header_lengths = [30, 8, 50, 35] header_lengths = [30, 8, 76, 45]
header_cols = [ header_cols = [
"Dataset", "Dataset",
"Score", "Score",
"File", "File/Message",
"Hyperparameters", "Hyperparameters",
] ]
def __init__(self, score, model): def __init__(self, score, model, best, grid):
file_name = os.path.join( name = (
Folders.results, Files.best_results(score, model) Files.best_results(score, model)
if best
else Files.grid_output(score, model)
) )
self.best = best
self.grid = grid
file_name = os.path.join(Folders.results, name)
super().__init__(file_name, best_file=True) super().__init__(file_name, best_file=True)
self.compare = False self.compare = False
self.score_name = score self.score_name = score
@@ -222,9 +227,10 @@ class ReportBest(BaseReport):
def header(self): def header(self):
self.header_line("*") self.header_line("*")
kind = "Best" if self.best else "Grid"
self.header_line( self.header_line(
f" Report Best {self.score_name} Scores with {self.model} in any " f" Report {kind} {self.score_name} Scores with {self.model} "
"platform" "in any platform"
) )
self.header_line("*") self.header_line("*")
print("") print("")

View File

@@ -14,7 +14,8 @@ data = [
'{"kernel": "rbf"}', '{"kernel": "rbf"}',
'{"C": 1.05, "gamma": "auto","kernel": "rbf"}', '{"C": 1.05, "gamma": "auto","kernel": "rbf"}',
'{"splitter": "random", "max_features": "auto"}', '{"splitter": "random", "max_features": "auto"}',
'{"C": 0.05, "max_features": "auto", "kernel": "liblinear", "multiclass_strategy": "ovr"}', '{"C": 0.05, "max_features": "auto", "kernel": "liblinear", '
'"multiclass_strategy": "ovr"}',
'{"kernel": "rbf", "C": 0.05}', '{"kernel": "rbf", "C": 0.05}',
'{"C": 0.05, "kernel": "liblinear", "multiclass_strategy": "ovr"}', '{"C": 0.05, "kernel": "liblinear", "multiclass_strategy": "ovr"}',
'{"C": 7, "gamma": 0.1, "kernel": "rbf"}', '{"C": 7, "gamma": 0.1, "kernel": "rbf"}',
@@ -27,13 +28,15 @@ data = [
'{"C": 2.8, "kernel": "rbf", "gamma": "auto"}', '{"C": 2.8, "kernel": "rbf", "gamma": "auto"}',
'{"kernel": "rbf"}', '{"kernel": "rbf"}',
'{"C": 0.05, "gamma": 0.1, "kernel": "poly"}', '{"C": 0.05, "gamma": 0.1, "kernel": "poly"}',
'{"C": 8.25, "gamma": 0.1, "kernel": "poly", "multiclass_strategy": "ovr"}', '{"C": 8.25, "gamma": 0.1, "kernel": "poly", "multiclass_strategy": '
'"ovr"}',
'{"kernel": "liblinear", "multiclass_strategy": "ovr"}', '{"kernel": "liblinear", "multiclass_strategy": "ovr"}',
'{"C": 1.75, "kernel": "liblinear", "multiclass_strategy": "ovr"}', '{"C": 1.75, "kernel": "liblinear", "multiclass_strategy": "ovr"}',
'{"C":57, "kernel": "rbf"}', '{"C":57, "kernel": "rbf"}',
'{"C": 7, "gamma": 0.1, "kernel": "rbf", "multiclass_strategy": "ovr"}', '{"C": 7, "gamma": 0.1, "kernel": "rbf", "multiclass_strategy": "ovr"}',
'{"C": 5, "kernel": "rbf", "gamma": "auto"}', '{"C": 5, "kernel": "rbf", "gamma": "auto"}',
'{"C": 0.05, "max_iter": 10000.0, "kernel": "liblinear", "multiclass_strategy": "ovr"}', '{"C": 0.05, "max_iter": 10000.0, "kernel": "liblinear", '
'"multiclass_strategy": "ovr"}',
'{"C":0.0275, "kernel": "liblinear", "multiclass_strategy": "ovr"}', '{"C":0.0275, "kernel": "liblinear", "multiclass_strategy": "ovr"}',
'{"C": 7, "gamma": 10.0, "kernel": "rbf", "multiclass_strategy": "ovr"}', '{"C": 7, "gamma": 10.0, "kernel": "rbf", "multiclass_strategy": "ovr"}',
'{"kernel": "rbf", "gamma": 0.001}', '{"kernel": "rbf", "gamma": 0.001}',
@@ -59,16 +62,14 @@ for kernel in kernels:
for item in hyper: for item in hyper:
results[kernel][item] = [] results[kernel][item] = []
# load data # load data
for item in data: for sample in data:
line = json.loads(item) line = json.loads(sample)
if "kernel" not in line: if "kernel" not in line:
line["kernel"] = "linear" line["kernel"] = "linear"
kernel = line["kernel"] kernel = line["kernel"]
for item in hyper: for item in hyper:
if item in line: if item in line and line[item] not in results[kernel][item]:
results[kernel][item].append(line[item]) if line[ results[kernel][item].append(line[item])
item
] not in results[kernel][item] else None
# Add default values and remove inconsistent values # Add default values and remove inconsistent values
results["linear"]["multiclass_strategy"] = ["ovo"] results["linear"]["multiclass_strategy"] = ["ovo"]
@@ -81,12 +82,12 @@ for kernel in kernels:
results[kernel]["C"].append(1.0) results[kernel]["C"].append(1.0)
for item in results: for item in results:
results_tmp = {} results_tmp = {"n_jobs": [-1], "n_estimators": [100]}
for key, value in results[item].items(): for key, value in results[item].items():
new_key = f"base_estimator__{key}" new_key = f"base_estimator__{key}"
try: try:
results_tmp[new_key] = sorted(value) results_tmp[new_key] = sorted(value)
except: except TypeError:
t1 = sorted( t1 = sorted(
[ [
x x

View File

@@ -3,7 +3,8 @@ import argparse
import numpy as np import numpy as np
from Experiments import Datasets from Experiments import Datasets
from Results import Report, Excel, SQL, ReportBest from Results import Report, Excel, SQL, ReportBest
from Utils import Files, TextColor from Utils import Files, TextColor, EnvDefault
"""Build report on screen of a result file, optionally generate excel and sql """Build report on screen of a result file, optionally generate excel and sql
file, and can compare results of report with best results obtained by model file, and can compare results of report with best results obtained by model
@@ -49,12 +50,29 @@ def parse_arguments():
help="best results of models", help="best results of models",
) )
ap.add_argument( ap.add_argument(
"-s", "-g",
"--score", "--grid",
type=str, type=str,
required=False, required=False,
default="accuracy", help="grid results of model",
help="score used in best results model", )
ap.add_argument(
"-m",
"--model",
action=EnvDefault,
envvar="model",
type=str,
required=True,
help="model name",
)
ap.add_argument(
"-s",
"--score",
action=EnvDefault,
envvar="score",
type=str,
required=True,
help="score name {accuracy, f1_macro, ...}",
) )
args = ap.parse_args() args = ap.parse_args()
@@ -64,7 +82,9 @@ def parse_arguments():
args.sql, args.sql,
args.compare, args.compare,
args.best, args.best,
args.grid,
args.score, args.score,
args.model,
) )
@@ -88,13 +108,14 @@ def default_report():
) )
(file, excel, sql, compare, best, score) = parse_arguments() (file, excel, sql, compare, best, grid, score, model) = parse_arguments()
if grid:
best = False
if file is None and best is None: if file is None and best is None:
default_report() default_report()
else: else:
if best is not None: if best is not None or grid is not None:
report = ReportBest(score, best) report = ReportBest(score, model, best, grid)
report.report() report.report()
else: else:
report = Report(file, compare) report = Report(file, compare)