Merge branch 'main' of github.com:doctorado-ml/benchmark into main

This commit is contained in:
2022-03-11 11:51:50 +01:00
5 changed files with 82 additions and 27 deletions

View File

@@ -1,5 +1,11 @@
[
{
"n_jobs": [
-1
],
"n_estimators": [
100
],
"base_estimator__C": [
1.0
],
@@ -11,6 +17,12 @@
]
},
{
"n_jobs": [
-1
],
"n_estimators": [
100
],
"base_estimator__C": [
0.001,
0.0275,
@@ -32,6 +44,12 @@
]
},
{
"n_jobs": [
-1
],
"n_estimators": [
100
],
"base_estimator__C": [
0.05,
1.0,
@@ -60,6 +78,12 @@
]
},
{
"n_jobs": [
-1
],
"n_estimators": [
100
],
"base_estimator__C": [
0.05,
0.2,

View File

@@ -279,6 +279,7 @@ class Experiment:
output["results"] = self.results
with open(self.output_file, "w") as f:
json.dump(output, f)
f.flush()
def do_experiment(self):
now = time.time()
@@ -287,6 +288,7 @@ class Experiment:
position=0,
disable=not self.progress_bar,
)
self.duration = 0.0
for name in loop:
loop.set_description(f"{name:30s}")
X, y = self.datasets.load(name)
@@ -296,6 +298,7 @@ class Experiment:
self._init_experiment()
self._n_fold_crossval(X, y, hyperparameters)
self._add_results(name, hyperparameters, samp, feat, n_classes)
self._output_results()
self.duration = time.time() - now
self._output_results()
if self.progress_bar:

View File

@@ -183,18 +183,23 @@ class Report(BaseReport):
class ReportBest(BaseReport):
header_lengths = [30, 8, 50, 35]
header_lengths = [30, 8, 76, 45]
header_cols = [
"Dataset",
"Score",
"File",
"File/Message",
"Hyperparameters",
]
def __init__(self, score, model):
file_name = os.path.join(
Folders.results, Files.best_results(score, model)
def __init__(self, score, model, best, grid):
name = (
Files.best_results(score, model)
if best
else Files.grid_output(score, model)
)
self.best = best
self.grid = grid
file_name = os.path.join(Folders.results, name)
super().__init__(file_name, best_file=True)
self.compare = False
self.score_name = score
@@ -222,9 +227,10 @@ class ReportBest(BaseReport):
def header(self):
self.header_line("*")
kind = "Best" if self.best else "Grid"
self.header_line(
f" Report Best {self.score_name} Scores with {self.model} in any "
"platform"
f" Report {kind} {self.score_name} Scores with {self.model} "
"in any platform"
)
self.header_line("*")
print("")

View File

@@ -14,7 +14,8 @@ data = [
'{"kernel": "rbf"}',
'{"C": 1.05, "gamma": "auto","kernel": "rbf"}',
'{"splitter": "random", "max_features": "auto"}',
'{"C": 0.05, "max_features": "auto", "kernel": "liblinear", "multiclass_strategy": "ovr"}',
'{"C": 0.05, "max_features": "auto", "kernel": "liblinear", '
'"multiclass_strategy": "ovr"}',
'{"kernel": "rbf", "C": 0.05}',
'{"C": 0.05, "kernel": "liblinear", "multiclass_strategy": "ovr"}',
'{"C": 7, "gamma": 0.1, "kernel": "rbf"}',
@@ -27,13 +28,15 @@ data = [
'{"C": 2.8, "kernel": "rbf", "gamma": "auto"}',
'{"kernel": "rbf"}',
'{"C": 0.05, "gamma": 0.1, "kernel": "poly"}',
'{"C": 8.25, "gamma": 0.1, "kernel": "poly", "multiclass_strategy": "ovr"}',
'{"C": 8.25, "gamma": 0.1, "kernel": "poly", "multiclass_strategy": '
'"ovr"}',
'{"kernel": "liblinear", "multiclass_strategy": "ovr"}',
'{"C": 1.75, "kernel": "liblinear", "multiclass_strategy": "ovr"}',
'{"C":57, "kernel": "rbf"}',
'{"C": 7, "gamma": 0.1, "kernel": "rbf", "multiclass_strategy": "ovr"}',
'{"C": 5, "kernel": "rbf", "gamma": "auto"}',
'{"C": 0.05, "max_iter": 10000.0, "kernel": "liblinear", "multiclass_strategy": "ovr"}',
'{"C": 0.05, "max_iter": 10000.0, "kernel": "liblinear", '
'"multiclass_strategy": "ovr"}',
'{"C":0.0275, "kernel": "liblinear", "multiclass_strategy": "ovr"}',
'{"C": 7, "gamma": 10.0, "kernel": "rbf", "multiclass_strategy": "ovr"}',
'{"kernel": "rbf", "gamma": 0.001}',
@@ -59,16 +62,14 @@ for kernel in kernels:
for item in hyper:
results[kernel][item] = []
# load data
for item in data:
line = json.loads(item)
for sample in data:
line = json.loads(sample)
if "kernel" not in line:
line["kernel"] = "linear"
kernel = line["kernel"]
for item in hyper:
if item in line:
results[kernel][item].append(line[item]) if line[
item
] not in results[kernel][item] else None
if item in line and line[item] not in results[kernel][item]:
results[kernel][item].append(line[item])
# Add default values and remove inconsistent values
results["linear"]["multiclass_strategy"] = ["ovo"]
@@ -81,12 +82,12 @@ for kernel in kernels:
results[kernel]["C"].append(1.0)
for item in results:
results_tmp = {}
results_tmp = {"n_jobs": [-1], "n_estimators": [100]}
for key, value in results[item].items():
new_key = f"base_estimator__{key}"
try:
results_tmp[new_key] = sorted(value)
except:
except TypeError:
t1 = sorted(
[
x

View File

@@ -3,7 +3,8 @@ import argparse
import numpy as np
from Experiments import Datasets
from Results import Report, Excel, SQL, ReportBest
from Utils import Files, TextColor
from Utils import Files, TextColor, EnvDefault
"""Build report on screen of a result file, optionally generate excel and sql
file, and can compare results of report with best results obtained by model
@@ -49,12 +50,29 @@ def parse_arguments():
help="best results of models",
)
ap.add_argument(
"-s",
"--score",
"-g",
"--grid",
type=str,
required=False,
default="accuracy",
help="score used in best results model",
help="grid results of model",
)
ap.add_argument(
"-m",
"--model",
action=EnvDefault,
envvar="model",
type=str,
required=True,
help="model name",
)
ap.add_argument(
"-s",
"--score",
action=EnvDefault,
envvar="score",
type=str,
required=True,
help="score name {accuracy, f1_macro, ...}",
)
args = ap.parse_args()
@@ -64,7 +82,9 @@ def parse_arguments():
args.sql,
args.compare,
args.best,
args.grid,
args.score,
args.model,
)
@@ -88,13 +108,14 @@ def default_report():
)
(file, excel, sql, compare, best, score) = parse_arguments()
(file, excel, sql, compare, best, grid, score, model) = parse_arguments()
if grid:
best = False
if file is None and best is None:
default_report()
else:
if best is not None:
report = ReportBest(score, best)
if best is not None or grid is not None:
report = ReportBest(score, model, best, grid)
report.report()
else:
report = Report(file, compare)