mirror of
https://github.com/Doctorado-ML/benchmark.git
synced 2025-08-15 07:25:53 +00:00
Change scripts model of setup
This commit is contained in:
13
.coveragerc
13
.coveragerc
@@ -1,15 +1,10 @@
|
||||
[run]
|
||||
branch = True
|
||||
source = benchmark
|
||||
|
||||
[report]
|
||||
exclude_lines =
|
||||
if self.debug:
|
||||
pragma: no cover
|
||||
raise NotImplementedError
|
||||
if __name__ == .__main__.:
|
||||
ignore_errors = True
|
||||
omit =
|
||||
omit =
|
||||
benchmark/__init__.py
|
||||
benchmark/scripts/__init__.py
|
||||
benchmark/_version.py
|
||||
benchmark/tests/*
|
||||
benchmark/tests/*
|
||||
|
@@ -11,3 +11,274 @@ ALL_METRICS = (
|
||||
)
|
||||
|
||||
|
||||
class EnvData:
|
||||
@staticmethod
|
||||
def load():
|
||||
args = {}
|
||||
with open(Files.dot_env) as f:
|
||||
for line in f.read().splitlines():
|
||||
if line == "" or line.startswith("#"):
|
||||
continue
|
||||
key, value = line.split("=")
|
||||
args[key] = value
|
||||
return args
|
||||
|
||||
|
||||
class EnvDefault(argparse.Action):
|
||||
# Thanks to https://stackoverflow.com/users/445507/russell-heilling
|
||||
def __init__(self, envvar, required=True, default=None, **kwargs):
|
||||
self._args = EnvData.load()
|
||||
default = self._args[envvar]
|
||||
required = False
|
||||
super(EnvDefault, self).__init__(
|
||||
default=default, required=required, **kwargs
|
||||
)
|
||||
|
||||
def __call__(self, parser, namespace, values, option_string=None):
|
||||
setattr(namespace, self.dest, values)
|
||||
|
||||
|
||||
class Arguments:
|
||||
def __init__(self):
|
||||
self.ap = argparse.ArgumentParser()
|
||||
models_data = Models.define_models(random_state=0)
|
||||
self.parameters = {
|
||||
"best": [
|
||||
("-b", "--best"),
|
||||
{
|
||||
"type": str,
|
||||
"required": False,
|
||||
"help": "best results of models",
|
||||
},
|
||||
],
|
||||
"color": [
|
||||
("-c", "--color"),
|
||||
{
|
||||
"type": bool,
|
||||
"required": False,
|
||||
"default": False,
|
||||
"help": "use colors for the tree",
|
||||
},
|
||||
],
|
||||
"compare": [
|
||||
("-c", "--compare"),
|
||||
{
|
||||
"type": bool,
|
||||
"required": False,
|
||||
"help": "Compare accuracy with best results",
|
||||
},
|
||||
],
|
||||
"dataset": [
|
||||
("-d", "--dataset"),
|
||||
{
|
||||
"type": str,
|
||||
"required": False,
|
||||
"help": "dataset to work with",
|
||||
},
|
||||
],
|
||||
"excel": [
|
||||
("-x", "--excel"),
|
||||
{
|
||||
"type": bool,
|
||||
"required": False,
|
||||
"default": False,
|
||||
"help": "Generate Excel File",
|
||||
},
|
||||
],
|
||||
"file": [
|
||||
("-f", "--file"),
|
||||
{"type": str, "required": False, "help": "Result file"},
|
||||
],
|
||||
"grid": [
|
||||
("-g", "--grid"),
|
||||
{
|
||||
"type": str,
|
||||
"required": False,
|
||||
"help": "grid results of model",
|
||||
},
|
||||
],
|
||||
"grid_paramfile": [
|
||||
("-g", "--grid_paramfile"),
|
||||
{
|
||||
"type": bool,
|
||||
"required": False,
|
||||
"default": False,
|
||||
"help": "Use best hyperparams file?",
|
||||
},
|
||||
],
|
||||
"hidden": [
|
||||
("--hidden",),
|
||||
{
|
||||
"type": str,
|
||||
"required": False,
|
||||
"default": False,
|
||||
"help": "Show hidden results",
|
||||
},
|
||||
],
|
||||
"hyperparameters": [
|
||||
("-p", "--hyperparameters"),
|
||||
{"type": str, "required": False, "default": "{}"},
|
||||
],
|
||||
"key": [
|
||||
("-k", "--key"),
|
||||
{
|
||||
"type": str,
|
||||
"required": False,
|
||||
"default": "date",
|
||||
"help": "key to sort results",
|
||||
},
|
||||
],
|
||||
"lose": [
|
||||
("-l", "--lose"),
|
||||
{
|
||||
"type": bool,
|
||||
"default": False,
|
||||
"required": False,
|
||||
"help": "show lose results",
|
||||
},
|
||||
],
|
||||
"model": [
|
||||
("-m", "--model"),
|
||||
{
|
||||
"type": str,
|
||||
"required": True,
|
||||
"choices": list(models_data),
|
||||
"action": EnvDefault,
|
||||
"envvar": "model",
|
||||
"help": "model name",
|
||||
},
|
||||
],
|
||||
"model1": [
|
||||
("-m1", "--model1"),
|
||||
{
|
||||
"type": str,
|
||||
"required": True,
|
||||
"choices": list(models_data),
|
||||
"action": EnvDefault,
|
||||
"envvar": "model",
|
||||
"help": "model name",
|
||||
},
|
||||
],
|
||||
"model2": [
|
||||
("-m2", "--model2"),
|
||||
{
|
||||
"type": str,
|
||||
"required": True,
|
||||
"choices": list(models_data),
|
||||
"action": EnvDefault,
|
||||
"envvar": "model",
|
||||
"help": "model name",
|
||||
},
|
||||
],
|
||||
"nan": [
|
||||
("--nan",),
|
||||
{
|
||||
"type": bool,
|
||||
"required": False,
|
||||
"help": "Move nan results to hidden folder",
|
||||
},
|
||||
],
|
||||
"number": [
|
||||
("-n", "--number"),
|
||||
{
|
||||
"type": int,
|
||||
"required": False,
|
||||
"default": 0,
|
||||
"help": "number of results to show, 0 to any",
|
||||
},
|
||||
],
|
||||
"n_folds": [
|
||||
("-n", "--n_folds"),
|
||||
{
|
||||
"action": EnvDefault,
|
||||
"envvar": "n_folds",
|
||||
"type": int,
|
||||
"required": True,
|
||||
"help": "number of folds",
|
||||
},
|
||||
],
|
||||
"paramfile": [
|
||||
("-f", "--paramfile"),
|
||||
{
|
||||
"type": bool,
|
||||
"required": False,
|
||||
"default": False,
|
||||
"help": "Use best hyperparams file?",
|
||||
},
|
||||
],
|
||||
"platform": [],
|
||||
"quiet": [
|
||||
("-q", "--quiet"),
|
||||
{
|
||||
"type": bool,
|
||||
"required": False,
|
||||
"default": False,
|
||||
},
|
||||
],
|
||||
"report": [
|
||||
("-r", "--report"),
|
||||
{
|
||||
"type": bool,
|
||||
"default": False,
|
||||
"required": False,
|
||||
"help": "Report results",
|
||||
},
|
||||
],
|
||||
"score": [
|
||||
("-s", "--score"),
|
||||
{
|
||||
"action": EnvDefault,
|
||||
"envvar": "score",
|
||||
"type": str,
|
||||
"required": True,
|
||||
"choices": ALL_METRICS,
|
||||
},
|
||||
],
|
||||
"sql": [
|
||||
("-q", "--sql"),
|
||||
{"type": bool, "required": False, "help": "Generate SQL File"},
|
||||
],
|
||||
"stratified": [
|
||||
("-t", "--stratified"),
|
||||
{
|
||||
"action": EnvDefault,
|
||||
"envvar": "stratified",
|
||||
"type": str,
|
||||
"required": True,
|
||||
"help": "Stratified",
|
||||
},
|
||||
],
|
||||
"tex_output": [
|
||||
("-t", "--tex-output"),
|
||||
{
|
||||
"type": bool,
|
||||
"required": False,
|
||||
"default": False,
|
||||
"help": "Generate Tex file with the table",
|
||||
},
|
||||
],
|
||||
"title": [
|
||||
("--title",),
|
||||
{"type": str, "required": True, "help": "experiment title"},
|
||||
],
|
||||
"win": [
|
||||
("-w", "--win"),
|
||||
{
|
||||
"type": bool,
|
||||
"default": False,
|
||||
"required": False,
|
||||
"help": "show win results",
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
def xset(self, *arg_name, **kwargs):
|
||||
names, default = self.parameters[arg_name[0]]
|
||||
self.ap.add_argument(
|
||||
*names,
|
||||
**{**default, **kwargs},
|
||||
)
|
||||
return self
|
||||
|
||||
def parse(self):
|
||||
return self.ap.parse_args()
|
||||
|
@@ -14,7 +14,8 @@ from sklearn.model_selection import (
|
||||
cross_validate,
|
||||
)
|
||||
from .Utils import Folders, Files
|
||||
from .Models import Models, EnvData
|
||||
from .Models import Models
|
||||
from .Arguments import EnvData
|
||||
|
||||
|
||||
class Randomized:
|
||||
|
@@ -1,4 +1,3 @@
|
||||
import argparse
|
||||
from statistics import mean
|
||||
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
|
||||
from sklearn.ensemble import (
|
||||
@@ -12,15 +11,6 @@ from stree import Stree
|
||||
from wodt import Wodt
|
||||
from odte import Odte
|
||||
from xgboost import XGBClassifier
|
||||
from .Utils import Files
|
||||
|
||||
ALL_METRICS = (
|
||||
"accuracy",
|
||||
"f1-macro",
|
||||
"f1-micro",
|
||||
"f1-weighted",
|
||||
"roc-auc-ovr",
|
||||
)
|
||||
|
||||
|
||||
class Models:
|
||||
@@ -99,139 +89,3 @@ class Models:
|
||||
nodes, leaves = result.nodes_leaves()
|
||||
depth = result.depth_ if hasattr(result, "depth_") else 0
|
||||
return nodes, leaves, depth
|
||||
|
||||
|
||||
class EnvData:
|
||||
@staticmethod
|
||||
def load():
|
||||
args = {}
|
||||
with open(Files.dot_env) as f:
|
||||
for line in f.read().splitlines():
|
||||
if line == "" or line.startswith("#"):
|
||||
continue
|
||||
key, value = line.split("=")
|
||||
args[key] = value
|
||||
return args
|
||||
|
||||
|
||||
class EnvDefault(argparse.Action):
|
||||
# Thanks to https://stackoverflow.com/users/445507/russell-heilling
|
||||
def __init__(self, envvar, required=True, default=None, **kwargs):
|
||||
self._args = EnvData.load()
|
||||
default = self._args[envvar]
|
||||
required = False
|
||||
super(EnvDefault, self).__init__(
|
||||
default=default, required=required, **kwargs
|
||||
)
|
||||
|
||||
def __call__(self, parser, namespace, values, option_string=None):
|
||||
setattr(namespace, self.dest, values)
|
||||
|
||||
|
||||
class Arguments:
|
||||
def __init__(self):
|
||||
self.ap = argparse.ArgumentParser()
|
||||
models_data = Models.define_models(random_state=0)
|
||||
self.parameters = {
|
||||
"best": [
|
||||
("-b", "--best"),
|
||||
{
|
||||
"type": str,
|
||||
"required": False,
|
||||
"help": "best results of models",
|
||||
},
|
||||
],
|
||||
"color": [],
|
||||
"compare": [
|
||||
("-c", "--compare"),
|
||||
{
|
||||
"type": bool,
|
||||
"required": False,
|
||||
"help": "Compare accuracy with best results",
|
||||
},
|
||||
],
|
||||
"dataset": [],
|
||||
"excel": [
|
||||
("-x", "--excel"),
|
||||
{
|
||||
"type": bool,
|
||||
"required": False,
|
||||
"default": False,
|
||||
"help": "Generate Excel File",
|
||||
},
|
||||
],
|
||||
"file": [
|
||||
("-f", "--file"),
|
||||
{"type": str, "required": False, "help": "Result file"},
|
||||
],
|
||||
"grid": [
|
||||
("-g", "--grid"),
|
||||
{
|
||||
"type": str,
|
||||
"required": False,
|
||||
"help": "grid results of model",
|
||||
},
|
||||
],
|
||||
"grid_paramfile": [],
|
||||
"hidden": [],
|
||||
"hyperparameters": [],
|
||||
"key": [],
|
||||
"lose": [],
|
||||
"model": [
|
||||
("-m", "--model"),
|
||||
{
|
||||
"type": str,
|
||||
"required": True,
|
||||
"choices": list(models_data),
|
||||
"action": EnvDefault,
|
||||
"envvar": "model",
|
||||
"help": f"model name",
|
||||
},
|
||||
],
|
||||
"model1": [],
|
||||
"model2": [],
|
||||
"nan": [],
|
||||
"number": [],
|
||||
"n_folds": [],
|
||||
"paramfile": [],
|
||||
"platform": [],
|
||||
"quiet": [],
|
||||
"report": [],
|
||||
"score": [
|
||||
("-s", "--score"),
|
||||
{
|
||||
"action": EnvDefault,
|
||||
"envvar": "score",
|
||||
"type": str,
|
||||
"required": True,
|
||||
"choices": ALL_METRICS,
|
||||
},
|
||||
],
|
||||
"sql": [
|
||||
("-q", "--sql"),
|
||||
{"type": bool, "required": False, "help": "Generate SQL File"},
|
||||
],
|
||||
"stratified": [],
|
||||
"tex_output": [
|
||||
("-t", "--tex-output"),
|
||||
{
|
||||
"type": bool,
|
||||
"required": False,
|
||||
"default": False,
|
||||
"help": "Generate Tex file with the table",
|
||||
},
|
||||
],
|
||||
"title": [],
|
||||
"win": [],
|
||||
}
|
||||
|
||||
def xset(self, *arg_name, **kwargs):
|
||||
names, default = self.parameters[arg_name[0]]
|
||||
self.ap.add_argument(
|
||||
*names,
|
||||
**{**default, **kwargs},
|
||||
)
|
||||
return self
|
||||
|
||||
def parse(self):
|
||||
return self.ap.parse_args()
|
||||
|
@@ -1,6 +1,6 @@
|
||||
from .Experiments import Experiment, Datasets, DatasetsSurcov, DatasetsTanveer
|
||||
from .Results import Report, Summary
|
||||
from .Models import EnvDefault
|
||||
from .Arguments import EnvDefault
|
||||
|
||||
__author__ = "Ricardo Montañana Gómez"
|
||||
__copyright__ = "Copyright 2020-2022, Ricardo Montañana Gómez"
|
||||
|
0
benchmark/scripts/__init__.py
Normal file
0
benchmark/scripts/__init__.py
Normal file
@@ -1,19 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
from benchmark.Results import Benchmark
|
||||
from benchmark.Utils import Files
|
||||
from benchmark.Models import Arguments
|
||||
|
||||
|
||||
arguments = Arguments()
|
||||
arguments.xset("score").xset("excel").xset("tex_output")
|
||||
ar = arguments.parse()
|
||||
benchmark = Benchmark(score=ar.score, visualize=True)
|
||||
benchmark.compile_results()
|
||||
benchmark.save_results()
|
||||
benchmark.report(ar.tex_output)
|
||||
benchmark.exreport()
|
||||
if ar.excel:
|
||||
benchmark.excel()
|
||||
Files.open(benchmark.get_excel_file_name())
|
||||
if ar.tex_output:
|
||||
print(f"File {benchmark.get_tex_file()} generated")
|
20
benchmark/scripts/be_benchmark.py
Executable file
20
benchmark/scripts/be_benchmark.py
Executable file
@@ -0,0 +1,20 @@
|
||||
#!/usr/bin/env python
|
||||
from benchmark.Results import Benchmark
|
||||
from benchmark.Utils import Files
|
||||
from benchmark.Arguments import Arguments
|
||||
|
||||
|
||||
def main():
|
||||
arguments = Arguments()
|
||||
arguments.xset("score").xset("excel").xset("tex_output")
|
||||
ar = arguments.parse()
|
||||
benchmark = Benchmark(score=ar.score, visualize=True)
|
||||
benchmark.compile_results()
|
||||
benchmark.save_results()
|
||||
benchmark.report(ar.tex_output)
|
||||
benchmark.exreport()
|
||||
if ar.excel:
|
||||
benchmark.excel()
|
||||
Files.open(benchmark.get_excel_file_name())
|
||||
if ar.tex_output:
|
||||
print(f"File {benchmark.get_tex_file()} generated")
|
@@ -1,30 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
import json
|
||||
from benchmark.Results import Summary
|
||||
from benchmark.Utils import ALL_METRICS, Arguments
|
||||
|
||||
|
||||
arguments = Arguments()
|
||||
metrics = list(ALL_METRICS)
|
||||
metrics.append("all")
|
||||
arguments.xset("score", choices=metrics)
|
||||
args = arguments.parse()
|
||||
|
||||
|
||||
metrics = ALL_METRICS if args.score == "all" else [args.score]
|
||||
|
||||
summary = Summary()
|
||||
summary.acquire()
|
||||
|
||||
nl = 50
|
||||
num = 100
|
||||
for metric in metrics:
|
||||
title = f"BEST RESULTS of {metric} for datasets"
|
||||
best = summary.best_results_datasets(score=metric)
|
||||
for key, item in best.items():
|
||||
print(f"{key:30s} {item[2]:{nl}s}")
|
||||
print("-" * num)
|
||||
print(f"{item[0]:30.7f} {json.dumps(item[1]):{nl}s}")
|
||||
print("-" * num)
|
||||
print(f"{item[3]:{nl+num}s}")
|
||||
print("*" * num)
|
26
benchmark/scripts/be_best.py
Executable file
26
benchmark/scripts/be_best.py
Executable file
@@ -0,0 +1,26 @@
|
||||
#!/usr/bin/env python
|
||||
import json
|
||||
from benchmark.Results import Summary
|
||||
from benchmark.Arguments import ALL_METRICS, Arguments
|
||||
|
||||
|
||||
def main():
|
||||
arguments = Arguments()
|
||||
metrics = list(ALL_METRICS)
|
||||
metrics.append("all")
|
||||
arguments.xset("score", choices=metrics)
|
||||
args = arguments.parse()
|
||||
metrics = ALL_METRICS if args.score == "all" else [args.score]
|
||||
summary = Summary()
|
||||
summary.acquire()
|
||||
nl = 50
|
||||
num = 100
|
||||
for metric in metrics:
|
||||
best = summary.best_results_datasets(score=metric)
|
||||
for key, item in best.items():
|
||||
print(f"{key:30s} {item[2]:{nl}s}")
|
||||
print("-" * num)
|
||||
print(f"{item[0]:30.7f} {json.dumps(item[1]):{nl}s}")
|
||||
print("-" * num)
|
||||
print(f"{item[3]:{nl+num}s}")
|
||||
print("*" * num)
|
@@ -1,49 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
import argparse
|
||||
from benchmark.Results import ReportBest
|
||||
from benchmark.Experiments import Datasets, BestResults
|
||||
from benchmark.Utils import ALL_METRICS, EnvDefault
|
||||
|
||||
"""Build a json file with the best results of a model and its hyperparameters
|
||||
"""
|
||||
|
||||
|
||||
def parse_arguments():
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument(
|
||||
"-s",
|
||||
"--score",
|
||||
action=EnvDefault,
|
||||
envvar="score",
|
||||
type=str,
|
||||
required=True,
|
||||
choices=ALL_METRICS,
|
||||
help="score name {accuracy, f1_macro, ...}",
|
||||
)
|
||||
ap.add_argument(
|
||||
"-m",
|
||||
"--model",
|
||||
action=EnvDefault,
|
||||
envvar="model",
|
||||
type=str,
|
||||
required=True,
|
||||
help="model name.",
|
||||
)
|
||||
ap.add_argument(
|
||||
"-r",
|
||||
"--report",
|
||||
type=bool,
|
||||
required=False,
|
||||
help="Generate Report",
|
||||
)
|
||||
args = ap.parse_args()
|
||||
return (args.score, args.model, args.report)
|
||||
|
||||
|
||||
(score, model, report) = parse_arguments()
|
||||
datasets = Datasets()
|
||||
best = BestResults(score, model, datasets)
|
||||
best.build()
|
||||
if report:
|
||||
report = ReportBest(score, model, best=True, grid=False)
|
||||
report.report()
|
19
benchmark/scripts/be_build_best.py
Executable file
19
benchmark/scripts/be_build_best.py
Executable file
@@ -0,0 +1,19 @@
|
||||
#!/usr/bin/env python
|
||||
from benchmark.Results import ReportBest
|
||||
from benchmark.Experiments import Datasets, BestResults
|
||||
from benchmark.Arguments import Arguments
|
||||
|
||||
"""Build a json file with the best results of a model and its hyperparameters
|
||||
"""
|
||||
|
||||
|
||||
def main():
|
||||
arguments = Arguments()
|
||||
arguments.xset("score").xset("report").xset("model")
|
||||
args = arguments.parse()
|
||||
datasets = Datasets()
|
||||
best = BestResults(args.score, args.model, datasets)
|
||||
best.build()
|
||||
if args.report:
|
||||
report = ReportBest(args.score, args.model, best=True, grid=False)
|
||||
report.report()
|
@@ -1,107 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
import os
|
||||
import json
|
||||
from benchmark.Utils import Files, Folders
|
||||
|
||||
data = [
|
||||
'{"C": 1e4, "gamma": 0.1, "kernel": "rbf"}',
|
||||
'{"C": 7, "gamma": 0.14, "kernel": "rbf"}',
|
||||
'{"C": 0.2, "kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||
'{"C": 0.2, "kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||
'{"C": 0.95, "kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||
'{"C": 0.05, "kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||
'{"kernel": "rbf"}',
|
||||
'{"kernel": "rbf"}',
|
||||
'{"C": 1.05, "gamma": "auto","kernel": "rbf"}',
|
||||
'{"splitter": "random", "max_features": "auto"}',
|
||||
'{"C": 0.05, "max_features": "auto", "kernel": "liblinear", '
|
||||
'"multiclass_strategy": "ovr"}',
|
||||
'{"kernel": "rbf", "C": 0.05}',
|
||||
'{"C": 0.05, "kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||
'{"C": 7, "gamma": 0.1, "kernel": "rbf"}',
|
||||
'{"kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||
'{"C": 7, "gamma": 0.1, "kernel": "rbf"}',
|
||||
'{"C": 0.25, "kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||
'{"kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||
'{"C": 0.08, "kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||
'{"C": 0.001, "kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||
'{"C": 2.8, "kernel": "rbf", "gamma": "auto"}',
|
||||
'{"kernel": "rbf"}',
|
||||
'{"C": 0.05, "gamma": 0.1, "kernel": "poly"}',
|
||||
'{"C": 8.25, "gamma": 0.1, "kernel": "poly", "multiclass_strategy": '
|
||||
'"ovr"}',
|
||||
'{"kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||
'{"C": 1.75, "kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||
'{"C":57, "kernel": "rbf"}',
|
||||
'{"C": 7, "gamma": 0.1, "kernel": "rbf", "multiclass_strategy": "ovr"}',
|
||||
'{"C": 5, "kernel": "rbf", "gamma": "auto"}',
|
||||
'{"C": 0.05, "max_iter": 10000.0, "kernel": "liblinear", '
|
||||
'"multiclass_strategy": "ovr"}',
|
||||
'{"C":0.0275, "kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||
'{"C": 7, "gamma": 10.0, "kernel": "rbf", "multiclass_strategy": "ovr"}',
|
||||
'{"kernel": "rbf", "gamma": 0.001}',
|
||||
'{"C": 1e4, "kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||
'{"C": 1.75, "kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||
'{"C": 7, "kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||
'{"kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||
'{"C": 2.83, "kernel": "rbf", "gamma": "auto"}',
|
||||
'{"C": 0.2, "gamma": 0.1, "kernel": "poly", "multiclass_strategy": "ovr"}',
|
||||
'{"kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||
'{"C": 2, "gamma": "auto", "kernel": "rbf"}',
|
||||
'{"C": 1.75, "kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||
]
|
||||
|
||||
results = {}
|
||||
output = []
|
||||
hyper = ["C", "gamma", "kernel", "multiclass_strategy"]
|
||||
kernels = ["linear", "liblinear", "rbf", "poly"]
|
||||
|
||||
# initialize results
|
||||
for kernel in kernels:
|
||||
results[kernel] = {}
|
||||
for item in hyper:
|
||||
results[kernel][item] = []
|
||||
# load data
|
||||
for sample in data:
|
||||
line = json.loads(sample)
|
||||
if "kernel" not in line:
|
||||
line["kernel"] = "linear"
|
||||
kernel = line["kernel"]
|
||||
for item in hyper:
|
||||
if item in line and line[item] not in results[kernel][item]:
|
||||
results[kernel][item].append(line[item])
|
||||
|
||||
# Add default values and remove inconsistent values
|
||||
results["linear"]["multiclass_strategy"] = ["ovo"]
|
||||
del results["linear"]["gamma"]
|
||||
del results["liblinear"]["gamma"]
|
||||
results["rbf"]["gamma"].append("scale")
|
||||
results["poly"]["gamma"].append("scale")
|
||||
results["poly"]["multiclass_strategy"].append("ovo")
|
||||
for kernel in kernels:
|
||||
results[kernel]["C"].append(1.0)
|
||||
|
||||
for item in results:
|
||||
results_tmp = {"n_jobs": [-1], "n_estimators": [100]}
|
||||
for key, value in results[item].items():
|
||||
new_key = f"base_estimator__{key}"
|
||||
try:
|
||||
results_tmp[new_key] = sorted(value)
|
||||
except TypeError:
|
||||
t1 = sorted(
|
||||
[
|
||||
x
|
||||
for x in value
|
||||
if isinstance(x, int) or isinstance(x, float)
|
||||
]
|
||||
)
|
||||
t2 = sorted([x for x in value if isinstance(x, str)])
|
||||
results_tmp[new_key] = t1 + t2
|
||||
output.append(results_tmp)
|
||||
|
||||
# save results
|
||||
file_name = Files.grid_input("accuracy", "ODTE")
|
||||
file_output = os.path.join(Folders.results, file_name)
|
||||
with open(file_output, "w") as f:
|
||||
json.dump(output, f, indent=4)
|
||||
print(f"Grid values saved to {file_output}")
|
112
benchmark/scripts/be_build_grid.py
Executable file
112
benchmark/scripts/be_build_grid.py
Executable file
@@ -0,0 +1,112 @@
|
||||
#!/usr/bin/env python
|
||||
import os
|
||||
import json
|
||||
from benchmark.Utils import Files, Folders
|
||||
|
||||
|
||||
def main():
|
||||
data = [
|
||||
'{"C": 1e4, "gamma": 0.1, "kernel": "rbf"}',
|
||||
'{"C": 7, "gamma": 0.14, "kernel": "rbf"}',
|
||||
'{"C": 0.2, "kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||
'{"C": 0.2, "kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||
'{"C": 0.95, "kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||
'{"C": 0.05, "kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||
'{"kernel": "rbf"}',
|
||||
'{"kernel": "rbf"}',
|
||||
'{"C": 1.05, "gamma": "auto","kernel": "rbf"}',
|
||||
'{"splitter": "random", "max_features": "auto"}',
|
||||
'{"C": 0.05, "max_features": "auto", "kernel": "liblinear", '
|
||||
'"multiclass_strategy": "ovr"}',
|
||||
'{"kernel": "rbf", "C": 0.05}',
|
||||
'{"C": 0.05, "kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||
'{"C": 7, "gamma": 0.1, "kernel": "rbf"}',
|
||||
'{"kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||
'{"C": 7, "gamma": 0.1, "kernel": "rbf"}',
|
||||
'{"C": 0.25, "kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||
'{"kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||
'{"C": 0.08, "kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||
'{"C": 0.001, "kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||
'{"C": 2.8, "kernel": "rbf", "gamma": "auto"}',
|
||||
'{"kernel": "rbf"}',
|
||||
'{"C": 0.05, "gamma": 0.1, "kernel": "poly"}',
|
||||
'{"C": 8.25, "gamma": 0.1, "kernel": "poly", "multiclass_strategy": '
|
||||
'"ovr"}',
|
||||
'{"kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||
'{"C": 1.75, "kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||
'{"C":57, "kernel": "rbf"}',
|
||||
'{"C": 7, "gamma": 0.1, "kernel": "rbf", "multiclass_strategy": '
|
||||
'"ovr"}',
|
||||
'{"C": 5, "kernel": "rbf", "gamma": "auto"}',
|
||||
'{"C": 0.05, "max_iter": 10000.0, "kernel": "liblinear", '
|
||||
'"multiclass_strategy": "ovr"}',
|
||||
'{"C":0.0275, "kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||
'{"C": 7, "gamma": 10.0, "kernel": "rbf", "multiclass_strategy": '
|
||||
'"ovr"}',
|
||||
'{"kernel": "rbf", "gamma": 0.001}',
|
||||
'{"C": 1e4, "kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||
'{"C": 1.75, "kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||
'{"C": 7, "kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||
'{"kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||
'{"C": 2.83, "kernel": "rbf", "gamma": "auto"}',
|
||||
'{"C": 0.2, "gamma": 0.1, "kernel": "poly", "multiclass_strategy": '
|
||||
'"ovr"}',
|
||||
'{"kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||
'{"C": 2, "gamma": "auto", "kernel": "rbf"}',
|
||||
'{"C": 1.75, "kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||
]
|
||||
|
||||
results = {}
|
||||
output = []
|
||||
hyper = ["C", "gamma", "kernel", "multiclass_strategy"]
|
||||
kernels = ["linear", "liblinear", "rbf", "poly"]
|
||||
|
||||
# initialize results
|
||||
for kernel in kernels:
|
||||
results[kernel] = {}
|
||||
for item in hyper:
|
||||
results[kernel][item] = []
|
||||
# load data
|
||||
for sample in data:
|
||||
line = json.loads(sample)
|
||||
if "kernel" not in line:
|
||||
line["kernel"] = "linear"
|
||||
kernel = line["kernel"]
|
||||
for item in hyper:
|
||||
if item in line and line[item] not in results[kernel][item]:
|
||||
results[kernel][item].append(line[item])
|
||||
|
||||
# Add default values and remove inconsistent values
|
||||
results["linear"]["multiclass_strategy"] = ["ovo"]
|
||||
del results["linear"]["gamma"]
|
||||
del results["liblinear"]["gamma"]
|
||||
results["rbf"]["gamma"].append("scale")
|
||||
results["poly"]["gamma"].append("scale")
|
||||
results["poly"]["multiclass_strategy"].append("ovo")
|
||||
for kernel in kernels:
|
||||
results[kernel]["C"].append(1.0)
|
||||
|
||||
for item in results:
|
||||
results_tmp = {"n_jobs": [-1], "n_estimators": [100]}
|
||||
for key, value in results[item].items():
|
||||
new_key = f"base_estimator__{key}"
|
||||
try:
|
||||
results_tmp[new_key] = sorted(value)
|
||||
except TypeError:
|
||||
t1 = sorted(
|
||||
[
|
||||
x
|
||||
for x in value
|
||||
if isinstance(x, int) or isinstance(x, float)
|
||||
]
|
||||
)
|
||||
t2 = sorted([x for x in value if isinstance(x, str)])
|
||||
results_tmp[new_key] = t1 + t2
|
||||
output.append(results_tmp)
|
||||
|
||||
# save results
|
||||
file_name = Files.grid_input("accuracy", "ODTE")
|
||||
file_output = os.path.join(Folders.results, file_name)
|
||||
with open(file_output, "w") as f:
|
||||
json.dump(output, f, indent=4)
|
||||
print(f"Grid values saved to {file_output}")
|
@@ -1,102 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
import argparse
|
||||
from benchmark.Experiments import GridSearch, Datasets
|
||||
from benchmark.Utils import EnvDefault, ALL_METRICS
|
||||
|
||||
"""Do experiment and build result file, optionally print report with results
|
||||
"""
|
||||
|
||||
|
||||
def parse_arguments():
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument(
|
||||
"-s",
|
||||
"--score",
|
||||
action=EnvDefault,
|
||||
envvar="score",
|
||||
type=str,
|
||||
required=True,
|
||||
choices=ALL_METRICS,
|
||||
help="score name {accuracy, f1_macro, ...}",
|
||||
)
|
||||
ap.add_argument(
|
||||
"-P",
|
||||
"--platform",
|
||||
action=EnvDefault,
|
||||
envvar="platform",
|
||||
type=str,
|
||||
required=True,
|
||||
help="Platform where the test is run",
|
||||
)
|
||||
ap.add_argument(
|
||||
"-m",
|
||||
"--model",
|
||||
type=str,
|
||||
required=True,
|
||||
help="model name",
|
||||
)
|
||||
ap.add_argument(
|
||||
"-n",
|
||||
"--n_folds",
|
||||
action=EnvDefault,
|
||||
envvar="n_folds",
|
||||
type=int,
|
||||
required=True,
|
||||
help="number of folds",
|
||||
)
|
||||
ap.add_argument(
|
||||
"-q",
|
||||
"--quiet",
|
||||
type=bool,
|
||||
default=False,
|
||||
required=False,
|
||||
help="Wether to show progress bar or not",
|
||||
)
|
||||
ap.add_argument(
|
||||
"-t",
|
||||
"--stratified",
|
||||
action=EnvDefault,
|
||||
envvar="stratified",
|
||||
type=str,
|
||||
required=True,
|
||||
help="Stratified",
|
||||
)
|
||||
ap.add_argument(
|
||||
"-d",
|
||||
"--dataset",
|
||||
type=str,
|
||||
required=True,
|
||||
default=None,
|
||||
help="Gridsearch on this dataset",
|
||||
)
|
||||
args = ap.parse_args()
|
||||
return (
|
||||
args.stratified,
|
||||
args.score,
|
||||
args.model,
|
||||
args.n_folds,
|
||||
args.platform,
|
||||
args.quiet,
|
||||
args.dataset,
|
||||
)
|
||||
|
||||
|
||||
(
|
||||
stratified,
|
||||
score,
|
||||
model,
|
||||
folds,
|
||||
platform,
|
||||
quiet,
|
||||
dataset,
|
||||
) = parse_arguments()
|
||||
job = GridSearch(
|
||||
score_name=score,
|
||||
model_name=model,
|
||||
stratified=stratified,
|
||||
datasets=Datasets(dataset_name=dataset),
|
||||
progress_bar=not quiet,
|
||||
platform=platform,
|
||||
folds=folds,
|
||||
)
|
||||
job.do_gridsearch()
|
23
benchmark/scripts/be_grid.py
Executable file
23
benchmark/scripts/be_grid.py
Executable file
@@ -0,0 +1,23 @@
|
||||
#!/usr/bin/env python
|
||||
from benchmark.Experiments import GridSearch, Datasets
|
||||
from benchmark.Arguments import Arguments
|
||||
|
||||
"""Do experiment and build result file, optionally print report with results
|
||||
"""
|
||||
|
||||
|
||||
def main():
|
||||
arguments = Arguments()
|
||||
arguments.xset("score").xset("platform").xset("model").xset("n_folds")
|
||||
arguments.xset("quiet").xset("stratified").xset("dataset")
|
||||
args = arguments.parse()
|
||||
job = GridSearch(
|
||||
score_name=args.score,
|
||||
model_name=args.model,
|
||||
stratified=args.stratified,
|
||||
datasets=Datasets(dataset_name=args.dataset),
|
||||
progress_bar=not args.quiet,
|
||||
platform=args.platform,
|
||||
folds=args.folds,
|
||||
)
|
||||
job.do_gridsearch()
|
@@ -1,103 +0,0 @@
|
||||
#! /usr/bin/env python
|
||||
import os
|
||||
import argparse
|
||||
from benchmark.Experiments import Models
|
||||
from benchmark.Results import Summary
|
||||
from benchmark.Utils import ALL_METRICS, Folders
|
||||
|
||||
"""List experiments of a model
|
||||
"""
|
||||
|
||||
|
||||
def parse_arguments():
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument(
|
||||
"--nan",
|
||||
type=bool,
|
||||
required=False,
|
||||
help="Move nan results to hidden folder",
|
||||
)
|
||||
ap.add_argument(
|
||||
"-s",
|
||||
"--score",
|
||||
type=str,
|
||||
required=False,
|
||||
choices=ALL_METRICS,
|
||||
help="score name {accuracy, f1-macro, f1-weighted, roc-auc-ovr}",
|
||||
)
|
||||
models_data = Models.define_models(0)
|
||||
models = "{" + ", ".join(models_data) + "}"
|
||||
ap.add_argument(
|
||||
"-m",
|
||||
"--model",
|
||||
type=str,
|
||||
required=False,
|
||||
choices=list(models_data),
|
||||
help=f"model name: {models}",
|
||||
)
|
||||
ap.add_argument(
|
||||
"-k",
|
||||
"--key",
|
||||
type=str,
|
||||
required=False,
|
||||
default="date",
|
||||
help="key to sort results",
|
||||
)
|
||||
ap.add_argument(
|
||||
"--hidden",
|
||||
type=str,
|
||||
required=False,
|
||||
default=False,
|
||||
help="Show hidden results",
|
||||
)
|
||||
ap.add_argument(
|
||||
"-n",
|
||||
"--number",
|
||||
type=int,
|
||||
required=False,
|
||||
default=0,
|
||||
help="number of results to show, 0 to any",
|
||||
)
|
||||
args = ap.parse_args()
|
||||
|
||||
return (
|
||||
args.nan,
|
||||
args.score,
|
||||
args.model,
|
||||
args.key,
|
||||
args.number,
|
||||
args.hidden,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(nan, score, model, key, number, hidden) = parse_arguments()
|
||||
data = Summary(hidden=hidden)
|
||||
data.acquire()
|
||||
data.list_results(score=score, model=model, sort_key=key, number=number)
|
||||
if nan:
|
||||
results_nan = []
|
||||
results = data.get_results_criteria(
|
||||
score=score,
|
||||
model=model,
|
||||
input_data=None,
|
||||
sort_key=key,
|
||||
number=number,
|
||||
)
|
||||
for result in results:
|
||||
if result["metric"] != result["metric"]:
|
||||
results_nan.append(result)
|
||||
if results_nan != []:
|
||||
print(
|
||||
"\n"
|
||||
+ "*" * 30
|
||||
+ " Results with nan moved to hidden "
|
||||
+ "*" * 30
|
||||
)
|
||||
data.list_results(input_data=results_nan)
|
||||
for result in results_nan:
|
||||
name = result["file"]
|
||||
os.rename(
|
||||
os.path.join(Folders.results, name),
|
||||
os.path.join(Folders.hidden_results, name),
|
||||
)
|
49
benchmark/scripts/be_list.py
Executable file
49
benchmark/scripts/be_list.py
Executable file
@@ -0,0 +1,49 @@
|
||||
#! /usr/bin/env python
|
||||
import os
|
||||
from benchmark.Results import Summary
|
||||
from benchmark.Utils import Folders
|
||||
from benchmark.Arguments import Arguments
|
||||
|
||||
"""List experiments of a model
|
||||
"""
|
||||
|
||||
|
||||
def main():
|
||||
arguments = Arguments()
|
||||
arguments.xset("number").xset("model", required=False).xset("score")
|
||||
arguments.xset("hidden").xset("nan").xset("key")
|
||||
args = arguments.parse()
|
||||
data = Summary(hidden=args.hidden)
|
||||
data.acquire()
|
||||
data.list_results(
|
||||
score=args.score,
|
||||
model=args.model,
|
||||
sort_key=args.key,
|
||||
number=args.number,
|
||||
)
|
||||
if args.nan:
|
||||
results_nan = []
|
||||
results = data.get_results_criteria(
|
||||
score=args.score,
|
||||
model=args.model,
|
||||
input_data=None,
|
||||
sort_key=args.key,
|
||||
number=args.number,
|
||||
)
|
||||
for result in results:
|
||||
if result["metric"] != result["metric"]:
|
||||
results_nan.append(result)
|
||||
if results_nan != []:
|
||||
print(
|
||||
"\n"
|
||||
+ "*" * 30
|
||||
+ " Results with nan moved to hidden "
|
||||
+ "*" * 30
|
||||
)
|
||||
data.list_results(input_data=results_nan)
|
||||
for result in results_nan:
|
||||
name = result["file"]
|
||||
os.rename(
|
||||
os.path.join(Folders.results, name),
|
||||
os.path.join(Folders.hidden_results, name),
|
||||
)
|
@@ -1,165 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
import os
|
||||
import argparse
|
||||
from benchmark.Experiments import Experiment, Datasets, Models
|
||||
from benchmark.Results import Report
|
||||
from benchmark.Utils import EnvDefault, ALL_METRICS
|
||||
|
||||
"""Do experiment and build result file, optionally print report with results
|
||||
"""
|
||||
|
||||
|
||||
def parse_arguments():
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument(
|
||||
"-s",
|
||||
"--score",
|
||||
action=EnvDefault,
|
||||
envvar="score",
|
||||
type=str,
|
||||
choices=ALL_METRICS,
|
||||
required=True,
|
||||
help="score name {accuracy, f1-macro, f1-weighted, roc-auc-ovr}",
|
||||
)
|
||||
ap.add_argument(
|
||||
"-P",
|
||||
"--platform",
|
||||
action=EnvDefault,
|
||||
envvar="platform",
|
||||
type=str,
|
||||
required=True,
|
||||
help="Platform where the test is run",
|
||||
)
|
||||
models_data = Models.define_models(0)
|
||||
models = "{" + ", ".join(models_data) + "}"
|
||||
ap.add_argument(
|
||||
"-m",
|
||||
"--model",
|
||||
type=str,
|
||||
required=True,
|
||||
choices=list(models_data),
|
||||
help=f"model name: {models}",
|
||||
)
|
||||
ap.add_argument(
|
||||
"-n",
|
||||
"--n_folds",
|
||||
action=EnvDefault,
|
||||
envvar="n_folds",
|
||||
type=int,
|
||||
required=True,
|
||||
help="number of folds",
|
||||
)
|
||||
ap.add_argument(
|
||||
"-p", "--hyperparameters", type=str, required=False, default="{}"
|
||||
)
|
||||
ap.add_argument(
|
||||
"-f",
|
||||
"--paramfile",
|
||||
type=bool,
|
||||
required=False,
|
||||
default=False,
|
||||
help="Use best hyperparams file?",
|
||||
)
|
||||
ap.add_argument(
|
||||
"-g",
|
||||
"--grid_paramfile",
|
||||
type=bool,
|
||||
required=False,
|
||||
default=False,
|
||||
help="Use grid searched hyperparams file?",
|
||||
)
|
||||
ap.add_argument(
|
||||
"--title", type=str, required=True, help="experiment title"
|
||||
)
|
||||
ap.add_argument(
|
||||
"-q",
|
||||
"--quiet",
|
||||
type=bool,
|
||||
default=False,
|
||||
required=False,
|
||||
help="Wether to show progress bar or not",
|
||||
)
|
||||
ap.add_argument(
|
||||
"-r",
|
||||
"--report",
|
||||
type=bool,
|
||||
default=False,
|
||||
required=False,
|
||||
help="Report results",
|
||||
)
|
||||
ap.add_argument(
|
||||
"-t",
|
||||
"--stratified",
|
||||
action=EnvDefault,
|
||||
envvar="stratified",
|
||||
type=str,
|
||||
required=True,
|
||||
help="Stratified",
|
||||
)
|
||||
ap.add_argument(
|
||||
"-d",
|
||||
"--dataset",
|
||||
type=str,
|
||||
required=False,
|
||||
default=None,
|
||||
help="Experiment with only this dataset",
|
||||
)
|
||||
args = ap.parse_args()
|
||||
return (
|
||||
args.stratified,
|
||||
args.score,
|
||||
args.model,
|
||||
args.n_folds,
|
||||
args.platform,
|
||||
args.quiet,
|
||||
args.hyperparameters,
|
||||
args.paramfile,
|
||||
args.grid_paramfile,
|
||||
args.report,
|
||||
args.title,
|
||||
args.dataset,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(
|
||||
stratified,
|
||||
score,
|
||||
model,
|
||||
folds,
|
||||
platform,
|
||||
quiet,
|
||||
hyperparameters,
|
||||
paramfile,
|
||||
grid_paramfile,
|
||||
report,
|
||||
experiment_title,
|
||||
dataset,
|
||||
) = parse_arguments()
|
||||
report = report or dataset is not None
|
||||
if grid_paramfile:
|
||||
paramfile = False
|
||||
job = Experiment(
|
||||
score_name=score,
|
||||
model_name=model,
|
||||
stratified=stratified,
|
||||
datasets=Datasets(dataset_name=dataset),
|
||||
hyperparams_dict=hyperparameters,
|
||||
hyperparams_file=paramfile,
|
||||
grid_paramfile=grid_paramfile,
|
||||
progress_bar=not quiet,
|
||||
platform=platform,
|
||||
title=experiment_title,
|
||||
folds=folds,
|
||||
)
|
||||
job.do_experiment()
|
||||
if report:
|
||||
result_file = job.get_output_file()
|
||||
report = Report(result_file)
|
||||
report.report()
|
||||
|
||||
if dataset is not None:
|
||||
print(f"Partial result file removed: {result_file}")
|
||||
os.remove(result_file)
|
||||
else:
|
||||
print(f"Results in {job.get_output_file()}")
|
44
benchmark/scripts/be_main.py
Executable file
44
benchmark/scripts/be_main.py
Executable file
@@ -0,0 +1,44 @@
|
||||
#!/usr/bin/env python
|
||||
import os
|
||||
from benchmark.Experiments import Experiment, Datasets
|
||||
from benchmark.Results import Report
|
||||
from Arguments import Arguments
|
||||
|
||||
"""Do experiment and build result file, optionally print report with results
|
||||
"""
|
||||
|
||||
|
||||
def main():
|
||||
arguments = Arguments()
|
||||
arguments.xset("stratified").xset("score").xset("model").xset("dataset")
|
||||
arguments.xset("n_folds").xset("platform").xset("quiet").xset("title")
|
||||
arguments.xset("hyperparameters").xset("paramfile").xset("report")
|
||||
arguments.xset("grid_paramfile")
|
||||
args = arguments.parse()
|
||||
report = args.report or args.dataset is not None
|
||||
if args.grid_paramfile:
|
||||
args.paramfile = False
|
||||
job = Experiment(
|
||||
score_name=args.score,
|
||||
model_name=args.model,
|
||||
stratified=args.stratified,
|
||||
datasets=Datasets(dataset_name=args.dataset),
|
||||
hyperparams_dict=args.hyperparameters,
|
||||
hyperparams_file=args.paramfile,
|
||||
grid_paramfile=args.grid_paramfile,
|
||||
progress_bar=not args.quiet,
|
||||
platform=args.platform,
|
||||
title=args.experiment_title,
|
||||
folds=args.folds,
|
||||
)
|
||||
job.do_experiment()
|
||||
if report:
|
||||
result_file = job.get_output_file()
|
||||
report = Report(result_file)
|
||||
report.report()
|
||||
|
||||
if args.dataset is not None:
|
||||
print(f"Partial result file removed: {result_file}")
|
||||
os.remove(result_file)
|
||||
else:
|
||||
print(f"Results in {job.get_output_file()}")
|
@@ -1,72 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
import argparse
|
||||
from benchmark.Results import PairCheck
|
||||
from benchmark.Utils import ALL_METRICS, EnvDefault
|
||||
|
||||
"""Check best results of two models giving scores and win-tie-loose results
|
||||
"""
|
||||
|
||||
|
||||
def parse_arguments():
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument(
|
||||
"-s",
|
||||
"--score",
|
||||
action=EnvDefault,
|
||||
envvar="score",
|
||||
type=str,
|
||||
required=True,
|
||||
choices=ALL_METRICS,
|
||||
help="score name {accuracy, f1_macro, ...}",
|
||||
)
|
||||
ap.add_argument(
|
||||
"-m1",
|
||||
"--model1",
|
||||
type=str,
|
||||
required=True,
|
||||
help="model 1 name",
|
||||
)
|
||||
ap.add_argument(
|
||||
"-m2",
|
||||
"--model2",
|
||||
type=str,
|
||||
required=True,
|
||||
help="model 2 name",
|
||||
)
|
||||
ap.add_argument(
|
||||
"-w",
|
||||
"--win",
|
||||
type=bool,
|
||||
default=False,
|
||||
required=False,
|
||||
help="show win results",
|
||||
)
|
||||
ap.add_argument(
|
||||
"-l",
|
||||
"--lose",
|
||||
type=bool,
|
||||
default=False,
|
||||
required=False,
|
||||
help="show lose results",
|
||||
)
|
||||
args = ap.parse_args()
|
||||
return (
|
||||
args.score,
|
||||
args.model1,
|
||||
args.model2,
|
||||
args.win,
|
||||
args.lose,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(
|
||||
score,
|
||||
model1,
|
||||
model2,
|
||||
win_results,
|
||||
lose_results,
|
||||
) = parse_arguments()
|
||||
pair_check = PairCheck(score, model1, model2, win_results, lose_results)
|
||||
pair_check.compute()
|
||||
pair_check.report()
|
22
benchmark/scripts/be_pair_check.py
Executable file
22
benchmark/scripts/be_pair_check.py
Executable file
@@ -0,0 +1,22 @@
|
||||
#!/usr/bin/env python
|
||||
from benchmark.Results import PairCheck
|
||||
from Arguments import Arguments
|
||||
|
||||
"""Check best results of two models giving scores and win-tie-loose results
|
||||
"""
|
||||
|
||||
|
||||
def main():
|
||||
arguments = Arguments()
|
||||
arguments.xset("score").xset("win").xset("model1").xset("model2")
|
||||
arguments.xset("lose")
|
||||
args = arguments.parse()
|
||||
pair_check = PairCheck(
|
||||
args.score,
|
||||
args.model1,
|
||||
args.model2,
|
||||
args.win_results,
|
||||
args.lose_results,
|
||||
)
|
||||
pair_check.compute()
|
||||
pair_check.report()
|
@@ -1,42 +1,12 @@
|
||||
#!/usr/bin/env python
|
||||
import os
|
||||
import subprocess
|
||||
import argparse
|
||||
import json
|
||||
from stree import Stree
|
||||
from graphviz import Source
|
||||
from benchmark.Experiments import Datasets
|
||||
from benchmark.Utils import Files, Folders
|
||||
|
||||
|
||||
def parse_arguments():
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument(
|
||||
"-c",
|
||||
"--color",
|
||||
type=bool,
|
||||
required=False,
|
||||
default=False,
|
||||
help="use colors for the tree",
|
||||
)
|
||||
ap.add_argument(
|
||||
"-d",
|
||||
"--dataset",
|
||||
type=str,
|
||||
required=False,
|
||||
default="all",
|
||||
help="dataset to print or all",
|
||||
)
|
||||
ap.add_argument(
|
||||
"-q",
|
||||
"--quiet",
|
||||
type=bool,
|
||||
required=False,
|
||||
default=False,
|
||||
help="don't print generated tree(s)",
|
||||
)
|
||||
args = ap.parse_args()
|
||||
return (args.color, args.dataset, args.quiet)
|
||||
from Arguments import Arguments
|
||||
|
||||
|
||||
def compute_stree(X, y, random_state):
|
||||
@@ -112,13 +82,15 @@ def print_stree(clf, dataset, X, y, color, quiet):
|
||||
subprocess.run([cmd_open, f"{file_name}.png"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(color, dataset_chosen, quiet) = parse_arguments()
|
||||
def main():
|
||||
arguments = Arguments()
|
||||
arguments.xset("color").xset("dataset", default="all").xset("quiet")
|
||||
args = arguments.parse()
|
||||
hyperparameters = load_hyperparams("accuracy", "ODTE")
|
||||
random_state = 57
|
||||
dt = Datasets()
|
||||
for dataset in dt:
|
||||
if dataset == dataset_chosen or dataset_chosen == "all":
|
||||
if dataset == args.dataset or args.dataset == "all":
|
||||
X, y = dt.load(dataset)
|
||||
clf = Stree(random_state=random_state)
|
||||
hyperparams_dataset = hyperparam_filter(
|
||||
@@ -126,4 +98,4 @@ if __name__ == "__main__":
|
||||
)
|
||||
clf.set_params(**hyperparams_dataset)
|
||||
clf.fit(X, y)
|
||||
print_stree(clf, dataset, X, y, color, quiet)
|
||||
print_stree(clf, dataset, X, y, args.color, args.quiet)
|
@@ -1,23 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
import os
|
||||
import json
|
||||
from benchmark.Experiments import Files, Folders
|
||||
|
||||
|
||||
versions = dict(SVC="-", STree="1.2.3", ODTE="0.3.2")
|
||||
|
||||
results = Files().get_all_results(hidden=False)
|
||||
for result in results:
|
||||
print(result)
|
||||
file_name = os.path.join(Folders.results, result)
|
||||
with open(file_name) as f:
|
||||
data = json.load(f)
|
||||
if "title" not in data:
|
||||
print(f"Repairing title in {result}")
|
||||
data["title"] = "default"
|
||||
if "version" not in data:
|
||||
print(f"Repairing version in {result}")
|
||||
model = data["model"]
|
||||
data["version"] = versions[model] if model in versions else "-"
|
||||
with open(file_name, "w") as f:
|
||||
json.dump(data, f, indent=4)
|
23
benchmark/scripts/be_repara.py
Executable file
23
benchmark/scripts/be_repara.py
Executable file
@@ -0,0 +1,23 @@
|
||||
#!/usr/bin/env python
|
||||
import os
|
||||
import json
|
||||
from benchmark.Experiments import Files, Folders
|
||||
|
||||
|
||||
def main():
|
||||
versions = dict(SVC="-", STree="1.2.3", ODTE="0.3.2")
|
||||
results = Files().get_all_results(hidden=False)
|
||||
for result in results:
|
||||
print(result)
|
||||
file_name = os.path.join(Folders.results, result)
|
||||
with open(file_name) as f:
|
||||
data = json.load(f)
|
||||
if "title" not in data:
|
||||
print(f"Repairing title in {result}")
|
||||
data["title"] = "default"
|
||||
if "version" not in data:
|
||||
print(f"Repairing version in {result}")
|
||||
model = data["model"]
|
||||
data["version"] = versions[model] if model in versions else "-"
|
||||
with open(file_name, "w") as f:
|
||||
json.dump(data, f, indent=4)
|
@@ -6,7 +6,7 @@ from benchmark.Utils import (
|
||||
Files,
|
||||
TextColor,
|
||||
)
|
||||
from benchmark.Models import Arguments
|
||||
from benchmark.Arguments import Arguments
|
||||
|
||||
|
||||
"""Build report on screen of a result file, optionally generate excel and sql
|
||||
@@ -41,7 +41,7 @@ def default_report():
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
def main():
|
||||
arguments = Arguments()
|
||||
arguments.xset("file").xset("excel").xset("sql").xset("compare")
|
||||
arguments.xset("best").xset("grid").xset("model", required=False).xset(
|
@@ -1,53 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
import argparse
|
||||
from benchmark.Results import Summary
|
||||
from benchmark.Utils import EnvDefault, ALL_METRICS
|
||||
|
||||
|
||||
def parse_arguments():
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument(
|
||||
"-m",
|
||||
"--model",
|
||||
type=str,
|
||||
action=EnvDefault,
|
||||
envvar="model",
|
||||
required=True,
|
||||
help="model name",
|
||||
)
|
||||
ap.add_argument(
|
||||
"-s",
|
||||
"--score",
|
||||
type=str,
|
||||
action=EnvDefault,
|
||||
envvar="score",
|
||||
required=True,
|
||||
choices=ALL_METRICS,
|
||||
help="score name {accuracy, f1_micro, f1_macro, all}",
|
||||
)
|
||||
args = ap.parse_args()
|
||||
return (
|
||||
args.score,
|
||||
args.model,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
(
|
||||
score,
|
||||
model,
|
||||
) = parse_arguments()
|
||||
all_metrics = ["accuracy", "f1-macro", "f1-micro"]
|
||||
metrics = all_metrics if score == "all" else [score]
|
||||
summary = Summary()
|
||||
summary.acquire()
|
||||
for metric in metrics:
|
||||
title = f"BEST RESULT of {metric} for {model}"
|
||||
best = summary.best_result(
|
||||
criterion="model", value=model, score=metric
|
||||
)
|
||||
summary.show_result(data=best, title=title)
|
||||
summary.show_result(
|
||||
summary.best_result(score=metric), title=f"BEST RESULT of {metric}"
|
||||
)
|
||||
summary.show_top(score=metric, n=10)
|
24
benchmark/scripts/be_summary.py
Executable file
24
benchmark/scripts/be_summary.py
Executable file
@@ -0,0 +1,24 @@
|
||||
#!/usr/bin/env python
|
||||
from benchmark.Results import Summary
|
||||
from benchmark.Arguments import ALL_METRICS, Arguments
|
||||
|
||||
|
||||
def main():
|
||||
arguments = Arguments()
|
||||
metrics = list(ALL_METRICS)
|
||||
metrics.append("all")
|
||||
arguments.xset("score", choices=metrics).xset("model", required=False)
|
||||
args = arguments.parse()
|
||||
metrics = ALL_METRICS if args.score == "all" else [args.score]
|
||||
summary = Summary()
|
||||
summary.acquire()
|
||||
for metric in metrics:
|
||||
title = f"BEST RESULT of {metric} for {args.model}"
|
||||
best = summary.best_result(
|
||||
criterion="model", value=args.model, score=metric
|
||||
)
|
||||
summary.show_result(data=best, title=title)
|
||||
summary.show_result(
|
||||
summary.best_result(score=metric), title=f"BEST RESULT of {metric}"
|
||||
)
|
||||
summary.show_top(score=metric, n=10)
|
@@ -1,46 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
import sys
|
||||
import time
|
||||
from benchmark.Experiments import Datasets
|
||||
from mufs import MUFS
|
||||
|
||||
mufs_i = MUFS()
|
||||
mufs_c = MUFS()
|
||||
mufs_f = MUFS()
|
||||
datasets = Datasets()
|
||||
iwss_t = iwss_tl = cfs_t = cfs_tl = fcbf_t = fcbf_tl = 0
|
||||
for i in datasets:
|
||||
X, y = datasets.load(i)
|
||||
now = time.time()
|
||||
mufs_i.iwss(X, y, float(sys.argv[1]))
|
||||
iwss = time.time() - now
|
||||
iwss_r = len(mufs_i.get_results())
|
||||
now = time.time()
|
||||
mufs_c.cfs(X, y)
|
||||
cfs = time.time() - now
|
||||
cfs_r = len(mufs_c.get_results())
|
||||
now = time.time()
|
||||
mufs_f.fcbf(X, y, 1e-5)
|
||||
fcbf = time.time() - now
|
||||
fcbf_r = len(mufs_f.get_results())
|
||||
print(
|
||||
f"{i:30s} {iwss:.4f}({iwss_r:2d}) {cfs:.4f}({cfs_r:2d}) {fcbf:.4f}"
|
||||
f"({fcbf_r:2d})"
|
||||
)
|
||||
iwss_t += iwss
|
||||
iwss_tl += iwss_r
|
||||
cfs_t += cfs
|
||||
cfs_tl += cfs_r
|
||||
fcbf_t += fcbf
|
||||
fcbf_tl += fcbf_r
|
||||
num = len(list(datasets))
|
||||
iwss_t /= num
|
||||
iwss_tl /= num
|
||||
cfs_t /= num
|
||||
cfs_tl /= num
|
||||
fcbf_t /= num
|
||||
fcbf_tl /= num
|
||||
print(
|
||||
f"{'Average ..: ':30s} {iwss_t:.4f}({iwss_tl:.2f}) {cfs_t:.4f}"
|
||||
f"({cfs_tl:.2f}) {fcbf_t:.4f}({fcbf_tl:.2f})"
|
||||
)
|
48
benchmark/scripts/be_td.py
Executable file
48
benchmark/scripts/be_td.py
Executable file
@@ -0,0 +1,48 @@
|
||||
#!/usr/bin/env python
|
||||
import sys
|
||||
import time
|
||||
from benchmark.Experiments import Datasets
|
||||
from mufs import MUFS
|
||||
|
||||
|
||||
def main():
|
||||
mufs_i = MUFS()
|
||||
mufs_c = MUFS()
|
||||
mufs_f = MUFS()
|
||||
datasets = Datasets()
|
||||
iwss_t = iwss_tl = cfs_t = cfs_tl = fcbf_t = fcbf_tl = 0
|
||||
for i in datasets:
|
||||
X, y = datasets.load(i)
|
||||
now = time.time()
|
||||
mufs_i.iwss(X, y, float(sys.argv[1]))
|
||||
iwss = time.time() - now
|
||||
iwss_r = len(mufs_i.get_results())
|
||||
now = time.time()
|
||||
mufs_c.cfs(X, y)
|
||||
cfs = time.time() - now
|
||||
cfs_r = len(mufs_c.get_results())
|
||||
now = time.time()
|
||||
mufs_f.fcbf(X, y, 1e-5)
|
||||
fcbf = time.time() - now
|
||||
fcbf_r = len(mufs_f.get_results())
|
||||
print(
|
||||
f"{i:30s} {iwss:.4f}({iwss_r:2d}) {cfs:.4f}({cfs_r:2d}) {fcbf:.4f}"
|
||||
f"({fcbf_r:2d})"
|
||||
)
|
||||
iwss_t += iwss
|
||||
iwss_tl += iwss_r
|
||||
cfs_t += cfs
|
||||
cfs_tl += cfs_r
|
||||
fcbf_t += fcbf
|
||||
fcbf_tl += fcbf_r
|
||||
num = len(list(datasets))
|
||||
iwss_t /= num
|
||||
iwss_tl /= num
|
||||
cfs_t /= num
|
||||
cfs_tl /= num
|
||||
fcbf_t /= num
|
||||
fcbf_tl /= num
|
||||
print(
|
||||
f"{'Average ..: ':30s} {iwss_t:.4f}({iwss_tl:.2f}) {cfs_t:.4f}"
|
||||
f"({cfs_tl:.2f}) {fcbf_t:.4f}({fcbf_tl:.2f})"
|
||||
)
|
@@ -1,5 +1,4 @@
|
||||
import os
|
||||
import shutil
|
||||
from io import StringIO
|
||||
from unittest.mock import patch
|
||||
from openpyxl import load_workbook
|
||||
@@ -22,14 +21,8 @@ class BenchmarkTest(TestBase):
|
||||
"Rplots.pdf",
|
||||
benchmark.get_tex_file(),
|
||||
]
|
||||
for file_name in files:
|
||||
if os.path.exists(file_name):
|
||||
os.remove(file_name)
|
||||
file_name = os.path.join(Folders.exreport, file_name)
|
||||
if os.path.exists(file_name):
|
||||
os.remove(file_name)
|
||||
if os.path.exists(Folders.report):
|
||||
shutil.rmtree(Folders.report)
|
||||
self.remove_files(files, Folders.exreport)
|
||||
self.remove_files(files, ".")
|
||||
return super().tearDown()
|
||||
|
||||
def test_csv(self):
|
||||
|
@@ -13,10 +13,7 @@ class ExcelTest(TestBase):
|
||||
"results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.xlsx",
|
||||
"results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.xlsx",
|
||||
]
|
||||
for file_name in files:
|
||||
file_name = os.path.join(Folders.results, file_name)
|
||||
if os.path.exists(file_name):
|
||||
os.remove(file_name)
|
||||
self.remove_files(files, Folders.results)
|
||||
return super().tearDown()
|
||||
|
||||
def test_report_excel_compared(self):
|
||||
|
@@ -1,4 +1,3 @@
|
||||
import os
|
||||
import json
|
||||
from .TestBase import TestBase
|
||||
from ..Experiments import Experiment, Datasets
|
||||
@@ -25,8 +24,12 @@ class ExperimentTest(TestBase):
|
||||
return Experiment(**params)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
if os.path.exists(self.exp.get_output_file()):
|
||||
os.remove(self.exp.get_output_file())
|
||||
self.remove_files(
|
||||
[
|
||||
self.exp.get_output_file(),
|
||||
],
|
||||
".",
|
||||
)
|
||||
return super().tearDown()
|
||||
|
||||
def test_build_hyperparams_file(self):
|
||||
|
@@ -1,4 +1,3 @@
|
||||
import os
|
||||
import json
|
||||
from .TestBase import TestBase
|
||||
from ..Experiments import GridSearch, Datasets
|
||||
@@ -32,8 +31,12 @@ class GridSearchTest(TestBase):
|
||||
|
||||
def test_out_file_not_exits(self):
|
||||
file_name = self.grid.get_output_file()
|
||||
if os.path.exists(file_name):
|
||||
os.remove(file_name)
|
||||
self.remove_files(
|
||||
[
|
||||
file_name,
|
||||
],
|
||||
".",
|
||||
)
|
||||
_ = self.build_exp()
|
||||
# check the output file is initialized
|
||||
with open(file_name) as f:
|
||||
|
@@ -9,10 +9,7 @@ class SQLTest(TestBase):
|
||||
files = [
|
||||
"results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.sql",
|
||||
]
|
||||
for file_name in files:
|
||||
file_name = os.path.join(Folders.results, file_name)
|
||||
if os.path.exists(file_name):
|
||||
os.remove(file_name)
|
||||
self.remove_files(files, Folders.results)
|
||||
return super().tearDown()
|
||||
|
||||
def test_report_SQL(self):
|
||||
|
@@ -10,6 +10,12 @@ class TestBase(unittest.TestCase):
|
||||
self.output = "sys.stdout"
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def remove_files(self, files, folder):
|
||||
for file_name in files:
|
||||
file_name = os.path.join(folder, file_name)
|
||||
if os.path.exists(file_name):
|
||||
os.remove(file_name)
|
||||
|
||||
def generate_excel_sheet(self, sheet, file_name):
|
||||
with open(os.path.join(self.test_files, file_name), "w") as f:
|
||||
for row in range(1, sheet.max_row + 1):
|
||||
|
@@ -2,7 +2,8 @@ import os
|
||||
import sys
|
||||
import argparse
|
||||
from .TestBase import TestBase
|
||||
from ..Utils import Folders, Files, Symbols, TextColor, EnvData, EnvDefault
|
||||
from ..Utils import Folders, Files, Symbols, TextColor
|
||||
from ..Arguments import EnvData, EnvDefault
|
||||
|
||||
|
||||
class UtilTest(TestBase):
|
||||
@@ -266,8 +267,3 @@ class UtilTest(TestBase):
|
||||
self.assertEqual(TextColor.ENDC, "\033[0m")
|
||||
self.assertEqual(TextColor.BOLD, "\033[1m")
|
||||
self.assertEqual(TextColor.UNDERLINE, "\033[4m")
|
||||
|
||||
def test_Arguments(self):
|
||||
arguments = Arguments()
|
||||
arg_list = ["score", "excel", "tex_output"]
|
||||
arguments.set_arguments(arg_list)
|
||||
|
@@ -24,4 +24,5 @@ all = [
|
||||
"BenchmarkTest",
|
||||
"SummaryTest",
|
||||
"PairCheckTest",
|
||||
"be_list",
|
||||
]
|
||||
|
16
setup.py
16
setup.py
@@ -59,5 +59,19 @@ setuptools.setup(
|
||||
"tqdm",
|
||||
],
|
||||
zip_safe=False,
|
||||
scripts=import_scripts(),
|
||||
entry_points={
|
||||
"console_scripts": [
|
||||
"be_list=benchmark.scripts.be_list:main",
|
||||
"be_report=benchmark.scripts.be_report:main",
|
||||
"be_main=benchmark.scripts.be_main:main",
|
||||
"be_benchmark=benchmark.scripts.be_benchmark:main",
|
||||
"be_best=benchmark.scripts.be_best:main",
|
||||
"be_build_best=benchmark.scripts.be_build_best:main",
|
||||
"be_grid=benchmark.scripts.be_grid:main",
|
||||
"be_pair_check=benchmark.scripts.be_pair_check:main",
|
||||
"be_print_strees=benchmark.scripts.be_print_strees:main",
|
||||
"be_repara=benchmark.scripts.be_repara:main",
|
||||
"be_summary=benchmark.scripts.be_summary:main",
|
||||
],
|
||||
},
|
||||
)
|
||||
|
Reference in New Issue
Block a user