diff --git a/.coveragerc b/.coveragerc index 232a67e..ed67cd8 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,15 +1,10 @@ [run] branch = True source = benchmark - [report] -exclude_lines = - if self.debug: - pragma: no cover - raise NotImplementedError - if __name__ == .__main__.: -ignore_errors = True -omit = +omit = benchmark/__init__.py + benchmark/scripts/__init__.py benchmark/_version.py - benchmark/tests/* \ No newline at end of file + benchmark/tests/* + \ No newline at end of file diff --git a/benchmark/Arguments.py b/benchmark/Arguments.py index 8e1fa57..333bcd6 100644 --- a/benchmark/Arguments.py +++ b/benchmark/Arguments.py @@ -11,3 +11,274 @@ ALL_METRICS = ( ) +class EnvData: + @staticmethod + def load(): + args = {} + with open(Files.dot_env) as f: + for line in f.read().splitlines(): + if line == "" or line.startswith("#"): + continue + key, value = line.split("=") + args[key] = value + return args + + +class EnvDefault(argparse.Action): + # Thanks to https://stackoverflow.com/users/445507/russell-heilling + def __init__(self, envvar, required=True, default=None, **kwargs): + self._args = EnvData.load() + default = self._args[envvar] + required = False + super(EnvDefault, self).__init__( + default=default, required=required, **kwargs + ) + + def __call__(self, parser, namespace, values, option_string=None): + setattr(namespace, self.dest, values) + + +class Arguments: + def __init__(self): + self.ap = argparse.ArgumentParser() + models_data = Models.define_models(random_state=0) + self.parameters = { + "best": [ + ("-b", "--best"), + { + "type": str, + "required": False, + "help": "best results of models", + }, + ], + "color": [ + ("-c", "--color"), + { + "type": bool, + "required": False, + "default": False, + "help": "use colors for the tree", + }, + ], + "compare": [ + ("-c", "--compare"), + { + "type": bool, + "required": False, + "help": "Compare accuracy with best results", + }, + ], + "dataset": [ + ("-d", "--dataset"), + { + "type": str, + "required": False, + "help": "dataset to work with", + }, + ], + "excel": [ + ("-x", "--excel"), + { + "type": bool, + "required": False, + "default": False, + "help": "Generate Excel File", + }, + ], + "file": [ + ("-f", "--file"), + {"type": str, "required": False, "help": "Result file"}, + ], + "grid": [ + ("-g", "--grid"), + { + "type": str, + "required": False, + "help": "grid results of model", + }, + ], + "grid_paramfile": [ + ("-g", "--grid_paramfile"), + { + "type": bool, + "required": False, + "default": False, + "help": "Use best hyperparams file?", + }, + ], + "hidden": [ + ("--hidden",), + { + "type": str, + "required": False, + "default": False, + "help": "Show hidden results", + }, + ], + "hyperparameters": [ + ("-p", "--hyperparameters"), + {"type": str, "required": False, "default": "{}"}, + ], + "key": [ + ("-k", "--key"), + { + "type": str, + "required": False, + "default": "date", + "help": "key to sort results", + }, + ], + "lose": [ + ("-l", "--lose"), + { + "type": bool, + "default": False, + "required": False, + "help": "show lose results", + }, + ], + "model": [ + ("-m", "--model"), + { + "type": str, + "required": True, + "choices": list(models_data), + "action": EnvDefault, + "envvar": "model", + "help": "model name", + }, + ], + "model1": [ + ("-m1", "--model1"), + { + "type": str, + "required": True, + "choices": list(models_data), + "action": EnvDefault, + "envvar": "model", + "help": "model name", + }, + ], + "model2": [ + ("-m2", "--model2"), + { + "type": str, + "required": True, + "choices": list(models_data), + "action": EnvDefault, + "envvar": "model", + "help": "model name", + }, + ], + "nan": [ + ("--nan",), + { + "type": bool, + "required": False, + "help": "Move nan results to hidden folder", + }, + ], + "number": [ + ("-n", "--number"), + { + "type": int, + "required": False, + "default": 0, + "help": "number of results to show, 0 to any", + }, + ], + "n_folds": [ + ("-n", "--n_folds"), + { + "action": EnvDefault, + "envvar": "n_folds", + "type": int, + "required": True, + "help": "number of folds", + }, + ], + "paramfile": [ + ("-f", "--paramfile"), + { + "type": bool, + "required": False, + "default": False, + "help": "Use best hyperparams file?", + }, + ], + "platform": [], + "quiet": [ + ("-q", "--quiet"), + { + "type": bool, + "required": False, + "default": False, + }, + ], + "report": [ + ("-r", "--report"), + { + "type": bool, + "default": False, + "required": False, + "help": "Report results", + }, + ], + "score": [ + ("-s", "--score"), + { + "action": EnvDefault, + "envvar": "score", + "type": str, + "required": True, + "choices": ALL_METRICS, + }, + ], + "sql": [ + ("-q", "--sql"), + {"type": bool, "required": False, "help": "Generate SQL File"}, + ], + "stratified": [ + ("-t", "--stratified"), + { + "action": EnvDefault, + "envvar": "stratified", + "type": str, + "required": True, + "help": "Stratified", + }, + ], + "tex_output": [ + ("-t", "--tex-output"), + { + "type": bool, + "required": False, + "default": False, + "help": "Generate Tex file with the table", + }, + ], + "title": [ + ("--title",), + {"type": str, "required": True, "help": "experiment title"}, + ], + "win": [ + ("-w", "--win"), + { + "type": bool, + "default": False, + "required": False, + "help": "show win results", + }, + ], + } + + def xset(self, *arg_name, **kwargs): + names, default = self.parameters[arg_name[0]] + self.ap.add_argument( + *names, + **{**default, **kwargs}, + ) + return self + + def parse(self): + return self.ap.parse_args() diff --git a/benchmark/Experiments.py b/benchmark/Experiments.py index d94a47b..cf4714a 100644 --- a/benchmark/Experiments.py +++ b/benchmark/Experiments.py @@ -14,7 +14,8 @@ from sklearn.model_selection import ( cross_validate, ) from .Utils import Folders, Files -from .Models import Models, EnvData +from .Models import Models +from .Arguments import EnvData class Randomized: diff --git a/benchmark/Models.py b/benchmark/Models.py index 70d0951..9b363c3 100644 --- a/benchmark/Models.py +++ b/benchmark/Models.py @@ -1,4 +1,3 @@ -import argparse from statistics import mean from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier from sklearn.ensemble import ( @@ -12,15 +11,6 @@ from stree import Stree from wodt import Wodt from odte import Odte from xgboost import XGBClassifier -from .Utils import Files - -ALL_METRICS = ( - "accuracy", - "f1-macro", - "f1-micro", - "f1-weighted", - "roc-auc-ovr", -) class Models: @@ -99,139 +89,3 @@ class Models: nodes, leaves = result.nodes_leaves() depth = result.depth_ if hasattr(result, "depth_") else 0 return nodes, leaves, depth - - -class EnvData: - @staticmethod - def load(): - args = {} - with open(Files.dot_env) as f: - for line in f.read().splitlines(): - if line == "" or line.startswith("#"): - continue - key, value = line.split("=") - args[key] = value - return args - - -class EnvDefault(argparse.Action): - # Thanks to https://stackoverflow.com/users/445507/russell-heilling - def __init__(self, envvar, required=True, default=None, **kwargs): - self._args = EnvData.load() - default = self._args[envvar] - required = False - super(EnvDefault, self).__init__( - default=default, required=required, **kwargs - ) - - def __call__(self, parser, namespace, values, option_string=None): - setattr(namespace, self.dest, values) - - -class Arguments: - def __init__(self): - self.ap = argparse.ArgumentParser() - models_data = Models.define_models(random_state=0) - self.parameters = { - "best": [ - ("-b", "--best"), - { - "type": str, - "required": False, - "help": "best results of models", - }, - ], - "color": [], - "compare": [ - ("-c", "--compare"), - { - "type": bool, - "required": False, - "help": "Compare accuracy with best results", - }, - ], - "dataset": [], - "excel": [ - ("-x", "--excel"), - { - "type": bool, - "required": False, - "default": False, - "help": "Generate Excel File", - }, - ], - "file": [ - ("-f", "--file"), - {"type": str, "required": False, "help": "Result file"}, - ], - "grid": [ - ("-g", "--grid"), - { - "type": str, - "required": False, - "help": "grid results of model", - }, - ], - "grid_paramfile": [], - "hidden": [], - "hyperparameters": [], - "key": [], - "lose": [], - "model": [ - ("-m", "--model"), - { - "type": str, - "required": True, - "choices": list(models_data), - "action": EnvDefault, - "envvar": "model", - "help": f"model name", - }, - ], - "model1": [], - "model2": [], - "nan": [], - "number": [], - "n_folds": [], - "paramfile": [], - "platform": [], - "quiet": [], - "report": [], - "score": [ - ("-s", "--score"), - { - "action": EnvDefault, - "envvar": "score", - "type": str, - "required": True, - "choices": ALL_METRICS, - }, - ], - "sql": [ - ("-q", "--sql"), - {"type": bool, "required": False, "help": "Generate SQL File"}, - ], - "stratified": [], - "tex_output": [ - ("-t", "--tex-output"), - { - "type": bool, - "required": False, - "default": False, - "help": "Generate Tex file with the table", - }, - ], - "title": [], - "win": [], - } - - def xset(self, *arg_name, **kwargs): - names, default = self.parameters[arg_name[0]] - self.ap.add_argument( - *names, - **{**default, **kwargs}, - ) - return self - - def parse(self): - return self.ap.parse_args() diff --git a/benchmark/__init__.py b/benchmark/__init__.py index f9034e9..6d699da 100644 --- a/benchmark/__init__.py +++ b/benchmark/__init__.py @@ -1,6 +1,6 @@ from .Experiments import Experiment, Datasets, DatasetsSurcov, DatasetsTanveer from .Results import Report, Summary -from .Models import EnvDefault +from .Arguments import EnvDefault __author__ = "Ricardo Montañana Gómez" __copyright__ = "Copyright 2020-2022, Ricardo Montañana Gómez" diff --git a/benchmark/scripts/__init__.py b/benchmark/scripts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/benchmark/scripts/be_benchmark b/benchmark/scripts/be_benchmark deleted file mode 100755 index 14822a0..0000000 --- a/benchmark/scripts/be_benchmark +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env python -from benchmark.Results import Benchmark -from benchmark.Utils import Files -from benchmark.Models import Arguments - - -arguments = Arguments() -arguments.xset("score").xset("excel").xset("tex_output") -ar = arguments.parse() -benchmark = Benchmark(score=ar.score, visualize=True) -benchmark.compile_results() -benchmark.save_results() -benchmark.report(ar.tex_output) -benchmark.exreport() -if ar.excel: - benchmark.excel() - Files.open(benchmark.get_excel_file_name()) -if ar.tex_output: - print(f"File {benchmark.get_tex_file()} generated") diff --git a/benchmark/scripts/be_benchmark.py b/benchmark/scripts/be_benchmark.py new file mode 100755 index 0000000..a409583 --- /dev/null +++ b/benchmark/scripts/be_benchmark.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python +from benchmark.Results import Benchmark +from benchmark.Utils import Files +from benchmark.Arguments import Arguments + + +def main(): + arguments = Arguments() + arguments.xset("score").xset("excel").xset("tex_output") + ar = arguments.parse() + benchmark = Benchmark(score=ar.score, visualize=True) + benchmark.compile_results() + benchmark.save_results() + benchmark.report(ar.tex_output) + benchmark.exreport() + if ar.excel: + benchmark.excel() + Files.open(benchmark.get_excel_file_name()) + if ar.tex_output: + print(f"File {benchmark.get_tex_file()} generated") diff --git a/benchmark/scripts/be_best b/benchmark/scripts/be_best deleted file mode 100755 index 283f182..0000000 --- a/benchmark/scripts/be_best +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env python -import json -from benchmark.Results import Summary -from benchmark.Utils import ALL_METRICS, Arguments - - -arguments = Arguments() -metrics = list(ALL_METRICS) -metrics.append("all") -arguments.xset("score", choices=metrics) -args = arguments.parse() - - -metrics = ALL_METRICS if args.score == "all" else [args.score] - -summary = Summary() -summary.acquire() - -nl = 50 -num = 100 -for metric in metrics: - title = f"BEST RESULTS of {metric} for datasets" - best = summary.best_results_datasets(score=metric) - for key, item in best.items(): - print(f"{key:30s} {item[2]:{nl}s}") - print("-" * num) - print(f"{item[0]:30.7f} {json.dumps(item[1]):{nl}s}") - print("-" * num) - print(f"{item[3]:{nl+num}s}") - print("*" * num) diff --git a/benchmark/scripts/be_best.py b/benchmark/scripts/be_best.py new file mode 100755 index 0000000..feff794 --- /dev/null +++ b/benchmark/scripts/be_best.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python +import json +from benchmark.Results import Summary +from benchmark.Arguments import ALL_METRICS, Arguments + + +def main(): + arguments = Arguments() + metrics = list(ALL_METRICS) + metrics.append("all") + arguments.xset("score", choices=metrics) + args = arguments.parse() + metrics = ALL_METRICS if args.score == "all" else [args.score] + summary = Summary() + summary.acquire() + nl = 50 + num = 100 + for metric in metrics: + best = summary.best_results_datasets(score=metric) + for key, item in best.items(): + print(f"{key:30s} {item[2]:{nl}s}") + print("-" * num) + print(f"{item[0]:30.7f} {json.dumps(item[1]):{nl}s}") + print("-" * num) + print(f"{item[3]:{nl+num}s}") + print("*" * num) diff --git a/benchmark/scripts/be_build_best b/benchmark/scripts/be_build_best deleted file mode 100755 index 6faa3dd..0000000 --- a/benchmark/scripts/be_build_best +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env python -import argparse -from benchmark.Results import ReportBest -from benchmark.Experiments import Datasets, BestResults -from benchmark.Utils import ALL_METRICS, EnvDefault - -"""Build a json file with the best results of a model and its hyperparameters -""" - - -def parse_arguments(): - ap = argparse.ArgumentParser() - ap.add_argument( - "-s", - "--score", - action=EnvDefault, - envvar="score", - type=str, - required=True, - choices=ALL_METRICS, - help="score name {accuracy, f1_macro, ...}", - ) - ap.add_argument( - "-m", - "--model", - action=EnvDefault, - envvar="model", - type=str, - required=True, - help="model name.", - ) - ap.add_argument( - "-r", - "--report", - type=bool, - required=False, - help="Generate Report", - ) - args = ap.parse_args() - return (args.score, args.model, args.report) - - -(score, model, report) = parse_arguments() -datasets = Datasets() -best = BestResults(score, model, datasets) -best.build() -if report: - report = ReportBest(score, model, best=True, grid=False) - report.report() diff --git a/benchmark/scripts/be_build_best.py b/benchmark/scripts/be_build_best.py new file mode 100755 index 0000000..cb6ee62 --- /dev/null +++ b/benchmark/scripts/be_build_best.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python +from benchmark.Results import ReportBest +from benchmark.Experiments import Datasets, BestResults +from benchmark.Arguments import Arguments + +"""Build a json file with the best results of a model and its hyperparameters +""" + + +def main(): + arguments = Arguments() + arguments.xset("score").xset("report").xset("model") + args = arguments.parse() + datasets = Datasets() + best = BestResults(args.score, args.model, datasets) + best.build() + if args.report: + report = ReportBest(args.score, args.model, best=True, grid=False) + report.report() diff --git a/benchmark/scripts/be_build_grid b/benchmark/scripts/be_build_grid deleted file mode 100755 index 3d44c47..0000000 --- a/benchmark/scripts/be_build_grid +++ /dev/null @@ -1,107 +0,0 @@ -#!/usr/bin/env python -import os -import json -from benchmark.Utils import Files, Folders - -data = [ - '{"C": 1e4, "gamma": 0.1, "kernel": "rbf"}', - '{"C": 7, "gamma": 0.14, "kernel": "rbf"}', - '{"C": 0.2, "kernel": "liblinear", "multiclass_strategy": "ovr"}', - '{"C": 0.2, "kernel": "liblinear", "multiclass_strategy": "ovr"}', - '{"C": 0.95, "kernel": "liblinear", "multiclass_strategy": "ovr"}', - '{"C": 0.05, "kernel": "liblinear", "multiclass_strategy": "ovr"}', - '{"kernel": "rbf"}', - '{"kernel": "rbf"}', - '{"C": 1.05, "gamma": "auto","kernel": "rbf"}', - '{"splitter": "random", "max_features": "auto"}', - '{"C": 0.05, "max_features": "auto", "kernel": "liblinear", ' - '"multiclass_strategy": "ovr"}', - '{"kernel": "rbf", "C": 0.05}', - '{"C": 0.05, "kernel": "liblinear", "multiclass_strategy": "ovr"}', - '{"C": 7, "gamma": 0.1, "kernel": "rbf"}', - '{"kernel": "liblinear", "multiclass_strategy": "ovr"}', - '{"C": 7, "gamma": 0.1, "kernel": "rbf"}', - '{"C": 0.25, "kernel": "liblinear", "multiclass_strategy": "ovr"}', - '{"kernel": "liblinear", "multiclass_strategy": "ovr"}', - '{"C": 0.08, "kernel": "liblinear", "multiclass_strategy": "ovr"}', - '{"C": 0.001, "kernel": "liblinear", "multiclass_strategy": "ovr"}', - '{"C": 2.8, "kernel": "rbf", "gamma": "auto"}', - '{"kernel": "rbf"}', - '{"C": 0.05, "gamma": 0.1, "kernel": "poly"}', - '{"C": 8.25, "gamma": 0.1, "kernel": "poly", "multiclass_strategy": ' - '"ovr"}', - '{"kernel": "liblinear", "multiclass_strategy": "ovr"}', - '{"C": 1.75, "kernel": "liblinear", "multiclass_strategy": "ovr"}', - '{"C":57, "kernel": "rbf"}', - '{"C": 7, "gamma": 0.1, "kernel": "rbf", "multiclass_strategy": "ovr"}', - '{"C": 5, "kernel": "rbf", "gamma": "auto"}', - '{"C": 0.05, "max_iter": 10000.0, "kernel": "liblinear", ' - '"multiclass_strategy": "ovr"}', - '{"C":0.0275, "kernel": "liblinear", "multiclass_strategy": "ovr"}', - '{"C": 7, "gamma": 10.0, "kernel": "rbf", "multiclass_strategy": "ovr"}', - '{"kernel": "rbf", "gamma": 0.001}', - '{"C": 1e4, "kernel": "liblinear", "multiclass_strategy": "ovr"}', - '{"C": 1.75, "kernel": "liblinear", "multiclass_strategy": "ovr"}', - '{"C": 7, "kernel": "liblinear", "multiclass_strategy": "ovr"}', - '{"kernel": "liblinear", "multiclass_strategy": "ovr"}', - '{"C": 2.83, "kernel": "rbf", "gamma": "auto"}', - '{"C": 0.2, "gamma": 0.1, "kernel": "poly", "multiclass_strategy": "ovr"}', - '{"kernel": "liblinear", "multiclass_strategy": "ovr"}', - '{"C": 2, "gamma": "auto", "kernel": "rbf"}', - '{"C": 1.75, "kernel": "liblinear", "multiclass_strategy": "ovr"}', -] - -results = {} -output = [] -hyper = ["C", "gamma", "kernel", "multiclass_strategy"] -kernels = ["linear", "liblinear", "rbf", "poly"] - -# initialize results -for kernel in kernels: - results[kernel] = {} - for item in hyper: - results[kernel][item] = [] -# load data -for sample in data: - line = json.loads(sample) - if "kernel" not in line: - line["kernel"] = "linear" - kernel = line["kernel"] - for item in hyper: - if item in line and line[item] not in results[kernel][item]: - results[kernel][item].append(line[item]) - -# Add default values and remove inconsistent values -results["linear"]["multiclass_strategy"] = ["ovo"] -del results["linear"]["gamma"] -del results["liblinear"]["gamma"] -results["rbf"]["gamma"].append("scale") -results["poly"]["gamma"].append("scale") -results["poly"]["multiclass_strategy"].append("ovo") -for kernel in kernels: - results[kernel]["C"].append(1.0) - -for item in results: - results_tmp = {"n_jobs": [-1], "n_estimators": [100]} - for key, value in results[item].items(): - new_key = f"base_estimator__{key}" - try: - results_tmp[new_key] = sorted(value) - except TypeError: - t1 = sorted( - [ - x - for x in value - if isinstance(x, int) or isinstance(x, float) - ] - ) - t2 = sorted([x for x in value if isinstance(x, str)]) - results_tmp[new_key] = t1 + t2 - output.append(results_tmp) - -# save results -file_name = Files.grid_input("accuracy", "ODTE") -file_output = os.path.join(Folders.results, file_name) -with open(file_output, "w") as f: - json.dump(output, f, indent=4) -print(f"Grid values saved to {file_output}") diff --git a/benchmark/scripts/be_build_grid.py b/benchmark/scripts/be_build_grid.py new file mode 100755 index 0000000..40a9a3d --- /dev/null +++ b/benchmark/scripts/be_build_grid.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python +import os +import json +from benchmark.Utils import Files, Folders + + +def main(): + data = [ + '{"C": 1e4, "gamma": 0.1, "kernel": "rbf"}', + '{"C": 7, "gamma": 0.14, "kernel": "rbf"}', + '{"C": 0.2, "kernel": "liblinear", "multiclass_strategy": "ovr"}', + '{"C": 0.2, "kernel": "liblinear", "multiclass_strategy": "ovr"}', + '{"C": 0.95, "kernel": "liblinear", "multiclass_strategy": "ovr"}', + '{"C": 0.05, "kernel": "liblinear", "multiclass_strategy": "ovr"}', + '{"kernel": "rbf"}', + '{"kernel": "rbf"}', + '{"C": 1.05, "gamma": "auto","kernel": "rbf"}', + '{"splitter": "random", "max_features": "auto"}', + '{"C": 0.05, "max_features": "auto", "kernel": "liblinear", ' + '"multiclass_strategy": "ovr"}', + '{"kernel": "rbf", "C": 0.05}', + '{"C": 0.05, "kernel": "liblinear", "multiclass_strategy": "ovr"}', + '{"C": 7, "gamma": 0.1, "kernel": "rbf"}', + '{"kernel": "liblinear", "multiclass_strategy": "ovr"}', + '{"C": 7, "gamma": 0.1, "kernel": "rbf"}', + '{"C": 0.25, "kernel": "liblinear", "multiclass_strategy": "ovr"}', + '{"kernel": "liblinear", "multiclass_strategy": "ovr"}', + '{"C": 0.08, "kernel": "liblinear", "multiclass_strategy": "ovr"}', + '{"C": 0.001, "kernel": "liblinear", "multiclass_strategy": "ovr"}', + '{"C": 2.8, "kernel": "rbf", "gamma": "auto"}', + '{"kernel": "rbf"}', + '{"C": 0.05, "gamma": 0.1, "kernel": "poly"}', + '{"C": 8.25, "gamma": 0.1, "kernel": "poly", "multiclass_strategy": ' + '"ovr"}', + '{"kernel": "liblinear", "multiclass_strategy": "ovr"}', + '{"C": 1.75, "kernel": "liblinear", "multiclass_strategy": "ovr"}', + '{"C":57, "kernel": "rbf"}', + '{"C": 7, "gamma": 0.1, "kernel": "rbf", "multiclass_strategy": ' + '"ovr"}', + '{"C": 5, "kernel": "rbf", "gamma": "auto"}', + '{"C": 0.05, "max_iter": 10000.0, "kernel": "liblinear", ' + '"multiclass_strategy": "ovr"}', + '{"C":0.0275, "kernel": "liblinear", "multiclass_strategy": "ovr"}', + '{"C": 7, "gamma": 10.0, "kernel": "rbf", "multiclass_strategy": ' + '"ovr"}', + '{"kernel": "rbf", "gamma": 0.001}', + '{"C": 1e4, "kernel": "liblinear", "multiclass_strategy": "ovr"}', + '{"C": 1.75, "kernel": "liblinear", "multiclass_strategy": "ovr"}', + '{"C": 7, "kernel": "liblinear", "multiclass_strategy": "ovr"}', + '{"kernel": "liblinear", "multiclass_strategy": "ovr"}', + '{"C": 2.83, "kernel": "rbf", "gamma": "auto"}', + '{"C": 0.2, "gamma": 0.1, "kernel": "poly", "multiclass_strategy": ' + '"ovr"}', + '{"kernel": "liblinear", "multiclass_strategy": "ovr"}', + '{"C": 2, "gamma": "auto", "kernel": "rbf"}', + '{"C": 1.75, "kernel": "liblinear", "multiclass_strategy": "ovr"}', + ] + + results = {} + output = [] + hyper = ["C", "gamma", "kernel", "multiclass_strategy"] + kernels = ["linear", "liblinear", "rbf", "poly"] + + # initialize results + for kernel in kernels: + results[kernel] = {} + for item in hyper: + results[kernel][item] = [] + # load data + for sample in data: + line = json.loads(sample) + if "kernel" not in line: + line["kernel"] = "linear" + kernel = line["kernel"] + for item in hyper: + if item in line and line[item] not in results[kernel][item]: + results[kernel][item].append(line[item]) + + # Add default values and remove inconsistent values + results["linear"]["multiclass_strategy"] = ["ovo"] + del results["linear"]["gamma"] + del results["liblinear"]["gamma"] + results["rbf"]["gamma"].append("scale") + results["poly"]["gamma"].append("scale") + results["poly"]["multiclass_strategy"].append("ovo") + for kernel in kernels: + results[kernel]["C"].append(1.0) + + for item in results: + results_tmp = {"n_jobs": [-1], "n_estimators": [100]} + for key, value in results[item].items(): + new_key = f"base_estimator__{key}" + try: + results_tmp[new_key] = sorted(value) + except TypeError: + t1 = sorted( + [ + x + for x in value + if isinstance(x, int) or isinstance(x, float) + ] + ) + t2 = sorted([x for x in value if isinstance(x, str)]) + results_tmp[new_key] = t1 + t2 + output.append(results_tmp) + + # save results + file_name = Files.grid_input("accuracy", "ODTE") + file_output = os.path.join(Folders.results, file_name) + with open(file_output, "w") as f: + json.dump(output, f, indent=4) + print(f"Grid values saved to {file_output}") diff --git a/benchmark/scripts/be_grid b/benchmark/scripts/be_grid deleted file mode 100755 index 4e5686b..0000000 --- a/benchmark/scripts/be_grid +++ /dev/null @@ -1,102 +0,0 @@ -#!/usr/bin/env python -import argparse -from benchmark.Experiments import GridSearch, Datasets -from benchmark.Utils import EnvDefault, ALL_METRICS - -"""Do experiment and build result file, optionally print report with results -""" - - -def parse_arguments(): - ap = argparse.ArgumentParser() - ap.add_argument( - "-s", - "--score", - action=EnvDefault, - envvar="score", - type=str, - required=True, - choices=ALL_METRICS, - help="score name {accuracy, f1_macro, ...}", - ) - ap.add_argument( - "-P", - "--platform", - action=EnvDefault, - envvar="platform", - type=str, - required=True, - help="Platform where the test is run", - ) - ap.add_argument( - "-m", - "--model", - type=str, - required=True, - help="model name", - ) - ap.add_argument( - "-n", - "--n_folds", - action=EnvDefault, - envvar="n_folds", - type=int, - required=True, - help="number of folds", - ) - ap.add_argument( - "-q", - "--quiet", - type=bool, - default=False, - required=False, - help="Wether to show progress bar or not", - ) - ap.add_argument( - "-t", - "--stratified", - action=EnvDefault, - envvar="stratified", - type=str, - required=True, - help="Stratified", - ) - ap.add_argument( - "-d", - "--dataset", - type=str, - required=True, - default=None, - help="Gridsearch on this dataset", - ) - args = ap.parse_args() - return ( - args.stratified, - args.score, - args.model, - args.n_folds, - args.platform, - args.quiet, - args.dataset, - ) - - -( - stratified, - score, - model, - folds, - platform, - quiet, - dataset, -) = parse_arguments() -job = GridSearch( - score_name=score, - model_name=model, - stratified=stratified, - datasets=Datasets(dataset_name=dataset), - progress_bar=not quiet, - platform=platform, - folds=folds, -) -job.do_gridsearch() diff --git a/benchmark/scripts/be_grid.py b/benchmark/scripts/be_grid.py new file mode 100755 index 0000000..b9d63e5 --- /dev/null +++ b/benchmark/scripts/be_grid.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python +from benchmark.Experiments import GridSearch, Datasets +from benchmark.Arguments import Arguments + +"""Do experiment and build result file, optionally print report with results +""" + + +def main(): + arguments = Arguments() + arguments.xset("score").xset("platform").xset("model").xset("n_folds") + arguments.xset("quiet").xset("stratified").xset("dataset") + args = arguments.parse() + job = GridSearch( + score_name=args.score, + model_name=args.model, + stratified=args.stratified, + datasets=Datasets(dataset_name=args.dataset), + progress_bar=not args.quiet, + platform=args.platform, + folds=args.folds, + ) + job.do_gridsearch() diff --git a/benchmark/scripts/be_list b/benchmark/scripts/be_list deleted file mode 100755 index 400576d..0000000 --- a/benchmark/scripts/be_list +++ /dev/null @@ -1,103 +0,0 @@ -#! /usr/bin/env python -import os -import argparse -from benchmark.Experiments import Models -from benchmark.Results import Summary -from benchmark.Utils import ALL_METRICS, Folders - -"""List experiments of a model -""" - - -def parse_arguments(): - ap = argparse.ArgumentParser() - ap.add_argument( - "--nan", - type=bool, - required=False, - help="Move nan results to hidden folder", - ) - ap.add_argument( - "-s", - "--score", - type=str, - required=False, - choices=ALL_METRICS, - help="score name {accuracy, f1-macro, f1-weighted, roc-auc-ovr}", - ) - models_data = Models.define_models(0) - models = "{" + ", ".join(models_data) + "}" - ap.add_argument( - "-m", - "--model", - type=str, - required=False, - choices=list(models_data), - help=f"model name: {models}", - ) - ap.add_argument( - "-k", - "--key", - type=str, - required=False, - default="date", - help="key to sort results", - ) - ap.add_argument( - "--hidden", - type=str, - required=False, - default=False, - help="Show hidden results", - ) - ap.add_argument( - "-n", - "--number", - type=int, - required=False, - default=0, - help="number of results to show, 0 to any", - ) - args = ap.parse_args() - - return ( - args.nan, - args.score, - args.model, - args.key, - args.number, - args.hidden, - ) - - -if __name__ == "__main__": - (nan, score, model, key, number, hidden) = parse_arguments() - data = Summary(hidden=hidden) - data.acquire() - data.list_results(score=score, model=model, sort_key=key, number=number) - if nan: - results_nan = [] - results = data.get_results_criteria( - score=score, - model=model, - input_data=None, - sort_key=key, - number=number, - ) - for result in results: - if result["metric"] != result["metric"]: - results_nan.append(result) - if results_nan != []: - print( - "\n" - + "*" * 30 - + " Results with nan moved to hidden " - + "*" * 30 - ) - data.list_results(input_data=results_nan) - for result in results_nan: - name = result["file"] - os.rename( - os.path.join(Folders.results, name), - os.path.join(Folders.hidden_results, name), - ) diff --git a/benchmark/scripts/be_list.py b/benchmark/scripts/be_list.py new file mode 100755 index 0000000..2382288 --- /dev/null +++ b/benchmark/scripts/be_list.py @@ -0,0 +1,49 @@ +#! /usr/bin/env python +import os +from benchmark.Results import Summary +from benchmark.Utils import Folders +from benchmark.Arguments import Arguments + +"""List experiments of a model +""" + + +def main(): + arguments = Arguments() + arguments.xset("number").xset("model", required=False).xset("score") + arguments.xset("hidden").xset("nan").xset("key") + args = arguments.parse() + data = Summary(hidden=args.hidden) + data.acquire() + data.list_results( + score=args.score, + model=args.model, + sort_key=args.key, + number=args.number, + ) + if args.nan: + results_nan = [] + results = data.get_results_criteria( + score=args.score, + model=args.model, + input_data=None, + sort_key=args.key, + number=args.number, + ) + for result in results: + if result["metric"] != result["metric"]: + results_nan.append(result) + if results_nan != []: + print( + "\n" + + "*" * 30 + + " Results with nan moved to hidden " + + "*" * 30 + ) + data.list_results(input_data=results_nan) + for result in results_nan: + name = result["file"] + os.rename( + os.path.join(Folders.results, name), + os.path.join(Folders.hidden_results, name), + ) diff --git a/benchmark/scripts/be_main b/benchmark/scripts/be_main deleted file mode 100755 index 1a57372..0000000 --- a/benchmark/scripts/be_main +++ /dev/null @@ -1,165 +0,0 @@ -#!/usr/bin/env python -import os -import argparse -from benchmark.Experiments import Experiment, Datasets, Models -from benchmark.Results import Report -from benchmark.Utils import EnvDefault, ALL_METRICS - -"""Do experiment and build result file, optionally print report with results -""" - - -def parse_arguments(): - ap = argparse.ArgumentParser() - ap.add_argument( - "-s", - "--score", - action=EnvDefault, - envvar="score", - type=str, - choices=ALL_METRICS, - required=True, - help="score name {accuracy, f1-macro, f1-weighted, roc-auc-ovr}", - ) - ap.add_argument( - "-P", - "--platform", - action=EnvDefault, - envvar="platform", - type=str, - required=True, - help="Platform where the test is run", - ) - models_data = Models.define_models(0) - models = "{" + ", ".join(models_data) + "}" - ap.add_argument( - "-m", - "--model", - type=str, - required=True, - choices=list(models_data), - help=f"model name: {models}", - ) - ap.add_argument( - "-n", - "--n_folds", - action=EnvDefault, - envvar="n_folds", - type=int, - required=True, - help="number of folds", - ) - ap.add_argument( - "-p", "--hyperparameters", type=str, required=False, default="{}" - ) - ap.add_argument( - "-f", - "--paramfile", - type=bool, - required=False, - default=False, - help="Use best hyperparams file?", - ) - ap.add_argument( - "-g", - "--grid_paramfile", - type=bool, - required=False, - default=False, - help="Use grid searched hyperparams file?", - ) - ap.add_argument( - "--title", type=str, required=True, help="experiment title" - ) - ap.add_argument( - "-q", - "--quiet", - type=bool, - default=False, - required=False, - help="Wether to show progress bar or not", - ) - ap.add_argument( - "-r", - "--report", - type=bool, - default=False, - required=False, - help="Report results", - ) - ap.add_argument( - "-t", - "--stratified", - action=EnvDefault, - envvar="stratified", - type=str, - required=True, - help="Stratified", - ) - ap.add_argument( - "-d", - "--dataset", - type=str, - required=False, - default=None, - help="Experiment with only this dataset", - ) - args = ap.parse_args() - return ( - args.stratified, - args.score, - args.model, - args.n_folds, - args.platform, - args.quiet, - args.hyperparameters, - args.paramfile, - args.grid_paramfile, - args.report, - args.title, - args.dataset, - ) - - -if __name__ == "__main__": - ( - stratified, - score, - model, - folds, - platform, - quiet, - hyperparameters, - paramfile, - grid_paramfile, - report, - experiment_title, - dataset, - ) = parse_arguments() - report = report or dataset is not None - if grid_paramfile: - paramfile = False - job = Experiment( - score_name=score, - model_name=model, - stratified=stratified, - datasets=Datasets(dataset_name=dataset), - hyperparams_dict=hyperparameters, - hyperparams_file=paramfile, - grid_paramfile=grid_paramfile, - progress_bar=not quiet, - platform=platform, - title=experiment_title, - folds=folds, - ) - job.do_experiment() - if report: - result_file = job.get_output_file() - report = Report(result_file) - report.report() - - if dataset is not None: - print(f"Partial result file removed: {result_file}") - os.remove(result_file) - else: - print(f"Results in {job.get_output_file()}") diff --git a/benchmark/scripts/be_main.py b/benchmark/scripts/be_main.py new file mode 100755 index 0000000..95cb060 --- /dev/null +++ b/benchmark/scripts/be_main.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python +import os +from benchmark.Experiments import Experiment, Datasets +from benchmark.Results import Report +from Arguments import Arguments + +"""Do experiment and build result file, optionally print report with results +""" + + +def main(): + arguments = Arguments() + arguments.xset("stratified").xset("score").xset("model").xset("dataset") + arguments.xset("n_folds").xset("platform").xset("quiet").xset("title") + arguments.xset("hyperparameters").xset("paramfile").xset("report") + arguments.xset("grid_paramfile") + args = arguments.parse() + report = args.report or args.dataset is not None + if args.grid_paramfile: + args.paramfile = False + job = Experiment( + score_name=args.score, + model_name=args.model, + stratified=args.stratified, + datasets=Datasets(dataset_name=args.dataset), + hyperparams_dict=args.hyperparameters, + hyperparams_file=args.paramfile, + grid_paramfile=args.grid_paramfile, + progress_bar=not args.quiet, + platform=args.platform, + title=args.experiment_title, + folds=args.folds, + ) + job.do_experiment() + if report: + result_file = job.get_output_file() + report = Report(result_file) + report.report() + + if args.dataset is not None: + print(f"Partial result file removed: {result_file}") + os.remove(result_file) + else: + print(f"Results in {job.get_output_file()}") diff --git a/benchmark/scripts/be_pair_check b/benchmark/scripts/be_pair_check deleted file mode 100755 index eef879d..0000000 --- a/benchmark/scripts/be_pair_check +++ /dev/null @@ -1,72 +0,0 @@ -#!/usr/bin/env python -import argparse -from benchmark.Results import PairCheck -from benchmark.Utils import ALL_METRICS, EnvDefault - -"""Check best results of two models giving scores and win-tie-loose results -""" - - -def parse_arguments(): - ap = argparse.ArgumentParser() - ap.add_argument( - "-s", - "--score", - action=EnvDefault, - envvar="score", - type=str, - required=True, - choices=ALL_METRICS, - help="score name {accuracy, f1_macro, ...}", - ) - ap.add_argument( - "-m1", - "--model1", - type=str, - required=True, - help="model 1 name", - ) - ap.add_argument( - "-m2", - "--model2", - type=str, - required=True, - help="model 2 name", - ) - ap.add_argument( - "-w", - "--win", - type=bool, - default=False, - required=False, - help="show win results", - ) - ap.add_argument( - "-l", - "--lose", - type=bool, - default=False, - required=False, - help="show lose results", - ) - args = ap.parse_args() - return ( - args.score, - args.model1, - args.model2, - args.win, - args.lose, - ) - - -if __name__ == "__main__": - ( - score, - model1, - model2, - win_results, - lose_results, - ) = parse_arguments() - pair_check = PairCheck(score, model1, model2, win_results, lose_results) - pair_check.compute() - pair_check.report() diff --git a/benchmark/scripts/be_pair_check.py b/benchmark/scripts/be_pair_check.py new file mode 100755 index 0000000..39d230a --- /dev/null +++ b/benchmark/scripts/be_pair_check.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python +from benchmark.Results import PairCheck +from Arguments import Arguments + +"""Check best results of two models giving scores and win-tie-loose results +""" + + +def main(): + arguments = Arguments() + arguments.xset("score").xset("win").xset("model1").xset("model2") + arguments.xset("lose") + args = arguments.parse() + pair_check = PairCheck( + args.score, + args.model1, + args.model2, + args.win_results, + args.lose_results, + ) + pair_check.compute() + pair_check.report() diff --git a/benchmark/scripts/be_print_strees b/benchmark/scripts/be_print_strees.py similarity index 77% rename from benchmark/scripts/be_print_strees rename to benchmark/scripts/be_print_strees.py index 18d5685..66d6e13 100755 --- a/benchmark/scripts/be_print_strees +++ b/benchmark/scripts/be_print_strees.py @@ -1,42 +1,12 @@ #!/usr/bin/env python import os import subprocess -import argparse import json from stree import Stree from graphviz import Source from benchmark.Experiments import Datasets from benchmark.Utils import Files, Folders - - -def parse_arguments(): - ap = argparse.ArgumentParser() - ap.add_argument( - "-c", - "--color", - type=bool, - required=False, - default=False, - help="use colors for the tree", - ) - ap.add_argument( - "-d", - "--dataset", - type=str, - required=False, - default="all", - help="dataset to print or all", - ) - ap.add_argument( - "-q", - "--quiet", - type=bool, - required=False, - default=False, - help="don't print generated tree(s)", - ) - args = ap.parse_args() - return (args.color, args.dataset, args.quiet) +from Arguments import Arguments def compute_stree(X, y, random_state): @@ -112,13 +82,15 @@ def print_stree(clf, dataset, X, y, color, quiet): subprocess.run([cmd_open, f"{file_name}.png"]) -if __name__ == "__main__": - (color, dataset_chosen, quiet) = parse_arguments() +def main(): + arguments = Arguments() + arguments.xset("color").xset("dataset", default="all").xset("quiet") + args = arguments.parse() hyperparameters = load_hyperparams("accuracy", "ODTE") random_state = 57 dt = Datasets() for dataset in dt: - if dataset == dataset_chosen or dataset_chosen == "all": + if dataset == args.dataset or args.dataset == "all": X, y = dt.load(dataset) clf = Stree(random_state=random_state) hyperparams_dataset = hyperparam_filter( @@ -126,4 +98,4 @@ if __name__ == "__main__": ) clf.set_params(**hyperparams_dataset) clf.fit(X, y) - print_stree(clf, dataset, X, y, color, quiet) + print_stree(clf, dataset, X, y, args.color, args.quiet) diff --git a/benchmark/scripts/be_repara b/benchmark/scripts/be_repara deleted file mode 100755 index cd7b70d..0000000 --- a/benchmark/scripts/be_repara +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env python -import os -import json -from benchmark.Experiments import Files, Folders - - -versions = dict(SVC="-", STree="1.2.3", ODTE="0.3.2") - -results = Files().get_all_results(hidden=False) -for result in results: - print(result) - file_name = os.path.join(Folders.results, result) - with open(file_name) as f: - data = json.load(f) - if "title" not in data: - print(f"Repairing title in {result}") - data["title"] = "default" - if "version" not in data: - print(f"Repairing version in {result}") - model = data["model"] - data["version"] = versions[model] if model in versions else "-" - with open(file_name, "w") as f: - json.dump(data, f, indent=4) diff --git a/benchmark/scripts/be_repara.py b/benchmark/scripts/be_repara.py new file mode 100755 index 0000000..3fb185c --- /dev/null +++ b/benchmark/scripts/be_repara.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python +import os +import json +from benchmark.Experiments import Files, Folders + + +def main(): + versions = dict(SVC="-", STree="1.2.3", ODTE="0.3.2") + results = Files().get_all_results(hidden=False) + for result in results: + print(result) + file_name = os.path.join(Folders.results, result) + with open(file_name) as f: + data = json.load(f) + if "title" not in data: + print(f"Repairing title in {result}") + data["title"] = "default" + if "version" not in data: + print(f"Repairing version in {result}") + model = data["model"] + data["version"] = versions[model] if model in versions else "-" + with open(file_name, "w") as f: + json.dump(data, f, indent=4) diff --git a/benchmark/scripts/be_report b/benchmark/scripts/be_report.py similarity index 96% rename from benchmark/scripts/be_report rename to benchmark/scripts/be_report.py index 6dffdcb..29ee983 100755 --- a/benchmark/scripts/be_report +++ b/benchmark/scripts/be_report.py @@ -6,7 +6,7 @@ from benchmark.Utils import ( Files, TextColor, ) -from benchmark.Models import Arguments +from benchmark.Arguments import Arguments """Build report on screen of a result file, optionally generate excel and sql @@ -41,7 +41,7 @@ def default_report(): ) -if __name__ == "__main__": +def main(): arguments = Arguments() arguments.xset("file").xset("excel").xset("sql").xset("compare") arguments.xset("best").xset("grid").xset("model", required=False).xset( diff --git a/benchmark/scripts/be_summary b/benchmark/scripts/be_summary deleted file mode 100755 index 8b88640..0000000 --- a/benchmark/scripts/be_summary +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env python -import argparse -from benchmark.Results import Summary -from benchmark.Utils import EnvDefault, ALL_METRICS - - -def parse_arguments(): - ap = argparse.ArgumentParser() - ap.add_argument( - "-m", - "--model", - type=str, - action=EnvDefault, - envvar="model", - required=True, - help="model name", - ) - ap.add_argument( - "-s", - "--score", - type=str, - action=EnvDefault, - envvar="score", - required=True, - choices=ALL_METRICS, - help="score name {accuracy, f1_micro, f1_macro, all}", - ) - args = ap.parse_args() - return ( - args.score, - args.model, - ) - - -if __name__ == "__main__": - ( - score, - model, - ) = parse_arguments() - all_metrics = ["accuracy", "f1-macro", "f1-micro"] - metrics = all_metrics if score == "all" else [score] - summary = Summary() - summary.acquire() - for metric in metrics: - title = f"BEST RESULT of {metric} for {model}" - best = summary.best_result( - criterion="model", value=model, score=metric - ) - summary.show_result(data=best, title=title) - summary.show_result( - summary.best_result(score=metric), title=f"BEST RESULT of {metric}" - ) - summary.show_top(score=metric, n=10) diff --git a/benchmark/scripts/be_summary.py b/benchmark/scripts/be_summary.py new file mode 100755 index 0000000..13a714c --- /dev/null +++ b/benchmark/scripts/be_summary.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python +from benchmark.Results import Summary +from benchmark.Arguments import ALL_METRICS, Arguments + + +def main(): + arguments = Arguments() + metrics = list(ALL_METRICS) + metrics.append("all") + arguments.xset("score", choices=metrics).xset("model", required=False) + args = arguments.parse() + metrics = ALL_METRICS if args.score == "all" else [args.score] + summary = Summary() + summary.acquire() + for metric in metrics: + title = f"BEST RESULT of {metric} for {args.model}" + best = summary.best_result( + criterion="model", value=args.model, score=metric + ) + summary.show_result(data=best, title=title) + summary.show_result( + summary.best_result(score=metric), title=f"BEST RESULT of {metric}" + ) + summary.show_top(score=metric, n=10) diff --git a/benchmark/scripts/be_td b/benchmark/scripts/be_td deleted file mode 100755 index 254b86f..0000000 --- a/benchmark/scripts/be_td +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/env python -import sys -import time -from benchmark.Experiments import Datasets -from mufs import MUFS - -mufs_i = MUFS() -mufs_c = MUFS() -mufs_f = MUFS() -datasets = Datasets() -iwss_t = iwss_tl = cfs_t = cfs_tl = fcbf_t = fcbf_tl = 0 -for i in datasets: - X, y = datasets.load(i) - now = time.time() - mufs_i.iwss(X, y, float(sys.argv[1])) - iwss = time.time() - now - iwss_r = len(mufs_i.get_results()) - now = time.time() - mufs_c.cfs(X, y) - cfs = time.time() - now - cfs_r = len(mufs_c.get_results()) - now = time.time() - mufs_f.fcbf(X, y, 1e-5) - fcbf = time.time() - now - fcbf_r = len(mufs_f.get_results()) - print( - f"{i:30s} {iwss:.4f}({iwss_r:2d}) {cfs:.4f}({cfs_r:2d}) {fcbf:.4f}" - f"({fcbf_r:2d})" - ) - iwss_t += iwss - iwss_tl += iwss_r - cfs_t += cfs - cfs_tl += cfs_r - fcbf_t += fcbf - fcbf_tl += fcbf_r -num = len(list(datasets)) -iwss_t /= num -iwss_tl /= num -cfs_t /= num -cfs_tl /= num -fcbf_t /= num -fcbf_tl /= num -print( - f"{'Average ..: ':30s} {iwss_t:.4f}({iwss_tl:.2f}) {cfs_t:.4f}" - f"({cfs_tl:.2f}) {fcbf_t:.4f}({fcbf_tl:.2f})" -) diff --git a/benchmark/scripts/be_td.py b/benchmark/scripts/be_td.py new file mode 100755 index 0000000..54f3abc --- /dev/null +++ b/benchmark/scripts/be_td.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python +import sys +import time +from benchmark.Experiments import Datasets +from mufs import MUFS + + +def main(): + mufs_i = MUFS() + mufs_c = MUFS() + mufs_f = MUFS() + datasets = Datasets() + iwss_t = iwss_tl = cfs_t = cfs_tl = fcbf_t = fcbf_tl = 0 + for i in datasets: + X, y = datasets.load(i) + now = time.time() + mufs_i.iwss(X, y, float(sys.argv[1])) + iwss = time.time() - now + iwss_r = len(mufs_i.get_results()) + now = time.time() + mufs_c.cfs(X, y) + cfs = time.time() - now + cfs_r = len(mufs_c.get_results()) + now = time.time() + mufs_f.fcbf(X, y, 1e-5) + fcbf = time.time() - now + fcbf_r = len(mufs_f.get_results()) + print( + f"{i:30s} {iwss:.4f}({iwss_r:2d}) {cfs:.4f}({cfs_r:2d}) {fcbf:.4f}" + f"({fcbf_r:2d})" + ) + iwss_t += iwss + iwss_tl += iwss_r + cfs_t += cfs + cfs_tl += cfs_r + fcbf_t += fcbf + fcbf_tl += fcbf_r + num = len(list(datasets)) + iwss_t /= num + iwss_tl /= num + cfs_t /= num + cfs_tl /= num + fcbf_t /= num + fcbf_tl /= num + print( + f"{'Average ..: ':30s} {iwss_t:.4f}({iwss_tl:.2f}) {cfs_t:.4f}" + f"({cfs_tl:.2f}) {fcbf_t:.4f}({fcbf_tl:.2f})" + ) diff --git a/benchmark/tests/Benchmark_test.py b/benchmark/tests/Benchmark_test.py index 7636521..adc9228 100644 --- a/benchmark/tests/Benchmark_test.py +++ b/benchmark/tests/Benchmark_test.py @@ -1,5 +1,4 @@ import os -import shutil from io import StringIO from unittest.mock import patch from openpyxl import load_workbook @@ -22,14 +21,8 @@ class BenchmarkTest(TestBase): "Rplots.pdf", benchmark.get_tex_file(), ] - for file_name in files: - if os.path.exists(file_name): - os.remove(file_name) - file_name = os.path.join(Folders.exreport, file_name) - if os.path.exists(file_name): - os.remove(file_name) - if os.path.exists(Folders.report): - shutil.rmtree(Folders.report) + self.remove_files(files, Folders.exreport) + self.remove_files(files, ".") return super().tearDown() def test_csv(self): diff --git a/benchmark/tests/Excel_test.py b/benchmark/tests/Excel_test.py index 1b092b4..1521e47 100644 --- a/benchmark/tests/Excel_test.py +++ b/benchmark/tests/Excel_test.py @@ -13,10 +13,7 @@ class ExcelTest(TestBase): "results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.xlsx", "results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.xlsx", ] - for file_name in files: - file_name = os.path.join(Folders.results, file_name) - if os.path.exists(file_name): - os.remove(file_name) + self.remove_files(files, Folders.results) return super().tearDown() def test_report_excel_compared(self): diff --git a/benchmark/tests/Experiment_test.py b/benchmark/tests/Experiment_test.py index 610c1ca..53df765 100644 --- a/benchmark/tests/Experiment_test.py +++ b/benchmark/tests/Experiment_test.py @@ -1,4 +1,3 @@ -import os import json from .TestBase import TestBase from ..Experiments import Experiment, Datasets @@ -25,8 +24,12 @@ class ExperimentTest(TestBase): return Experiment(**params) def tearDown(self) -> None: - if os.path.exists(self.exp.get_output_file()): - os.remove(self.exp.get_output_file()) + self.remove_files( + [ + self.exp.get_output_file(), + ], + ".", + ) return super().tearDown() def test_build_hyperparams_file(self): diff --git a/benchmark/tests/GridSearch_test.py b/benchmark/tests/GridSearch_test.py index ec1f7c7..6016bbf 100644 --- a/benchmark/tests/GridSearch_test.py +++ b/benchmark/tests/GridSearch_test.py @@ -1,4 +1,3 @@ -import os import json from .TestBase import TestBase from ..Experiments import GridSearch, Datasets @@ -32,8 +31,12 @@ class GridSearchTest(TestBase): def test_out_file_not_exits(self): file_name = self.grid.get_output_file() - if os.path.exists(file_name): - os.remove(file_name) + self.remove_files( + [ + file_name, + ], + ".", + ) _ = self.build_exp() # check the output file is initialized with open(file_name) as f: diff --git a/benchmark/tests/SQL_test.py b/benchmark/tests/SQL_test.py index 7acf9bf..da4a6cf 100644 --- a/benchmark/tests/SQL_test.py +++ b/benchmark/tests/SQL_test.py @@ -9,10 +9,7 @@ class SQLTest(TestBase): files = [ "results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.sql", ] - for file_name in files: - file_name = os.path.join(Folders.results, file_name) - if os.path.exists(file_name): - os.remove(file_name) + self.remove_files(files, Folders.results) return super().tearDown() def test_report_SQL(self): diff --git a/benchmark/tests/TestBase.py b/benchmark/tests/TestBase.py index a30cba5..13778b9 100644 --- a/benchmark/tests/TestBase.py +++ b/benchmark/tests/TestBase.py @@ -10,6 +10,12 @@ class TestBase(unittest.TestCase): self.output = "sys.stdout" super().__init__(*args, **kwargs) + def remove_files(self, files, folder): + for file_name in files: + file_name = os.path.join(folder, file_name) + if os.path.exists(file_name): + os.remove(file_name) + def generate_excel_sheet(self, sheet, file_name): with open(os.path.join(self.test_files, file_name), "w") as f: for row in range(1, sheet.max_row + 1): diff --git a/benchmark/tests/Util_test.py b/benchmark/tests/Util_test.py index e26a101..7f34f10 100644 --- a/benchmark/tests/Util_test.py +++ b/benchmark/tests/Util_test.py @@ -2,7 +2,8 @@ import os import sys import argparse from .TestBase import TestBase -from ..Utils import Folders, Files, Symbols, TextColor, EnvData, EnvDefault +from ..Utils import Folders, Files, Symbols, TextColor +from ..Arguments import EnvData, EnvDefault class UtilTest(TestBase): @@ -266,8 +267,3 @@ class UtilTest(TestBase): self.assertEqual(TextColor.ENDC, "\033[0m") self.assertEqual(TextColor.BOLD, "\033[1m") self.assertEqual(TextColor.UNDERLINE, "\033[4m") - - def test_Arguments(self): - arguments = Arguments() - arg_list = ["score", "excel", "tex_output"] - arguments.set_arguments(arg_list) diff --git a/benchmark/tests/__init__.py b/benchmark/tests/__init__.py index 0155059..689ba62 100644 --- a/benchmark/tests/__init__.py +++ b/benchmark/tests/__init__.py @@ -24,4 +24,5 @@ all = [ "BenchmarkTest", "SummaryTest", "PairCheckTest", + "be_list", ] diff --git a/setup.py b/setup.py index 4ecc37b..42b453f 100644 --- a/setup.py +++ b/setup.py @@ -59,5 +59,19 @@ setuptools.setup( "tqdm", ], zip_safe=False, - scripts=import_scripts(), + entry_points={ + "console_scripts": [ + "be_list=benchmark.scripts.be_list:main", + "be_report=benchmark.scripts.be_report:main", + "be_main=benchmark.scripts.be_main:main", + "be_benchmark=benchmark.scripts.be_benchmark:main", + "be_best=benchmark.scripts.be_best:main", + "be_build_best=benchmark.scripts.be_build_best:main", + "be_grid=benchmark.scripts.be_grid:main", + "be_pair_check=benchmark.scripts.be_pair_check:main", + "be_print_strees=benchmark.scripts.be_print_strees:main", + "be_repara=benchmark.scripts.be_repara:main", + "be_summary=benchmark.scripts.be_summary:main", + ], + }, )