From 50cbc959193e02cc7b51f8e7124b03ae2437bc75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Mon, 27 Sep 2021 16:32:17 +0200 Subject: [PATCH] Add score hyperparameter --- src/Experiments.py | 37 +++++++++++++------ src/Results.py | 88 ++++++++++++++++++++++++++-------------------- src/Utils.py | 25 ++++++++----- src/benchmark.py | 13 +++++-- src/build_best.py | 15 +++++--- src/main.py | 10 ++++++ test.sh | 14 ++++++++ 7 files changed, 137 insertions(+), 65 deletions(-) create mode 100755 test.sh diff --git a/src/Experiments.py b/src/Experiments.py index cdd52c4..0f4a478 100644 --- a/src/Experiments.py +++ b/src/Experiments.py @@ -46,13 +46,16 @@ class Datasets: class BestResults: - def __init__(self, model, datasets): + def __init__(self, score, model, datasets): + self.score_name = score self.datasets = datasets self.model = model self.data = {} def _get_file_name(self): - return os.path.join(Folders.results, Files.best_results(self.model)) + return os.path.join( + Folders.results, Files.best_results(self.score_name, self.model) + ) def load(self, dictionary): self.file_name = self._get_file_name() @@ -75,7 +78,7 @@ class BestResults: for record in data["results"]: dataset = record["dataset"] if dataset in results: - if record["accuracy"] > results[dataset]["accuracy"]: + if record["score"] > results[dataset]["score"]: record["file_name"] = file_name results[dataset] = record else: @@ -84,7 +87,9 @@ class BestResults: def build(self): results = {} - init_suffix, end_suffix = Files.results_suffixes(self.model) + init_suffix, end_suffix = Files.results_suffixes( + score=self.score_name, model=self.model + ) all_files = list(os.walk(Folders.results)) for root, _, files in tqdm(all_files, desc="files"): for name in files: @@ -98,7 +103,7 @@ class BestResults: datasets = Datasets() for name in tqdm(list(datasets), desc="datasets"): output[name] = ( - results[name]["accuracy"], + results[name]["score"], results[name]["hyperparameters"], results[name]["file_name"], ) @@ -110,6 +115,7 @@ class BestResults: class Experiment: def __init__( self, + score_name, model_name, datasets, hyperparams_dict, @@ -123,13 +129,18 @@ class Experiment: self.date = today.strftime("%Y-%m-%d") self.output_file = os.path.join( Folders.results, - Files.results(model_name, platform, self.date, self.time), + Files.results( + score_name, model_name, platform, self.date, self.time + ), ) + self.score_name = score_name self.model_name = model_name self.model = Models.get_model(model_name) self.datasets = datasets dictionary = json.loads(hyperparams_dict) - hyper = BestResults(model=model_name, datasets=datasets) + hyper = BestResults( + score=score_name, model=model_name, datasets=datasets + ) if hyperparams_file: self.hyperparameters_dict = hyper.load( dictionary=dictionary, @@ -181,7 +192,12 @@ class Experiment: with warnings.catch_warnings(): warnings.filterwarnings("ignore") res = cross_validate( - clf, X, y, cv=kfold, return_estimator=True + clf, + X, + y, + cv=kfold, + return_estimator=True, + scoring=self.score_name, ) self.scores.append(res["test_score"]) self.times.append(res["fit_time"]) @@ -203,14 +219,15 @@ class Experiment: record["nodes"] = np.mean(self.nodes) record["leaves"] = np.mean(self.leaves) record["depth"] = np.mean(self.depths) - record["accuracy"] = np.mean(self.scores) - record["accuracy_std"] = np.std(self.scores) + record["score"] = np.mean(self.scores) + record["score_std"] = np.std(self.scores) record["time"] = np.mean(self.times) record["time_std"] = np.std(self.times) self.results.append(record) def _output_results(self): output = {} + output["score_name"] = self.score_name output["model"] = self.model_name output["folds"] = self.folds output["date"] = self.date diff --git a/src/Results.py b/src/Results.py index da5aba9..35d8bd5 100644 --- a/src/Results.py +++ b/src/Results.py @@ -20,7 +20,7 @@ class BaseReport(abc.ABC): self.lines = self.data if best_file else self.data["results"] def _get_accuracy(self, item): - return self.data[item][0] if self.best_acc_file else item["accuracy"] + return self.data[item][0] if self.best_acc_file else item["score"] def report(self): self.header() @@ -30,8 +30,8 @@ class BaseReport(abc.ABC): accuracy_total += self._get_accuracy(result) self.footer(accuracy_total) - def _load_best_results(self, model): - best = BestResults(model, Datasets()) + def _load_best_results(self, score, model): + best = BestResults(score, model, Datasets()) self.best_results = best.load({}) def _compute_status(self, dataset, accuracy): @@ -79,7 +79,7 @@ class Report(BaseReport): "Nodes", "Leaves", "Depth", - "Accuracy", + "Score", "Time", "Hyperparameters", ] @@ -113,13 +113,11 @@ class Report(BaseReport): print(f"{result['depth']:{hl[i]}.2f} ", end="") i += 1 if self.compare: - status = self._compute_status( - result["dataset"], result["accuracy"] - ) + status = self._compute_status(result["dataset"], result["score"]) else: status = " " print( - f"{result['accuracy']:8.6f}±{result['accuracy_std']:6.4f}{status}", + f"{result['score']:8.6f}±{result['score_std']:6.4f}{status}", end="", ) i += 1 @@ -132,7 +130,9 @@ class Report(BaseReport): def header(self): if self.compare: - self._load_best_results(self.data["model"]) + self._load_best_results( + self.data["score_name"], self.data["model"] + ) self._compare_totals = {} self.header_line("*") self.header_line( @@ -144,6 +144,7 @@ class Report(BaseReport): f" Execution took {self.data['duration']:7.2f} seconds on an " f"{self.data['platform']}" ) + self.header_line(f" Score is {self.data['score_name']}") self.header_line("*") print("") line_col = "" @@ -170,15 +171,18 @@ class ReportBest(BaseReport): header_lengths = [30, 8, 50, 35] header_cols = [ "Dataset", - "Accuracy", + "Score", "File", "Hyperparameters", ] - def __init__(self, model): - file_name = os.path.join(Folders.results, Files.best_results(model)) + def __init__(self, score, model): + file_name = os.path.join( + Folders.results, Files.best_results(score, model) + ) super().__init__(file_name, best_file=True) self.compare = False + self.score_name = score self.model = model def header_line(self, text): @@ -204,7 +208,8 @@ class ReportBest(BaseReport): def header(self): self.header_line("*") self.header_line( - f" Report Best Accuracies with {self.model} in any platform" + f" Report Best {self.score_name} Scores with {self.model} in any " + "platform" ) self.header_line("*") print("") @@ -222,14 +227,14 @@ class ReportBest(BaseReport): f" {key} {self._status_meaning(key)} .....: {value:2d}" ) self.header_line( - f" Accuracy compared to stree_default (liblinear-ovr) .: " + f" Scores compared to stree_default accuracy (liblinear-ovr) .: " f"{accuracy/40.282203:7.4f}" ) self.header_line("*") class Excel(BaseReport): - row = 4 + row = 5 def __init__(self, file_name, compare=False): super().__init__(file_name) @@ -240,7 +245,9 @@ class Excel(BaseReport): def header(self): if self.compare: - self._load_best_results(self.data["model"]) + self._load_best_results( + self.data["score_name"], self.data["model"] + ) self._compare_totals = {} self.excel_file_name = self.file_name.replace(".json", ".xlsx") self.book = xlsxwriter.Workbook(self.excel_file_name) @@ -266,6 +273,9 @@ class Excel(BaseReport): self.sheet.write( 1, 5, f"Random seeds: {self.data['seeds']}", subheader ) + self.sheet.write( + 2, 0, f" Score is {self.data['score_name']}", subheader + ) header_cols = [ ("Dataset", 30), ("Samples", 10), @@ -274,8 +284,8 @@ class Excel(BaseReport): ("Nodes", 7), ("Leaves", 7), ("Depth", 7), - ("Accuracy", 10), - ("Acc. Std.", 10), + ("Score", 10), + ("Score Std.", 10), ("Time", 10), ("Time Std.", 10), ("Parameters", 50), @@ -285,7 +295,7 @@ class Excel(BaseReport): bold = self.book.add_format({"bold": True, "font_size": 14}) i = 0 for item, length in header_cols: - self.sheet.write(3, i, item, bold) + self.sheet.write(4, i, item, bold) self.sheet.set_column(i, i, length) i += 1 @@ -306,16 +316,14 @@ class Excel(BaseReport): self.sheet.write(self.row, col + 4, result["nodes"], normal) self.sheet.write(self.row, col + 5, result["leaves"], normal) self.sheet.write(self.row, col + 6, result["depth"], normal) - self.sheet.write(self.row, col + 7, result["accuracy"], decimal) + self.sheet.write(self.row, col + 7, result["score"], decimal) if self.compare: - status = self._compute_status( - result["dataset"], result["accuracy"] - ) + status = self._compute_status(result["dataset"], result["score"]) self.sheet.write(self.row, col + 8, status, normal) col = 9 else: col = 8 - self.sheet.write(self.row, col, result["accuracy_std"], decimal) + self.sheet.write(self.row, col, result["score_std"], decimal) self.sheet.write(self.row, col + 1, result["time"], decimal) self.sheet.write(self.row, col + 2, result["time_std"], decimal) self.sheet.write( @@ -355,8 +363,9 @@ class SQL(BaseReport): "date", "time", "type", - "accuracy", - "accuracy_std", + "score_name", + "score", + "score_std", "dataset", "classifier", "norm", @@ -382,8 +391,9 @@ class SQL(BaseReport): self.data["date"], self.data["time"], "crossval", - result["accuracy"], - result["accuracy_std"], + self.data["score_name"], + result["score"], + result["score_std"], result["dataset"], self.data["model"], 0, @@ -406,8 +416,8 @@ class SQL(BaseReport): class Benchmark: @staticmethod - def get_result_file_name(): - return os.path.join(Folders.results, Files.exreport) + def get_result_file_name(score): + return os.path.join(Folders.results, Files.exreport(score)) @staticmethod def _process_dataset(results, data): @@ -415,23 +425,23 @@ class Benchmark: for record in data["results"]: dataset = record["dataset"] if (model, dataset) in results: - if record["accuracy"] > results[model, dataset][0]: + if record["score"] > results[model, dataset][0]: results[model, dataset] = ( - record["accuracy"], - record["accuracy_std"], + record["score"], + record["score_std"], ) else: results[model, dataset] = ( - record["accuracy"], - record["accuracy_std"], + record["score"], + record["score_std"], ) @staticmethod - def compile_results(): + def compile_results(score): # build Files.exreport - result_file_name = Benchmark.get_result_file_name() + result_file_name = Benchmark.get_result_file_name(score) results = {} - init_suffix, end_suffix = Files.results_suffixes("") + init_suffix, end_suffix = Files.results_suffixes(score=score) all_files = list(os.walk(Folders.results)) for root, _, files in tqdm(all_files, desc="files"): for name in files: @@ -557,7 +567,7 @@ class Benchmark: row += 1 column = 1 for _ in range(len(results)): - sheet.write(row, column, "Accuracy", merge_format) + sheet.write(row, column, "Score", merge_format) sheet.write(row, column + 1, "Stdev", merge_format) column += 2 diff --git a/src/Utils.py b/src/Utils.py index 50f5d78..9b6ba96 100644 --- a/src/Utils.py +++ b/src/Utils.py @@ -12,7 +12,7 @@ class Folders: class Files: index = "all.txt" - exreport = "exreport.csv" + exreport_output = "exreport.txt" exreport_err = "exreport_err.txt" exreport_excel = "exreport.xlsx" @@ -22,19 +22,26 @@ class Files: benchmark_r = "benchmark.r" @staticmethod - def best_results(model): - return f"best_results_{model}.json" + def exreport(score): + return f"exreport_{score}.csv" @staticmethod - def results(model, platform, date, time): - return f"results_{model}_{platform}_{date}_{time}.json" + def best_results(score, model): + return f"best_results_{score}_{model}.json" @staticmethod - def results_suffixes(model): - if model == "": - return "results_", ".json" + def results(score, model, platform, date, time): + return f"results_{score}_{model}_{platform}_{date}_{time}.json" + + @staticmethod + def results_suffixes(score="", model=""): + suffix = ".json" + if model == "" and score == "": + return "results_", suffix + elif model == "": + return f"results_{score}_", suffix else: - return f"results_{model}_", ".json" + return f"results_{score}_{model}_", suffix @staticmethod def dataset(name): diff --git a/src/benchmark.py b/src/benchmark.py index 36978b2..c369dca 100644 --- a/src/benchmark.py +++ b/src/benchmark.py @@ -5,6 +5,13 @@ import argparse def parse_arguments(): ap = argparse.ArgumentParser() + ap.add_argument( + "-s", + "--score", + type=str, + required=True, + help="score name {accuracy, f1_macro, ...}", + ) ap.add_argument( "-x", "--excel", @@ -13,12 +20,12 @@ def parse_arguments(): help="Generate Excel File", ) args = ap.parse_args() - return args.excel + return (args, score, args.excel) -excel = parse_arguments() +(score, excel) = parse_arguments() benchmark = Benchmark() -benchmark.compile_results() +benchmark.compile_results(score) benchmark.report() benchmark.exreport() if excel: diff --git a/src/build_best.py b/src/build_best.py index 3c64725..d3be595 100644 --- a/src/build_best.py +++ b/src/build_best.py @@ -8,6 +8,13 @@ from Experiments import Datasets, BestResults def parse_arguments(): ap = argparse.ArgumentParser() + ap.add_argument( + "-s", + "--score", + type=str, + required=True, + help="score name {accuracy, f1_macro, ...}", + ) ap.add_argument( "-m", "--model", @@ -24,13 +31,13 @@ def parse_arguments(): help="Generate Report", ) args = ap.parse_args() - return (args.model, args.report) + return (args.score, args.model, args.report) -(model, report) = parse_arguments() +(score, model, report) = parse_arguments() datasets = Datasets() -best = BestResults(model, datasets) +best = BestResults(score, model, datasets) best.build() if report: - report = ReportBest(model) + report = ReportBest(score, model) report.report() diff --git a/src/main.py b/src/main.py index 5d02efd..156b7c8 100644 --- a/src/main.py +++ b/src/main.py @@ -8,6 +8,13 @@ from Results import Report def parse_arguments(): ap = argparse.ArgumentParser() + ap.add_argument( + "-s", + "--score", + type=str, + required=True, + help="score name {accuracy, f1_macro, ...}", + ) ap.add_argument( "-P", "--platform", @@ -55,6 +62,7 @@ def parse_arguments(): ) args = ap.parse_args() return ( + args.score, args.model, args.n_folds, args.platform, @@ -66,6 +74,7 @@ def parse_arguments(): ( + score, model, folds, platform, @@ -75,6 +84,7 @@ def parse_arguments(): report, ) = parse_arguments() job = Experiment( + score_name=score, model_name=model, datasets=Datasets(), hyperparams_dict=hyperparameters, diff --git a/test.sh b/test.sh new file mode 100755 index 0000000..00ab73c --- /dev/null +++ b/test.sh @@ -0,0 +1,14 @@ +#!/bin/bash +for i in STree Wodt Cart SVC ExtraTree; do + for a in accuracy f1_macro; do + python src/main.py -s $a -P iMac27 -m $i -r 1 + done +done +for i in STree Wodt Cart SVC ExtraTree; do + for a in accuracy f1_macro; do + python src/build_best.py -s $a -m $i -r 1 + done +done +for a in accuracy f1_macro; do + ptyhon src/benchmark.py -s $a +done \ No newline at end of file