From b24a508d1ccad3c8003494ee4afb8607d4acc079 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= Date: Tue, 25 Oct 2022 15:00:37 +0200 Subject: [PATCH] Add consistent comparative results to reports --- benchmark/Results.py | 71 +++++++++++++++---- benchmark/Utils.py | 1 - .../test_files/be_build_best_report.test | 2 +- benchmark/tests/test_files/be_main_best.test | 2 +- .../tests/test_files/be_main_complete.test | 2 +- .../tests/test_files/be_main_dataset.test | 2 +- benchmark/tests/test_files/be_main_grid.test | 2 +- benchmark/tests/test_files/excel.test | 2 +- .../tests/test_files/excel_add_ODTE.test | 2 +- .../tests/test_files/excel_add_STree.test | 2 +- .../tests/test_files/excel_compared.test | 2 +- .../tests/test_files/exreport_excel_ODTE.test | 2 +- .../exreport_excel_RandomForest.test | 2 +- .../test_files/exreport_excel_STree.test | 2 +- benchmark/tests/test_files/report.test | 2 +- benchmark/tests/test_files/report_best.test | 2 +- .../tests/test_files/report_compared.test | 2 +- benchmark/tests/test_files/report_grid.test | 2 +- 18 files changed, 75 insertions(+), 29 deletions(-) diff --git a/benchmark/Results.py b/benchmark/Results.py index 9c90dc8..bf0c8e6 100644 --- a/benchmark/Results.py +++ b/benchmark/Results.py @@ -9,16 +9,37 @@ import xlsxwriter import numpy as np from .Experiments import BestResults from .Datasets import Datasets +from .Arguments import EnvData, ALL_METRICS from .Utils import ( Folders, Files, Symbols, - BEST_ACCURACY_STREE, TextColor, NO_RESULTS, ) +class BestResultsEver: + def __init__(self): + self.data = {} + for i in ["Tanveer", "Surcov", "Arff"]: + self.data[i] = {} + for metric in ALL_METRICS: + self.data[i][metric.replace("-", "_")] = ["self", 1.0] + self.data[i][metric] = ["self", 1.0] + self.data["Tanveer"]["accuracy"] = [ + "STree_default (liblinear-ovr)", + 40.282203, + ] + self.data["Arff"]["accuracy"] = [ + "STree_default (linear-ovo)", + 21.9765, + ] + + def get_name_value(self, key, score): + return self.data[key][score] + + class BaseReport(abc.ABC): def __init__(self, file_name, best_file=False): self.file_name = file_name @@ -30,7 +51,20 @@ class BaseReport(abc.ABC): with open(self.file_name) as f: self.data = json.load(f) self.best_acc_file = best_file - self.lines = self.data if best_file else self.data["results"] + if best_file: + self.lines = self.data + else: + self.lines = self.data["results"] + self.score_name = self.data["score_name"] + self.__compute_best_results_ever() + + def __compute_best_results_ever(self): + args = EnvData.load() + key = args["source_data"] + best = BestResultsEver() + self.best_score_name, self.best_score_value = best.get_name_value( + key, self.score_name + ) def _get_accuracy(self, item): return self.data[item][0] if self.best_acc_file else item["score"] @@ -69,6 +103,12 @@ class BaseReport(abc.ABC): } return meaning[status] + def _get_best_accuracy(self): + return self.best_score_value + + def _get_message_best_accuracy(self): + return f"{self.score_name} compared to {self.best_score_name} .:" + @abc.abstractmethod def header(self) -> None: pass @@ -188,8 +228,8 @@ class Report(BaseReport): f" {key} {self._status_meaning(key)} .....: {value:2d}" ) self.header_line( - f" Accuracy compared to stree_default (liblinear-ovr) .: " - f"{accuracy/BEST_ACCURACY_STREE:7.4f}" + f" {self._get_message_best_accuracy()} " + f"{accuracy/self._get_best_accuracy():7.4f}" ) self.header_line("*") @@ -209,12 +249,12 @@ class ReportBest(BaseReport): if best else Files.grid_output(score, model) ) + file_name = os.path.join(Folders.results, name) self.best = best self.grid = grid - file_name = os.path.join(Folders.results, name) - super().__init__(file_name, best_file=True) self.score_name = score self.model = model + super().__init__(file_name, best_file=True) def header_line(self, text: str) -> None: length = sum(self.header_lengths) + len(self.header_lengths) - 3 @@ -254,8 +294,8 @@ class ReportBest(BaseReport): def footer(self, accuracy): self.header_line("*") self.header_line( - f" Scores compared to stree_default accuracy (liblinear-ovr) .: " - f"{accuracy/BEST_ACCURACY_STREE:7.4f}" + f" {self._get_message_best_accuracy()} " + f"{accuracy/self._get_best_accuracy():7.4f}" ) self.header_line("*") @@ -509,8 +549,8 @@ class Excel(BaseReport): self.sheet.write(self.row, 3, self._status_meaning(key), bold) self.row += 1 message = ( - f"** Accuracy compared to stree_default (liblinear-ovr) .: " - f"{accuracy/BEST_ACCURACY_STREE:7.4f}" + f"** {self._get_message_best_accuracy()} " + f"{accuracy/self._get_best_accuracy():7.4f}" ) bold = self.book.add_format({"bold": True, "font_size": 14}) # set width of the hyperparams column with the maximum width @@ -634,6 +674,13 @@ class Benchmark: self._report = {} self._datasets = set() self.visualize = visualize + self.__compute_best_results_ever() + + def __compute_best_results_ever(self): + args = EnvData.load() + key = args["source_data"] + best = BestResultsEver() + _, self.best_score_value = best.get_name_value(key, self._score) def get_result_file_name(self): return os.path.join(Folders.exreport, Files.exreport(self._score)) @@ -971,7 +1018,7 @@ class Benchmark: sheet.write_formula( row, col + 1, - f"=sum({range_metric})/{BEST_ACCURACY_STREE}", + f"=sum({range_metric})/{self.best_score_value}", decimal_total, ) range_rank = ( @@ -1063,7 +1110,7 @@ class StubReport(BaseReport): def footer(self, accuracy: float) -> None: self.accuracy = accuracy - self.score = accuracy / BEST_ACCURACY_STREE + self.score = accuracy / self._get_best_accuracy() class Summary: diff --git a/benchmark/Utils.py b/benchmark/Utils.py index 176352a..d470959 100644 --- a/benchmark/Utils.py +++ b/benchmark/Utils.py @@ -1,7 +1,6 @@ import os import subprocess -BEST_ACCURACY_STREE = 40.282203 NO_RESULTS = "** No results found **" NO_ENV = "File .env not found" diff --git a/benchmark/tests/test_files/be_build_best_report.test b/benchmark/tests/test_files/be_build_best_report.test index 603f363..ea19b87 100644 --- a/benchmark/tests/test_files/be_build_best_report.test +++ b/benchmark/tests/test_files/be_build_best_report.test @@ -7,5 +7,5 @@ Dataset Score File/Message balance-scale 0.963520 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json {'base_estimator__C': 57, 'base_estimator__gamma': 0.1, 'base_estimator__kernel': 'rbf', 'base_estimator__multiclass_strategy': 'ovr', 'n_estimators': 100, 'n_jobs': -1} balloons 0.785000 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json {'base_estimator__C': 5, 'base_estimator__gamma': 0.14, 'base_estimator__kernel': 'rbf', 'base_estimator__multiclass_strategy': 'ovr', 'n_estimators': 100, 'n_jobs': -1} ****************************************************************************************************************************************************************** -* Scores compared to stree_default accuracy (liblinear-ovr) .: 0.0434 * +* accuracy compared to STree_default (liblinear-ovr) .: 0.0434 * ****************************************************************************************************************************************************************** diff --git a/benchmark/tests/test_files/be_main_best.test b/benchmark/tests/test_files/be_main_best.test index f90f45a..8b21255 100644 --- a/benchmark/tests/test_files/be_main_best.test +++ b/benchmark/tests/test_files/be_main_best.test @@ -11,6 +11,6 @@ Dataset Sampl. Feat. Cls Nodes Leaves Depth Score balance-scale 625 4 3 23.32 12.16 6.44 0.840160±0.0304 0.013745±0.0019 {'splitter': 'best', 'max_features': 'auto'} balloons 16 4 2 3.00 2.00 2.00 0.860000±0.2850 0.000388±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'} ************************************************************************************************************************ -* Accuracy compared to stree_default (liblinear-ovr) .: 0.0422 * +* accuracy compared to STree_default (liblinear-ovr) .: 0.0422 * ************************************************************************************************************************ Results in results/results_accuracy_STree_iMac27_2022-05-09_00:15:25_0.json diff --git a/benchmark/tests/test_files/be_main_complete.test b/benchmark/tests/test_files/be_main_complete.test index d70eb26..793d267 100644 --- a/benchmark/tests/test_files/be_main_complete.test +++ b/benchmark/tests/test_files/be_main_complete.test @@ -11,6 +11,6 @@ Dataset Sampl. Feat. Cls Nodes Leaves Depth Score balance-scale 625 4 3 17.36 9.18 6.18 0.908480±0.0247 0.007388±0.0013 {} balloons 16 4 2 4.64 2.82 2.66 0.663333±0.3009 0.000664±0.0002 {} ************************************************************************************************************************ -* Accuracy compared to stree_default (liblinear-ovr) .: 0.0390 * +* accuracy compared to STree_default (liblinear-ovr) .: 0.0390 * ************************************************************************************************************************ Results in results/results_accuracy_STree_iMac27_2022-05-08_20:14:43_0.json diff --git a/benchmark/tests/test_files/be_main_dataset.test b/benchmark/tests/test_files/be_main_dataset.test index 10d3eea..abfcc76 100644 --- a/benchmark/tests/test_files/be_main_dataset.test +++ b/benchmark/tests/test_files/be_main_dataset.test @@ -10,6 +10,6 @@ Dataset Sampl. Feat. Cls Nodes Leaves Depth Score ============================== ====== ===== === ======= ======= ======= =============== ================ =============== balloons 16 4 2 4.64 2.82 2.66 0.663333±0.3009 0.000671±0.0001 {} ************************************************************************************************************************ -* Accuracy compared to stree_default (liblinear-ovr) .: 0.0165 * +* accuracy compared to STree_default (liblinear-ovr) .: 0.0165 * ************************************************************************************************************************ Partial result file removed: results/results_accuracy_STree_iMac27_2022-05-08_19:38:28_0.json diff --git a/benchmark/tests/test_files/be_main_grid.test b/benchmark/tests/test_files/be_main_grid.test index 10e1373..a4bec6e 100644 --- a/benchmark/tests/test_files/be_main_grid.test +++ b/benchmark/tests/test_files/be_main_grid.test @@ -11,6 +11,6 @@ Dataset Sampl. Feat. Cls Nodes Leaves Depth Score balance-scale 625 4 3 26.12 13.56 7.94 0.910720±0.0249 0.015852±0.0027 {'C': 1.0, 'kernel': 'liblinear', 'multiclass_strategy': 'ovr'} balloons 16 4 2 4.64 2.82 2.66 0.663333±0.3009 0.000640±0.0001 {'C': 1.0, 'kernel': 'linear', 'multiclass_strategy': 'ovr'} ************************************************************************************************************************ -* Accuracy compared to stree_default (liblinear-ovr) .: 0.0391 * +* accuracy compared to STree_default (liblinear-ovr) .: 0.0391 * ************************************************************************************************************************ Results in results/results_accuracy_STree_iMac27_2022-05-09_00:21:06_0.json diff --git a/benchmark/tests/test_files/excel.test b/benchmark/tests/test_files/excel.test index 8cf2ef2..373c803 100644 --- a/benchmark/tests/test_files/excel.test +++ b/benchmark/tests/test_files/excel.test @@ -45,4 +45,4 @@ 8;10;"0.0008541679382324218" 8;11;"3.629469326417878e-05" 8;12;"{'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}" -10;1;"** Accuracy compared to stree_default (liblinear-ovr) .: 0.0454" \ No newline at end of file +10;1;"** accuracy compared to STree_default (liblinear-ovr) .: 0.0454" \ No newline at end of file diff --git a/benchmark/tests/test_files/excel_add_ODTE.test b/benchmark/tests/test_files/excel_add_ODTE.test index f97bd71..34f226f 100644 --- a/benchmark/tests/test_files/excel_add_ODTE.test +++ b/benchmark/tests/test_files/excel_add_ODTE.test @@ -45,4 +45,4 @@ 8;10;"0.1156062078475952" 8;11;"0.0127842418285999" 8;12;"{'base_estimator__C': 5, 'base_estimator__gamma': 0.14, 'base_estimator__kernel': 'rbf', 'base_estimator__multiclass_strategy': 'ovr', 'n_estimators': 100, 'n_jobs': -1}" -10;1;"** Accuracy compared to stree_default (liblinear-ovr) .: 0.0434" \ No newline at end of file +10;1;"** accuracy compared to STree_default (liblinear-ovr) .: 0.0434" \ No newline at end of file diff --git a/benchmark/tests/test_files/excel_add_STree.test b/benchmark/tests/test_files/excel_add_STree.test index 36937eb..3a864e4 100644 --- a/benchmark/tests/test_files/excel_add_STree.test +++ b/benchmark/tests/test_files/excel_add_STree.test @@ -43,4 +43,4 @@ 8;10;"0.02120100021362305" 8;11;"0.003526023309468471" 8;12;"{'splitter': 'best', 'max_features': 'auto'}" -10;1;"** Accuracy compared to stree_default (liblinear-ovr) .: 0.0416" \ No newline at end of file +10;1;"** accuracy compared to STree_default (liblinear-ovr) .: 0.0416" \ No newline at end of file diff --git a/benchmark/tests/test_files/excel_compared.test b/benchmark/tests/test_files/excel_compared.test index eb7239d..16b415a 100644 --- a/benchmark/tests/test_files/excel_compared.test +++ b/benchmark/tests/test_files/excel_compared.test @@ -49,4 +49,4 @@ 11;2;"✔" 11;3;1 11;4;"Equal to best" -13;1;"** Accuracy compared to stree_default (liblinear-ovr) .: 0.0454" \ No newline at end of file +13;1;"** accuracy compared to STree_default (liblinear-ovr) .: 0.0454" \ No newline at end of file diff --git a/benchmark/tests/test_files/exreport_excel_ODTE.test b/benchmark/tests/test_files/exreport_excel_ODTE.test index 23f3dc7..46188f4 100644 --- a/benchmark/tests/test_files/exreport_excel_ODTE.test +++ b/benchmark/tests/test_files/exreport_excel_ODTE.test @@ -45,4 +45,4 @@ 8;10;"0.1156062078475952" 8;11;"0.0127842418285999" 8;12;"{'base_estimator__C': 5, 'base_estimator__gamma': 0.14, 'base_estimator__kernel': 'rbf', 'base_estimator__multiclass_strategy': 'ovr', 'n_estimators': 100, 'n_jobs': -1}" -10;1;"** Accuracy compared to stree_default (liblinear-ovr) .: 0.0434" +10;1;"** accuracy compared to STree_default (liblinear-ovr) .: 0.0434" diff --git a/benchmark/tests/test_files/exreport_excel_RandomForest.test b/benchmark/tests/test_files/exreport_excel_RandomForest.test index 424c0c7..7e7a395 100644 --- a/benchmark/tests/test_files/exreport_excel_RandomForest.test +++ b/benchmark/tests/test_files/exreport_excel_RandomForest.test @@ -45,4 +45,4 @@ 8;10;"0.07016648769378662" 8;11;"0.002460508923990468" 8;12;"{}" -10;1;"** Accuracy compared to stree_default (liblinear-ovr) .: 0.0363" +10;1;"** accuracy compared to STree_default (liblinear-ovr) .: 0.0363" diff --git a/benchmark/tests/test_files/exreport_excel_STree.test b/benchmark/tests/test_files/exreport_excel_STree.test index 07f44c0..18b7aa4 100644 --- a/benchmark/tests/test_files/exreport_excel_STree.test +++ b/benchmark/tests/test_files/exreport_excel_STree.test @@ -45,4 +45,4 @@ 8;10;"0.0008541679382324218" 8;11;"3.629469326417878e-05" 8;12;"{'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}" -10;1;"** Accuracy compared to stree_default (liblinear-ovr) .: 0.0454" +10;1;"** accuracy compared to STree_default (liblinear-ovr) .: 0.0454" diff --git a/benchmark/tests/test_files/report.test b/benchmark/tests/test_files/report.test index 1dcac2e..94498b7 100644 --- a/benchmark/tests/test_files/report.test +++ b/benchmark/tests/test_files/report.test @@ -11,5 +11,5 @@ Dataset Sampl. Feat. Cls Nodes Leaves Depth Score balance-scale 625 4 3 7.00 4.00 3.00 0.970560±0.0150 0.014049±0.0020 {'C': 10000.0, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'} balloons 16 4 2 3.00 2.00 2.00 0.860000±0.2850 0.000854±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'} ************************************************************************************************************************ -* Accuracy compared to stree_default (liblinear-ovr) .: 0.0454 * +* accuracy compared to STree_default (liblinear-ovr) .: 0.0454 * ************************************************************************************************************************ diff --git a/benchmark/tests/test_files/report_best.test b/benchmark/tests/test_files/report_best.test index 735ed01..03ffc30 100644 --- a/benchmark/tests/test_files/report_best.test +++ b/benchmark/tests/test_files/report_best.test @@ -7,5 +7,5 @@ Dataset Score File/Message balance-scale 0.980000 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json {'splitter': 'best', 'max_features': 'auto'} balloons 0.860000 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'} ****************************************************************************************************************************************************************** -* Scores compared to stree_default accuracy (liblinear-ovr) .: 0.0457 * +* accuracy compared to STree_default (liblinear-ovr) .: 0.0457 * ****************************************************************************************************************************************************************** diff --git a/benchmark/tests/test_files/report_compared.test b/benchmark/tests/test_files/report_compared.test index 000f4dd..46c6f6c 100644 --- a/benchmark/tests/test_files/report_compared.test +++ b/benchmark/tests/test_files/report_compared.test @@ -12,5 +12,5 @@ Dataset Sampl. Feat. Cls Nodes Leaves Depth Score balloons 16 4 2 3.00 2.00 2.00 0.860000±0.2850✔ 0.000854±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'} ************************************************************************************************************************ * ✔ Equal to best .....: 1 * -* Accuracy compared to stree_default (liblinear-ovr) .: 0.0454 * +* accuracy compared to STree_default (liblinear-ovr) .: 0.0454 * ************************************************************************************************************************ diff --git a/benchmark/tests/test_files/report_grid.test b/benchmark/tests/test_files/report_grid.test index 7aa394f..4ad130f 100644 --- a/benchmark/tests/test_files/report_grid.test +++ b/benchmark/tests/test_files/report_grid.test @@ -7,5 +7,5 @@ Dataset Score File/Message balance-scale 0.919995 v. 1.2.4, Computed on Test on 2022-02-22 at 12:00:00 took 1s {'C': 1.0, 'kernel': 'liblinear', 'multiclass_strategy': 'ovr'} balloons 0.625000 v. 1.2.4, Computed on Test on 2022-02-22 at 12:00:00 took 1s {'C': 1.0, 'kernel': 'linear', 'multiclass_strategy': 'ovr'} ****************************************************************************************************************************************************************** -* Scores compared to stree_default accuracy (liblinear-ovr) .: 0.0384 * +* accuracy compared to STree_default (liblinear-ovr) .: 0.0384 * ******************************************************************************************************************************************************************