Complete tests of Benchmark

2025-08-16 07:55:54 +00:00 · 2022-04-25 19:21:26 +02:00
parent a17166ed31
commit 629f413293
14 changed files with 365 additions and 36 deletions
--- a/benchmark/Results.py
+++ b/benchmark/Results.py
@@ -589,12 +589,13 @@ class SQL(BaseReport):
 class Benchmark:
-    def __init__(self, score):
+    def __init__(self, score, visualize=True):
        self._score = score
        self._results = []
        self._models = []
        self._report = {}
        self._datasets = set()
        self.visualize = visualize
    def get_result_file_name(self):
        return os.path.join(Folders.exreport, Files.exreport(self._score))
@@ -650,13 +651,10 @@ class Benchmark:
                    print(line)
        # Remove previous results
-        try:
+        if os.path.exists(Folders.report):
            shutil.rmtree(Folders.report)
        if os.path.exists(Files.exreport_pdf):
            os.remove(Files.exreport_pdf)
        except FileNotFoundError:
            pass
        except OSError as os_error:
            print("Error: %s : %s" % (Folders.report, os_error.strerror))
        # Compute Friedman & Holm Tests
        fout = open(
            os.path.join(Folders.exreport, Files.exreport_output(self._score)),
@@ -666,16 +664,13 @@ class Benchmark:
            os.path.join(Folders.exreport, Files.exreport_err(self._score)),
            "w",
        )
        print(
            "*********************",
            os.path.join(Folders.src(), Files.benchmark_r),
        )
        result = subprocess.run(
            [
                "Rscript",
                os.path.join(Folders.src(), Files.benchmark_r),
                self._score,
                os.path.join(Folders.exreport, f"exreport_{self._score}"),
                "1" if self.visualize else "0",
            ],
            stdout=fout,
            stderr=ferr,
@@ -714,7 +709,6 @@ class Benchmark:
            print("")
            if tex_output:
                self.print_tex_line(num, dataset, scores)
        if tex_output:
            self.print_tex_footer()
        # Summary of result files used
@@ -984,7 +978,7 @@ class Benchmark:
        def exreport_output():
            file_name = os.path.join(
-                Folders.results, Files.exreport_output(self._score)
+                Folders.exreport, Files.exreport_output(self._score)
            )
            sheet = book.add_worksheet("Exreport")
            normal = book.add_format(
--- a/benchmark/benchmark.r
+++ b/benchmark/benchmark.r
@@ -1,9 +1,15 @@
 library(glue)
 args = commandArgs(trailingOnly=TRUE)
-if (length(args)!=2) {
+if (length(args)!=3) {
-  stop("Only two arguments must be supplied (score & input_file).n", call.=FALSE)
+  stop("Only two arguments must be supplied (score & input_file & visualize).n", call.=FALSE)
 }
 csv_file <- glue("{args[2]}.csv")
 visualize_c <- args[3]
 if (visualize_c == "1") {
  visualize <- T
 } else {
  visualize <- F
 }
 destination <- "exreport/"
 results <- read.csv(csv_file)
 library(exreport)
@@ -37,4 +43,6 @@ report <- exreportAdd(report, table2)
 # Now that we have finished adding elements to the report it is time to render it. We want to generate an HTML report, so we call the appropiate function, by default it renders and opens the report in your browser using a temporary file, but you can optionally specify a folder in which the report will be saved for future use.
 # Render the report:
-exreportRender(report, destination=destination, target = "html", visualize = T)
+if (visualize) {
  exreportRender(report, destination=destination, target = "html", visualize = T)
 }
--- a/benchmark/scripts/be_benchmark
+++ b/benchmark/scripts/be_benchmark
@@ -34,7 +34,7 @@ def parse_arguments():
 (score, excel, tex_output) = parse_arguments()
-benchmark = Benchmark(score)
+benchmark = Benchmark(score=score, visualize=True)
 benchmark.compile_results()
 benchmark.save_results()
 benchmark.report(tex_output)
--- a/benchmark/tests/Benchmark_test.py
+++ b/benchmark/tests/Benchmark_test.py
@@ -3,8 +3,10 @@ import unittest
 import shutil
 from io import StringIO
 from unittest.mock import patch
-from ..Utils import Folders
+from openpyxl import load_workbook
 from ..Utils import Folders, Files
 from ..Results import Benchmark
 from .Excel_test import ExcelTest
 class BenchmarkTest(unittest.TestCase):
@@ -13,23 +15,30 @@ class BenchmarkTest(unittest.TestCase):
        super().__init__(*args, **kwargs)
    def tearDown(self) -> None:
        benchmark = Benchmark("accuracy", visualize=False)
        files = [
-            # "exreport_accuracy.csv",
+            "exreport_accuracy.csv",
            "exreport_accuracy.txt",
            "exreport_accuracy.xlsx",
            "exreport_err_accuracy.txt",
            "exreport_err_unknown.txt",
            "exreport_unknown.csv",
            "exreport_unknown.txt",
            "Rplots.pdf",
            benchmark.get_tex_file(),
        ]
        for file_name in files:
            if os.path.exists(file_name):
                os.remove(file_name)
            file_name = os.path.join(Folders.exreport, file_name)
            if os.path.exists(file_name):
                os.remove(file_name)
        if os.path.exists(Folders.report):
            shutil.rmtree(Folders.report)
        if os.path.exists("Rplots.pdf"):
            os.remove("Rplots.pdf")
        return super().tearDown()
    def test_csv(self):
-        benchmark = Benchmark("accuracy")
+        benchmark = Benchmark("accuracy", visualize=False)
        benchmark.compile_results()
        benchmark.save_results()
        with open(benchmark.get_result_file_name()) as f:
@@ -38,8 +47,18 @@ class BenchmarkTest(unittest.TestCase):
            expected = f_exp.readlines()
        self.assertEqual(computed, expected)
    def test_exreport_report(self):
        benchmark = Benchmark("accuracy", visualize=False)
        benchmark.compile_results()
        benchmark.save_results()
        with patch("sys.stdout", new=StringIO()) as fake_out:
            benchmark.report(tex_output=False)
        with open(os.path.join("test_files", "exreport_report.test")) as f:
            expected = f.read()
        self.assertEqual(fake_out.getvalue(), expected)
    def test_exreport(self):
-        benchmark = Benchmark("accuracy")
+        benchmark = Benchmark("accuracy", visualize=False)
        benchmark.compile_results()
        benchmark.save_results()
        with patch("sys.stdout", new=StringIO()) as fake_out:
@@ -51,3 +70,61 @@ class BenchmarkTest(unittest.TestCase):
        computed_t.pop(0)
        for computed, expected in zip(computed_t, expected_t.split("\n")):
            self.assertEqual(computed, expected)
    def test_exreport_remove_previous(self):
        os.makedirs(Folders.report)
        with open(os.path.join(Files.exreport_pdf), "w") as f:
            print("x", file=f)
        self.assertTrue(os.path.exists(Files.exreport_pdf))
        self.assertTrue(os.path.exists(Folders.report))
        benchmark = Benchmark("accuracy", visualize=False)
        benchmark.compile_results()
        benchmark.save_results()
        benchmark.exreport()
        self.assertFalse(os.path.exists(Files.exreport_pdf))
        self.assertFalse(os.path.exists(Folders.report))
    def test_exreport_error(self):
        benchmark = Benchmark("unknown", visualize=False)
        benchmark.compile_results()
        benchmark.save_results()
        with patch("sys.stdout", new=StringIO()) as fake_out:
            benchmark.exreport()
        computed = fake_out.getvalue()
        with open(os.path.join("test_files", "exreport_error.test")) as f:
            expected = f.read()
        self.assertEqual(computed, expected)
    def test_tex_output(self):
        benchmark = Benchmark("accuracy", visualize=False)
        benchmark.compile_results()
        benchmark.save_results()
        with patch("sys.stdout", new=StringIO()) as fake_out:
            benchmark.report(tex_output=True)
        with open(os.path.join("test_files", "exreport_report.test")) as f:
            expected = f.read()
        self.assertEqual(fake_out.getvalue(), expected)
        self.assertTrue(os.path.exists(benchmark.get_tex_file()))
        with open(benchmark.get_tex_file()) as f:
            computed = f.read()
        with open(os.path.join("test_files", "exreport_tex.test")) as f:
            expected = f.read()
        self.assertEqual(computed, expected)
    def test_excel_output(self):
        benchmark = Benchmark("accuracy", visualize=False)
        benchmark.compile_results()
        benchmark.save_results()
        with patch("sys.stdout", new=StringIO()) as fake_out:
            benchmark.exreport()
        benchmark.excel()
        file_name = benchmark.get_excel_file_name()
        book = load_workbook(file_name)
        for sheet_name in book.sheetnames:
            sheet = book[sheet_name]
            ExcelTest.check_excel_sheet(
                self, sheet, f"exreport_excel_{sheet_name}.test"
            )
            # ExcelTest.generate_excel_sheet(
            #     self, sheet, f"exreport_excel_{sheet_name}.test"
            # )
--- a/benchmark/tests/Excel_test.py
+++ b/benchmark/tests/Excel_test.py
@@ -24,8 +24,18 @@ class ExcelTest(unittest.TestCase):
                os.remove(file_name)
        return super().tearDown()
-    def check_excel_sheet(self, sheet, file_name):
+    @staticmethod
-        with open(file_name, "r") as f:
+    def generate_excel_sheet(test, sheet, file_name):
        with open(os.path.join("test_files", file_name), "w") as f:
            for row in range(1, sheet.max_row + 1):
                for col in range(1, sheet.max_column + 1):
                    value = sheet.cell(row=row, column=col).value
                    if value is not None:
                        print(f'{row};{col};"{value}"', file=f)
    @staticmethod
    def check_excel_sheet(test, sheet, file_name):
        with open(os.path.join("test_files", file_name), "r") as f:
            expected = csv.reader(f, delimiter=";")
            for row, col, value in expected:
                if value.isdigit():
@@ -35,7 +45,7 @@ class ExcelTest(unittest.TestCase):
                        value = float(value)
                    except ValueError:
                        pass
-                self.assertEqual(sheet.cell(int(row), int(col)).value, value)
+                test.assertEqual(sheet.cell(int(row), int(col)).value, value)
    def test_report_excel_compared(self):
        file_name = "results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json"
@@ -44,9 +54,7 @@ class ExcelTest(unittest.TestCase):
        file_output = report.get_file_name()
        book = load_workbook(file_output)
        sheet = book["STree"]
-        self.check_excel_sheet(
+        self.check_excel_sheet(self, sheet, "excel_compared.test")
            sheet, os.path.join("test_files", "excel_compared.test")
        )
    def test_report_excel(self):
        file_name = "results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json"
@@ -55,7 +63,7 @@ class ExcelTest(unittest.TestCase):
        file_output = report.get_file_name()
        book = load_workbook(file_output)
        sheet = book["STree"]
-        self.check_excel_sheet(sheet, os.path.join("test_files", "excel.test"))
+        self.check_excel_sheet(self, sheet, "excel.test")
    def test_Excel_Add_sheet(self):
        file_name = "results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json"
@@ -71,10 +79,6 @@ class ExcelTest(unittest.TestCase):
        book.close()
        book = load_workbook(os.path.join(Folders.results, excel_file_name))
        sheet = book["STree"]
-        self.check_excel_sheet(
+        self.check_excel_sheet(self, sheet, "excel_add_STree.test")
            sheet, os.path.join("test_files", "excel_add_STree.test")
        )
        sheet = book["ODTE"]
-        self.check_excel_sheet(
+        self.check_excel_sheet(self, sheet, "excel_add_ODTE.test")
            sheet, os.path.join("test_files", "excel_add_ODTE.test")
        )
--- a/benchmark/tests/test_files/exreport.test
+++ b/benchmark/tests/test_files/exreport.test
@@ -1,4 +1,3 @@
 ****************************************************************************************************
 Benchmark Ok
 ****************************************************************************************************
 ---------------------------------------------------------------------
--- a/benchmark/tests/test_files/exreport_error.test
+++ b/benchmark/tests/test_files/exreport_error.test
@@ -0,0 +1,7 @@
 ****************************************************************************************************
 Error computing benchmark
 ****************************************************************************************************
 Error in dim(ordered) <- ns : 
  dims [producto 1] no coincide con la longitud del objeto [0]
 Calls: testMultipleControl -> .doFriedmanTest -> <Anonymous> -> cast
 Ejecución interrumpida
--- a/benchmark/tests/test_files/exreport_excel_Benchmark.test
+++ b/benchmark/tests/test_files/exreport_excel_Benchmark.test
@@ -0,0 +1,54 @@
 1;1;"Benchmark of Models"
 1;2;"Score is accuracy"
 5;1;"Dataset"
 5;2;"ODTE"
 5;5;"RandomForest"
 5;8;"STree"
 6;2;"Score"
 6;3;"Stdev"
 6;4;"Rank"
 6;5;"Score"
 6;6;"Stdev"
 6;7;"Rank"
 6;8;"Score"
 6;9;"Stdev"
 6;10;"Rank"
 7;1;"balance-scale                 "
 7;2;"0.96352"
 7;3;"0.02494974148162661"
 7;4;"=rank(B7,(B7,E7,H7))"
 7;5;"0.83616"
 7;6;"0.02649630917694009"
 7;7;"=rank(E7,(B7,E7,H7))"
 7;8;"0.97056"
 7;9;"0.0150468069702512"
 7;10;"=rank(H7,(B7,E7,H7))"
 8;1;"balloons                      "
 8;2;"0.785"
 8;3;"0.2461311755051675"
 8;4;"=rank(B8,(B8,E8,H8))"
 8;5;"0.625"
 8;6;"0.249582985531199"
 8;7;"=rank(E8,(B8,E8,H8))"
 8;8;"0.86"
 8;9;"0.2850146195080759"
 8;10;"=rank(H8,(B8,E8,H8))"
 9;1;"Total"
 9;2;"=sum(B7:B8)/40.282203"
 9;4;"=average(D7:D8)"
 9;5;"=sum(E7:E8)/40.282203"
 9;7;"=average(G7:G8)"
 9;8;"=sum(H7:H8)/40.282203"
 9;10;"=average(J7:J8)"
 12;1;"Model"
 12;2;"File"
 12;7;"Score"
 14;1;"ODTE"
 14;2;"results/results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json"
 14;7;"0.04340676203831255"
 15;1;"RandomForest"
 15;2;"results/results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json"
 15;7;"0.03627309062515771"
 16;1;"STree"
 16;2;"results/results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json"
 16;7;"0.04544339345094904"
--- a/benchmark/tests/test_files/exreport_excel_Exreport.test
+++ b/benchmark/tests/test_files/exreport_excel_Exreport.test
@@ -0,0 +1,17 @@
 1;1;"---------------------------------------------------------------------"
 2;1;"Friedman test, objetive maximize output variable accuracy. Obtained p-value: 1.3534e-01"
 3;1;"Chi squared with 2 degrees of freedom statistic: 4.0000"
 4;1;"Test accepted: p-value: 1.3534e-01 >= 0.0500"
 5;1;"---------------------------------------------------------------------"
 6;1;"Control post hoc test for output accuracy"
 7;1;"Adjust method: Holm"
 9;1;"Control method: STree"
 10;1;"p-values:"
 11;1;"           ODTE	0.3173"
 12;1;"   RandomForest	0.0910"
 13;1;"---------------------------------------------------------------------"
 14;1;"$testMultiple"
 15;1;"               classifier     pvalue rank win tie loss"
 16;1;"STree               STree         NA    1  NA  NA   NA"
 17;1;"ODTE                 ODTE 0.31731051    2   2   0    0"
 18;1;"RandomForest RandomForest 0.09100053    3   2   0    0"
--- a/benchmark/tests/test_files/exreport_excel_ODTE.test
+++ b/benchmark/tests/test_files/exreport_excel_ODTE.test
@@ -0,0 +1,48 @@
 1;1;" Report ODTE ver. 0.3.2 with 5 Folds cross validation and 10 random seeds. 2022-04-20 10:52:20"
 2;1;" Gridsearched hyperparams v022.1b random_init"
 3;1;" Score is accuracy"
 3;2;" Execution time"
 3;5;"22,591.47 s"
 3;7;" "
 3;8;"Platform"
 3;9;"Galgo"
 3;10;"Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]"
 4;5;"  6.28 h"
 4;10;"Stratified: False"
 6;1;"Dataset"
 6;2;"Samples"
 6;3;"Features"
 6;4;"Classes"
 6;5;"Nodes"
 6;6;"Leaves"
 6;7;"Depth"
 6;8;"Score"
 6;9;"Score Std."
 6;10;"Time"
 6;11;"Time Std."
 6;12;"Hyperparameters"
 7;1;"balance-scale"
 7;2;"625"
 7;3;"4"
 7;4;"3"
 7;5;"7.361199999999999"
 7;6;"4.180599999999999"
 7;7;"3.536"
 7;8;"0.96352"
 7;9;"0.02494974148162661"
 7;10;"0.3166321754455567"
 7;11;"0.1991881389525559"
 7;12;"{'base_estimator__C': 57, 'base_estimator__gamma': 0.1, 'base_estimator__kernel': 'rbf', 'base_estimator__multiclass_strategy': 'ovr', 'n_estimators': 100, 'n_jobs': -1}"
 8;1;"balloons"
 8;2;"16"
 8;3;"4"
 8;4;"2"
 8;5;"2.9952"
 8;6;"1.9976"
 8;7;"1.9976"
 8;8;"0.785"
 8;9;"0.2461311755051675"
 8;10;"0.1156062078475952"
 8;11;"0.0127842418285999"
 8;12;"{'base_estimator__C': 5, 'base_estimator__gamma': 0.14, 'base_estimator__kernel': 'rbf', 'base_estimator__multiclass_strategy': 'ovr', 'n_estimators': 100, 'n_jobs': -1}"
 10;1;"** Accuracy compared to stree_default (liblinear-ovr) .:  0.0434"
--- a/benchmark/tests/test_files/exreport_excel_RandomForest.test
+++ b/benchmark/tests/test_files/exreport_excel_RandomForest.test
@@ -0,0 +1,48 @@
 1;1;" Report RandomForest ver. - with 5 Folds cross validation and 10 random seeds. 2022-01-14 12:39:30"
 2;1;" Test default paramters with RandomForest"
 3;1;" Score is accuracy"
 3;2;" Execution time"
 3;5;" 272.74 s"
 3;7;" "
 3;8;"Platform"
 3;9;"iMac27"
 3;10;"Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]"
 4;5;"  0.08 h"
 4;10;"Stratified: False"
 6;1;"Dataset"
 6;2;"Samples"
 6;3;"Features"
 6;4;"Classes"
 6;5;"Nodes"
 6;6;"Leaves"
 6;7;"Depth"
 6;8;"Score"
 6;9;"Score Std."
 6;10;"Time"
 6;11;"Time Std."
 6;12;"Hyperparameters"
 7;1;"balance-scale"
 7;2;"625"
 7;3;"4"
 7;4;"3"
 7;5;"196.9144"
 7;6;"98.42"
 7;7;"10.6814"
 7;8;"0.83616"
 7;9;"0.02649630917694009"
 7;10;"0.08222018241882324"
 7;11;"0.001302632681512063"
 7;12;"{}"
 8;1;"balloons"
 8;2;"16"
 8;3;"4"
 8;4;"2"
 8;5;"9.110800000000001"
 8;6;"4.58"
 8;7;"3.0982"
 8;8;"0.625"
 8;9;"0.249582985531199"
 8;10;"0.07016648769378662"
 8;11;"0.002460508923990468"
 8;12;"{}"
 10;1;"** Accuracy compared to stree_default (liblinear-ovr) .:  0.0363"
--- a/benchmark/tests/test_files/exreport_excel_STree.test
+++ b/benchmark/tests/test_files/exreport_excel_STree.test
@@ -0,0 +1,48 @@
 1;1;" Report STree ver. 1.2.3 with 5 Folds cross validation and 10 random seeds. 2021-09-30 11:42:07"
 2;1;" With gridsearched hyperparameters"
 3;1;" Score is accuracy"
 3;2;" Execution time"
 3;5;" 624.25 s"
 3;7;" "
 3;8;"Platform"
 3;9;"iMac27"
 3;10;"Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]"
 4;5;"  0.17 h"
 4;10;"Stratified: False"
 6;1;"Dataset"
 6;2;"Samples"
 6;3;"Features"
 6;4;"Classes"
 6;5;"Nodes"
 6;6;"Leaves"
 6;7;"Depth"
 6;8;"Score"
 6;9;"Score Std."
 6;10;"Time"
 6;11;"Time Std."
 6;12;"Hyperparameters"
 7;1;"balance-scale"
 7;2;"625"
 7;3;"4"
 7;4;"3"
 7;5;"7"
 7;6;"4"
 7;7;"3"
 7;8;"0.97056"
 7;9;"0.0150468069702512"
 7;10;"0.01404867172241211"
 7;11;"0.002026269126958884"
 7;12;"{'C': 10000.0, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}"
 8;1;"balloons"
 8;2;"16"
 8;3;"4"
 8;4;"2"
 8;5;"3"
 8;6;"2"
 8;7;"2"
 8;8;"0.86"
 8;9;"0.2850146195080759"
 8;10;"0.0008541679382324218"
 8;11;"3.629469326417878e-05"
 8;12;"{'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}"
 10;1;"** Accuracy compared to stree_default (liblinear-ovr) .:  0.0454"
--- a/benchmark/tests/test_files/exreport_report.test
+++ b/benchmark/tests/test_files/exreport_report.test
@@ -0,0 +1,10 @@
 Dataset                            ODTE      RandomForest      STree     
 ============================== ============= ============= ============= 
 balance-scale                  0.96352±0.025 0.83616±0.026 0.97056±0.015 
 balloons                       0.78500±0.246 0.62500±0.250 0.86000±0.285 
 Model                          File Name                                                                   Score
 ============================== =========================================================================== ========
             ODTE              results/results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json               0.04341
         RandomForest          results/results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json      0.03627
            STree              results/results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json             0.04544
--- a/benchmark/tests/test_files/exreport_tex.test
+++ b/benchmark/tests/test_files/exreport_tex.test
@@ -0,0 +1,15 @@
 \begin{sidewaystable}[ht]
 \centering
 \renewcommand{\arraystretch}{1.2}
 \renewcommand{\tabcolsep}{0.07cm}
 \caption{Accuracy results (mean ± std) for all the algorithms and datasets}
 \label{table:datasets}
 \resizebox{0.95\textwidth}{!}{
 \begin {tabular} {{rlrrrccc}}\hline
 \# & Dataset & \#S & \#F & \#L & ODTE & RandomForest & STree\\
 \hline
 1 & balance-scale & 625 & 4 & 3 & 0.9635±0.025 & 0.8362±0.026 & \bfseries 0.9706±0.015 \\
 2 & balloons & 16 & 4 & 2 & 0.7850±0.246 & 0.6250±0.250 & \bfseries 0.8600±0.285 \\
 \hline
 \end{tabular}}
 \end{sidewaystable}