Complete tests of Benchmark

This commit is contained in:
2022-04-25 19:21:26 +02:00
parent a17166ed31
commit 629f413293
14 changed files with 365 additions and 36 deletions

View File

@@ -589,12 +589,13 @@ class SQL(BaseReport):
class Benchmark: class Benchmark:
def __init__(self, score): def __init__(self, score, visualize=True):
self._score = score self._score = score
self._results = [] self._results = []
self._models = [] self._models = []
self._report = {} self._report = {}
self._datasets = set() self._datasets = set()
self.visualize = visualize
def get_result_file_name(self): def get_result_file_name(self):
return os.path.join(Folders.exreport, Files.exreport(self._score)) return os.path.join(Folders.exreport, Files.exreport(self._score))
@@ -650,13 +651,10 @@ class Benchmark:
print(line) print(line)
# Remove previous results # Remove previous results
try: if os.path.exists(Folders.report):
shutil.rmtree(Folders.report) shutil.rmtree(Folders.report)
if os.path.exists(Files.exreport_pdf):
os.remove(Files.exreport_pdf) os.remove(Files.exreport_pdf)
except FileNotFoundError:
pass
except OSError as os_error:
print("Error: %s : %s" % (Folders.report, os_error.strerror))
# Compute Friedman & Holm Tests # Compute Friedman & Holm Tests
fout = open( fout = open(
os.path.join(Folders.exreport, Files.exreport_output(self._score)), os.path.join(Folders.exreport, Files.exreport_output(self._score)),
@@ -666,16 +664,13 @@ class Benchmark:
os.path.join(Folders.exreport, Files.exreport_err(self._score)), os.path.join(Folders.exreport, Files.exreport_err(self._score)),
"w", "w",
) )
print(
"*********************",
os.path.join(Folders.src(), Files.benchmark_r),
)
result = subprocess.run( result = subprocess.run(
[ [
"Rscript", "Rscript",
os.path.join(Folders.src(), Files.benchmark_r), os.path.join(Folders.src(), Files.benchmark_r),
self._score, self._score,
os.path.join(Folders.exreport, f"exreport_{self._score}"), os.path.join(Folders.exreport, f"exreport_{self._score}"),
"1" if self.visualize else "0",
], ],
stdout=fout, stdout=fout,
stderr=ferr, stderr=ferr,
@@ -714,7 +709,6 @@ class Benchmark:
print("") print("")
if tex_output: if tex_output:
self.print_tex_line(num, dataset, scores) self.print_tex_line(num, dataset, scores)
if tex_output: if tex_output:
self.print_tex_footer() self.print_tex_footer()
# Summary of result files used # Summary of result files used
@@ -984,7 +978,7 @@ class Benchmark:
def exreport_output(): def exreport_output():
file_name = os.path.join( file_name = os.path.join(
Folders.results, Files.exreport_output(self._score) Folders.exreport, Files.exreport_output(self._score)
) )
sheet = book.add_worksheet("Exreport") sheet = book.add_worksheet("Exreport")
normal = book.add_format( normal = book.add_format(

View File

@@ -1,9 +1,15 @@
library(glue) library(glue)
args = commandArgs(trailingOnly=TRUE) args = commandArgs(trailingOnly=TRUE)
if (length(args)!=2) { if (length(args)!=3) {
stop("Only two arguments must be supplied (score & input_file).n", call.=FALSE) stop("Only two arguments must be supplied (score & input_file & visualize).n", call.=FALSE)
} }
csv_file <- glue("{args[2]}.csv") csv_file <- glue("{args[2]}.csv")
visualize_c <- args[3]
if (visualize_c == "1") {
visualize <- T
} else {
visualize <- F
}
destination <- "exreport/" destination <- "exreport/"
results <- read.csv(csv_file) results <- read.csv(csv_file)
library(exreport) library(exreport)
@@ -37,4 +43,6 @@ report <- exreportAdd(report, table2)
# Now that we have finished adding elements to the report it is time to render it. We want to generate an HTML report, so we call the appropiate function, by default it renders and opens the report in your browser using a temporary file, but you can optionally specify a folder in which the report will be saved for future use. # Now that we have finished adding elements to the report it is time to render it. We want to generate an HTML report, so we call the appropiate function, by default it renders and opens the report in your browser using a temporary file, but you can optionally specify a folder in which the report will be saved for future use.
# Render the report: # Render the report:
exreportRender(report, destination=destination, target = "html", visualize = T) if (visualize) {
exreportRender(report, destination=destination, target = "html", visualize = T)
}

View File

@@ -34,7 +34,7 @@ def parse_arguments():
(score, excel, tex_output) = parse_arguments() (score, excel, tex_output) = parse_arguments()
benchmark = Benchmark(score) benchmark = Benchmark(score=score, visualize=True)
benchmark.compile_results() benchmark.compile_results()
benchmark.save_results() benchmark.save_results()
benchmark.report(tex_output) benchmark.report(tex_output)

View File

@@ -3,8 +3,10 @@ import unittest
import shutil import shutil
from io import StringIO from io import StringIO
from unittest.mock import patch from unittest.mock import patch
from ..Utils import Folders from openpyxl import load_workbook
from ..Utils import Folders, Files
from ..Results import Benchmark from ..Results import Benchmark
from .Excel_test import ExcelTest
class BenchmarkTest(unittest.TestCase): class BenchmarkTest(unittest.TestCase):
@@ -13,23 +15,30 @@ class BenchmarkTest(unittest.TestCase):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
def tearDown(self) -> None: def tearDown(self) -> None:
benchmark = Benchmark("accuracy", visualize=False)
files = [ files = [
# "exreport_accuracy.csv", "exreport_accuracy.csv",
"exreport_accuracy.txt", "exreport_accuracy.txt",
"exreport_accuracy.xlsx",
"exreport_err_accuracy.txt", "exreport_err_accuracy.txt",
"exreport_err_unknown.txt",
"exreport_unknown.csv",
"exreport_unknown.txt",
"Rplots.pdf",
benchmark.get_tex_file(),
] ]
for file_name in files: for file_name in files:
if os.path.exists(file_name):
os.remove(file_name)
file_name = os.path.join(Folders.exreport, file_name) file_name = os.path.join(Folders.exreport, file_name)
if os.path.exists(file_name): if os.path.exists(file_name):
os.remove(file_name) os.remove(file_name)
if os.path.exists(Folders.report): if os.path.exists(Folders.report):
shutil.rmtree(Folders.report) shutil.rmtree(Folders.report)
if os.path.exists("Rplots.pdf"):
os.remove("Rplots.pdf")
return super().tearDown() return super().tearDown()
def test_csv(self): def test_csv(self):
benchmark = Benchmark("accuracy") benchmark = Benchmark("accuracy", visualize=False)
benchmark.compile_results() benchmark.compile_results()
benchmark.save_results() benchmark.save_results()
with open(benchmark.get_result_file_name()) as f: with open(benchmark.get_result_file_name()) as f:
@@ -38,8 +47,18 @@ class BenchmarkTest(unittest.TestCase):
expected = f_exp.readlines() expected = f_exp.readlines()
self.assertEqual(computed, expected) self.assertEqual(computed, expected)
def test_exreport_report(self):
benchmark = Benchmark("accuracy", visualize=False)
benchmark.compile_results()
benchmark.save_results()
with patch("sys.stdout", new=StringIO()) as fake_out:
benchmark.report(tex_output=False)
with open(os.path.join("test_files", "exreport_report.test")) as f:
expected = f.read()
self.assertEqual(fake_out.getvalue(), expected)
def test_exreport(self): def test_exreport(self):
benchmark = Benchmark("accuracy") benchmark = Benchmark("accuracy", visualize=False)
benchmark.compile_results() benchmark.compile_results()
benchmark.save_results() benchmark.save_results()
with patch("sys.stdout", new=StringIO()) as fake_out: with patch("sys.stdout", new=StringIO()) as fake_out:
@@ -51,3 +70,61 @@ class BenchmarkTest(unittest.TestCase):
computed_t.pop(0) computed_t.pop(0)
for computed, expected in zip(computed_t, expected_t.split("\n")): for computed, expected in zip(computed_t, expected_t.split("\n")):
self.assertEqual(computed, expected) self.assertEqual(computed, expected)
def test_exreport_remove_previous(self):
os.makedirs(Folders.report)
with open(os.path.join(Files.exreport_pdf), "w") as f:
print("x", file=f)
self.assertTrue(os.path.exists(Files.exreport_pdf))
self.assertTrue(os.path.exists(Folders.report))
benchmark = Benchmark("accuracy", visualize=False)
benchmark.compile_results()
benchmark.save_results()
benchmark.exreport()
self.assertFalse(os.path.exists(Files.exreport_pdf))
self.assertFalse(os.path.exists(Folders.report))
def test_exreport_error(self):
benchmark = Benchmark("unknown", visualize=False)
benchmark.compile_results()
benchmark.save_results()
with patch("sys.stdout", new=StringIO()) as fake_out:
benchmark.exreport()
computed = fake_out.getvalue()
with open(os.path.join("test_files", "exreport_error.test")) as f:
expected = f.read()
self.assertEqual(computed, expected)
def test_tex_output(self):
benchmark = Benchmark("accuracy", visualize=False)
benchmark.compile_results()
benchmark.save_results()
with patch("sys.stdout", new=StringIO()) as fake_out:
benchmark.report(tex_output=True)
with open(os.path.join("test_files", "exreport_report.test")) as f:
expected = f.read()
self.assertEqual(fake_out.getvalue(), expected)
self.assertTrue(os.path.exists(benchmark.get_tex_file()))
with open(benchmark.get_tex_file()) as f:
computed = f.read()
with open(os.path.join("test_files", "exreport_tex.test")) as f:
expected = f.read()
self.assertEqual(computed, expected)
def test_excel_output(self):
benchmark = Benchmark("accuracy", visualize=False)
benchmark.compile_results()
benchmark.save_results()
with patch("sys.stdout", new=StringIO()) as fake_out:
benchmark.exreport()
benchmark.excel()
file_name = benchmark.get_excel_file_name()
book = load_workbook(file_name)
for sheet_name in book.sheetnames:
sheet = book[sheet_name]
ExcelTest.check_excel_sheet(
self, sheet, f"exreport_excel_{sheet_name}.test"
)
# ExcelTest.generate_excel_sheet(
# self, sheet, f"exreport_excel_{sheet_name}.test"
# )

View File

@@ -24,8 +24,18 @@ class ExcelTest(unittest.TestCase):
os.remove(file_name) os.remove(file_name)
return super().tearDown() return super().tearDown()
def check_excel_sheet(self, sheet, file_name): @staticmethod
with open(file_name, "r") as f: def generate_excel_sheet(test, sheet, file_name):
with open(os.path.join("test_files", file_name), "w") as f:
for row in range(1, sheet.max_row + 1):
for col in range(1, sheet.max_column + 1):
value = sheet.cell(row=row, column=col).value
if value is not None:
print(f'{row};{col};"{value}"', file=f)
@staticmethod
def check_excel_sheet(test, sheet, file_name):
with open(os.path.join("test_files", file_name), "r") as f:
expected = csv.reader(f, delimiter=";") expected = csv.reader(f, delimiter=";")
for row, col, value in expected: for row, col, value in expected:
if value.isdigit(): if value.isdigit():
@@ -35,7 +45,7 @@ class ExcelTest(unittest.TestCase):
value = float(value) value = float(value)
except ValueError: except ValueError:
pass pass
self.assertEqual(sheet.cell(int(row), int(col)).value, value) test.assertEqual(sheet.cell(int(row), int(col)).value, value)
def test_report_excel_compared(self): def test_report_excel_compared(self):
file_name = "results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json" file_name = "results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json"
@@ -44,9 +54,7 @@ class ExcelTest(unittest.TestCase):
file_output = report.get_file_name() file_output = report.get_file_name()
book = load_workbook(file_output) book = load_workbook(file_output)
sheet = book["STree"] sheet = book["STree"]
self.check_excel_sheet( self.check_excel_sheet(self, sheet, "excel_compared.test")
sheet, os.path.join("test_files", "excel_compared.test")
)
def test_report_excel(self): def test_report_excel(self):
file_name = "results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json" file_name = "results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json"
@@ -55,7 +63,7 @@ class ExcelTest(unittest.TestCase):
file_output = report.get_file_name() file_output = report.get_file_name()
book = load_workbook(file_output) book = load_workbook(file_output)
sheet = book["STree"] sheet = book["STree"]
self.check_excel_sheet(sheet, os.path.join("test_files", "excel.test")) self.check_excel_sheet(self, sheet, "excel.test")
def test_Excel_Add_sheet(self): def test_Excel_Add_sheet(self):
file_name = "results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json" file_name = "results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json"
@@ -71,10 +79,6 @@ class ExcelTest(unittest.TestCase):
book.close() book.close()
book = load_workbook(os.path.join(Folders.results, excel_file_name)) book = load_workbook(os.path.join(Folders.results, excel_file_name))
sheet = book["STree"] sheet = book["STree"]
self.check_excel_sheet( self.check_excel_sheet(self, sheet, "excel_add_STree.test")
sheet, os.path.join("test_files", "excel_add_STree.test")
)
sheet = book["ODTE"] sheet = book["ODTE"]
self.check_excel_sheet( self.check_excel_sheet(self, sheet, "excel_add_ODTE.test")
sheet, os.path.join("test_files", "excel_add_ODTE.test")
)

View File

@@ -1,4 +1,3 @@
****************************************************************************************************
Benchmark Ok Benchmark Ok
**************************************************************************************************** ****************************************************************************************************
--------------------------------------------------------------------- ---------------------------------------------------------------------

View File

@@ -0,0 +1,7 @@
****************************************************************************************************
Error computing benchmark
****************************************************************************************************
Error in dim(ordered) <- ns :
dims [producto 1] no coincide con la longitud del objeto [0]
Calls: testMultipleControl -> .doFriedmanTest -> <Anonymous> -> cast
Ejecución interrumpida

View File

@@ -0,0 +1,54 @@
1;1;"Benchmark of Models"
1;2;"Score is accuracy"
5;1;"Dataset"
5;2;"ODTE"
5;5;"RandomForest"
5;8;"STree"
6;2;"Score"
6;3;"Stdev"
6;4;"Rank"
6;5;"Score"
6;6;"Stdev"
6;7;"Rank"
6;8;"Score"
6;9;"Stdev"
6;10;"Rank"
7;1;"balance-scale "
7;2;"0.96352"
7;3;"0.02494974148162661"
7;4;"=rank(B7,(B7,E7,H7))"
7;5;"0.83616"
7;6;"0.02649630917694009"
7;7;"=rank(E7,(B7,E7,H7))"
7;8;"0.97056"
7;9;"0.0150468069702512"
7;10;"=rank(H7,(B7,E7,H7))"
8;1;"balloons "
8;2;"0.785"
8;3;"0.2461311755051675"
8;4;"=rank(B8,(B8,E8,H8))"
8;5;"0.625"
8;6;"0.249582985531199"
8;7;"=rank(E8,(B8,E8,H8))"
8;8;"0.86"
8;9;"0.2850146195080759"
8;10;"=rank(H8,(B8,E8,H8))"
9;1;"Total"
9;2;"=sum(B7:B8)/40.282203"
9;4;"=average(D7:D8)"
9;5;"=sum(E7:E8)/40.282203"
9;7;"=average(G7:G8)"
9;8;"=sum(H7:H8)/40.282203"
9;10;"=average(J7:J8)"
12;1;"Model"
12;2;"File"
12;7;"Score"
14;1;"ODTE"
14;2;"results/results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json"
14;7;"0.04340676203831255"
15;1;"RandomForest"
15;2;"results/results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json"
15;7;"0.03627309062515771"
16;1;"STree"
16;2;"results/results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json"
16;7;"0.04544339345094904"

View File

@@ -0,0 +1,17 @@
1;1;"---------------------------------------------------------------------"
2;1;"Friedman test, objetive maximize output variable accuracy. Obtained p-value: 1.3534e-01"
3;1;"Chi squared with 2 degrees of freedom statistic: 4.0000"
4;1;"Test accepted: p-value: 1.3534e-01 >= 0.0500"
5;1;"---------------------------------------------------------------------"
6;1;"Control post hoc test for output accuracy"
7;1;"Adjust method: Holm"
9;1;"Control method: STree"
10;1;"p-values:"
11;1;" ODTE 0.3173"
12;1;" RandomForest 0.0910"
13;1;"---------------------------------------------------------------------"
14;1;"$testMultiple"
15;1;" classifier pvalue rank win tie loss"
16;1;"STree STree NA 1 NA NA NA"
17;1;"ODTE ODTE 0.31731051 2 2 0 0"
18;1;"RandomForest RandomForest 0.09100053 3 2 0 0"

View File

@@ -0,0 +1,48 @@
1;1;" Report ODTE ver. 0.3.2 with 5 Folds cross validation and 10 random seeds. 2022-04-20 10:52:20"
2;1;" Gridsearched hyperparams v022.1b random_init"
3;1;" Score is accuracy"
3;2;" Execution time"
3;5;"22,591.47 s"
3;7;" "
3;8;"Platform"
3;9;"Galgo"
3;10;"Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]"
4;5;" 6.28 h"
4;10;"Stratified: False"
6;1;"Dataset"
6;2;"Samples"
6;3;"Features"
6;4;"Classes"
6;5;"Nodes"
6;6;"Leaves"
6;7;"Depth"
6;8;"Score"
6;9;"Score Std."
6;10;"Time"
6;11;"Time Std."
6;12;"Hyperparameters"
7;1;"balance-scale"
7;2;"625"
7;3;"4"
7;4;"3"
7;5;"7.361199999999999"
7;6;"4.180599999999999"
7;7;"3.536"
7;8;"0.96352"
7;9;"0.02494974148162661"
7;10;"0.3166321754455567"
7;11;"0.1991881389525559"
7;12;"{'base_estimator__C': 57, 'base_estimator__gamma': 0.1, 'base_estimator__kernel': 'rbf', 'base_estimator__multiclass_strategy': 'ovr', 'n_estimators': 100, 'n_jobs': -1}"
8;1;"balloons"
8;2;"16"
8;3;"4"
8;4;"2"
8;5;"2.9952"
8;6;"1.9976"
8;7;"1.9976"
8;8;"0.785"
8;9;"0.2461311755051675"
8;10;"0.1156062078475952"
8;11;"0.0127842418285999"
8;12;"{'base_estimator__C': 5, 'base_estimator__gamma': 0.14, 'base_estimator__kernel': 'rbf', 'base_estimator__multiclass_strategy': 'ovr', 'n_estimators': 100, 'n_jobs': -1}"
10;1;"** Accuracy compared to stree_default (liblinear-ovr) .: 0.0434"

View File

@@ -0,0 +1,48 @@
1;1;" Report RandomForest ver. - with 5 Folds cross validation and 10 random seeds. 2022-01-14 12:39:30"
2;1;" Test default paramters with RandomForest"
3;1;" Score is accuracy"
3;2;" Execution time"
3;5;" 272.74 s"
3;7;" "
3;8;"Platform"
3;9;"iMac27"
3;10;"Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]"
4;5;" 0.08 h"
4;10;"Stratified: False"
6;1;"Dataset"
6;2;"Samples"
6;3;"Features"
6;4;"Classes"
6;5;"Nodes"
6;6;"Leaves"
6;7;"Depth"
6;8;"Score"
6;9;"Score Std."
6;10;"Time"
6;11;"Time Std."
6;12;"Hyperparameters"
7;1;"balance-scale"
7;2;"625"
7;3;"4"
7;4;"3"
7;5;"196.9144"
7;6;"98.42"
7;7;"10.6814"
7;8;"0.83616"
7;9;"0.02649630917694009"
7;10;"0.08222018241882324"
7;11;"0.001302632681512063"
7;12;"{}"
8;1;"balloons"
8;2;"16"
8;3;"4"
8;4;"2"
8;5;"9.110800000000001"
8;6;"4.58"
8;7;"3.0982"
8;8;"0.625"
8;9;"0.249582985531199"
8;10;"0.07016648769378662"
8;11;"0.002460508923990468"
8;12;"{}"
10;1;"** Accuracy compared to stree_default (liblinear-ovr) .: 0.0363"

View File

@@ -0,0 +1,48 @@
1;1;" Report STree ver. 1.2.3 with 5 Folds cross validation and 10 random seeds. 2021-09-30 11:42:07"
2;1;" With gridsearched hyperparameters"
3;1;" Score is accuracy"
3;2;" Execution time"
3;5;" 624.25 s"
3;7;" "
3;8;"Platform"
3;9;"iMac27"
3;10;"Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]"
4;5;" 0.17 h"
4;10;"Stratified: False"
6;1;"Dataset"
6;2;"Samples"
6;3;"Features"
6;4;"Classes"
6;5;"Nodes"
6;6;"Leaves"
6;7;"Depth"
6;8;"Score"
6;9;"Score Std."
6;10;"Time"
6;11;"Time Std."
6;12;"Hyperparameters"
7;1;"balance-scale"
7;2;"625"
7;3;"4"
7;4;"3"
7;5;"7"
7;6;"4"
7;7;"3"
7;8;"0.97056"
7;9;"0.0150468069702512"
7;10;"0.01404867172241211"
7;11;"0.002026269126958884"
7;12;"{'C': 10000.0, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}"
8;1;"balloons"
8;2;"16"
8;3;"4"
8;4;"2"
8;5;"3"
8;6;"2"
8;7;"2"
8;8;"0.86"
8;9;"0.2850146195080759"
8;10;"0.0008541679382324218"
8;11;"3.629469326417878e-05"
8;12;"{'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}"
10;1;"** Accuracy compared to stree_default (liblinear-ovr) .: 0.0454"

View File

@@ -0,0 +1,10 @@
Dataset ODTE RandomForest STree
============================== ============= ============= =============
balance-scale 0.96352±0.025 0.83616±0.026 0.97056±0.015
balloons 0.78500±0.246 0.62500±0.250 0.86000±0.285
Model File Name Score
============================== =========================================================================== ========
ODTE results/results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341
RandomForest results/results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627
STree results/results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544

View File

@@ -0,0 +1,15 @@
\begin{sidewaystable}[ht]
\centering
\renewcommand{\arraystretch}{1.2}
\renewcommand{\tabcolsep}{0.07cm}
\caption{Accuracy results (mean ± std) for all the algorithms and datasets}
\label{table:datasets}
\resizebox{0.95\textwidth}{!}{
\begin {tabular} {{rlrrrccc}}\hline
\# & Dataset & \#S & \#F & \#L & ODTE & RandomForest & STree\\
\hline
1 & balance-scale & 625 & 4 & 3 & 0.9635±0.025 & 0.8362±0.026 & \bfseries 0.9706±0.015 \\
2 & balloons & 16 & 4 & 2 & 0.7850±0.246 & 0.6250±0.250 & \bfseries 0.8600±0.285 \\
\hline
\end{tabular}}
\end{sidewaystable}