Complete be_benchmark tests

This commit is contained in:
2022-05-08 18:14:55 +02:00
parent bb4769de43
commit e58901a307
7 changed files with 123 additions and 23 deletions

View File

@@ -1,3 +1,4 @@
from multiprocessing.sharedctypes import Value
import os import os
from operator import itemgetter from operator import itemgetter
import math import math
@@ -641,6 +642,8 @@ class Benchmark:
summary = Summary() summary = Summary()
summary.acquire(given_score=self._score) summary.acquire(given_score=self._score)
self._models = summary.get_models() self._models = summary.get_models()
if self._models == []:
raise ValueError(NO_RESULTS)
for model in self._models: for model in self._models:
best = summary.best_result( best = summary.best_result(
criterion="model", value=model, score=self._score criterion="model", value=model, score=self._score

View File

@@ -19,6 +19,6 @@ def main(args_test=None):
benchmark.exreport() benchmark.exreport()
if args.excel: if args.excel:
benchmark.excel() benchmark.excel()
Files.open(benchmark.get_excel_file_name()) Files.open(benchmark.get_excel_file_name(), test=args.quiet)
if args.tex_output: if args.tex_output:
print(f"File {benchmark.get_tex_file()} generated") print(f"File {benchmark.get_tex_file()} generated")

View File

@@ -3,19 +3,19 @@ from io import StringIO
from unittest.mock import patch from unittest.mock import patch
from openpyxl import load_workbook from openpyxl import load_workbook
from .TestBase import TestBase from .TestBase import TestBase
from ..Utils import Folders, Files from ..Utils import Folders, Files, NO_RESULTS
from ..Results import Benchmark from ..Results import Benchmark
class BenchmarkTest(TestBase): class BenchmarkTest(TestBase):
def tearDown(self): def tearDown(self) -> None:
benchmark = Benchmark("accuracy", visualize=False) benchmark = Benchmark("accuracy", visualize=False)
files = [] files = []
for score in ["accuracy", "unknown"]: for score in ["accuracy", "unknown"]:
files.append(Files.exreport(score)) files.append(Files.exreport(score))
files.append(Files.exreport_output(score)) files.append(Files.exreport_output(score))
files.append(Files.exreport_err(score)) files.append(Files.exreport_err(score))
files.append(Files.exreport_excel("accuracy")) files.append(Files.exreport_excel(score))
files.append(Files.exreport_pdf) files.append(Files.exreport_pdf)
files.append(Files.tex_output("accuracy")) files.append(Files.tex_output("accuracy"))
self.remove_files(files, Folders.exreport) self.remove_files(files, Folders.exreport)
@@ -65,20 +65,20 @@ class BenchmarkTest(TestBase):
self.assertFalse(os.path.exists(Folders.report)) self.assertFalse(os.path.exists(Folders.report))
def test_exreport_error(self): def test_exreport_error(self):
benchmark = Benchmark("unknown", visualize=False) benchmark = Benchmark("accuracy", visualize=False)
benchmark.compile_results() benchmark.compile_results()
benchmark.save_results() benchmark.save_results()
# Make Rscript exreport fail
benchmark._score = "unknown"
with patch(self.output, new=StringIO()) as stdout: with patch(self.output, new=StringIO()) as stdout:
benchmark.exreport() benchmark.exreport()
self.check_output_file(stdout, "exreport_error") self.check_output_file(stdout, "exreport_error")
def test_exreport_no_data(self): def test_exreport_no_data(self):
benchmark = Benchmark("f1-weighted", visualize=False) benchmark = Benchmark("f1-weighted", visualize=False)
benchmark.compile_results() with self.assertRaises(ValueError) as msg:
benchmark.save_results() benchmark.compile_results()
with patch(self.output, new=StringIO()) as stdout: self.assertEqual(str(msg.exception), NO_RESULTS)
benchmark.exreport()
self.check_output_file(stdout, "exreport_error")
def test_tex_output(self): def test_tex_output(self):
benchmark = Benchmark("accuracy", visualize=False) benchmark = Benchmark("accuracy", visualize=False)

View File

@@ -1,36 +1,66 @@
import os import os
import json from openpyxl import load_workbook
from ...Utils import Folders, Files from ...Utils import NO_RESULTS, Folders, Files
from ..TestBase import TestBase from ..TestBase import TestBase
class BeBenchmarkTest(TestBase): class BeBenchmarkTest(TestBase):
def setUp(self): def setUp(self):
self.prepare_scripts_env() self.prepare_scripts_env()
self.score = "accuracy"
def tearDown(self) -> None: def tearDown(self) -> None:
files = [] files = []
for score in ["accuracy", "unknown"]: for score in [self.score, "unknown"]:
files.append(Files.exreport(score)) files.append(Files.exreport(score))
files.append(Files.exreport_output(score)) files.append(Files.exreport_output(score))
files.append(Files.exreport_err(score)) files.append(Files.exreport_err(score))
files.append(Files.exreport_excel(self.score))
files.append(Files.exreport_excel("accuracy"))
files.append(Files.exreport_pdf) files.append(Files.exreport_pdf)
files.append(Files.tex_output("accuracy")) files.append(Files.tex_output(self.score))
self.remove_files(files, Folders.exreport) self.remove_files(files, Folders.exreport)
self.remove_files(files, ".") self.remove_files(files, ".")
return super().tearDown() return super().tearDown()
def test_be_benchmark(self): def test_be_benchmark_complete(self):
stdout, stderr = self.execute_script( stdout, stderr = self.execute_script(
"be_benchmark", ["-s", "accuracy", "-q", "1", "-t", "1", "-x", "1"] "be_benchmark", ["-s", self.score, "-q", "1", "-t", "1", "-x", "1"]
) )
self.assertEqual(stderr.getvalue(), "") self.assertEqual(stderr.getvalue(), "")
# Check output # Check output
self.check_output_file(stdout, "exreport_report") self.check_output_file(stdout, "be_benchmark_complete")
# Check csv file # Check csv file
file_name = os.path.join(Folders.exreport, Files.exreport("accuracy")) file_name = os.path.join(Folders.exreport, Files.exreport(self.score))
self.check_file_file(file_name, "exreport_csv") self.check_file_file(file_name, "exreport_csv")
# Check tex file # Check tex file
file_name = os.path.join(
Folders.exreport, Files.tex_output(self.score)
)
self.assertTrue(os.path.exists(file_name))
self.check_file_file(file_name, "exreport_tex")
# Check excel file # Check excel file
file_name = os.path.join(
Folders.exreport, Files.exreport_excel(self.score)
)
book = load_workbook(file_name)
for sheet_name in book.sheetnames:
sheet = book[sheet_name]
self.check_excel_sheet(sheet, f"exreport_excel_{sheet_name}")
def test_be_benchmark_single(self):
stdout, stderr = self.execute_script(
"be_benchmark", ["-s", self.score, "-q", "1"]
)
self.assertEqual(stderr.getvalue(), "")
# Check output
self.check_output_file(stdout, "be_benchmark")
# Check csv file
file_name = os.path.join(Folders.exreport, Files.exreport(self.score))
self.check_file_file(file_name, "exreport_csv")
def test_be_benchmark_no_data(self):
stdout, stderr = self.execute_script(
"be_benchmark", ["-s", "f1-weighted"]
)
self.assertEqual(stderr.getvalue(), "")
self.assertEqual(stdout.getvalue(), f"{NO_RESULTS}\n")

View File

@@ -0,0 +1,32 @@
Dataset ODTE RandomForest STree
============================== ============= ============= =============
balance-scale 0.96352±0.025 0.83616±0.026 0.97056±0.015
balloons 0.78500±0.246 0.62500±0.250 0.86000±0.285
Model File Name Score
============================== =========================================================================== ========
ODTE results/results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341
RandomForest results/results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627
STree results/results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544
****************************************************************************************************
Benchmark Ok
****************************************************************************************************
---------------------------------------------------------------------
Friedman test, objetive maximize output variable accuracy. Obtained p-value: 1.3534e-01
Chi squared with 2 degrees of freedom statistic: 4.0000
Test accepted: p-value: 1.3534e-01 >= 0.0500
---------------------------------------------------------------------
Control post hoc test for output accuracy
Adjust method: Holm
Control method: STree
p-values:
ODTE 0.3173
RandomForest 0.0910
---------------------------------------------------------------------
$testMultiple
classifier pvalue rank win tie loss
STree STree NA 1 NA NA NA
ODTE ODTE 0.31731051 2 2 0 0
RandomForest RandomForest 0.09100053 3 2 0 0

View File

@@ -0,0 +1,33 @@
Dataset ODTE RandomForest STree
============================== ============= ============= =============
balance-scale 0.96352±0.025 0.83616±0.026 0.97056±0.015
balloons 0.78500±0.246 0.62500±0.250 0.86000±0.285
Model File Name Score
============================== =========================================================================== ========
ODTE results/results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341
RandomForest results/results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627
STree results/results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544
****************************************************************************************************
Benchmark Ok
****************************************************************************************************
---------------------------------------------------------------------
Friedman test, objetive maximize output variable accuracy. Obtained p-value: 1.3534e-01
Chi squared with 2 degrees of freedom statistic: 4.0000
Test accepted: p-value: 1.3534e-01 >= 0.0500
---------------------------------------------------------------------
Control post hoc test for output accuracy
Adjust method: Holm
Control method: STree
p-values:
ODTE 0.3173
RandomForest 0.0910
---------------------------------------------------------------------
$testMultiple
classifier pvalue rank win tie loss
STree STree NA 1 NA NA NA
ODTE ODTE 0.31731051 2 2 0 0
RandomForest RandomForest 0.09100053 3 2 0 0
File exreport/exreport_accuracy.tex generated

View File

@@ -1,7 +1,9 @@
**************************************************************************************************** ****************************************************************************************************
Error computing benchmark Error computing benchmark
**************************************************************************************************** ****************************************************************************************************
Error in dim(ordered) <- ns : Error in file(file, "rt") : no se puede abrir la conexión
dims [producto 1] no coincide con la longitud del objeto [0] Calls: read.csv -> read.table -> file
Calls: testMultipleControl -> .doFriedmanTest -> <Anonymous> -> cast Además: Warning message:
In file(file, "rt") :
no fue posible abrir el archivo 'exreport/exreport_unknown.csv': No such file or directory
Ejecución interrumpida Ejecución interrumpida