From 6aec5b2a9726304c635f3efd33ae7731eb3000a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= Date: Sun, 13 Nov 2022 17:44:45 +0100 Subject: [PATCH] Add tests to excel in report datasets --- benchmark/Results.py | 21 ++++++++++------ benchmark/Utils.py | 1 + benchmark/tests/.env | 1 + benchmark/tests/.gitignore | 1 + benchmark/tests/Benchmark_test.py | 12 ++++++--- benchmark/tests/Util_test.py | 1 + benchmark/tests/scripts/Be_Report_test.py | 11 +++++++- .../test_files/exreport_excel_Datasets.test | 25 +++++++++++++++++++ .../tests/test_files/report_datasets.test | 6 +++-- 9 files changed, 66 insertions(+), 13 deletions(-) create mode 100644 benchmark/tests/.gitignore create mode 100644 benchmark/tests/test_files/exreport_excel_Datasets.test diff --git a/benchmark/Results.py b/benchmark/Results.py index 011b3a0..e53a0f6 100644 --- a/benchmark/Results.py +++ b/benchmark/Results.py @@ -580,7 +580,7 @@ class ReportDatasets: color2 = "#FDE9D9" color3 = "#B1A0C7" - def __init__(self, excel, book=None): + def __init__(self, excel=False, book=None): self.excel = excel self.env = EnvData().load() self.close = False @@ -589,7 +589,7 @@ class ReportDatasets: if excel: self.max_length = 0 if book is None: - self.excel_file_name = "ReportDatasets.xlsx" + self.excel_file_name = Files.datasets_report_excel self.book = xlsxwriter.Workbook( self.excel_file_name, {"nan_inf_to_errors": True} ) @@ -728,6 +728,7 @@ class ReportDatasets: self.sheet.write( 4, 4, f"{self.env['seeds']}", merge_format_subheader_left ) + self.update_max_length(len(self.env["seeds"]) + 1) header_cols = [ ("Dataset", 30), ("Samples", 10), @@ -775,10 +776,13 @@ class ReportDatasets: self.sheet.write(self.row, col + 2, result.features, integer) self.sheet.write(self.row, col + 3, result.classes, normal) self.sheet.write(self.row, col + 4, result.balance, normal) - if len(result.balance) > self.max_length: - self.max_length = len(result.balance) + self.update_max_length(len(result.balance)) self.row += 1 + def update_max_length(self, value): + if value > self.max_length: + self.max_length = value + def report(self): data_sets = Datasets() color_line = TextColor.LINE1 @@ -789,7 +793,7 @@ class ReportDatasets: print(self.header_text) print("") print(f"{'Dataset':30s} Sampl. Feat. Cls Balance") - print("=" * 30 + " ===== ====== === " + "=" * 60) + print("=" * 30 + " ====== ===== === " + "=" * 60) for dataset in data_sets: attributes = data_sets.get_attributes(dataset) attributes.dataset = dataset @@ -1255,8 +1259,7 @@ class Benchmark: sheet.merge_range(row, 0, row + 1, 0, "Model", merge_format) sheet.merge_range(row, 1, row + 1, 5, "File", merge_format) sheet.merge_range(row, 6, row + 1, 6, "Score", merge_format) - sheet.freeze_panes(6, 1) - sheet.hide_gridlines(2) + row += 1 d_name = next(iter(self._datasets)) for model in self._models: file_name = self._report[model][d_name]["file_name"] @@ -1280,7 +1283,10 @@ class Benchmark: ) k = Excel(file_name=file_name, book=book) k.report() + sheet.freeze_panes(6, 1) + sheet.hide_gridlines(2) + def add_datasets_sheet(): # Add datasets sheet re = ReportDatasets(excel=True, book=book) re.report() @@ -1311,6 +1317,7 @@ class Benchmark: footer() models_files() exreport_output() + add_datasets_sheet() book.close() diff --git a/benchmark/Utils.py b/benchmark/Utils.py index b6b5797..9663086 100644 --- a/benchmark/Utils.py +++ b/benchmark/Utils.py @@ -27,6 +27,7 @@ class Files: exreport_pdf = "Rplots.pdf" benchmark_r = "benchmark.r" dot_env = ".env" + datasets_report_excel = "ReportDatasets.xlsx" @staticmethod def exreport_output(score): diff --git a/benchmark/tests/.env b/benchmark/tests/.env index 31a99ab..9641efa 100644 --- a/benchmark/tests/.env +++ b/benchmark/tests/.env @@ -6,3 +6,4 @@ stratified=0 # Source of data Tanveer/Surcov source_data=Tanveer seeds=[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] +discretize=0 diff --git a/benchmark/tests/.gitignore b/benchmark/tests/.gitignore new file mode 100644 index 0000000..c56bbf8 --- /dev/null +++ b/benchmark/tests/.gitignore @@ -0,0 +1 @@ +ReportDatasets.xlsx diff --git a/benchmark/tests/Benchmark_test.py b/benchmark/tests/Benchmark_test.py index 71eea04..0b4abc2 100644 --- a/benchmark/tests/Benchmark_test.py +++ b/benchmark/tests/Benchmark_test.py @@ -89,6 +89,15 @@ class BenchmarkTest(TestBase): self.assertTrue(os.path.exists(benchmark.get_tex_file())) self.check_file_file(benchmark.get_tex_file(), "exreport_tex") + @staticmethod + def generate_excel_sheet(test, sheet, file_name): + with open(os.path.join("test_files", file_name), "w") as f: + for row in range(1, sheet.max_row + 1): + for col in range(1, sheet.max_column + 1): + value = sheet.cell(row=row, column=col).value + if value is not None: + print(f'{row};{col};"{value}"', file=f) + def test_excel_output(self): benchmark = Benchmark("accuracy", visualize=False) benchmark.compile_results() @@ -101,6 +110,3 @@ class BenchmarkTest(TestBase): for sheet_name in book.sheetnames: sheet = book[sheet_name] self.check_excel_sheet(sheet, f"exreport_excel_{sheet_name}") - # ExcelTest.generate_excel_sheet( - # self, sheet, f"exreport_excel_{sheet_name}" - # ) diff --git a/benchmark/tests/Util_test.py b/benchmark/tests/Util_test.py index 1020a5e..8ca7b33 100644 --- a/benchmark/tests/Util_test.py +++ b/benchmark/tests/Util_test.py @@ -179,6 +179,7 @@ class UtilTest(TestBase): "stratified": "0", "source_data": "Tanveer", "seeds": "[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]", + "discretize": "0", } computed = EnvData().load() self.assertDictEqual(computed, expected) diff --git a/benchmark/tests/scripts/Be_Report_test.py b/benchmark/tests/scripts/Be_Report_test.py index 14a51f8..073d4e6 100644 --- a/benchmark/tests/scripts/Be_Report_test.py +++ b/benchmark/tests/scripts/Be_Report_test.py @@ -1,6 +1,6 @@ import os from openpyxl import load_workbook -from ...Utils import Folders +from ...Utils import Folders, Files from ..TestBase import TestBase @@ -43,6 +43,15 @@ class BeReportTest(TestBase): self.assertEqual(stderr.getvalue(), "") self.check_output_file(stdout, "report_datasets") + def test_be_report_datasets_excel(self): + stdout, stderr = self.execute_script("be_report", ["-x", "1"]) + self.assertEqual(stderr.getvalue(), "") + self.check_output_file(stdout, "report_datasets") + file_name = os.path.join(os.getcwd(), Files.datasets_report_excel) + book = load_workbook(file_name) + sheet = book["Datasets"] + self.check_excel_sheet(sheet, "exreport_excel_datasets") + def test_be_report_best(self): stdout, stderr = self.execute_script( "be_report", ["-s", "accuracy", "-m", "STree", "-b", "1"] diff --git a/benchmark/tests/test_files/exreport_excel_Datasets.test b/benchmark/tests/test_files/exreport_excel_Datasets.test new file mode 100644 index 0000000..5c2f35a --- /dev/null +++ b/benchmark/tests/test_files/exreport_excel_Datasets.test @@ -0,0 +1,25 @@ +1;1;"Datasets used in benchmark ver. 0.2.0" +2;1;" Default score accuracy" +2;2;"Cross validation" +2;5;"5 Folds" +3;2;"Stratified" +3;5;"False" +4;2;"Discretized" +4;5;"False" +5;2;"Seeds" +5;5;"[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]" +6;1;"Dataset" +6;2;"Samples" +6;3;"Features" +6;4;"Classes" +6;5;"Balance" +7;1;"balance-scale" +7;2;"625" +7;3;"4" +7;4;"3" +7;5;" 7.84%/ 46.08%/ 46.08%" +8;1;"balloons" +8;2;"16" +8;3;"4" +8;4;"2" +8;5;"56.25%/ 43.75%" diff --git a/benchmark/tests/test_files/report_datasets.test b/benchmark/tests/test_files/report_datasets.test index 8f5b0f6..16c7bd7 100644 --- a/benchmark/tests/test_files/report_datasets.test +++ b/benchmark/tests/test_files/report_datasets.test @@ -1,4 +1,6 @@ -Dataset Sampl. Feat. Cls Balance -============================== ===== ====== === ======================================== +Datasets used in benchmark ver. 0.2.0 + +Dataset Sampl. Feat. Cls Balance +============================== ====== ===== === ============================================================ balance-scale 625 4 3 7.84%/ 46.08%/ 46.08% balloons 16 4 2 56.25%/ 43.75%