Add tests to excel in report datasets

This commit is contained in:
2022-11-13 17:44:45 +01:00
parent f1b9dc1fef
commit 6aec5b2a97
9 changed files with 66 additions and 13 deletions

View File

@@ -580,7 +580,7 @@ class ReportDatasets:
color2 = "#FDE9D9" color2 = "#FDE9D9"
color3 = "#B1A0C7" color3 = "#B1A0C7"
def __init__(self, excel, book=None): def __init__(self, excel=False, book=None):
self.excel = excel self.excel = excel
self.env = EnvData().load() self.env = EnvData().load()
self.close = False self.close = False
@@ -589,7 +589,7 @@ class ReportDatasets:
if excel: if excel:
self.max_length = 0 self.max_length = 0
if book is None: if book is None:
self.excel_file_name = "ReportDatasets.xlsx" self.excel_file_name = Files.datasets_report_excel
self.book = xlsxwriter.Workbook( self.book = xlsxwriter.Workbook(
self.excel_file_name, {"nan_inf_to_errors": True} self.excel_file_name, {"nan_inf_to_errors": True}
) )
@@ -728,6 +728,7 @@ class ReportDatasets:
self.sheet.write( self.sheet.write(
4, 4, f"{self.env['seeds']}", merge_format_subheader_left 4, 4, f"{self.env['seeds']}", merge_format_subheader_left
) )
self.update_max_length(len(self.env["seeds"]) + 1)
header_cols = [ header_cols = [
("Dataset", 30), ("Dataset", 30),
("Samples", 10), ("Samples", 10),
@@ -775,10 +776,13 @@ class ReportDatasets:
self.sheet.write(self.row, col + 2, result.features, integer) self.sheet.write(self.row, col + 2, result.features, integer)
self.sheet.write(self.row, col + 3, result.classes, normal) self.sheet.write(self.row, col + 3, result.classes, normal)
self.sheet.write(self.row, col + 4, result.balance, normal) self.sheet.write(self.row, col + 4, result.balance, normal)
if len(result.balance) > self.max_length: self.update_max_length(len(result.balance))
self.max_length = len(result.balance)
self.row += 1 self.row += 1
def update_max_length(self, value):
if value > self.max_length:
self.max_length = value
def report(self): def report(self):
data_sets = Datasets() data_sets = Datasets()
color_line = TextColor.LINE1 color_line = TextColor.LINE1
@@ -789,7 +793,7 @@ class ReportDatasets:
print(self.header_text) print(self.header_text)
print("") print("")
print(f"{'Dataset':30s} Sampl. Feat. Cls Balance") print(f"{'Dataset':30s} Sampl. Feat. Cls Balance")
print("=" * 30 + " ===== ====== === " + "=" * 60) print("=" * 30 + " ====== ===== === " + "=" * 60)
for dataset in data_sets: for dataset in data_sets:
attributes = data_sets.get_attributes(dataset) attributes = data_sets.get_attributes(dataset)
attributes.dataset = dataset attributes.dataset = dataset
@@ -1255,8 +1259,7 @@ class Benchmark:
sheet.merge_range(row, 0, row + 1, 0, "Model", merge_format) sheet.merge_range(row, 0, row + 1, 0, "Model", merge_format)
sheet.merge_range(row, 1, row + 1, 5, "File", merge_format) sheet.merge_range(row, 1, row + 1, 5, "File", merge_format)
sheet.merge_range(row, 6, row + 1, 6, "Score", merge_format) sheet.merge_range(row, 6, row + 1, 6, "Score", merge_format)
sheet.freeze_panes(6, 1) row += 1
sheet.hide_gridlines(2)
d_name = next(iter(self._datasets)) d_name = next(iter(self._datasets))
for model in self._models: for model in self._models:
file_name = self._report[model][d_name]["file_name"] file_name = self._report[model][d_name]["file_name"]
@@ -1280,7 +1283,10 @@ class Benchmark:
) )
k = Excel(file_name=file_name, book=book) k = Excel(file_name=file_name, book=book)
k.report() k.report()
sheet.freeze_panes(6, 1)
sheet.hide_gridlines(2)
def add_datasets_sheet():
# Add datasets sheet # Add datasets sheet
re = ReportDatasets(excel=True, book=book) re = ReportDatasets(excel=True, book=book)
re.report() re.report()
@@ -1311,6 +1317,7 @@ class Benchmark:
footer() footer()
models_files() models_files()
exreport_output() exreport_output()
add_datasets_sheet()
book.close() book.close()

View File

@@ -27,6 +27,7 @@ class Files:
exreport_pdf = "Rplots.pdf" exreport_pdf = "Rplots.pdf"
benchmark_r = "benchmark.r" benchmark_r = "benchmark.r"
dot_env = ".env" dot_env = ".env"
datasets_report_excel = "ReportDatasets.xlsx"
@staticmethod @staticmethod
def exreport_output(score): def exreport_output(score):

View File

@@ -6,3 +6,4 @@ stratified=0
# Source of data Tanveer/Surcov # Source of data Tanveer/Surcov
source_data=Tanveer source_data=Tanveer
seeds=[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] seeds=[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]
discretize=0

1
benchmark/tests/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
ReportDatasets.xlsx

View File

@@ -89,6 +89,15 @@ class BenchmarkTest(TestBase):
self.assertTrue(os.path.exists(benchmark.get_tex_file())) self.assertTrue(os.path.exists(benchmark.get_tex_file()))
self.check_file_file(benchmark.get_tex_file(), "exreport_tex") self.check_file_file(benchmark.get_tex_file(), "exreport_tex")
@staticmethod
def generate_excel_sheet(test, sheet, file_name):
with open(os.path.join("test_files", file_name), "w") as f:
for row in range(1, sheet.max_row + 1):
for col in range(1, sheet.max_column + 1):
value = sheet.cell(row=row, column=col).value
if value is not None:
print(f'{row};{col};"{value}"', file=f)
def test_excel_output(self): def test_excel_output(self):
benchmark = Benchmark("accuracy", visualize=False) benchmark = Benchmark("accuracy", visualize=False)
benchmark.compile_results() benchmark.compile_results()
@@ -101,6 +110,3 @@ class BenchmarkTest(TestBase):
for sheet_name in book.sheetnames: for sheet_name in book.sheetnames:
sheet = book[sheet_name] sheet = book[sheet_name]
self.check_excel_sheet(sheet, f"exreport_excel_{sheet_name}") self.check_excel_sheet(sheet, f"exreport_excel_{sheet_name}")
# ExcelTest.generate_excel_sheet(
# self, sheet, f"exreport_excel_{sheet_name}"
# )

View File

@@ -179,6 +179,7 @@ class UtilTest(TestBase):
"stratified": "0", "stratified": "0",
"source_data": "Tanveer", "source_data": "Tanveer",
"seeds": "[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]", "seeds": "[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]",
"discretize": "0",
} }
computed = EnvData().load() computed = EnvData().load()
self.assertDictEqual(computed, expected) self.assertDictEqual(computed, expected)

View File

@@ -1,6 +1,6 @@
import os import os
from openpyxl import load_workbook from openpyxl import load_workbook
from ...Utils import Folders from ...Utils import Folders, Files
from ..TestBase import TestBase from ..TestBase import TestBase
@@ -43,6 +43,15 @@ class BeReportTest(TestBase):
self.assertEqual(stderr.getvalue(), "") self.assertEqual(stderr.getvalue(), "")
self.check_output_file(stdout, "report_datasets") self.check_output_file(stdout, "report_datasets")
def test_be_report_datasets_excel(self):
stdout, stderr = self.execute_script("be_report", ["-x", "1"])
self.assertEqual(stderr.getvalue(), "")
self.check_output_file(stdout, "report_datasets")
file_name = os.path.join(os.getcwd(), Files.datasets_report_excel)
book = load_workbook(file_name)
sheet = book["Datasets"]
self.check_excel_sheet(sheet, "exreport_excel_datasets")
def test_be_report_best(self): def test_be_report_best(self):
stdout, stderr = self.execute_script( stdout, stderr = self.execute_script(
"be_report", ["-s", "accuracy", "-m", "STree", "-b", "1"] "be_report", ["-s", "accuracy", "-m", "STree", "-b", "1"]

View File

@@ -0,0 +1,25 @@
1;1;"Datasets used in benchmark ver. 0.2.0"
2;1;" Default score accuracy"
2;2;"Cross validation"
2;5;"5 Folds"
3;2;"Stratified"
3;5;"False"
4;2;"Discretized"
4;5;"False"
5;2;"Seeds"
5;5;"[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]"
6;1;"Dataset"
6;2;"Samples"
6;3;"Features"
6;4;"Classes"
6;5;"Balance"
7;1;"balance-scale"
7;2;"625"
7;3;"4"
7;4;"3"
7;5;" 7.84%/ 46.08%/ 46.08%"
8;1;"balloons"
8;2;"16"
8;3;"4"
8;4;"2"
8;5;"56.25%/ 43.75%"

View File

@@ -1,4 +1,6 @@
Dataset Sampl. Feat. Cls Balance Datasets used in benchmark ver. 0.2.0
============================== ===== ====== === ========================================
Dataset Sampl. Feat. Cls Balance
============================== ====== ===== === ============================================================
balance-scale 625 4 3 7.84%/ 46.08%/ 46.08% balance-scale 625 4 3 7.84%/ 46.08%/ 46.08%
balloons 16 4 2 56.25%/ 43.75% balloons 16 4 2 56.25%/ 43.75%