mirror of
https://github.com/Doctorado-ML/benchmark.git
synced 2025-08-17 00:15:55 +00:00
Add tests to excel in report datasets
This commit is contained in:
@@ -580,7 +580,7 @@ class ReportDatasets:
|
|||||||
color2 = "#FDE9D9"
|
color2 = "#FDE9D9"
|
||||||
color3 = "#B1A0C7"
|
color3 = "#B1A0C7"
|
||||||
|
|
||||||
def __init__(self, excel, book=None):
|
def __init__(self, excel=False, book=None):
|
||||||
self.excel = excel
|
self.excel = excel
|
||||||
self.env = EnvData().load()
|
self.env = EnvData().load()
|
||||||
self.close = False
|
self.close = False
|
||||||
@@ -589,7 +589,7 @@ class ReportDatasets:
|
|||||||
if excel:
|
if excel:
|
||||||
self.max_length = 0
|
self.max_length = 0
|
||||||
if book is None:
|
if book is None:
|
||||||
self.excel_file_name = "ReportDatasets.xlsx"
|
self.excel_file_name = Files.datasets_report_excel
|
||||||
self.book = xlsxwriter.Workbook(
|
self.book = xlsxwriter.Workbook(
|
||||||
self.excel_file_name, {"nan_inf_to_errors": True}
|
self.excel_file_name, {"nan_inf_to_errors": True}
|
||||||
)
|
)
|
||||||
@@ -728,6 +728,7 @@ class ReportDatasets:
|
|||||||
self.sheet.write(
|
self.sheet.write(
|
||||||
4, 4, f"{self.env['seeds']}", merge_format_subheader_left
|
4, 4, f"{self.env['seeds']}", merge_format_subheader_left
|
||||||
)
|
)
|
||||||
|
self.update_max_length(len(self.env["seeds"]) + 1)
|
||||||
header_cols = [
|
header_cols = [
|
||||||
("Dataset", 30),
|
("Dataset", 30),
|
||||||
("Samples", 10),
|
("Samples", 10),
|
||||||
@@ -775,10 +776,13 @@ class ReportDatasets:
|
|||||||
self.sheet.write(self.row, col + 2, result.features, integer)
|
self.sheet.write(self.row, col + 2, result.features, integer)
|
||||||
self.sheet.write(self.row, col + 3, result.classes, normal)
|
self.sheet.write(self.row, col + 3, result.classes, normal)
|
||||||
self.sheet.write(self.row, col + 4, result.balance, normal)
|
self.sheet.write(self.row, col + 4, result.balance, normal)
|
||||||
if len(result.balance) > self.max_length:
|
self.update_max_length(len(result.balance))
|
||||||
self.max_length = len(result.balance)
|
|
||||||
self.row += 1
|
self.row += 1
|
||||||
|
|
||||||
|
def update_max_length(self, value):
|
||||||
|
if value > self.max_length:
|
||||||
|
self.max_length = value
|
||||||
|
|
||||||
def report(self):
|
def report(self):
|
||||||
data_sets = Datasets()
|
data_sets = Datasets()
|
||||||
color_line = TextColor.LINE1
|
color_line = TextColor.LINE1
|
||||||
@@ -789,7 +793,7 @@ class ReportDatasets:
|
|||||||
print(self.header_text)
|
print(self.header_text)
|
||||||
print("")
|
print("")
|
||||||
print(f"{'Dataset':30s} Sampl. Feat. Cls Balance")
|
print(f"{'Dataset':30s} Sampl. Feat. Cls Balance")
|
||||||
print("=" * 30 + " ===== ====== === " + "=" * 60)
|
print("=" * 30 + " ====== ===== === " + "=" * 60)
|
||||||
for dataset in data_sets:
|
for dataset in data_sets:
|
||||||
attributes = data_sets.get_attributes(dataset)
|
attributes = data_sets.get_attributes(dataset)
|
||||||
attributes.dataset = dataset
|
attributes.dataset = dataset
|
||||||
@@ -1255,8 +1259,7 @@ class Benchmark:
|
|||||||
sheet.merge_range(row, 0, row + 1, 0, "Model", merge_format)
|
sheet.merge_range(row, 0, row + 1, 0, "Model", merge_format)
|
||||||
sheet.merge_range(row, 1, row + 1, 5, "File", merge_format)
|
sheet.merge_range(row, 1, row + 1, 5, "File", merge_format)
|
||||||
sheet.merge_range(row, 6, row + 1, 6, "Score", merge_format)
|
sheet.merge_range(row, 6, row + 1, 6, "Score", merge_format)
|
||||||
sheet.freeze_panes(6, 1)
|
row += 1
|
||||||
sheet.hide_gridlines(2)
|
|
||||||
d_name = next(iter(self._datasets))
|
d_name = next(iter(self._datasets))
|
||||||
for model in self._models:
|
for model in self._models:
|
||||||
file_name = self._report[model][d_name]["file_name"]
|
file_name = self._report[model][d_name]["file_name"]
|
||||||
@@ -1280,7 +1283,10 @@ class Benchmark:
|
|||||||
)
|
)
|
||||||
k = Excel(file_name=file_name, book=book)
|
k = Excel(file_name=file_name, book=book)
|
||||||
k.report()
|
k.report()
|
||||||
|
sheet.freeze_panes(6, 1)
|
||||||
|
sheet.hide_gridlines(2)
|
||||||
|
|
||||||
|
def add_datasets_sheet():
|
||||||
# Add datasets sheet
|
# Add datasets sheet
|
||||||
re = ReportDatasets(excel=True, book=book)
|
re = ReportDatasets(excel=True, book=book)
|
||||||
re.report()
|
re.report()
|
||||||
@@ -1311,6 +1317,7 @@ class Benchmark:
|
|||||||
footer()
|
footer()
|
||||||
models_files()
|
models_files()
|
||||||
exreport_output()
|
exreport_output()
|
||||||
|
add_datasets_sheet()
|
||||||
book.close()
|
book.close()
|
||||||
|
|
||||||
|
|
||||||
|
@@ -27,6 +27,7 @@ class Files:
|
|||||||
exreport_pdf = "Rplots.pdf"
|
exreport_pdf = "Rplots.pdf"
|
||||||
benchmark_r = "benchmark.r"
|
benchmark_r = "benchmark.r"
|
||||||
dot_env = ".env"
|
dot_env = ".env"
|
||||||
|
datasets_report_excel = "ReportDatasets.xlsx"
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def exreport_output(score):
|
def exreport_output(score):
|
||||||
|
@@ -6,3 +6,4 @@ stratified=0
|
|||||||
# Source of data Tanveer/Surcov
|
# Source of data Tanveer/Surcov
|
||||||
source_data=Tanveer
|
source_data=Tanveer
|
||||||
seeds=[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]
|
seeds=[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]
|
||||||
|
discretize=0
|
||||||
|
1
benchmark/tests/.gitignore
vendored
Normal file
1
benchmark/tests/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
|||||||
|
ReportDatasets.xlsx
|
@@ -89,6 +89,15 @@ class BenchmarkTest(TestBase):
|
|||||||
self.assertTrue(os.path.exists(benchmark.get_tex_file()))
|
self.assertTrue(os.path.exists(benchmark.get_tex_file()))
|
||||||
self.check_file_file(benchmark.get_tex_file(), "exreport_tex")
|
self.check_file_file(benchmark.get_tex_file(), "exreport_tex")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def generate_excel_sheet(test, sheet, file_name):
|
||||||
|
with open(os.path.join("test_files", file_name), "w") as f:
|
||||||
|
for row in range(1, sheet.max_row + 1):
|
||||||
|
for col in range(1, sheet.max_column + 1):
|
||||||
|
value = sheet.cell(row=row, column=col).value
|
||||||
|
if value is not None:
|
||||||
|
print(f'{row};{col};"{value}"', file=f)
|
||||||
|
|
||||||
def test_excel_output(self):
|
def test_excel_output(self):
|
||||||
benchmark = Benchmark("accuracy", visualize=False)
|
benchmark = Benchmark("accuracy", visualize=False)
|
||||||
benchmark.compile_results()
|
benchmark.compile_results()
|
||||||
@@ -101,6 +110,3 @@ class BenchmarkTest(TestBase):
|
|||||||
for sheet_name in book.sheetnames:
|
for sheet_name in book.sheetnames:
|
||||||
sheet = book[sheet_name]
|
sheet = book[sheet_name]
|
||||||
self.check_excel_sheet(sheet, f"exreport_excel_{sheet_name}")
|
self.check_excel_sheet(sheet, f"exreport_excel_{sheet_name}")
|
||||||
# ExcelTest.generate_excel_sheet(
|
|
||||||
# self, sheet, f"exreport_excel_{sheet_name}"
|
|
||||||
# )
|
|
||||||
|
@@ -179,6 +179,7 @@ class UtilTest(TestBase):
|
|||||||
"stratified": "0",
|
"stratified": "0",
|
||||||
"source_data": "Tanveer",
|
"source_data": "Tanveer",
|
||||||
"seeds": "[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]",
|
"seeds": "[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]",
|
||||||
|
"discretize": "0",
|
||||||
}
|
}
|
||||||
computed = EnvData().load()
|
computed = EnvData().load()
|
||||||
self.assertDictEqual(computed, expected)
|
self.assertDictEqual(computed, expected)
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
import os
|
import os
|
||||||
from openpyxl import load_workbook
|
from openpyxl import load_workbook
|
||||||
from ...Utils import Folders
|
from ...Utils import Folders, Files
|
||||||
from ..TestBase import TestBase
|
from ..TestBase import TestBase
|
||||||
|
|
||||||
|
|
||||||
@@ -43,6 +43,15 @@ class BeReportTest(TestBase):
|
|||||||
self.assertEqual(stderr.getvalue(), "")
|
self.assertEqual(stderr.getvalue(), "")
|
||||||
self.check_output_file(stdout, "report_datasets")
|
self.check_output_file(stdout, "report_datasets")
|
||||||
|
|
||||||
|
def test_be_report_datasets_excel(self):
|
||||||
|
stdout, stderr = self.execute_script("be_report", ["-x", "1"])
|
||||||
|
self.assertEqual(stderr.getvalue(), "")
|
||||||
|
self.check_output_file(stdout, "report_datasets")
|
||||||
|
file_name = os.path.join(os.getcwd(), Files.datasets_report_excel)
|
||||||
|
book = load_workbook(file_name)
|
||||||
|
sheet = book["Datasets"]
|
||||||
|
self.check_excel_sheet(sheet, "exreport_excel_datasets")
|
||||||
|
|
||||||
def test_be_report_best(self):
|
def test_be_report_best(self):
|
||||||
stdout, stderr = self.execute_script(
|
stdout, stderr = self.execute_script(
|
||||||
"be_report", ["-s", "accuracy", "-m", "STree", "-b", "1"]
|
"be_report", ["-s", "accuracy", "-m", "STree", "-b", "1"]
|
||||||
|
25
benchmark/tests/test_files/exreport_excel_Datasets.test
Normal file
25
benchmark/tests/test_files/exreport_excel_Datasets.test
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
1;1;"Datasets used in benchmark ver. 0.2.0"
|
||||||
|
2;1;" Default score accuracy"
|
||||||
|
2;2;"Cross validation"
|
||||||
|
2;5;"5 Folds"
|
||||||
|
3;2;"Stratified"
|
||||||
|
3;5;"False"
|
||||||
|
4;2;"Discretized"
|
||||||
|
4;5;"False"
|
||||||
|
5;2;"Seeds"
|
||||||
|
5;5;"[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]"
|
||||||
|
6;1;"Dataset"
|
||||||
|
6;2;"Samples"
|
||||||
|
6;3;"Features"
|
||||||
|
6;4;"Classes"
|
||||||
|
6;5;"Balance"
|
||||||
|
7;1;"balance-scale"
|
||||||
|
7;2;"625"
|
||||||
|
7;3;"4"
|
||||||
|
7;4;"3"
|
||||||
|
7;5;" 7.84%/ 46.08%/ 46.08%"
|
||||||
|
8;1;"balloons"
|
||||||
|
8;2;"16"
|
||||||
|
8;3;"4"
|
||||||
|
8;4;"2"
|
||||||
|
8;5;"56.25%/ 43.75%"
|
@@ -1,4 +1,6 @@
|
|||||||
[94mDataset Sampl. Feat. Cls Balance
|
[94mDatasets used in benchmark ver. 0.2.0
|
||||||
============================== ===== ====== === ========================================
|
|
||||||
|
Dataset Sampl. Feat. Cls Balance
|
||||||
|
============================== ====== ===== === ============================================================
|
||||||
[96mbalance-scale 625 4 3 7.84%/ 46.08%/ 46.08%
|
[96mbalance-scale 625 4 3 7.84%/ 46.08%/ 46.08%
|
||||||
[94mballoons 16 4 2 56.25%/ 43.75%
|
[94mballoons 16 4 2 56.25%/ 43.75%
|
||||||
|
Reference in New Issue
Block a user