mirror of
https://github.com/Doctorado-ML/benchmark.git
synced 2025-08-15 23:45:54 +00:00
Add excel to report dataset
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
import os
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from scipy.io import arff
|
||||
from .Utils import Files
|
||||
from .Arguments import EnvData
|
||||
@@ -40,9 +41,6 @@ class DatasetsArff:
|
||||
|
||||
|
||||
class DatasetsTanveer:
|
||||
def __init__(self, discretized):
|
||||
self.discretized = discretized
|
||||
|
||||
@staticmethod
|
||||
def dataset_names(name):
|
||||
return f"{name}_R.dat"
|
||||
@@ -127,6 +125,24 @@ class Datasets:
|
||||
self.data_sets = result
|
||||
self.class_names = class_names
|
||||
|
||||
def get_attributes(self, name):
|
||||
class Attributes:
|
||||
pass
|
||||
|
||||
X, y = self.load_continuous(name)
|
||||
attr = Attributes()
|
||||
values, counts = np.unique(y, return_counts=True)
|
||||
comp = ""
|
||||
sep = ""
|
||||
for count in counts:
|
||||
comp += f"{sep}{count/sum(counts)*100:5.2f}%"
|
||||
sep = "/ "
|
||||
attr.balance = comp
|
||||
attr.classes = len(np.unique(y))
|
||||
attr.samples = X.shape[0]
|
||||
attr.features = X.shape[1]
|
||||
return attr
|
||||
|
||||
def get_features(self):
|
||||
return self.dataset.features
|
||||
|
||||
|
@@ -1,4 +1,5 @@
|
||||
import os
|
||||
import sys
|
||||
from operator import itemgetter
|
||||
import math
|
||||
import json
|
||||
@@ -17,6 +18,7 @@ from .Utils import (
|
||||
TextColor,
|
||||
NO_RESULTS,
|
||||
)
|
||||
from ._version import __version__
|
||||
|
||||
|
||||
class BestResultsEver:
|
||||
@@ -566,37 +568,247 @@ class Excel(BaseReport):
|
||||
self.sheet.set_row(c, 20)
|
||||
self.sheet.set_row(0, 25)
|
||||
self.sheet.freeze_panes(6, 1)
|
||||
self.sheet.hide_gridlines()
|
||||
self.sheet.hide_gridlines(2)
|
||||
if self.close:
|
||||
self.book.close()
|
||||
|
||||
|
||||
class ReportDatasets:
|
||||
row = 6
|
||||
# alternate lines colors
|
||||
color1 = "#DCE6F1"
|
||||
color2 = "#FDE9D9"
|
||||
color3 = "#B1A0C7"
|
||||
|
||||
def __init__(self, excel, book=None):
|
||||
self.excel = excel
|
||||
self.env = EnvData().load()
|
||||
self.close = False
|
||||
self.output = True
|
||||
self.header_text = f"Datasets used in benchmark ver. {__version__}"
|
||||
if excel:
|
||||
self.max_length = 0
|
||||
if book is None:
|
||||
self.excel_file_name = "ReportDatasets.xlsx"
|
||||
self.book = xlsxwriter.Workbook(
|
||||
self.excel_file_name, {"nan_inf_to_errors": True}
|
||||
)
|
||||
self.set_properties(self.get_title())
|
||||
self.close = True
|
||||
else:
|
||||
self.book = book
|
||||
self.output = False
|
||||
self.sheet = self.book.add_worksheet("Datasets")
|
||||
|
||||
def set_properties(self, title):
|
||||
self.book.set_properties(
|
||||
{
|
||||
"title": title,
|
||||
"subject": "Machine learning results",
|
||||
"author": "Ricardo Montañana Gómez",
|
||||
"manager": "Dr. J. A. Gámez, Dr. J. M. Puerta",
|
||||
"company": "UCLM",
|
||||
"comments": "Created with Python and XlsxWriter",
|
||||
}
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def report():
|
||||
def get_python_version():
|
||||
return "{}.{}".format(sys.version_info.major, sys.version_info.minor)
|
||||
|
||||
def get_title(self):
|
||||
return (
|
||||
f" Benchmark ver. {__version__} - "
|
||||
f" Python ver. {self.get_python_version()}"
|
||||
f" with {self.env['n_folds']} Folds cross validation "
|
||||
f" Discretization: {self.env['discretize']} "
|
||||
f"Stratification: {self.env['stratified']}"
|
||||
)
|
||||
|
||||
def get_file_name(self):
|
||||
return self.excel_file_name
|
||||
|
||||
def header(self):
|
||||
merge_format = self.book.add_format(
|
||||
{
|
||||
"border": 1,
|
||||
"bold": 1,
|
||||
"align": "center",
|
||||
"valign": "vcenter",
|
||||
"font_size": 18,
|
||||
"bg_color": self.color3,
|
||||
}
|
||||
)
|
||||
merge_format_subheader = self.book.add_format(
|
||||
{
|
||||
"border": 1,
|
||||
"bold": 1,
|
||||
"align": "center",
|
||||
"valign": "vcenter",
|
||||
"font_size": 16,
|
||||
"bg_color": self.color1,
|
||||
}
|
||||
)
|
||||
merge_format_subheader_right = self.book.add_format(
|
||||
{
|
||||
"border": 1,
|
||||
"bold": 1,
|
||||
"align": "right",
|
||||
"valign": "vcenter",
|
||||
"font_size": 16,
|
||||
"bg_color": self.color1,
|
||||
}
|
||||
)
|
||||
merge_format_subheader_left = self.book.add_format(
|
||||
{
|
||||
"border": 1,
|
||||
"bold": 1,
|
||||
"align": "left",
|
||||
"valign": "vcenter",
|
||||
"font_size": 16,
|
||||
"bg_color": self.color1,
|
||||
}
|
||||
)
|
||||
self.sheet.merge_range(0, 0, 0, 4, self.header_text, merge_format)
|
||||
self.sheet.merge_range(
|
||||
1,
|
||||
0,
|
||||
4,
|
||||
0,
|
||||
f" Default score {self.env['score']}",
|
||||
merge_format_subheader,
|
||||
)
|
||||
self.sheet.merge_range(
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
3,
|
||||
"Cross validation",
|
||||
merge_format_subheader_right,
|
||||
)
|
||||
self.sheet.write(
|
||||
1, 4, f"{self.env['n_folds']} Folds", merge_format_subheader_left
|
||||
)
|
||||
self.sheet.merge_range(
|
||||
2,
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
"Stratified",
|
||||
merge_format_subheader_right,
|
||||
)
|
||||
self.sheet.write(
|
||||
2,
|
||||
4,
|
||||
f"{'True' if self.env['stratified']=='1' else 'False'}",
|
||||
merge_format_subheader_left,
|
||||
)
|
||||
self.sheet.merge_range(
|
||||
3,
|
||||
1,
|
||||
3,
|
||||
3,
|
||||
"Discretized",
|
||||
merge_format_subheader_right,
|
||||
)
|
||||
self.sheet.write(
|
||||
3,
|
||||
4,
|
||||
f"{'True' if self.env['discretize']=='1' else 'False'}",
|
||||
merge_format_subheader_left,
|
||||
)
|
||||
self.sheet.merge_range(
|
||||
4,
|
||||
1,
|
||||
4,
|
||||
3,
|
||||
"Seeds",
|
||||
merge_format_subheader_right,
|
||||
)
|
||||
self.sheet.write(
|
||||
4, 4, f"{self.env['seeds']}", merge_format_subheader_left
|
||||
)
|
||||
header_cols = [
|
||||
("Dataset", 30),
|
||||
("Samples", 10),
|
||||
("Features", 10),
|
||||
("Classes", 10),
|
||||
("Balance", 50),
|
||||
]
|
||||
bold = self.book.add_format(
|
||||
{
|
||||
"bold": True,
|
||||
"font_size": 14,
|
||||
"bg_color": self.color3,
|
||||
"border": 1,
|
||||
}
|
||||
)
|
||||
i = 0
|
||||
for item, length in header_cols:
|
||||
self.sheet.write(5, i, item, bold)
|
||||
self.sheet.set_column(i, i, length)
|
||||
i += 1
|
||||
|
||||
def footer(self):
|
||||
# set Balance column width to max length
|
||||
self.sheet.set_column(4, 4, self.max_length)
|
||||
self.sheet.freeze_panes(6, 1)
|
||||
self.sheet.hide_gridlines(2)
|
||||
if self.close:
|
||||
self.book.close()
|
||||
|
||||
def print_line(self, result):
|
||||
size_n = 14
|
||||
integer = self.book.add_format(
|
||||
{"num_format": "#,###", "font_size": size_n, "border": 1}
|
||||
)
|
||||
normal = self.book.add_format({"font_size": size_n, "border": 1})
|
||||
col = 0
|
||||
if self.row % 2 == 0:
|
||||
normal.set_bg_color(self.color1)
|
||||
integer.set_bg_color(self.color1)
|
||||
else:
|
||||
normal.set_bg_color(self.color2)
|
||||
integer.set_bg_color(self.color2)
|
||||
self.sheet.write(self.row, col, result.dataset, normal)
|
||||
self.sheet.write(self.row, col + 1, result.samples, integer)
|
||||
self.sheet.write(self.row, col + 2, result.features, integer)
|
||||
self.sheet.write(self.row, col + 3, result.classes, normal)
|
||||
self.sheet.write(self.row, col + 4, result.balance, normal)
|
||||
if len(result.balance) > self.max_length:
|
||||
self.max_length = len(result.balance)
|
||||
self.row += 1
|
||||
|
||||
def report(self):
|
||||
data_sets = Datasets()
|
||||
color_line = TextColor.LINE1
|
||||
print(color_line, end="")
|
||||
print(f"{'Dataset':30s} Sampl. Feat. Cls Balance")
|
||||
print("=" * 30 + " ===== ====== === " + "=" * 40)
|
||||
if self.excel:
|
||||
self.header()
|
||||
if self.output:
|
||||
print(color_line, end="")
|
||||
print(self.header_text)
|
||||
print("")
|
||||
print(f"{'Dataset':30s} Sampl. Feat. Cls Balance")
|
||||
print("=" * 30 + " ===== ====== === " + "=" * 60)
|
||||
for dataset in data_sets:
|
||||
X, y = data_sets.load(dataset)
|
||||
attributes = data_sets.get_attributes(dataset)
|
||||
attributes.dataset = dataset
|
||||
if self.excel:
|
||||
self.print_line(attributes)
|
||||
color_line = (
|
||||
TextColor.LINE2
|
||||
if color_line == TextColor.LINE1
|
||||
else TextColor.LINE1
|
||||
)
|
||||
values, counts = np.unique(y, return_counts=True)
|
||||
comp = ""
|
||||
sep = ""
|
||||
for count in counts:
|
||||
comp += f"{sep}{count/sum(counts)*100:5.2f}%"
|
||||
sep = "/ "
|
||||
print(color_line, end="")
|
||||
print(
|
||||
f"{dataset:30s} {X.shape[0]:6,d} {X.shape[1]:5,d} "
|
||||
f"{len(np.unique(y)):3d} {comp:40s}"
|
||||
)
|
||||
if self.output:
|
||||
print(color_line, end="")
|
||||
print(
|
||||
f"{dataset:30s} {attributes.samples:6,d} "
|
||||
f"{attributes.features:5,d} {attributes.classes:3d} "
|
||||
f"{attributes.balance:40s}"
|
||||
)
|
||||
if self.excel:
|
||||
self.footer()
|
||||
|
||||
|
||||
class SQL(BaseReport):
|
||||
@@ -1043,7 +1255,8 @@ class Benchmark:
|
||||
sheet.merge_range(row, 0, row + 1, 0, "Model", merge_format)
|
||||
sheet.merge_range(row, 1, row + 1, 5, "File", merge_format)
|
||||
sheet.merge_range(row, 6, row + 1, 6, "Score", merge_format)
|
||||
row += 1
|
||||
sheet.freeze_panes(6, 1)
|
||||
sheet.hide_gridlines(2)
|
||||
d_name = next(iter(self._datasets))
|
||||
for model in self._models:
|
||||
file_name = self._report[model][d_name]["file_name"]
|
||||
@@ -1067,8 +1280,10 @@ class Benchmark:
|
||||
)
|
||||
k = Excel(file_name=file_name, book=book)
|
||||
k.report()
|
||||
sheet.freeze_panes(6, 1)
|
||||
sheet.hide_gridlines()
|
||||
|
||||
# Add datasets sheet
|
||||
re = ReportDatasets(excel=True, book=book)
|
||||
re.report()
|
||||
|
||||
def exreport_output():
|
||||
file_name = os.path.join(
|
||||
|
@@ -6,10 +6,11 @@ from .Datasets import (
|
||||
)
|
||||
from .Experiments import Experiment
|
||||
from .Results import Report, Summary
|
||||
from ._version import __version__
|
||||
|
||||
__author__ = "Ricardo Montañana Gómez"
|
||||
__copyright__ = "Copyright 2020-2022, Ricardo Montañana Gómez"
|
||||
__copyright__ = "Copyright 2020-2023, Ricardo Montañana Gómez"
|
||||
__license__ = "MIT License"
|
||||
__author_email__ = "ricardo.montanana@alu.uclm.es"
|
||||
|
||||
__all__ = ["Experiment", "Datasets", "Report", "Summary"]
|
||||
__all__ = ["Experiment", "Datasets", "Report", "Summary", __version__]
|
||||
|
1
benchmark/_version
Normal file
1
benchmark/_version
Normal file
@@ -0,0 +1 @@
|
||||
__version__ = "0.7.1"
|
@@ -21,7 +21,11 @@ def main(args_test=None):
|
||||
if args.grid:
|
||||
args.best = None
|
||||
if args.file is None and args.best is None and args.grid is None:
|
||||
ReportDatasets.report()
|
||||
report = ReportDatasets(args.excel)
|
||||
report.report()
|
||||
if args.excel:
|
||||
is_test = args_test is not None
|
||||
Files.open(report.get_file_name(), is_test)
|
||||
else:
|
||||
if args.best is not None or args.grid is not None:
|
||||
report = ReportBest(args.score, args.model, args.best, args.grid)
|
||||
|
Reference in New Issue
Block a user