mirror of
https://github.com/Doctorado-ML/benchmark.git
synced 2025-08-17 00:15:55 +00:00
Add excel to report dataset
This commit is contained in:
@@ -1,5 +1,6 @@
|
|||||||
import os
|
import os
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
from scipy.io import arff
|
from scipy.io import arff
|
||||||
from .Utils import Files
|
from .Utils import Files
|
||||||
from .Arguments import EnvData
|
from .Arguments import EnvData
|
||||||
@@ -40,9 +41,6 @@ class DatasetsArff:
|
|||||||
|
|
||||||
|
|
||||||
class DatasetsTanveer:
|
class DatasetsTanveer:
|
||||||
def __init__(self, discretized):
|
|
||||||
self.discretized = discretized
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def dataset_names(name):
|
def dataset_names(name):
|
||||||
return f"{name}_R.dat"
|
return f"{name}_R.dat"
|
||||||
@@ -127,6 +125,24 @@ class Datasets:
|
|||||||
self.data_sets = result
|
self.data_sets = result
|
||||||
self.class_names = class_names
|
self.class_names = class_names
|
||||||
|
|
||||||
|
def get_attributes(self, name):
|
||||||
|
class Attributes:
|
||||||
|
pass
|
||||||
|
|
||||||
|
X, y = self.load_continuous(name)
|
||||||
|
attr = Attributes()
|
||||||
|
values, counts = np.unique(y, return_counts=True)
|
||||||
|
comp = ""
|
||||||
|
sep = ""
|
||||||
|
for count in counts:
|
||||||
|
comp += f"{sep}{count/sum(counts)*100:5.2f}%"
|
||||||
|
sep = "/ "
|
||||||
|
attr.balance = comp
|
||||||
|
attr.classes = len(np.unique(y))
|
||||||
|
attr.samples = X.shape[0]
|
||||||
|
attr.features = X.shape[1]
|
||||||
|
return attr
|
||||||
|
|
||||||
def get_features(self):
|
def get_features(self):
|
||||||
return self.dataset.features
|
return self.dataset.features
|
||||||
|
|
||||||
|
@@ -1,4 +1,5 @@
|
|||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
import math
|
import math
|
||||||
import json
|
import json
|
||||||
@@ -17,6 +18,7 @@ from .Utils import (
|
|||||||
TextColor,
|
TextColor,
|
||||||
NO_RESULTS,
|
NO_RESULTS,
|
||||||
)
|
)
|
||||||
|
from ._version import __version__
|
||||||
|
|
||||||
|
|
||||||
class BestResultsEver:
|
class BestResultsEver:
|
||||||
@@ -566,37 +568,247 @@ class Excel(BaseReport):
|
|||||||
self.sheet.set_row(c, 20)
|
self.sheet.set_row(c, 20)
|
||||||
self.sheet.set_row(0, 25)
|
self.sheet.set_row(0, 25)
|
||||||
self.sheet.freeze_panes(6, 1)
|
self.sheet.freeze_panes(6, 1)
|
||||||
self.sheet.hide_gridlines()
|
self.sheet.hide_gridlines(2)
|
||||||
if self.close:
|
if self.close:
|
||||||
self.book.close()
|
self.book.close()
|
||||||
|
|
||||||
|
|
||||||
class ReportDatasets:
|
class ReportDatasets:
|
||||||
|
row = 6
|
||||||
|
# alternate lines colors
|
||||||
|
color1 = "#DCE6F1"
|
||||||
|
color2 = "#FDE9D9"
|
||||||
|
color3 = "#B1A0C7"
|
||||||
|
|
||||||
|
def __init__(self, excel, book=None):
|
||||||
|
self.excel = excel
|
||||||
|
self.env = EnvData().load()
|
||||||
|
self.close = False
|
||||||
|
self.output = True
|
||||||
|
self.header_text = f"Datasets used in benchmark ver. {__version__}"
|
||||||
|
if excel:
|
||||||
|
self.max_length = 0
|
||||||
|
if book is None:
|
||||||
|
self.excel_file_name = "ReportDatasets.xlsx"
|
||||||
|
self.book = xlsxwriter.Workbook(
|
||||||
|
self.excel_file_name, {"nan_inf_to_errors": True}
|
||||||
|
)
|
||||||
|
self.set_properties(self.get_title())
|
||||||
|
self.close = True
|
||||||
|
else:
|
||||||
|
self.book = book
|
||||||
|
self.output = False
|
||||||
|
self.sheet = self.book.add_worksheet("Datasets")
|
||||||
|
|
||||||
|
def set_properties(self, title):
|
||||||
|
self.book.set_properties(
|
||||||
|
{
|
||||||
|
"title": title,
|
||||||
|
"subject": "Machine learning results",
|
||||||
|
"author": "Ricardo Montañana Gómez",
|
||||||
|
"manager": "Dr. J. A. Gámez, Dr. J. M. Puerta",
|
||||||
|
"company": "UCLM",
|
||||||
|
"comments": "Created with Python and XlsxWriter",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def report():
|
def get_python_version():
|
||||||
|
return "{}.{}".format(sys.version_info.major, sys.version_info.minor)
|
||||||
|
|
||||||
|
def get_title(self):
|
||||||
|
return (
|
||||||
|
f" Benchmark ver. {__version__} - "
|
||||||
|
f" Python ver. {self.get_python_version()}"
|
||||||
|
f" with {self.env['n_folds']} Folds cross validation "
|
||||||
|
f" Discretization: {self.env['discretize']} "
|
||||||
|
f"Stratification: {self.env['stratified']}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_file_name(self):
|
||||||
|
return self.excel_file_name
|
||||||
|
|
||||||
|
def header(self):
|
||||||
|
merge_format = self.book.add_format(
|
||||||
|
{
|
||||||
|
"border": 1,
|
||||||
|
"bold": 1,
|
||||||
|
"align": "center",
|
||||||
|
"valign": "vcenter",
|
||||||
|
"font_size": 18,
|
||||||
|
"bg_color": self.color3,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
merge_format_subheader = self.book.add_format(
|
||||||
|
{
|
||||||
|
"border": 1,
|
||||||
|
"bold": 1,
|
||||||
|
"align": "center",
|
||||||
|
"valign": "vcenter",
|
||||||
|
"font_size": 16,
|
||||||
|
"bg_color": self.color1,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
merge_format_subheader_right = self.book.add_format(
|
||||||
|
{
|
||||||
|
"border": 1,
|
||||||
|
"bold": 1,
|
||||||
|
"align": "right",
|
||||||
|
"valign": "vcenter",
|
||||||
|
"font_size": 16,
|
||||||
|
"bg_color": self.color1,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
merge_format_subheader_left = self.book.add_format(
|
||||||
|
{
|
||||||
|
"border": 1,
|
||||||
|
"bold": 1,
|
||||||
|
"align": "left",
|
||||||
|
"valign": "vcenter",
|
||||||
|
"font_size": 16,
|
||||||
|
"bg_color": self.color1,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
self.sheet.merge_range(0, 0, 0, 4, self.header_text, merge_format)
|
||||||
|
self.sheet.merge_range(
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
4,
|
||||||
|
0,
|
||||||
|
f" Default score {self.env['score']}",
|
||||||
|
merge_format_subheader,
|
||||||
|
)
|
||||||
|
self.sheet.merge_range(
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
3,
|
||||||
|
"Cross validation",
|
||||||
|
merge_format_subheader_right,
|
||||||
|
)
|
||||||
|
self.sheet.write(
|
||||||
|
1, 4, f"{self.env['n_folds']} Folds", merge_format_subheader_left
|
||||||
|
)
|
||||||
|
self.sheet.merge_range(
|
||||||
|
2,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
3,
|
||||||
|
"Stratified",
|
||||||
|
merge_format_subheader_right,
|
||||||
|
)
|
||||||
|
self.sheet.write(
|
||||||
|
2,
|
||||||
|
4,
|
||||||
|
f"{'True' if self.env['stratified']=='1' else 'False'}",
|
||||||
|
merge_format_subheader_left,
|
||||||
|
)
|
||||||
|
self.sheet.merge_range(
|
||||||
|
3,
|
||||||
|
1,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
"Discretized",
|
||||||
|
merge_format_subheader_right,
|
||||||
|
)
|
||||||
|
self.sheet.write(
|
||||||
|
3,
|
||||||
|
4,
|
||||||
|
f"{'True' if self.env['discretize']=='1' else 'False'}",
|
||||||
|
merge_format_subheader_left,
|
||||||
|
)
|
||||||
|
self.sheet.merge_range(
|
||||||
|
4,
|
||||||
|
1,
|
||||||
|
4,
|
||||||
|
3,
|
||||||
|
"Seeds",
|
||||||
|
merge_format_subheader_right,
|
||||||
|
)
|
||||||
|
self.sheet.write(
|
||||||
|
4, 4, f"{self.env['seeds']}", merge_format_subheader_left
|
||||||
|
)
|
||||||
|
header_cols = [
|
||||||
|
("Dataset", 30),
|
||||||
|
("Samples", 10),
|
||||||
|
("Features", 10),
|
||||||
|
("Classes", 10),
|
||||||
|
("Balance", 50),
|
||||||
|
]
|
||||||
|
bold = self.book.add_format(
|
||||||
|
{
|
||||||
|
"bold": True,
|
||||||
|
"font_size": 14,
|
||||||
|
"bg_color": self.color3,
|
||||||
|
"border": 1,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
i = 0
|
||||||
|
for item, length in header_cols:
|
||||||
|
self.sheet.write(5, i, item, bold)
|
||||||
|
self.sheet.set_column(i, i, length)
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
def footer(self):
|
||||||
|
# set Balance column width to max length
|
||||||
|
self.sheet.set_column(4, 4, self.max_length)
|
||||||
|
self.sheet.freeze_panes(6, 1)
|
||||||
|
self.sheet.hide_gridlines(2)
|
||||||
|
if self.close:
|
||||||
|
self.book.close()
|
||||||
|
|
||||||
|
def print_line(self, result):
|
||||||
|
size_n = 14
|
||||||
|
integer = self.book.add_format(
|
||||||
|
{"num_format": "#,###", "font_size": size_n, "border": 1}
|
||||||
|
)
|
||||||
|
normal = self.book.add_format({"font_size": size_n, "border": 1})
|
||||||
|
col = 0
|
||||||
|
if self.row % 2 == 0:
|
||||||
|
normal.set_bg_color(self.color1)
|
||||||
|
integer.set_bg_color(self.color1)
|
||||||
|
else:
|
||||||
|
normal.set_bg_color(self.color2)
|
||||||
|
integer.set_bg_color(self.color2)
|
||||||
|
self.sheet.write(self.row, col, result.dataset, normal)
|
||||||
|
self.sheet.write(self.row, col + 1, result.samples, integer)
|
||||||
|
self.sheet.write(self.row, col + 2, result.features, integer)
|
||||||
|
self.sheet.write(self.row, col + 3, result.classes, normal)
|
||||||
|
self.sheet.write(self.row, col + 4, result.balance, normal)
|
||||||
|
if len(result.balance) > self.max_length:
|
||||||
|
self.max_length = len(result.balance)
|
||||||
|
self.row += 1
|
||||||
|
|
||||||
|
def report(self):
|
||||||
data_sets = Datasets()
|
data_sets = Datasets()
|
||||||
color_line = TextColor.LINE1
|
color_line = TextColor.LINE1
|
||||||
print(color_line, end="")
|
if self.excel:
|
||||||
print(f"{'Dataset':30s} Sampl. Feat. Cls Balance")
|
self.header()
|
||||||
print("=" * 30 + " ===== ====== === " + "=" * 40)
|
if self.output:
|
||||||
|
print(color_line, end="")
|
||||||
|
print(self.header_text)
|
||||||
|
print("")
|
||||||
|
print(f"{'Dataset':30s} Sampl. Feat. Cls Balance")
|
||||||
|
print("=" * 30 + " ===== ====== === " + "=" * 60)
|
||||||
for dataset in data_sets:
|
for dataset in data_sets:
|
||||||
X, y = data_sets.load(dataset)
|
attributes = data_sets.get_attributes(dataset)
|
||||||
|
attributes.dataset = dataset
|
||||||
|
if self.excel:
|
||||||
|
self.print_line(attributes)
|
||||||
color_line = (
|
color_line = (
|
||||||
TextColor.LINE2
|
TextColor.LINE2
|
||||||
if color_line == TextColor.LINE1
|
if color_line == TextColor.LINE1
|
||||||
else TextColor.LINE1
|
else TextColor.LINE1
|
||||||
)
|
)
|
||||||
values, counts = np.unique(y, return_counts=True)
|
if self.output:
|
||||||
comp = ""
|
print(color_line, end="")
|
||||||
sep = ""
|
print(
|
||||||
for count in counts:
|
f"{dataset:30s} {attributes.samples:6,d} "
|
||||||
comp += f"{sep}{count/sum(counts)*100:5.2f}%"
|
f"{attributes.features:5,d} {attributes.classes:3d} "
|
||||||
sep = "/ "
|
f"{attributes.balance:40s}"
|
||||||
print(color_line, end="")
|
)
|
||||||
print(
|
if self.excel:
|
||||||
f"{dataset:30s} {X.shape[0]:6,d} {X.shape[1]:5,d} "
|
self.footer()
|
||||||
f"{len(np.unique(y)):3d} {comp:40s}"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class SQL(BaseReport):
|
class SQL(BaseReport):
|
||||||
@@ -1043,7 +1255,8 @@ class Benchmark:
|
|||||||
sheet.merge_range(row, 0, row + 1, 0, "Model", merge_format)
|
sheet.merge_range(row, 0, row + 1, 0, "Model", merge_format)
|
||||||
sheet.merge_range(row, 1, row + 1, 5, "File", merge_format)
|
sheet.merge_range(row, 1, row + 1, 5, "File", merge_format)
|
||||||
sheet.merge_range(row, 6, row + 1, 6, "Score", merge_format)
|
sheet.merge_range(row, 6, row + 1, 6, "Score", merge_format)
|
||||||
row += 1
|
sheet.freeze_panes(6, 1)
|
||||||
|
sheet.hide_gridlines(2)
|
||||||
d_name = next(iter(self._datasets))
|
d_name = next(iter(self._datasets))
|
||||||
for model in self._models:
|
for model in self._models:
|
||||||
file_name = self._report[model][d_name]["file_name"]
|
file_name = self._report[model][d_name]["file_name"]
|
||||||
@@ -1067,8 +1280,10 @@ class Benchmark:
|
|||||||
)
|
)
|
||||||
k = Excel(file_name=file_name, book=book)
|
k = Excel(file_name=file_name, book=book)
|
||||||
k.report()
|
k.report()
|
||||||
sheet.freeze_panes(6, 1)
|
|
||||||
sheet.hide_gridlines()
|
# Add datasets sheet
|
||||||
|
re = ReportDatasets(excel=True, book=book)
|
||||||
|
re.report()
|
||||||
|
|
||||||
def exreport_output():
|
def exreport_output():
|
||||||
file_name = os.path.join(
|
file_name = os.path.join(
|
||||||
|
@@ -6,10 +6,11 @@ from .Datasets import (
|
|||||||
)
|
)
|
||||||
from .Experiments import Experiment
|
from .Experiments import Experiment
|
||||||
from .Results import Report, Summary
|
from .Results import Report, Summary
|
||||||
|
from ._version import __version__
|
||||||
|
|
||||||
__author__ = "Ricardo Montañana Gómez"
|
__author__ = "Ricardo Montañana Gómez"
|
||||||
__copyright__ = "Copyright 2020-2022, Ricardo Montañana Gómez"
|
__copyright__ = "Copyright 2020-2023, Ricardo Montañana Gómez"
|
||||||
__license__ = "MIT License"
|
__license__ = "MIT License"
|
||||||
__author_email__ = "ricardo.montanana@alu.uclm.es"
|
__author_email__ = "ricardo.montanana@alu.uclm.es"
|
||||||
|
|
||||||
__all__ = ["Experiment", "Datasets", "Report", "Summary"]
|
__all__ = ["Experiment", "Datasets", "Report", "Summary", __version__]
|
||||||
|
1
benchmark/_version
Normal file
1
benchmark/_version
Normal file
@@ -0,0 +1 @@
|
|||||||
|
__version__ = "0.7.1"
|
@@ -21,7 +21,11 @@ def main(args_test=None):
|
|||||||
if args.grid:
|
if args.grid:
|
||||||
args.best = None
|
args.best = None
|
||||||
if args.file is None and args.best is None and args.grid is None:
|
if args.file is None and args.best is None and args.grid is None:
|
||||||
ReportDatasets.report()
|
report = ReportDatasets(args.excel)
|
||||||
|
report.report()
|
||||||
|
if args.excel:
|
||||||
|
is_test = args_test is not None
|
||||||
|
Files.open(report.get_file_name(), is_test)
|
||||||
else:
|
else:
|
||||||
if args.best is not None or args.grid is not None:
|
if args.best is not None or args.grid is not None:
|
||||||
report = ReportBest(args.score, args.model, args.best, args.grid)
|
report = ReportBest(args.score, args.model, args.best, args.grid)
|
||||||
|
Reference in New Issue
Block a user