mirror of
https://github.com/Doctorado-ML/benchmark.git
synced 2025-08-17 08:25:53 +00:00
1162 lines
37 KiB
Python
1162 lines
37 KiB
Python
import os
|
|
from operator import itemgetter
|
|
import math
|
|
import json
|
|
import abc
|
|
import shutil
|
|
import subprocess
|
|
import xlsxwriter
|
|
from Experiments import Datasets, BestResults
|
|
from Utils import Folders, Files, Symbols, BEST_ACCURACY_STREE, TextColor
|
|
|
|
|
|
class BaseReport(abc.ABC):
|
|
def __init__(self, file_name, best_file=False):
|
|
self.file_name = file_name
|
|
if not os.path.isfile(file_name):
|
|
if not os.path.isfile(os.path.join(Folders.results, file_name)):
|
|
raise FileNotFoundError(f"{file_name} does not exists!")
|
|
else:
|
|
file_name = os.path.join(Folders.results, file_name)
|
|
with open(file_name) as f:
|
|
self.data = json.load(f)
|
|
self.best_acc_file = best_file
|
|
self.lines = self.data if best_file else self.data["results"]
|
|
|
|
def _get_accuracy(self, item):
|
|
return self.data[item][0] if self.best_acc_file else item["score"]
|
|
|
|
def report(self):
|
|
self.header()
|
|
accuracy_total = 0.0
|
|
for result in self.lines:
|
|
self.print_line(result)
|
|
accuracy_total += self._get_accuracy(result)
|
|
self.footer(accuracy_total)
|
|
|
|
def _load_best_results(self, score, model):
|
|
best = BestResults(score, model, Datasets())
|
|
self.best_results = best.load({})
|
|
|
|
def _compute_status(self, dataset, accuracy: float):
|
|
best = self.best_results[dataset][0]
|
|
status = " "
|
|
if accuracy == best:
|
|
status = Symbols.equal_best
|
|
elif accuracy > best:
|
|
status = Symbols.better_best
|
|
if status != " ":
|
|
if status not in self._compare_totals:
|
|
self._compare_totals[status] = 1
|
|
else:
|
|
self._compare_totals[status] += 1
|
|
return status
|
|
|
|
@staticmethod
|
|
def _status_meaning(status):
|
|
meaning = {
|
|
Symbols.equal_best: "Equal to best",
|
|
Symbols.better_best: "Better than best",
|
|
}
|
|
return meaning[status]
|
|
|
|
@abc.abstractmethod
|
|
def header(self) -> None:
|
|
pass
|
|
|
|
@abc.abstractmethod
|
|
def print_line(self, result) -> None:
|
|
pass
|
|
|
|
@abc.abstractmethod
|
|
def footer(self, accuracy: float) -> None:
|
|
pass
|
|
|
|
|
|
class Report(BaseReport):
|
|
header_lengths = [30, 5, 3, 3, 7, 7, 7, 15, 16, 15]
|
|
header_cols = [
|
|
"Dataset",
|
|
"Samp",
|
|
"Var",
|
|
"Cls",
|
|
"Nodes",
|
|
"Leaves",
|
|
"Depth",
|
|
"Score",
|
|
"Time",
|
|
"Hyperparameters",
|
|
]
|
|
|
|
def __init__(self, file_name: str, compare: bool = False):
|
|
super().__init__(file_name)
|
|
self.nline = 0
|
|
self.compare = compare
|
|
|
|
def header_line(self, text: str) -> None:
|
|
print(TextColor.LINE1, end="")
|
|
length = sum(self.header_lengths) + len(self.header_lengths) - 3
|
|
if text == "*":
|
|
print("*" * (length + 2))
|
|
else:
|
|
print(f"*{text:{length}s}*")
|
|
|
|
def print_line(self, result) -> None:
|
|
self.nline += 1
|
|
text_color = (
|
|
TextColor.LINE1 if self.nline % 2 == 0 else TextColor.LINE2
|
|
)
|
|
print(text_color, end="")
|
|
hl = self.header_lengths
|
|
i = 0
|
|
print(f"{result['dataset']:{hl[i]}s} ", end="")
|
|
i += 1
|
|
print(f"{result['samples']:{hl[i]},d} ", end="")
|
|
i += 1
|
|
print(f"{result['features']:{hl[i]}d} ", end="")
|
|
i += 1
|
|
print(f"{result['classes']:{hl[i]}d} ", end="")
|
|
i += 1
|
|
print(f"{result['nodes']:{hl[i]}.2f} ", end="")
|
|
i += 1
|
|
print(f"{result['leaves']:{hl[i]}.2f} ", end="")
|
|
i += 1
|
|
print(f"{result['depth']:{hl[i]}.2f} ", end="")
|
|
i += 1
|
|
if self.compare:
|
|
status = self._compute_status(result["dataset"], result["score"])
|
|
else:
|
|
status = " "
|
|
print(
|
|
f"{result['score']:8.6f}±{result['score_std']:6.4f}{status}",
|
|
end="",
|
|
)
|
|
i += 1
|
|
print(
|
|
f"{result['time']:9.6f}±{result['time_std']:6.4f} ",
|
|
end="",
|
|
)
|
|
i += 1
|
|
print(f"{str(result['hyperparameters']):{hl[i]}s} ")
|
|
|
|
def header(self) -> None:
|
|
if self.compare:
|
|
self._load_best_results(
|
|
self.data["score_name"], self.data["model"]
|
|
)
|
|
self._compare_totals = {}
|
|
self.header_line("*")
|
|
self.header_line(
|
|
f" Report {self.data['model']} ver. {self.data['version']}"
|
|
f" with {self.data['folds']} Folds "
|
|
f"cross validation and {len(self.data['seeds'])} random seeds. "
|
|
f"{self.data['date']} {self.data['time']}"
|
|
)
|
|
self.header_line(f" {self.data['title']}")
|
|
self.header_line(
|
|
f" Random seeds: {self.data['seeds']} Stratified: "
|
|
f"{self.data['stratified']}"
|
|
)
|
|
hours = self.data["duration"] / 3600
|
|
self.header_line(
|
|
f" Execution took {self.data['duration']:7.2f} seconds, "
|
|
f" {hours:5.2f} hours, on {self.data['platform']}"
|
|
)
|
|
self.header_line(f" Score is {self.data['score_name']}")
|
|
self.header_line("*")
|
|
print("")
|
|
line_col = ""
|
|
for field, underscore in zip(self.header_cols, self.header_lengths):
|
|
print(f"{field:{underscore}s} ", end="")
|
|
line_col += "=" * underscore + " "
|
|
print(f"\n{line_col}")
|
|
|
|
def footer(self, accuracy: float) -> None:
|
|
self.header_line("*")
|
|
if self.compare:
|
|
for key, value in self._compare_totals.items():
|
|
self.header_line(
|
|
f" {key} {self._status_meaning(key)} .....: {value:2d}"
|
|
)
|
|
self.header_line(
|
|
f" Accuracy compared to stree_default (liblinear-ovr) .: "
|
|
f"{accuracy/BEST_ACCURACY_STREE:7.4f}"
|
|
)
|
|
self.header_line("*")
|
|
|
|
|
|
class ReportBest(BaseReport):
|
|
header_lengths = [30, 8, 76, 45]
|
|
header_cols = [
|
|
"Dataset",
|
|
"Score",
|
|
"File/Message",
|
|
"Hyperparameters",
|
|
]
|
|
|
|
def __init__(self, score, model, best, grid):
|
|
name = (
|
|
Files.best_results(score, model)
|
|
if best
|
|
else Files.grid_output(score, model)
|
|
)
|
|
self.best = best
|
|
self.grid = grid
|
|
file_name = os.path.join(Folders.results, name)
|
|
super().__init__(file_name, best_file=True)
|
|
self.compare = False
|
|
self.score_name = score
|
|
self.model = model
|
|
|
|
def header_line(self, text: str) -> None:
|
|
length = sum(self.header_lengths) + len(self.header_lengths) - 3
|
|
if text == "*":
|
|
print("*" * (length + 2))
|
|
else:
|
|
print(f"*{text:{length}s}*")
|
|
|
|
def print_line(self, result):
|
|
hl = self.header_lengths
|
|
print(f"{result:{hl[0]}s} ", end="")
|
|
print(
|
|
f"{self.data[result][0]:8.6f} ",
|
|
end="",
|
|
)
|
|
print(
|
|
f"{self.data[result][2]:{hl[2]}s} ",
|
|
end="",
|
|
)
|
|
print(f"{str(self.data[result][1]):{hl[1]}s} ")
|
|
|
|
def header(self):
|
|
self.header_line("*")
|
|
kind = "Best" if self.best else "Grid"
|
|
self.header_line(
|
|
f" Report {kind} {self.score_name} Scores with {self.model} "
|
|
"in any platform"
|
|
)
|
|
self.header_line("*")
|
|
print("")
|
|
line_col = ""
|
|
for field, underscore in zip(self.header_cols, self.header_lengths):
|
|
print(f"{field:{underscore}s} ", end="")
|
|
line_col += "=" * underscore + " "
|
|
print(f"\n{line_col}")
|
|
|
|
def footer(self, accuracy):
|
|
self.header_line("*")
|
|
if self.compare:
|
|
for key, value in self._compare_totals.items():
|
|
self.header_line(
|
|
f" {key} {self._status_meaning(key)} .....: {value:2d}"
|
|
)
|
|
self.header_line(
|
|
f" Scores compared to stree_default accuracy (liblinear-ovr) .: "
|
|
f"{accuracy/BEST_ACCURACY_STREE:7.4f}"
|
|
)
|
|
self.header_line("*")
|
|
|
|
|
|
class Excel(BaseReport):
|
|
row = 6
|
|
# alternate lines colors
|
|
color1 = "#DCE6F1"
|
|
color2 = "#FDE9D9"
|
|
color3 = "#B1A0C7"
|
|
|
|
def __init__(self, file_name, compare=False, book=None):
|
|
super().__init__(file_name)
|
|
self.compare = compare
|
|
if self.compare:
|
|
self._load_best_results(
|
|
self.data["score_name"], self.data["model"]
|
|
)
|
|
self._compare_totals = {}
|
|
if book is None:
|
|
self.excel_file_name = self.file_name.replace(".json", ".xlsx")
|
|
self.book = xlsxwriter.Workbook(self.excel_file_name)
|
|
self.set_book_properties()
|
|
self.close = True
|
|
else:
|
|
self.book = book
|
|
self.close = False
|
|
self.sheet = self.book.add_worksheet(self.data["model"])
|
|
self.max_hyper_width = 0
|
|
self.col_hyperparams = 0
|
|
|
|
@staticmethod
|
|
def set_properties(book, title):
|
|
book.set_properties(
|
|
{
|
|
"title": title,
|
|
"subject": "Machine learning results",
|
|
"author": "Ricardo Montañana Gómez",
|
|
"manager": "Dr. J. A. Gámez, Dr. J. M. Puerta",
|
|
"company": "UCLM",
|
|
"comments": "Created with Python and XlsxWriter",
|
|
}
|
|
)
|
|
|
|
def set_book_properties(self):
|
|
self.set_properties(self.book, self.get_title())
|
|
|
|
def get_title(self):
|
|
return (
|
|
f" Report {self.data['model']} ver. {self.data['version']}"
|
|
f" with {self.data['folds']} Folds "
|
|
f"cross validation and {len(self.data['seeds'])} random seeds. "
|
|
f"{self.data['date']} {self.data['time']}"
|
|
)
|
|
|
|
def get_file_name(self):
|
|
return self.excel_file_name
|
|
|
|
def header(self):
|
|
merge_format = self.book.add_format(
|
|
{
|
|
"border": 1,
|
|
"bold": 1,
|
|
"align": "center",
|
|
"valign": "vcenter",
|
|
"font_size": 18,
|
|
"bg_color": self.color3,
|
|
}
|
|
)
|
|
merge_format_subheader = self.book.add_format(
|
|
{
|
|
"border": 1,
|
|
"bold": 1,
|
|
"align": "center",
|
|
"valign": "vcenter",
|
|
"font_size": 16,
|
|
"bg_color": self.color1,
|
|
}
|
|
)
|
|
merge_format_subheader_left = self.book.add_format(
|
|
{
|
|
"border": 1,
|
|
"bold": 1,
|
|
"align": "left",
|
|
"valign": "vcenter",
|
|
"font_size": 12,
|
|
"bg_color": self.color1,
|
|
}
|
|
)
|
|
header_text = self.get_title()
|
|
self.sheet.merge_range(0, 0, 0, 11, header_text, merge_format)
|
|
self.sheet.merge_range(
|
|
1, 0, 1, 11, f" {self.data['title']}", merge_format_subheader
|
|
)
|
|
self.sheet.merge_range(
|
|
2,
|
|
0,
|
|
3,
|
|
0,
|
|
f" Score is {self.data['score_name']}",
|
|
merge_format_subheader,
|
|
)
|
|
self.sheet.merge_range(
|
|
2,
|
|
1,
|
|
3,
|
|
3,
|
|
" Execution time",
|
|
merge_format_subheader,
|
|
)
|
|
hours = self.data["duration"] / 3600
|
|
self.sheet.merge_range(
|
|
2,
|
|
4,
|
|
2,
|
|
5,
|
|
f"{self.data['duration']:7,.2f} s",
|
|
merge_format_subheader,
|
|
)
|
|
self.sheet.merge_range(
|
|
3,
|
|
4,
|
|
3,
|
|
5,
|
|
f" {hours:5.2f} h",
|
|
merge_format_subheader,
|
|
)
|
|
self.sheet.merge_range(
|
|
2,
|
|
6,
|
|
3,
|
|
6,
|
|
" ",
|
|
merge_format_subheader,
|
|
)
|
|
self.sheet.merge_range(
|
|
2,
|
|
7,
|
|
3,
|
|
7,
|
|
"Platform",
|
|
merge_format_subheader,
|
|
)
|
|
self.sheet.merge_range(
|
|
2,
|
|
8,
|
|
3,
|
|
8,
|
|
f"{self.data['platform']}",
|
|
merge_format_subheader,
|
|
)
|
|
self.sheet.merge_range(
|
|
2,
|
|
9,
|
|
2,
|
|
11,
|
|
f"Random seeds: {self.data['seeds']}",
|
|
merge_format_subheader_left,
|
|
)
|
|
self.sheet.merge_range(
|
|
3,
|
|
9,
|
|
3,
|
|
11,
|
|
f"Stratified: {self.data['stratified']}",
|
|
merge_format_subheader_left,
|
|
)
|
|
header_cols = [
|
|
("Dataset", 30),
|
|
("Samples", 10),
|
|
("Features", 7),
|
|
("Classes", 7),
|
|
("Nodes", 7),
|
|
("Leaves", 7),
|
|
("Depth", 7),
|
|
("Score", 12),
|
|
("Score Std.", 12),
|
|
("Time", 12),
|
|
("Time Std.", 12),
|
|
("Hyperparameters", 50),
|
|
]
|
|
if self.compare:
|
|
header_cols.insert(8, ("Stat", 3))
|
|
bold = self.book.add_format(
|
|
{
|
|
"bold": True,
|
|
"font_size": 14,
|
|
"bg_color": self.color3,
|
|
"border": 1,
|
|
}
|
|
)
|
|
i = 0
|
|
for item, length in header_cols:
|
|
self.sheet.write(5, i, item, bold)
|
|
self.sheet.set_column(i, i, length)
|
|
i += 1
|
|
|
|
def print_line(self, result):
|
|
size_n = 14
|
|
decimal = self.book.add_format(
|
|
{"num_format": "0.000000", "font_size": size_n, "border": 1}
|
|
)
|
|
integer = self.book.add_format(
|
|
{"num_format": "#,###", "font_size": size_n, "border": 1}
|
|
)
|
|
normal = self.book.add_format({"font_size": size_n, "border": 1})
|
|
col = 0
|
|
if self.row % 2 == 0:
|
|
normal.set_bg_color(self.color1)
|
|
decimal.set_bg_color(self.color1)
|
|
integer.set_bg_color(self.color1)
|
|
else:
|
|
normal.set_bg_color(self.color2)
|
|
decimal.set_bg_color(self.color2)
|
|
integer.set_bg_color(self.color2)
|
|
self.sheet.write(self.row, col, result["dataset"], normal)
|
|
self.sheet.write(self.row, col + 1, result["samples"], integer)
|
|
self.sheet.write(self.row, col + 2, result["features"], normal)
|
|
self.sheet.write(self.row, col + 3, result["classes"], normal)
|
|
self.sheet.write(self.row, col + 4, result["nodes"], normal)
|
|
self.sheet.write(self.row, col + 5, result["leaves"], normal)
|
|
self.sheet.write(self.row, col + 6, result["depth"], normal)
|
|
self.sheet.write(self.row, col + 7, result["score"], decimal)
|
|
if self.compare:
|
|
status = self._compute_status(result["dataset"], result["score"])
|
|
self.sheet.write(self.row, col + 8, status, normal)
|
|
col = 9
|
|
else:
|
|
col = 8
|
|
self.sheet.write(self.row, col, result["score_std"], decimal)
|
|
self.sheet.write(self.row, col + 1, result["time"], decimal)
|
|
self.sheet.write(self.row, col + 2, result["time_std"], decimal)
|
|
self.sheet.write(
|
|
self.row, col + 3, str(result["hyperparameters"]), normal
|
|
)
|
|
self.col_hyperparams = col + 3
|
|
self.max_hyper_width = max(
|
|
self.max_hyper_width, len(str(result["hyperparameters"]))
|
|
)
|
|
self.row += 1
|
|
|
|
def footer(self, accuracy):
|
|
if self.compare:
|
|
self.row += 2
|
|
bold = self.book.add_format({"bold": True, "font_size": 16})
|
|
for key, total in self._compare_totals.items():
|
|
self.sheet.write(self.row, 1, key, bold)
|
|
self.sheet.write(self.row, 2, total, bold)
|
|
self.sheet.write(self.row, 3, self._status_meaning(key), bold)
|
|
self.row += 1
|
|
message = (
|
|
f"** Accuracy compared to stree_default (liblinear-ovr) .: "
|
|
f"{accuracy/BEST_ACCURACY_STREE:7.4f}"
|
|
)
|
|
bold = self.book.add_format({"bold": True, "font_size": 14})
|
|
# set width of the hyperparams column with the maximum width
|
|
self.sheet.set_column(
|
|
self.col_hyperparams,
|
|
self.col_hyperparams,
|
|
max(self.max_hyper_width + 1, 23),
|
|
)
|
|
self.sheet.write(self.row + 1, 0, message, bold)
|
|
for c in range(self.row + 2):
|
|
self.sheet.set_row(c, 20)
|
|
self.sheet.set_row(0, 25)
|
|
self.sheet.freeze_panes(6, 1)
|
|
self.sheet.hide_gridlines()
|
|
if self.close:
|
|
self.book.close()
|
|
|
|
|
|
class SQL(BaseReport):
|
|
table_name = "results"
|
|
|
|
def header(self):
|
|
file_name = self.file_name.replace(".json", ".sql")
|
|
self.file = open(file_name, "w")
|
|
|
|
def print_line(self, result):
|
|
attributes = [
|
|
"date",
|
|
"time",
|
|
"type",
|
|
"title",
|
|
"stratified",
|
|
"score_name",
|
|
"score",
|
|
"score_std",
|
|
"dataset",
|
|
"classifier",
|
|
"version",
|
|
"norm",
|
|
"stand",
|
|
"time_spent",
|
|
"time_spent_std",
|
|
"parameters",
|
|
"nodes",
|
|
"leaves",
|
|
"depth",
|
|
"platform",
|
|
"nfolds",
|
|
"seeds",
|
|
]
|
|
command_insert = (
|
|
f"replace into {self.table_name} ("
|
|
+ ",".join(attributes)
|
|
+ ") values("
|
|
+ ("'%s'," * len(attributes))[:-1]
|
|
+ ");\n"
|
|
)
|
|
values = (
|
|
self.data["date"],
|
|
self.data["time"],
|
|
"crossval",
|
|
self.data["title"],
|
|
"1" if self.data["stratified"] else "0",
|
|
self.data["score_name"],
|
|
result["score"],
|
|
result["score_std"],
|
|
result["dataset"],
|
|
self.data["model"],
|
|
self.data["version"],
|
|
0,
|
|
1,
|
|
result["time"],
|
|
result["time_std"],
|
|
str(result["hyperparameters"]).replace("'", '"'),
|
|
result["nodes"],
|
|
result["leaves"],
|
|
result["depth"],
|
|
self.data["platform"],
|
|
self.data["folds"],
|
|
str(self.data["seeds"]),
|
|
)
|
|
self.file.write(command_insert % values)
|
|
|
|
def footer(self, accuracy):
|
|
self.file.close()
|
|
|
|
|
|
class Benchmark:
|
|
def __init__(self, score):
|
|
self._score = score
|
|
self._results = []
|
|
self._models = []
|
|
self._report = {}
|
|
self._datasets = set()
|
|
|
|
def get_result_file_name(self):
|
|
return os.path.join(Folders.results, Files.exreport(self._score))
|
|
|
|
def compile_results(self):
|
|
summary = Summary()
|
|
summary.acquire(given_score=self._score)
|
|
self._models = summary.get_models()
|
|
for model in self._models:
|
|
best = summary.best_result(
|
|
criterion="model", value=model, score=self._score
|
|
)
|
|
file_name = os.path.join(Folders.results, best["file"])
|
|
with open(file_name) as fi:
|
|
experiment = json.load(fi)
|
|
for result in experiment["results"]:
|
|
dataset = result["dataset"]
|
|
record = {
|
|
"model": model,
|
|
"dataset": dataset,
|
|
"score": result["score"],
|
|
"score_std": result["score_std"],
|
|
"file_name": file_name,
|
|
}
|
|
self._results.append(record)
|
|
if model not in self._report:
|
|
self._report[model] = {}
|
|
self._report[model][dataset] = record
|
|
self._datasets.add(dataset)
|
|
self._datasets = sorted(self._datasets)
|
|
|
|
def save_results(self):
|
|
# build Files.exreport
|
|
result_file_name = self.get_result_file_name()
|
|
with open(result_file_name, "w") as f:
|
|
f.write(f"classifier, dataset, {self._score}, stdev, file_name\n")
|
|
for record in self._results:
|
|
f.write(
|
|
f"{record['model']}, {record['dataset']}, "
|
|
f"{record['score']}, {record['score_std']}, "
|
|
f"{record['file_name']}\n"
|
|
)
|
|
|
|
def exreport(self):
|
|
def end_message(message, file):
|
|
length = 100
|
|
print("*" * length)
|
|
print(message)
|
|
print("*" * length)
|
|
with open(os.path.join(Folders.results, file)) as f:
|
|
data = f.read().splitlines()
|
|
for line in data:
|
|
print(line)
|
|
|
|
# Remove previous results
|
|
try:
|
|
shutil.rmtree(Folders.report)
|
|
os.remove(Files.exreport_pdf)
|
|
except FileNotFoundError:
|
|
pass
|
|
except OSError as os_error:
|
|
print("Error: %s : %s" % (Folders.report, os_error.strerror))
|
|
# Compute Friedman & Holm Tests
|
|
fout = open(
|
|
os.path.join(Folders.results, Files.exreport_output(self._score)),
|
|
"w",
|
|
)
|
|
ferr = open(
|
|
os.path.join(Folders.results, Files.exreport_err(self._score)), "w"
|
|
)
|
|
result = subprocess.run(
|
|
[
|
|
"Rscript",
|
|
os.path.join(Folders.src, Files.benchmark_r),
|
|
self._score,
|
|
],
|
|
stdout=fout,
|
|
stderr=ferr,
|
|
)
|
|
fout.close()
|
|
ferr.close()
|
|
if result.returncode != 0:
|
|
end_message(
|
|
"Error computing benchmark", Files.exreport_err(self._score)
|
|
)
|
|
else:
|
|
end_message("Benchmark Ok", Files.exreport_output(self._score))
|
|
Files.open(Files.exreport_pdf)
|
|
|
|
def report(self):
|
|
print(f"{'Dataset':30s} ", end="")
|
|
lines = "=" * 30 + " "
|
|
for model in self._models:
|
|
print(f"{model:^13s} ", end="")
|
|
lines += "=" * 13 + " "
|
|
print(f"\n{lines}")
|
|
for dataset in self._datasets:
|
|
print(f"{dataset:30s} ", end="")
|
|
for model in self._models:
|
|
result = self._report[model][dataset]
|
|
print(f"{float(result['score']):.5f}±", end="")
|
|
print(f"{float(result['score_std']):.3f} ", end="")
|
|
print("")
|
|
d_name = next(iter(self._datasets))
|
|
print(f"\n{'Model':30s} {'File Name':75s} Score")
|
|
print("=" * 30 + " " + "=" * 75 + " ========")
|
|
for model in self._models:
|
|
file_name = self._report[model][d_name]["file_name"]
|
|
report = StubReport(file_name)
|
|
report.report()
|
|
print(f"{model:^30s} {file_name:75s} {report.score:8.5f}")
|
|
|
|
def get_excel_file_name(self):
|
|
return os.path.join(
|
|
Folders.exreport, Files.exreport_excel(self._score)
|
|
)
|
|
|
|
def excel(self):
|
|
book = xlsxwriter.Workbook(self.get_excel_file_name())
|
|
Excel.set_properties(book, "Experimentation summary")
|
|
sheet = book.add_worksheet("Benchmark")
|
|
normal = book.add_format({"font_size": 14, "border": 1})
|
|
decimal = book.add_format(
|
|
{"num_format": "0.000000", "font_size": 14, "border": 1}
|
|
)
|
|
decimal_total = book.add_format(
|
|
{
|
|
"num_format": "0.000000",
|
|
"font_size": 14,
|
|
"border": 1,
|
|
"bold": True,
|
|
"bg_color": Excel.color3,
|
|
}
|
|
)
|
|
two_decimal_total = book.add_format(
|
|
{
|
|
"num_format": "0.00",
|
|
"font_size": 14,
|
|
"border": 1,
|
|
"bold": True,
|
|
"bg_color": Excel.color3,
|
|
}
|
|
)
|
|
merge_format_header = book.add_format(
|
|
{
|
|
"border": 1,
|
|
"bold": 1,
|
|
"align": "center",
|
|
"valign": "vcenter",
|
|
"font_size": 14,
|
|
"bg_color": Excel.color1,
|
|
}
|
|
)
|
|
merge_format = book.add_format(
|
|
{
|
|
"border": 1,
|
|
"bold": 1,
|
|
"align": "center",
|
|
"valign": "vcenter",
|
|
"font_size": 14,
|
|
"bg_color": Excel.color3,
|
|
}
|
|
)
|
|
merge_format_normal = book.add_format(
|
|
{
|
|
"border": 1,
|
|
"valign": "vcenter",
|
|
"font_size": 14,
|
|
}
|
|
)
|
|
row = row_init = 4
|
|
|
|
def header():
|
|
nonlocal row
|
|
sheet.merge_range(
|
|
0, 0, 1, 0, "Benchmark of Models", merge_format_header
|
|
)
|
|
sheet.merge_range(
|
|
0, 1, 1, 2, f"Score is {self._score}", merge_format_header
|
|
)
|
|
sheet.set_row(1, 20)
|
|
# Set columns width
|
|
sheet.set_column(0, 0, 40)
|
|
for column in range(2 * len(self._results)):
|
|
sheet.set_column(column + 1, column + 1, 15)
|
|
# Set report header
|
|
# Merge 2 rows
|
|
sheet.merge_range(row, 0, row + 1, 0, "Dataset", merge_format)
|
|
column = 1
|
|
for model in self._models:
|
|
# Merge 3 columns
|
|
sheet.merge_range(
|
|
row, column, row, column + 2, model, merge_format
|
|
)
|
|
column += 3
|
|
row += 1
|
|
column = 1
|
|
for _ in range(len(self._models)):
|
|
sheet.write(row, column, "Score", merge_format)
|
|
sheet.write(row, column + 1, "Stdev", merge_format)
|
|
sheet.write(row, column + 2, "Rank", merge_format)
|
|
column += 3
|
|
|
|
def body():
|
|
nonlocal row
|
|
for dataset in self._datasets:
|
|
row += 1
|
|
normal = book.add_format({"font_size": 14, "border": 1})
|
|
decimal = book.add_format(
|
|
{
|
|
"num_format": "0.000000",
|
|
"font_size": 14,
|
|
"border": 1,
|
|
}
|
|
)
|
|
if row % 2 == 0:
|
|
normal.set_bg_color(Excel.color1)
|
|
decimal.set_bg_color(Excel.color1)
|
|
else:
|
|
normal.set_bg_color(Excel.color2)
|
|
decimal.set_bg_color(Excel.color2)
|
|
sheet.write(row, 0, f"{dataset:30s}", normal)
|
|
column = 1
|
|
range_cells = ""
|
|
for col in range(0, len(self._models) * 3, 3):
|
|
range_cells += chr(ord("B") + col) + str(row + 1) + ","
|
|
range_cells = range_cells[:-1]
|
|
for model in self._models:
|
|
sheet.write(
|
|
row,
|
|
column,
|
|
float(self._report[model][dataset]["score"]),
|
|
decimal,
|
|
)
|
|
column += 1
|
|
sheet.write(
|
|
row,
|
|
column,
|
|
float(self._report[model][dataset]["score_std"]),
|
|
decimal,
|
|
)
|
|
column += 1
|
|
cell_target = chr(ord("B") + column - 3) + str(row + 1)
|
|
sheet.write_formula(
|
|
row,
|
|
column,
|
|
f"=rank({cell_target},({range_cells}))",
|
|
normal,
|
|
)
|
|
column += 1
|
|
|
|
def footer():
|
|
nonlocal row
|
|
for c in range(row_init, row + 2):
|
|
sheet.set_row(c, 20)
|
|
# Write totals
|
|
row += 1
|
|
sheet.write(row, 0, "Total", merge_format)
|
|
for col in range(0, len(self._models) * 3, 3):
|
|
range_metric = (
|
|
f"{chr(ord('B') + col )}7:{chr(ord('B') + col )}{row}"
|
|
)
|
|
sheet.write_formula(
|
|
row,
|
|
col + 1,
|
|
f"=sum({range_metric})/{BEST_ACCURACY_STREE}",
|
|
decimal_total,
|
|
)
|
|
range_rank = (
|
|
f"{chr(ord('B') + col + 2)}7:"
|
|
f"{chr(ord('B') + col + 2)}{row}"
|
|
)
|
|
sheet.write_formula(
|
|
row,
|
|
col + 3,
|
|
f"=average({range_rank})",
|
|
two_decimal_total,
|
|
)
|
|
row += 1
|
|
|
|
def models_files():
|
|
nonlocal row
|
|
row += 2
|
|
# Set report header
|
|
# Merge 2 rows
|
|
sheet.merge_range(row, 0, row + 1, 0, "Model", merge_format)
|
|
sheet.merge_range(row, 1, row + 1, 5, "File", merge_format)
|
|
sheet.merge_range(row, 6, row + 1, 6, "Score", merge_format)
|
|
row += 1
|
|
d_name = next(iter(self._datasets))
|
|
for model in self._models:
|
|
file_name = self._report[model][d_name]["file_name"]
|
|
report = StubReport(file_name)
|
|
report.report()
|
|
row += 1
|
|
sheet.write(
|
|
row,
|
|
0,
|
|
model,
|
|
normal,
|
|
)
|
|
sheet.merge_range(
|
|
row, 1, row, 5, file_name, merge_format_normal
|
|
)
|
|
sheet.write(
|
|
row,
|
|
6,
|
|
report.score,
|
|
decimal,
|
|
)
|
|
k = Excel(file_name=file_name, book=book)
|
|
k.report()
|
|
sheet.freeze_panes(6, 1)
|
|
sheet.hide_gridlines()
|
|
|
|
def exreport_output():
|
|
file_name = os.path.join(
|
|
Folders.results, Files.exreport_output(self._score)
|
|
)
|
|
sheet = book.add_worksheet("Exreport")
|
|
normal = book.add_format(
|
|
{
|
|
"font_size": 14,
|
|
"border": 1,
|
|
"font_color": "blue",
|
|
"font_name": "Courier",
|
|
"bold": True,
|
|
}
|
|
)
|
|
with open(file_name) as f:
|
|
lines = f.read().splitlines()
|
|
row = 0
|
|
for line in lines:
|
|
sheet.write(row, 0, line, normal)
|
|
row += 1
|
|
|
|
header()
|
|
body()
|
|
footer()
|
|
models_files()
|
|
exreport_output()
|
|
book.close()
|
|
|
|
|
|
class StubReport(BaseReport):
|
|
def __init__(self, file_name):
|
|
super().__init__(file_name=file_name, best_file=False)
|
|
|
|
def print_line(self, line) -> None:
|
|
pass
|
|
|
|
def header(self) -> None:
|
|
self.title = self.data["title"]
|
|
self.duration = self.data["duration"]
|
|
|
|
def footer(self, accuracy: float) -> None:
|
|
self.accuracy = accuracy
|
|
self.score = accuracy / BEST_ACCURACY_STREE
|
|
|
|
|
|
class Summary:
|
|
def __init__(self, hidden=False) -> None:
|
|
self.results = Files().get_all_results(hidden=hidden)
|
|
self.data = []
|
|
self.datasets = {}
|
|
self.models = set()
|
|
self.hidden = hidden
|
|
|
|
def get_models(self):
|
|
return sorted(self.models)
|
|
|
|
def acquire(self, given_score="any") -> None:
|
|
"""Get all results"""
|
|
for result in self.results:
|
|
(
|
|
score,
|
|
model,
|
|
platform,
|
|
date,
|
|
time,
|
|
stratified,
|
|
) = Files().split_file_name(result)
|
|
if given_score in ("any", score):
|
|
self.models.add(model)
|
|
report = StubReport(
|
|
os.path.join(
|
|
Folders.hidden_results
|
|
if self.hidden
|
|
else Folders.results,
|
|
result,
|
|
)
|
|
)
|
|
report.report()
|
|
entry = dict(
|
|
score=score,
|
|
model=model,
|
|
title=report.title,
|
|
platform=platform,
|
|
date=date,
|
|
time=time,
|
|
stratified=stratified,
|
|
file=result,
|
|
metric=report.score,
|
|
duration=report.duration,
|
|
)
|
|
self.datasets[result] = report.lines
|
|
self.data.append(entry)
|
|
|
|
def list_results(
|
|
self,
|
|
score=None,
|
|
model=None,
|
|
input_data=None,
|
|
sort_key="date",
|
|
number=0,
|
|
) -> None:
|
|
"""Print the list of results"""
|
|
data = self.data.copy() if input_data is None else input_data
|
|
if score:
|
|
data = [x for x in data if x["score"] == score]
|
|
if model:
|
|
data = [x for x in data if x["model"] == model]
|
|
keys = (
|
|
itemgetter(sort_key, "time")
|
|
if sort_key == "date"
|
|
else itemgetter(sort_key, "date", "time")
|
|
)
|
|
data = sorted(data, key=keys, reverse=True)
|
|
if number > 0:
|
|
data = data[:number]
|
|
max_file = max(len(x["file"]) for x in data)
|
|
max_title = max(len(x["title"]) for x in data)
|
|
if self.hidden:
|
|
color1 = TextColor.GREEN
|
|
color2 = TextColor.YELLOW
|
|
else:
|
|
color1 = TextColor.LINE1
|
|
color2 = TextColor.LINE2
|
|
print(color1, end="")
|
|
print(
|
|
f"{'Date':10s} {'File':{max_file}s} {'Score':7s} {'Time(h)':7s} "
|
|
f"{'Title':s}"
|
|
)
|
|
print(
|
|
"=" * 10
|
|
+ " "
|
|
+ "=" * max_file
|
|
+ " "
|
|
+ "=" * 8
|
|
+ " "
|
|
+ "=" * 7
|
|
+ " "
|
|
+ "=" * max_title
|
|
)
|
|
print(
|
|
"\n".join(
|
|
[
|
|
(color2 if n % 2 == 0 else color1)
|
|
+ f"{x['date']} {x['file']:{max_file}s} "
|
|
f"{x['metric']:8.5f} "
|
|
f"{x['duration']/3600:7.3f} "
|
|
f"{x['title']}"
|
|
for n, x in enumerate(data)
|
|
]
|
|
)
|
|
)
|
|
|
|
def show_result(self, data: dict, title: str = "") -> None:
|
|
def whites(n: int) -> str:
|
|
return " " * n + "*"
|
|
|
|
if data == {}:
|
|
print(f"** {title} has No data **")
|
|
return
|
|
file_name = data["file"]
|
|
metric = data["metric"]
|
|
result = StubReport(os.path.join(Folders.results, file_name))
|
|
length = 81
|
|
print("*" * length)
|
|
if title != "":
|
|
print(f"*{title:^{length - 2}s}*")
|
|
print("*" + "-" * (length - 2) + "*")
|
|
print("*" + whites(length - 2))
|
|
print(f"* {result.data['title']:^{length - 4}} *")
|
|
print("*" + whites(length - 2))
|
|
print(
|
|
f"* Model: {result.data['model']:15s} "
|
|
f"Ver. {result.data['version']:10s} "
|
|
f"Score: {result.data['score_name']:10s} "
|
|
f"Metric: {metric:10.7f}" + whites(length - 78)
|
|
)
|
|
print("*" + whites(length - 2))
|
|
print(
|
|
f"* Date : {result.data['date']:15s} Time: "
|
|
f"{result.data['time']:18s} Time Spent: "
|
|
f"{result.data['duration']:9,.2f} secs." + whites(length - 78)
|
|
)
|
|
seeds = str(result.data["seeds"])
|
|
seeds_len = len(seeds)
|
|
print(
|
|
f"* Seeds: {seeds:{seeds_len}s} Platform: "
|
|
f"{result.data['platform']:17s} " + whites(length - 79)
|
|
)
|
|
print(
|
|
f"* Stratified: {str(result.data['stratified']):15s}"
|
|
+ whites(length - 30)
|
|
)
|
|
print(f"* {file_name:60s}" + whites(length - 63))
|
|
print("*" + whites(length - 2))
|
|
print("*" * length)
|
|
|
|
def best_results(self, criterion=None, value=None, score="accuracy", n=10):
|
|
# First filter the same score results (accuracy, f1, ...)
|
|
haystack = [x for x in self.data if x["score"] == score]
|
|
haystack = (
|
|
haystack
|
|
if criterion is None or value is None
|
|
else [x for x in haystack if x[criterion] == value]
|
|
)
|
|
return (
|
|
sorted(
|
|
haystack,
|
|
key=lambda x: -1.0 if math.isnan(x["metric"]) else x["metric"],
|
|
reverse=True,
|
|
)[:n]
|
|
if len(haystack) > 0
|
|
else {}
|
|
)
|
|
|
|
def best_result(
|
|
self, criterion=None, value=None, score="accuracy"
|
|
) -> dict:
|
|
return self.best_results(criterion, value, score)[0]
|
|
|
|
def best_results_datasets(self, score="accuracy") -> dict:
|
|
"""Get the best results for each dataset"""
|
|
dt = Datasets()
|
|
best_results = {}
|
|
for dataset in dt:
|
|
best_results[dataset] = (1, "", "", "")
|
|
haystack = [x for x in self.data if x["score"] == score]
|
|
# Search for the best results for each dataset
|
|
for entry in haystack:
|
|
for dataset in self.datasets[entry["file"]]:
|
|
if dataset["score"] < best_results[dataset["dataset"]][0]:
|
|
best_results[dataset["dataset"]] = (
|
|
dataset["score"],
|
|
dataset["hyperparameters"],
|
|
entry["file"],
|
|
entry["title"],
|
|
)
|
|
return best_results
|
|
|
|
def show_top(self, score="accuracy", n=10):
|
|
self.list_results(
|
|
score=score,
|
|
input_data=self.best_results(score=score, n=n),
|
|
sort_key="metric",
|
|
)
|