Files
benchmark/src/Results.py

1162 lines
37 KiB
Python

import os
from operator import itemgetter
import math
import json
import abc
import shutil
import subprocess
import xlsxwriter
from Experiments import Datasets, BestResults
from Utils import Folders, Files, Symbols, BEST_ACCURACY_STREE, TextColor
class BaseReport(abc.ABC):
def __init__(self, file_name, best_file=False):
self.file_name = file_name
if not os.path.isfile(file_name):
if not os.path.isfile(os.path.join(Folders.results, file_name)):
raise FileNotFoundError(f"{file_name} does not exists!")
else:
file_name = os.path.join(Folders.results, file_name)
with open(file_name) as f:
self.data = json.load(f)
self.best_acc_file = best_file
self.lines = self.data if best_file else self.data["results"]
def _get_accuracy(self, item):
return self.data[item][0] if self.best_acc_file else item["score"]
def report(self):
self.header()
accuracy_total = 0.0
for result in self.lines:
self.print_line(result)
accuracy_total += self._get_accuracy(result)
self.footer(accuracy_total)
def _load_best_results(self, score, model):
best = BestResults(score, model, Datasets())
self.best_results = best.load({})
def _compute_status(self, dataset, accuracy: float):
best = self.best_results[dataset][0]
status = " "
if accuracy == best:
status = Symbols.equal_best
elif accuracy > best:
status = Symbols.better_best
if status != " ":
if status not in self._compare_totals:
self._compare_totals[status] = 1
else:
self._compare_totals[status] += 1
return status
@staticmethod
def _status_meaning(status):
meaning = {
Symbols.equal_best: "Equal to best",
Symbols.better_best: "Better than best",
}
return meaning[status]
@abc.abstractmethod
def header(self) -> None:
pass
@abc.abstractmethod
def print_line(self, result) -> None:
pass
@abc.abstractmethod
def footer(self, accuracy: float) -> None:
pass
class Report(BaseReport):
header_lengths = [30, 5, 3, 3, 7, 7, 7, 15, 16, 15]
header_cols = [
"Dataset",
"Samp",
"Var",
"Cls",
"Nodes",
"Leaves",
"Depth",
"Score",
"Time",
"Hyperparameters",
]
def __init__(self, file_name: str, compare: bool = False):
super().__init__(file_name)
self.nline = 0
self.compare = compare
def header_line(self, text: str) -> None:
print(TextColor.LINE1, end="")
length = sum(self.header_lengths) + len(self.header_lengths) - 3
if text == "*":
print("*" * (length + 2))
else:
print(f"*{text:{length}s}*")
def print_line(self, result) -> None:
self.nline += 1
text_color = (
TextColor.LINE1 if self.nline % 2 == 0 else TextColor.LINE2
)
print(text_color, end="")
hl = self.header_lengths
i = 0
print(f"{result['dataset']:{hl[i]}s} ", end="")
i += 1
print(f"{result['samples']:{hl[i]},d} ", end="")
i += 1
print(f"{result['features']:{hl[i]}d} ", end="")
i += 1
print(f"{result['classes']:{hl[i]}d} ", end="")
i += 1
print(f"{result['nodes']:{hl[i]}.2f} ", end="")
i += 1
print(f"{result['leaves']:{hl[i]}.2f} ", end="")
i += 1
print(f"{result['depth']:{hl[i]}.2f} ", end="")
i += 1
if self.compare:
status = self._compute_status(result["dataset"], result["score"])
else:
status = " "
print(
f"{result['score']:8.6f}±{result['score_std']:6.4f}{status}",
end="",
)
i += 1
print(
f"{result['time']:9.6f}±{result['time_std']:6.4f} ",
end="",
)
i += 1
print(f"{str(result['hyperparameters']):{hl[i]}s} ")
def header(self) -> None:
if self.compare:
self._load_best_results(
self.data["score_name"], self.data["model"]
)
self._compare_totals = {}
self.header_line("*")
self.header_line(
f" Report {self.data['model']} ver. {self.data['version']}"
f" with {self.data['folds']} Folds "
f"cross validation and {len(self.data['seeds'])} random seeds. "
f"{self.data['date']} {self.data['time']}"
)
self.header_line(f" {self.data['title']}")
self.header_line(
f" Random seeds: {self.data['seeds']} Stratified: "
f"{self.data['stratified']}"
)
hours = self.data["duration"] / 3600
self.header_line(
f" Execution took {self.data['duration']:7.2f} seconds, "
f" {hours:5.2f} hours, on {self.data['platform']}"
)
self.header_line(f" Score is {self.data['score_name']}")
self.header_line("*")
print("")
line_col = ""
for field, underscore in zip(self.header_cols, self.header_lengths):
print(f"{field:{underscore}s} ", end="")
line_col += "=" * underscore + " "
print(f"\n{line_col}")
def footer(self, accuracy: float) -> None:
self.header_line("*")
if self.compare:
for key, value in self._compare_totals.items():
self.header_line(
f" {key} {self._status_meaning(key)} .....: {value:2d}"
)
self.header_line(
f" Accuracy compared to stree_default (liblinear-ovr) .: "
f"{accuracy/BEST_ACCURACY_STREE:7.4f}"
)
self.header_line("*")
class ReportBest(BaseReport):
header_lengths = [30, 8, 76, 45]
header_cols = [
"Dataset",
"Score",
"File/Message",
"Hyperparameters",
]
def __init__(self, score, model, best, grid):
name = (
Files.best_results(score, model)
if best
else Files.grid_output(score, model)
)
self.best = best
self.grid = grid
file_name = os.path.join(Folders.results, name)
super().__init__(file_name, best_file=True)
self.compare = False
self.score_name = score
self.model = model
def header_line(self, text: str) -> None:
length = sum(self.header_lengths) + len(self.header_lengths) - 3
if text == "*":
print("*" * (length + 2))
else:
print(f"*{text:{length}s}*")
def print_line(self, result):
hl = self.header_lengths
print(f"{result:{hl[0]}s} ", end="")
print(
f"{self.data[result][0]:8.6f} ",
end="",
)
print(
f"{self.data[result][2]:{hl[2]}s} ",
end="",
)
print(f"{str(self.data[result][1]):{hl[1]}s} ")
def header(self):
self.header_line("*")
kind = "Best" if self.best else "Grid"
self.header_line(
f" Report {kind} {self.score_name} Scores with {self.model} "
"in any platform"
)
self.header_line("*")
print("")
line_col = ""
for field, underscore in zip(self.header_cols, self.header_lengths):
print(f"{field:{underscore}s} ", end="")
line_col += "=" * underscore + " "
print(f"\n{line_col}")
def footer(self, accuracy):
self.header_line("*")
if self.compare:
for key, value in self._compare_totals.items():
self.header_line(
f" {key} {self._status_meaning(key)} .....: {value:2d}"
)
self.header_line(
f" Scores compared to stree_default accuracy (liblinear-ovr) .: "
f"{accuracy/BEST_ACCURACY_STREE:7.4f}"
)
self.header_line("*")
class Excel(BaseReport):
row = 6
# alternate lines colors
color1 = "#DCE6F1"
color2 = "#FDE9D9"
color3 = "#B1A0C7"
def __init__(self, file_name, compare=False, book=None):
super().__init__(file_name)
self.compare = compare
if self.compare:
self._load_best_results(
self.data["score_name"], self.data["model"]
)
self._compare_totals = {}
if book is None:
self.excel_file_name = self.file_name.replace(".json", ".xlsx")
self.book = xlsxwriter.Workbook(self.excel_file_name)
self.set_book_properties()
self.close = True
else:
self.book = book
self.close = False
self.sheet = self.book.add_worksheet(self.data["model"])
self.max_hyper_width = 0
self.col_hyperparams = 0
@staticmethod
def set_properties(book, title):
book.set_properties(
{
"title": title,
"subject": "Machine learning results",
"author": "Ricardo Montañana Gómez",
"manager": "Dr. J. A. Gámez, Dr. J. M. Puerta",
"company": "UCLM",
"comments": "Created with Python and XlsxWriter",
}
)
def set_book_properties(self):
self.set_properties(self.book, self.get_title())
def get_title(self):
return (
f" Report {self.data['model']} ver. {self.data['version']}"
f" with {self.data['folds']} Folds "
f"cross validation and {len(self.data['seeds'])} random seeds. "
f"{self.data['date']} {self.data['time']}"
)
def get_file_name(self):
return self.excel_file_name
def header(self):
merge_format = self.book.add_format(
{
"border": 1,
"bold": 1,
"align": "center",
"valign": "vcenter",
"font_size": 18,
"bg_color": self.color3,
}
)
merge_format_subheader = self.book.add_format(
{
"border": 1,
"bold": 1,
"align": "center",
"valign": "vcenter",
"font_size": 16,
"bg_color": self.color1,
}
)
merge_format_subheader_left = self.book.add_format(
{
"border": 1,
"bold": 1,
"align": "left",
"valign": "vcenter",
"font_size": 12,
"bg_color": self.color1,
}
)
header_text = self.get_title()
self.sheet.merge_range(0, 0, 0, 11, header_text, merge_format)
self.sheet.merge_range(
1, 0, 1, 11, f" {self.data['title']}", merge_format_subheader
)
self.sheet.merge_range(
2,
0,
3,
0,
f" Score is {self.data['score_name']}",
merge_format_subheader,
)
self.sheet.merge_range(
2,
1,
3,
3,
" Execution time",
merge_format_subheader,
)
hours = self.data["duration"] / 3600
self.sheet.merge_range(
2,
4,
2,
5,
f"{self.data['duration']:7,.2f} s",
merge_format_subheader,
)
self.sheet.merge_range(
3,
4,
3,
5,
f" {hours:5.2f} h",
merge_format_subheader,
)
self.sheet.merge_range(
2,
6,
3,
6,
" ",
merge_format_subheader,
)
self.sheet.merge_range(
2,
7,
3,
7,
"Platform",
merge_format_subheader,
)
self.sheet.merge_range(
2,
8,
3,
8,
f"{self.data['platform']}",
merge_format_subheader,
)
self.sheet.merge_range(
2,
9,
2,
11,
f"Random seeds: {self.data['seeds']}",
merge_format_subheader_left,
)
self.sheet.merge_range(
3,
9,
3,
11,
f"Stratified: {self.data['stratified']}",
merge_format_subheader_left,
)
header_cols = [
("Dataset", 30),
("Samples", 10),
("Features", 7),
("Classes", 7),
("Nodes", 7),
("Leaves", 7),
("Depth", 7),
("Score", 12),
("Score Std.", 12),
("Time", 12),
("Time Std.", 12),
("Hyperparameters", 50),
]
if self.compare:
header_cols.insert(8, ("Stat", 3))
bold = self.book.add_format(
{
"bold": True,
"font_size": 14,
"bg_color": self.color3,
"border": 1,
}
)
i = 0
for item, length in header_cols:
self.sheet.write(5, i, item, bold)
self.sheet.set_column(i, i, length)
i += 1
def print_line(self, result):
size_n = 14
decimal = self.book.add_format(
{"num_format": "0.000000", "font_size": size_n, "border": 1}
)
integer = self.book.add_format(
{"num_format": "#,###", "font_size": size_n, "border": 1}
)
normal = self.book.add_format({"font_size": size_n, "border": 1})
col = 0
if self.row % 2 == 0:
normal.set_bg_color(self.color1)
decimal.set_bg_color(self.color1)
integer.set_bg_color(self.color1)
else:
normal.set_bg_color(self.color2)
decimal.set_bg_color(self.color2)
integer.set_bg_color(self.color2)
self.sheet.write(self.row, col, result["dataset"], normal)
self.sheet.write(self.row, col + 1, result["samples"], integer)
self.sheet.write(self.row, col + 2, result["features"], normal)
self.sheet.write(self.row, col + 3, result["classes"], normal)
self.sheet.write(self.row, col + 4, result["nodes"], normal)
self.sheet.write(self.row, col + 5, result["leaves"], normal)
self.sheet.write(self.row, col + 6, result["depth"], normal)
self.sheet.write(self.row, col + 7, result["score"], decimal)
if self.compare:
status = self._compute_status(result["dataset"], result["score"])
self.sheet.write(self.row, col + 8, status, normal)
col = 9
else:
col = 8
self.sheet.write(self.row, col, result["score_std"], decimal)
self.sheet.write(self.row, col + 1, result["time"], decimal)
self.sheet.write(self.row, col + 2, result["time_std"], decimal)
self.sheet.write(
self.row, col + 3, str(result["hyperparameters"]), normal
)
self.col_hyperparams = col + 3
self.max_hyper_width = max(
self.max_hyper_width, len(str(result["hyperparameters"]))
)
self.row += 1
def footer(self, accuracy):
if self.compare:
self.row += 2
bold = self.book.add_format({"bold": True, "font_size": 16})
for key, total in self._compare_totals.items():
self.sheet.write(self.row, 1, key, bold)
self.sheet.write(self.row, 2, total, bold)
self.sheet.write(self.row, 3, self._status_meaning(key), bold)
self.row += 1
message = (
f"** Accuracy compared to stree_default (liblinear-ovr) .: "
f"{accuracy/BEST_ACCURACY_STREE:7.4f}"
)
bold = self.book.add_format({"bold": True, "font_size": 14})
# set width of the hyperparams column with the maximum width
self.sheet.set_column(
self.col_hyperparams,
self.col_hyperparams,
max(self.max_hyper_width + 1, 23),
)
self.sheet.write(self.row + 1, 0, message, bold)
for c in range(self.row + 2):
self.sheet.set_row(c, 20)
self.sheet.set_row(0, 25)
self.sheet.freeze_panes(6, 1)
self.sheet.hide_gridlines()
if self.close:
self.book.close()
class SQL(BaseReport):
table_name = "results"
def header(self):
file_name = self.file_name.replace(".json", ".sql")
self.file = open(file_name, "w")
def print_line(self, result):
attributes = [
"date",
"time",
"type",
"title",
"stratified",
"score_name",
"score",
"score_std",
"dataset",
"classifier",
"version",
"norm",
"stand",
"time_spent",
"time_spent_std",
"parameters",
"nodes",
"leaves",
"depth",
"platform",
"nfolds",
"seeds",
]
command_insert = (
f"replace into {self.table_name} ("
+ ",".join(attributes)
+ ") values("
+ ("'%s'," * len(attributes))[:-1]
+ ");\n"
)
values = (
self.data["date"],
self.data["time"],
"crossval",
self.data["title"],
"1" if self.data["stratified"] else "0",
self.data["score_name"],
result["score"],
result["score_std"],
result["dataset"],
self.data["model"],
self.data["version"],
0,
1,
result["time"],
result["time_std"],
str(result["hyperparameters"]).replace("'", '"'),
result["nodes"],
result["leaves"],
result["depth"],
self.data["platform"],
self.data["folds"],
str(self.data["seeds"]),
)
self.file.write(command_insert % values)
def footer(self, accuracy):
self.file.close()
class Benchmark:
def __init__(self, score):
self._score = score
self._results = []
self._models = []
self._report = {}
self._datasets = set()
def get_result_file_name(self):
return os.path.join(Folders.results, Files.exreport(self._score))
def compile_results(self):
summary = Summary()
summary.acquire(given_score=self._score)
self._models = summary.get_models()
for model in self._models:
best = summary.best_result(
criterion="model", value=model, score=self._score
)
file_name = os.path.join(Folders.results, best["file"])
with open(file_name) as fi:
experiment = json.load(fi)
for result in experiment["results"]:
dataset = result["dataset"]
record = {
"model": model,
"dataset": dataset,
"score": result["score"],
"score_std": result["score_std"],
"file_name": file_name,
}
self._results.append(record)
if model not in self._report:
self._report[model] = {}
self._report[model][dataset] = record
self._datasets.add(dataset)
self._datasets = sorted(self._datasets)
def save_results(self):
# build Files.exreport
result_file_name = self.get_result_file_name()
with open(result_file_name, "w") as f:
f.write(f"classifier, dataset, {self._score}, stdev, file_name\n")
for record in self._results:
f.write(
f"{record['model']}, {record['dataset']}, "
f"{record['score']}, {record['score_std']}, "
f"{record['file_name']}\n"
)
def exreport(self):
def end_message(message, file):
length = 100
print("*" * length)
print(message)
print("*" * length)
with open(os.path.join(Folders.results, file)) as f:
data = f.read().splitlines()
for line in data:
print(line)
# Remove previous results
try:
shutil.rmtree(Folders.report)
os.remove(Files.exreport_pdf)
except FileNotFoundError:
pass
except OSError as os_error:
print("Error: %s : %s" % (Folders.report, os_error.strerror))
# Compute Friedman & Holm Tests
fout = open(
os.path.join(Folders.results, Files.exreport_output(self._score)),
"w",
)
ferr = open(
os.path.join(Folders.results, Files.exreport_err(self._score)), "w"
)
result = subprocess.run(
[
"Rscript",
os.path.join(Folders.src, Files.benchmark_r),
self._score,
],
stdout=fout,
stderr=ferr,
)
fout.close()
ferr.close()
if result.returncode != 0:
end_message(
"Error computing benchmark", Files.exreport_err(self._score)
)
else:
end_message("Benchmark Ok", Files.exreport_output(self._score))
Files.open(Files.exreport_pdf)
def report(self):
print(f"{'Dataset':30s} ", end="")
lines = "=" * 30 + " "
for model in self._models:
print(f"{model:^13s} ", end="")
lines += "=" * 13 + " "
print(f"\n{lines}")
for dataset in self._datasets:
print(f"{dataset:30s} ", end="")
for model in self._models:
result = self._report[model][dataset]
print(f"{float(result['score']):.5f}±", end="")
print(f"{float(result['score_std']):.3f} ", end="")
print("")
d_name = next(iter(self._datasets))
print(f"\n{'Model':30s} {'File Name':75s} Score")
print("=" * 30 + " " + "=" * 75 + " ========")
for model in self._models:
file_name = self._report[model][d_name]["file_name"]
report = StubReport(file_name)
report.report()
print(f"{model:^30s} {file_name:75s} {report.score:8.5f}")
def get_excel_file_name(self):
return os.path.join(
Folders.exreport, Files.exreport_excel(self._score)
)
def excel(self):
book = xlsxwriter.Workbook(self.get_excel_file_name())
Excel.set_properties(book, "Experimentation summary")
sheet = book.add_worksheet("Benchmark")
normal = book.add_format({"font_size": 14, "border": 1})
decimal = book.add_format(
{"num_format": "0.000000", "font_size": 14, "border": 1}
)
decimal_total = book.add_format(
{
"num_format": "0.000000",
"font_size": 14,
"border": 1,
"bold": True,
"bg_color": Excel.color3,
}
)
two_decimal_total = book.add_format(
{
"num_format": "0.00",
"font_size": 14,
"border": 1,
"bold": True,
"bg_color": Excel.color3,
}
)
merge_format_header = book.add_format(
{
"border": 1,
"bold": 1,
"align": "center",
"valign": "vcenter",
"font_size": 14,
"bg_color": Excel.color1,
}
)
merge_format = book.add_format(
{
"border": 1,
"bold": 1,
"align": "center",
"valign": "vcenter",
"font_size": 14,
"bg_color": Excel.color3,
}
)
merge_format_normal = book.add_format(
{
"border": 1,
"valign": "vcenter",
"font_size": 14,
}
)
row = row_init = 4
def header():
nonlocal row
sheet.merge_range(
0, 0, 1, 0, "Benchmark of Models", merge_format_header
)
sheet.merge_range(
0, 1, 1, 2, f"Score is {self._score}", merge_format_header
)
sheet.set_row(1, 20)
# Set columns width
sheet.set_column(0, 0, 40)
for column in range(2 * len(self._results)):
sheet.set_column(column + 1, column + 1, 15)
# Set report header
# Merge 2 rows
sheet.merge_range(row, 0, row + 1, 0, "Dataset", merge_format)
column = 1
for model in self._models:
# Merge 3 columns
sheet.merge_range(
row, column, row, column + 2, model, merge_format
)
column += 3
row += 1
column = 1
for _ in range(len(self._models)):
sheet.write(row, column, "Score", merge_format)
sheet.write(row, column + 1, "Stdev", merge_format)
sheet.write(row, column + 2, "Rank", merge_format)
column += 3
def body():
nonlocal row
for dataset in self._datasets:
row += 1
normal = book.add_format({"font_size": 14, "border": 1})
decimal = book.add_format(
{
"num_format": "0.000000",
"font_size": 14,
"border": 1,
}
)
if row % 2 == 0:
normal.set_bg_color(Excel.color1)
decimal.set_bg_color(Excel.color1)
else:
normal.set_bg_color(Excel.color2)
decimal.set_bg_color(Excel.color2)
sheet.write(row, 0, f"{dataset:30s}", normal)
column = 1
range_cells = ""
for col in range(0, len(self._models) * 3, 3):
range_cells += chr(ord("B") + col) + str(row + 1) + ","
range_cells = range_cells[:-1]
for model in self._models:
sheet.write(
row,
column,
float(self._report[model][dataset]["score"]),
decimal,
)
column += 1
sheet.write(
row,
column,
float(self._report[model][dataset]["score_std"]),
decimal,
)
column += 1
cell_target = chr(ord("B") + column - 3) + str(row + 1)
sheet.write_formula(
row,
column,
f"=rank({cell_target},({range_cells}))",
normal,
)
column += 1
def footer():
nonlocal row
for c in range(row_init, row + 2):
sheet.set_row(c, 20)
# Write totals
row += 1
sheet.write(row, 0, "Total", merge_format)
for col in range(0, len(self._models) * 3, 3):
range_metric = (
f"{chr(ord('B') + col )}7:{chr(ord('B') + col )}{row}"
)
sheet.write_formula(
row,
col + 1,
f"=sum({range_metric})/{BEST_ACCURACY_STREE}",
decimal_total,
)
range_rank = (
f"{chr(ord('B') + col + 2)}7:"
f"{chr(ord('B') + col + 2)}{row}"
)
sheet.write_formula(
row,
col + 3,
f"=average({range_rank})",
two_decimal_total,
)
row += 1
def models_files():
nonlocal row
row += 2
# Set report header
# Merge 2 rows
sheet.merge_range(row, 0, row + 1, 0, "Model", merge_format)
sheet.merge_range(row, 1, row + 1, 5, "File", merge_format)
sheet.merge_range(row, 6, row + 1, 6, "Score", merge_format)
row += 1
d_name = next(iter(self._datasets))
for model in self._models:
file_name = self._report[model][d_name]["file_name"]
report = StubReport(file_name)
report.report()
row += 1
sheet.write(
row,
0,
model,
normal,
)
sheet.merge_range(
row, 1, row, 5, file_name, merge_format_normal
)
sheet.write(
row,
6,
report.score,
decimal,
)
k = Excel(file_name=file_name, book=book)
k.report()
sheet.freeze_panes(6, 1)
sheet.hide_gridlines()
def exreport_output():
file_name = os.path.join(
Folders.results, Files.exreport_output(self._score)
)
sheet = book.add_worksheet("Exreport")
normal = book.add_format(
{
"font_size": 14,
"border": 1,
"font_color": "blue",
"font_name": "Courier",
"bold": True,
}
)
with open(file_name) as f:
lines = f.read().splitlines()
row = 0
for line in lines:
sheet.write(row, 0, line, normal)
row += 1
header()
body()
footer()
models_files()
exreport_output()
book.close()
class StubReport(BaseReport):
def __init__(self, file_name):
super().__init__(file_name=file_name, best_file=False)
def print_line(self, line) -> None:
pass
def header(self) -> None:
self.title = self.data["title"]
self.duration = self.data["duration"]
def footer(self, accuracy: float) -> None:
self.accuracy = accuracy
self.score = accuracy / BEST_ACCURACY_STREE
class Summary:
def __init__(self, hidden=False) -> None:
self.results = Files().get_all_results(hidden=hidden)
self.data = []
self.datasets = {}
self.models = set()
self.hidden = hidden
def get_models(self):
return sorted(self.models)
def acquire(self, given_score="any") -> None:
"""Get all results"""
for result in self.results:
(
score,
model,
platform,
date,
time,
stratified,
) = Files().split_file_name(result)
if given_score in ("any", score):
self.models.add(model)
report = StubReport(
os.path.join(
Folders.hidden_results
if self.hidden
else Folders.results,
result,
)
)
report.report()
entry = dict(
score=score,
model=model,
title=report.title,
platform=platform,
date=date,
time=time,
stratified=stratified,
file=result,
metric=report.score,
duration=report.duration,
)
self.datasets[result] = report.lines
self.data.append(entry)
def list_results(
self,
score=None,
model=None,
input_data=None,
sort_key="date",
number=0,
) -> None:
"""Print the list of results"""
data = self.data.copy() if input_data is None else input_data
if score:
data = [x for x in data if x["score"] == score]
if model:
data = [x for x in data if x["model"] == model]
keys = (
itemgetter(sort_key, "time")
if sort_key == "date"
else itemgetter(sort_key, "date", "time")
)
data = sorted(data, key=keys, reverse=True)
if number > 0:
data = data[:number]
max_file = max(len(x["file"]) for x in data)
max_title = max(len(x["title"]) for x in data)
if self.hidden:
color1 = TextColor.GREEN
color2 = TextColor.YELLOW
else:
color1 = TextColor.LINE1
color2 = TextColor.LINE2
print(color1, end="")
print(
f"{'Date':10s} {'File':{max_file}s} {'Score':7s} {'Time(h)':7s} "
f"{'Title':s}"
)
print(
"=" * 10
+ " "
+ "=" * max_file
+ " "
+ "=" * 8
+ " "
+ "=" * 7
+ " "
+ "=" * max_title
)
print(
"\n".join(
[
(color2 if n % 2 == 0 else color1)
+ f"{x['date']} {x['file']:{max_file}s} "
f"{x['metric']:8.5f} "
f"{x['duration']/3600:7.3f} "
f"{x['title']}"
for n, x in enumerate(data)
]
)
)
def show_result(self, data: dict, title: str = "") -> None:
def whites(n: int) -> str:
return " " * n + "*"
if data == {}:
print(f"** {title} has No data **")
return
file_name = data["file"]
metric = data["metric"]
result = StubReport(os.path.join(Folders.results, file_name))
length = 81
print("*" * length)
if title != "":
print(f"*{title:^{length - 2}s}*")
print("*" + "-" * (length - 2) + "*")
print("*" + whites(length - 2))
print(f"* {result.data['title']:^{length - 4}} *")
print("*" + whites(length - 2))
print(
f"* Model: {result.data['model']:15s} "
f"Ver. {result.data['version']:10s} "
f"Score: {result.data['score_name']:10s} "
f"Metric: {metric:10.7f}" + whites(length - 78)
)
print("*" + whites(length - 2))
print(
f"* Date : {result.data['date']:15s} Time: "
f"{result.data['time']:18s} Time Spent: "
f"{result.data['duration']:9,.2f} secs." + whites(length - 78)
)
seeds = str(result.data["seeds"])
seeds_len = len(seeds)
print(
f"* Seeds: {seeds:{seeds_len}s} Platform: "
f"{result.data['platform']:17s} " + whites(length - 79)
)
print(
f"* Stratified: {str(result.data['stratified']):15s}"
+ whites(length - 30)
)
print(f"* {file_name:60s}" + whites(length - 63))
print("*" + whites(length - 2))
print("*" * length)
def best_results(self, criterion=None, value=None, score="accuracy", n=10):
# First filter the same score results (accuracy, f1, ...)
haystack = [x for x in self.data if x["score"] == score]
haystack = (
haystack
if criterion is None or value is None
else [x for x in haystack if x[criterion] == value]
)
return (
sorted(
haystack,
key=lambda x: -1.0 if math.isnan(x["metric"]) else x["metric"],
reverse=True,
)[:n]
if len(haystack) > 0
else {}
)
def best_result(
self, criterion=None, value=None, score="accuracy"
) -> dict:
return self.best_results(criterion, value, score)[0]
def best_results_datasets(self, score="accuracy") -> dict:
"""Get the best results for each dataset"""
dt = Datasets()
best_results = {}
for dataset in dt:
best_results[dataset] = (1, "", "", "")
haystack = [x for x in self.data if x["score"] == score]
# Search for the best results for each dataset
for entry in haystack:
for dataset in self.datasets[entry["file"]]:
if dataset["score"] < best_results[dataset["dataset"]][0]:
best_results[dataset["dataset"]] = (
dataset["score"],
dataset["hyperparameters"],
entry["file"],
entry["title"],
)
return best_results
def show_top(self, score="accuracy", n=10):
self.list_results(
score=score,
input_data=self.best_results(score=score, n=n),
sort_key="metric",
)