Create benchmark

This commit is contained in:
2021-09-24 11:18:38 +02:00
parent ebe768f566
commit 2fc188adca
65 changed files with 27900 additions and 0 deletions

396
src/Results.py Normal file
View File

@@ -0,0 +1,396 @@
import os
import json
import abc
import xlsxwriter
from Experiments import Datasets, BestResults
from Utils import Folders, Files, Symbols
class BaseReport(abc.ABC):
def __init__(self, file_name, best_file=False):
self.file_name = file_name
if not os.path.isfile(file_name):
raise ValueError(f"{file_name} does not exists!")
with open(file_name) as f:
self.data = json.load(f)
self.best_acc_file = best_file
self.lines = self.data if best_file else self.data["results"]
def _get_accuracy(self, item):
return self.data[item][0] if self.best_acc_file else item["accuracy"]
def report(self):
self.header()
accuracy_total = 0.0
for result in self.lines:
self.print_line(result)
accuracy_total += self._get_accuracy(result)
self.footer(accuracy_total)
def _load_best_results(self, model):
best = BestResults(model, Datasets())
self.best_results = best.load({})
def _compute_status(self, dataset, accuracy):
best = self.best_results[dataset][0]
status = " "
if accuracy == best:
status = Symbols.equal_best
elif accuracy > best:
status = Symbols.better_best
if status != " ":
if status not in self._compare_totals:
self._compare_totals[status] = 1
else:
self._compare_totals[status] += 1
return status
@staticmethod
def _status_meaning(status):
meaning = {
Symbols.equal_best: "Equal to best",
Symbols.better_best: "Better than best",
}
return meaning[status]
@abc.abstractmethod
def header(self):
pass
@abc.abstractmethod
def print_line(self, result):
pass
@abc.abstractmethod
def footer(self, accuracy):
pass
class Report(BaseReport):
header_lengths = [30, 5, 3, 3, 7, 7, 7, 15, 15, 15]
header_cols = [
"Dataset",
"Samp",
"Var",
"Cls",
"Nodes",
"Leaves",
"Depth",
"Accuracy",
"Time",
"Hyperparameters",
]
def __init__(self, file_name, compare=False):
super().__init__(file_name)
self.compare = compare
def header_line(self, text):
length = sum(self.header_lengths) + len(self.header_lengths) - 3
if text == "*":
print("*" * (length + 2))
else:
print(f"*{text:{length}s}*")
def print_line(self, result):
hl = self.header_lengths
i = 0
print(f"{result['dataset']:{hl[i]}s} ", end="")
i += 1
print(f"{result['samples']:{hl[i]},d} ", end="")
i += 1
print(f"{result['features']:{hl[i]}d} ", end="")
i += 1
print(f"{result['classes']:{hl[i]}d} ", end="")
i += 1
print(f"{result['nodes']:{hl[i]}.2f} ", end="")
i += 1
print(f"{result['leaves']:{hl[i]}.2f} ", end="")
i += 1
print(f"{result['depth']:{hl[i]}.2f} ", end="")
i += 1
if self.compare:
status = self._compute_status(
result["dataset"], result["accuracy"]
)
else:
status = " "
print(
f"{result['accuracy']:8.6f}±{result['accuracy_std']:6.4f}{status}",
end="",
)
i += 1
print(
f"{result['time']:8.6f}±{result['time_std']:6.4f} ",
end="",
)
i += 1
print(f"{str(result['hyperparameters']):{hl[i]}s} ")
def header(self):
if self.compare:
self._load_best_results(self.data["model"])
self._compare_totals = {}
self.header_line("*")
self.header_line(
f" Report {self.data['model']} with {self.data['folds']} Folds "
f"cross validation and {len(self.data['seeds'])} random seeds"
)
self.header_line(f" Random seeds: {self.data['seeds']}")
self.header_line(
f" Execution took {self.data['duration']:7.2f} seconds on an "
f"{self.data['platform']}"
)
self.header_line("*")
print("")
line_col = ""
for field, underscore in zip(self.header_cols, self.header_lengths):
print(f"{field:{underscore}s} ", end="")
line_col += "=" * underscore + " "
print(f"\n{line_col}")
def footer(self, accuracy):
self.header_line("*")
if self.compare:
for key, value in self._compare_totals.items():
self.header_line(
f" {key} {self._status_meaning(key)} .....: {value:2d}"
)
self.header_line(
f" Accuracy compared to stree_default (liblinear-ovr) .: "
f"{accuracy/40.282203:7.4f}"
)
self.header_line("*")
class ReportBest(BaseReport):
header_lengths = [30, 8, 50, 35]
header_cols = [
"Dataset",
"Accuracy",
"File",
"Hyperparameters",
]
def __init__(self, model):
file_name = os.path.join(Folders.results, Files.best_results(model))
super().__init__(file_name, best_file=True)
self.compare = False
self.model = model
def header_line(self, text):
length = sum(self.header_lengths) + len(self.header_lengths) - 3
if text == "*":
print("*" * (length + 2))
else:
print(f"*{text:{length}s}*")
def print_line(self, result):
hl = self.header_lengths
print(f"{result:{hl[0]}s} ", end="")
print(
f"{self.data[result][0]:8.6f} ",
end="",
)
print(
f"{self.data[result][2]:{hl[2]}s} ",
end="",
)
print(f"{str(self.data[result][1]):{hl[1]}s} ")
def header(self):
self.header_line("*")
self.header_line(
f" Report Best Accuracies with {self.model}" f" in any platform"
)
self.header_line("*")
print("")
line_col = ""
for field, underscore in zip(self.header_cols, self.header_lengths):
print(f"{field:{underscore}s} ", end="")
line_col += "=" * underscore + " "
print(f"\n{line_col}")
def footer(self, accuracy):
self.header_line("*")
if self.compare:
for key, value in self._compare_totals.items():
self.header_line(
f" {key} {self._status_meaning(key)} .....: {value:2d}"
)
self.header_line(
f" Accuracy compared to stree_default (liblinear-ovr) .: "
f"{accuracy/40.282203:7.4f}"
)
self.header_line("*")
class Excel(BaseReport):
row = 4
def __init__(self, file_name, compare=False):
super().__init__(file_name)
self.compare = compare
def header(self):
if self.compare:
self._load_best_results(self.data["model"])
self._compare_totals = {}
file_name = self.file_name.replace(".json", ".xlsx")
self.book = xlsxwriter.Workbook(file_name)
self.sheet = self.book.add_worksheet(self.data["model"])
header = self.book.add_format()
header.set_font_size(18)
subheader = self.book.add_format()
subheader.set_font_size(16)
self.sheet.write(
0,
0,
f" Report {self.data['model']} with {self.data['folds']} Folds "
f"cross validation and {len(self.data['seeds'])} random seeds",
header,
)
self.sheet.write(
1,
0,
f" Execution took {self.data['duration']:7.2f} seconds on an "
f"{self.data['platform']}",
subheader,
)
self.sheet.write(
1, 5, f"Random seeds: {self.data['seeds']}", subheader
)
header_cols = [
("Dataset", 30),
("Samples", 10),
("Variables", 7),
("Classes", 7),
("Nodes", 7),
("Leaves", 7),
("Depth", 7),
("Accuracy", 10),
("Acc. Std.", 10),
("Time", 10),
("Time Std.", 10),
("Parameters", 50),
]
if self.compare:
header_cols.insert(8, ("Stat", 3))
bold = self.book.add_format({"bold": True, "font_size": 14})
i = 0
for item, length in header_cols:
self.sheet.write(3, i, item, bold)
self.sheet.set_column(i, i, length)
i += 1
def print_line(self, result):
size_n = 14
decimal = self.book.add_format(
{"num_format": "0.000000", "font_size": size_n}
)
integer = self.book.add_format(
{"num_format": "#,###", "font_size": size_n}
)
normal = self.book.add_format({"font_size": size_n})
col = 0
self.sheet.write(self.row, col, result["dataset"], normal)
self.sheet.write(self.row, col + 1, result["samples"], integer)
self.sheet.write(self.row, col + 2, result["features"], normal)
self.sheet.write(self.row, col + 3, result["classes"], normal)
self.sheet.write(self.row, col + 4, result["nodes"], normal)
self.sheet.write(self.row, col + 5, result["leaves"], normal)
self.sheet.write(self.row, col + 6, result["depth"], normal)
self.sheet.write(self.row, col + 7, result["accuracy"], decimal)
if self.compare:
status = self._compute_status(
result["dataset"], result["accuracy"]
)
self.sheet.write(self.row, col + 8, status, normal)
col = 9
else:
col = 8
self.sheet.write(self.row, col, result["accuracy_std"], decimal)
self.sheet.write(self.row, col + 1, result["time"], decimal)
self.sheet.write(self.row, col + 2, result["time_std"], decimal)
self.sheet.write(
self.row, col + 3, str(result["hyperparameters"]), normal
)
self.row += 1
def footer(self, accuracy):
if self.compare:
self.row += 2
bold = self.book.add_format({"bold": True, "font_size": 16})
for key, total in self._compare_totals.items():
self.sheet.write(self.row, 1, key, bold)
self.sheet.write(self.row, 2, total, bold)
self.sheet.write(self.row, 3, self._status_meaning(key), bold)
self.row += 1
message = (
f"** Accuracy compared to stree_default (liblinear-ovr) .: "
f"{accuracy/40.282203:7.4f}"
)
bold = self.book.add_format({"bold": True, "font_size": 14})
self.sheet.write(self.row + 1, 0, message, bold)
self.book.close()
class SQL(BaseReport):
table_name = "results"
def header(self):
file_name = self.file_name.replace(".json", ".sql")
self.file = open(file_name, "w")
def print_line(self, result):
attributes = [
"date",
"time",
"type",
"accuracy",
"accuracy_std",
"dataset",
"classifier",
"norm",
"stand",
"time_spent",
"time_spent_std",
"parameters",
"nodes",
"leaves",
"depth",
"platform",
"nfolds",
"seeds",
]
command_insert = (
f"replace into {self.table_name} ("
+ ",".join(attributes)
+ ") values("
+ ("'%s'," * len(attributes))[:-1]
+ ");\n"
)
values = (
self.data["date"],
self.data["time"],
"crossval",
result["accuracy"],
result["accuracy_std"],
result["dataset"],
self.data["model"],
0,
1,
result["time"],
result["time_std"],
str(result["hyperparameters"]).replace("'", '"'),
result["nodes"],
result["leaves"],
result["depth"],
self.data["platform"],
self.data["folds"],
str(self.data["seeds"]),
)
self.file.write(command_insert % values)
def footer(self, accuracy):
self.file.close()