mirror of
https://github.com/Doctorado-ML/benchmark.git
synced 2025-08-17 16:35:54 +00:00
Create benchmark
This commit is contained in:
396
src/Results.py
Normal file
396
src/Results.py
Normal file
@@ -0,0 +1,396 @@
|
||||
import os
|
||||
import json
|
||||
import abc
|
||||
import xlsxwriter
|
||||
from Experiments import Datasets, BestResults
|
||||
from Utils import Folders, Files, Symbols
|
||||
|
||||
|
||||
class BaseReport(abc.ABC):
|
||||
def __init__(self, file_name, best_file=False):
|
||||
self.file_name = file_name
|
||||
if not os.path.isfile(file_name):
|
||||
raise ValueError(f"{file_name} does not exists!")
|
||||
with open(file_name) as f:
|
||||
self.data = json.load(f)
|
||||
self.best_acc_file = best_file
|
||||
self.lines = self.data if best_file else self.data["results"]
|
||||
|
||||
def _get_accuracy(self, item):
|
||||
return self.data[item][0] if self.best_acc_file else item["accuracy"]
|
||||
|
||||
def report(self):
|
||||
self.header()
|
||||
accuracy_total = 0.0
|
||||
for result in self.lines:
|
||||
self.print_line(result)
|
||||
accuracy_total += self._get_accuracy(result)
|
||||
self.footer(accuracy_total)
|
||||
|
||||
def _load_best_results(self, model):
|
||||
best = BestResults(model, Datasets())
|
||||
self.best_results = best.load({})
|
||||
|
||||
def _compute_status(self, dataset, accuracy):
|
||||
best = self.best_results[dataset][0]
|
||||
status = " "
|
||||
if accuracy == best:
|
||||
status = Symbols.equal_best
|
||||
elif accuracy > best:
|
||||
status = Symbols.better_best
|
||||
if status != " ":
|
||||
if status not in self._compare_totals:
|
||||
self._compare_totals[status] = 1
|
||||
else:
|
||||
self._compare_totals[status] += 1
|
||||
return status
|
||||
|
||||
@staticmethod
|
||||
def _status_meaning(status):
|
||||
meaning = {
|
||||
Symbols.equal_best: "Equal to best",
|
||||
Symbols.better_best: "Better than best",
|
||||
}
|
||||
return meaning[status]
|
||||
|
||||
@abc.abstractmethod
|
||||
def header(self):
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def print_line(self, result):
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def footer(self, accuracy):
|
||||
pass
|
||||
|
||||
|
||||
class Report(BaseReport):
|
||||
header_lengths = [30, 5, 3, 3, 7, 7, 7, 15, 15, 15]
|
||||
header_cols = [
|
||||
"Dataset",
|
||||
"Samp",
|
||||
"Var",
|
||||
"Cls",
|
||||
"Nodes",
|
||||
"Leaves",
|
||||
"Depth",
|
||||
"Accuracy",
|
||||
"Time",
|
||||
"Hyperparameters",
|
||||
]
|
||||
|
||||
def __init__(self, file_name, compare=False):
|
||||
super().__init__(file_name)
|
||||
self.compare = compare
|
||||
|
||||
def header_line(self, text):
|
||||
length = sum(self.header_lengths) + len(self.header_lengths) - 3
|
||||
if text == "*":
|
||||
print("*" * (length + 2))
|
||||
else:
|
||||
print(f"*{text:{length}s}*")
|
||||
|
||||
def print_line(self, result):
|
||||
hl = self.header_lengths
|
||||
i = 0
|
||||
print(f"{result['dataset']:{hl[i]}s} ", end="")
|
||||
i += 1
|
||||
print(f"{result['samples']:{hl[i]},d} ", end="")
|
||||
i += 1
|
||||
print(f"{result['features']:{hl[i]}d} ", end="")
|
||||
i += 1
|
||||
print(f"{result['classes']:{hl[i]}d} ", end="")
|
||||
i += 1
|
||||
print(f"{result['nodes']:{hl[i]}.2f} ", end="")
|
||||
i += 1
|
||||
print(f"{result['leaves']:{hl[i]}.2f} ", end="")
|
||||
i += 1
|
||||
print(f"{result['depth']:{hl[i]}.2f} ", end="")
|
||||
i += 1
|
||||
if self.compare:
|
||||
status = self._compute_status(
|
||||
result["dataset"], result["accuracy"]
|
||||
)
|
||||
else:
|
||||
status = " "
|
||||
print(
|
||||
f"{result['accuracy']:8.6f}±{result['accuracy_std']:6.4f}{status}",
|
||||
end="",
|
||||
)
|
||||
i += 1
|
||||
print(
|
||||
f"{result['time']:8.6f}±{result['time_std']:6.4f} ",
|
||||
end="",
|
||||
)
|
||||
i += 1
|
||||
print(f"{str(result['hyperparameters']):{hl[i]}s} ")
|
||||
|
||||
def header(self):
|
||||
if self.compare:
|
||||
self._load_best_results(self.data["model"])
|
||||
self._compare_totals = {}
|
||||
self.header_line("*")
|
||||
self.header_line(
|
||||
f" Report {self.data['model']} with {self.data['folds']} Folds "
|
||||
f"cross validation and {len(self.data['seeds'])} random seeds"
|
||||
)
|
||||
self.header_line(f" Random seeds: {self.data['seeds']}")
|
||||
self.header_line(
|
||||
f" Execution took {self.data['duration']:7.2f} seconds on an "
|
||||
f"{self.data['platform']}"
|
||||
)
|
||||
self.header_line("*")
|
||||
print("")
|
||||
line_col = ""
|
||||
for field, underscore in zip(self.header_cols, self.header_lengths):
|
||||
print(f"{field:{underscore}s} ", end="")
|
||||
line_col += "=" * underscore + " "
|
||||
print(f"\n{line_col}")
|
||||
|
||||
def footer(self, accuracy):
|
||||
self.header_line("*")
|
||||
if self.compare:
|
||||
for key, value in self._compare_totals.items():
|
||||
self.header_line(
|
||||
f" {key} {self._status_meaning(key)} .....: {value:2d}"
|
||||
)
|
||||
self.header_line(
|
||||
f" Accuracy compared to stree_default (liblinear-ovr) .: "
|
||||
f"{accuracy/40.282203:7.4f}"
|
||||
)
|
||||
self.header_line("*")
|
||||
|
||||
|
||||
class ReportBest(BaseReport):
|
||||
header_lengths = [30, 8, 50, 35]
|
||||
header_cols = [
|
||||
"Dataset",
|
||||
"Accuracy",
|
||||
"File",
|
||||
"Hyperparameters",
|
||||
]
|
||||
|
||||
def __init__(self, model):
|
||||
file_name = os.path.join(Folders.results, Files.best_results(model))
|
||||
super().__init__(file_name, best_file=True)
|
||||
self.compare = False
|
||||
self.model = model
|
||||
|
||||
def header_line(self, text):
|
||||
length = sum(self.header_lengths) + len(self.header_lengths) - 3
|
||||
if text == "*":
|
||||
print("*" * (length + 2))
|
||||
else:
|
||||
print(f"*{text:{length}s}*")
|
||||
|
||||
def print_line(self, result):
|
||||
hl = self.header_lengths
|
||||
print(f"{result:{hl[0]}s} ", end="")
|
||||
print(
|
||||
f"{self.data[result][0]:8.6f} ",
|
||||
end="",
|
||||
)
|
||||
print(
|
||||
f"{self.data[result][2]:{hl[2]}s} ",
|
||||
end="",
|
||||
)
|
||||
print(f"{str(self.data[result][1]):{hl[1]}s} ")
|
||||
|
||||
def header(self):
|
||||
self.header_line("*")
|
||||
self.header_line(
|
||||
f" Report Best Accuracies with {self.model}" f" in any platform"
|
||||
)
|
||||
self.header_line("*")
|
||||
print("")
|
||||
line_col = ""
|
||||
for field, underscore in zip(self.header_cols, self.header_lengths):
|
||||
print(f"{field:{underscore}s} ", end="")
|
||||
line_col += "=" * underscore + " "
|
||||
print(f"\n{line_col}")
|
||||
|
||||
def footer(self, accuracy):
|
||||
self.header_line("*")
|
||||
if self.compare:
|
||||
for key, value in self._compare_totals.items():
|
||||
self.header_line(
|
||||
f" {key} {self._status_meaning(key)} .....: {value:2d}"
|
||||
)
|
||||
self.header_line(
|
||||
f" Accuracy compared to stree_default (liblinear-ovr) .: "
|
||||
f"{accuracy/40.282203:7.4f}"
|
||||
)
|
||||
self.header_line("*")
|
||||
|
||||
|
||||
class Excel(BaseReport):
|
||||
row = 4
|
||||
|
||||
def __init__(self, file_name, compare=False):
|
||||
super().__init__(file_name)
|
||||
self.compare = compare
|
||||
|
||||
def header(self):
|
||||
if self.compare:
|
||||
self._load_best_results(self.data["model"])
|
||||
self._compare_totals = {}
|
||||
file_name = self.file_name.replace(".json", ".xlsx")
|
||||
self.book = xlsxwriter.Workbook(file_name)
|
||||
self.sheet = self.book.add_worksheet(self.data["model"])
|
||||
header = self.book.add_format()
|
||||
header.set_font_size(18)
|
||||
subheader = self.book.add_format()
|
||||
subheader.set_font_size(16)
|
||||
self.sheet.write(
|
||||
0,
|
||||
0,
|
||||
f" Report {self.data['model']} with {self.data['folds']} Folds "
|
||||
f"cross validation and {len(self.data['seeds'])} random seeds",
|
||||
header,
|
||||
)
|
||||
self.sheet.write(
|
||||
1,
|
||||
0,
|
||||
f" Execution took {self.data['duration']:7.2f} seconds on an "
|
||||
f"{self.data['platform']}",
|
||||
subheader,
|
||||
)
|
||||
self.sheet.write(
|
||||
1, 5, f"Random seeds: {self.data['seeds']}", subheader
|
||||
)
|
||||
header_cols = [
|
||||
("Dataset", 30),
|
||||
("Samples", 10),
|
||||
("Variables", 7),
|
||||
("Classes", 7),
|
||||
("Nodes", 7),
|
||||
("Leaves", 7),
|
||||
("Depth", 7),
|
||||
("Accuracy", 10),
|
||||
("Acc. Std.", 10),
|
||||
("Time", 10),
|
||||
("Time Std.", 10),
|
||||
("Parameters", 50),
|
||||
]
|
||||
if self.compare:
|
||||
header_cols.insert(8, ("Stat", 3))
|
||||
bold = self.book.add_format({"bold": True, "font_size": 14})
|
||||
i = 0
|
||||
for item, length in header_cols:
|
||||
self.sheet.write(3, i, item, bold)
|
||||
self.sheet.set_column(i, i, length)
|
||||
i += 1
|
||||
|
||||
def print_line(self, result):
|
||||
size_n = 14
|
||||
decimal = self.book.add_format(
|
||||
{"num_format": "0.000000", "font_size": size_n}
|
||||
)
|
||||
integer = self.book.add_format(
|
||||
{"num_format": "#,###", "font_size": size_n}
|
||||
)
|
||||
normal = self.book.add_format({"font_size": size_n})
|
||||
col = 0
|
||||
self.sheet.write(self.row, col, result["dataset"], normal)
|
||||
self.sheet.write(self.row, col + 1, result["samples"], integer)
|
||||
self.sheet.write(self.row, col + 2, result["features"], normal)
|
||||
self.sheet.write(self.row, col + 3, result["classes"], normal)
|
||||
self.sheet.write(self.row, col + 4, result["nodes"], normal)
|
||||
self.sheet.write(self.row, col + 5, result["leaves"], normal)
|
||||
self.sheet.write(self.row, col + 6, result["depth"], normal)
|
||||
self.sheet.write(self.row, col + 7, result["accuracy"], decimal)
|
||||
if self.compare:
|
||||
status = self._compute_status(
|
||||
result["dataset"], result["accuracy"]
|
||||
)
|
||||
self.sheet.write(self.row, col + 8, status, normal)
|
||||
col = 9
|
||||
else:
|
||||
col = 8
|
||||
self.sheet.write(self.row, col, result["accuracy_std"], decimal)
|
||||
self.sheet.write(self.row, col + 1, result["time"], decimal)
|
||||
self.sheet.write(self.row, col + 2, result["time_std"], decimal)
|
||||
self.sheet.write(
|
||||
self.row, col + 3, str(result["hyperparameters"]), normal
|
||||
)
|
||||
self.row += 1
|
||||
|
||||
def footer(self, accuracy):
|
||||
if self.compare:
|
||||
self.row += 2
|
||||
bold = self.book.add_format({"bold": True, "font_size": 16})
|
||||
for key, total in self._compare_totals.items():
|
||||
self.sheet.write(self.row, 1, key, bold)
|
||||
self.sheet.write(self.row, 2, total, bold)
|
||||
self.sheet.write(self.row, 3, self._status_meaning(key), bold)
|
||||
self.row += 1
|
||||
message = (
|
||||
f"** Accuracy compared to stree_default (liblinear-ovr) .: "
|
||||
f"{accuracy/40.282203:7.4f}"
|
||||
)
|
||||
bold = self.book.add_format({"bold": True, "font_size": 14})
|
||||
self.sheet.write(self.row + 1, 0, message, bold)
|
||||
self.book.close()
|
||||
|
||||
|
||||
class SQL(BaseReport):
|
||||
table_name = "results"
|
||||
|
||||
def header(self):
|
||||
file_name = self.file_name.replace(".json", ".sql")
|
||||
self.file = open(file_name, "w")
|
||||
|
||||
def print_line(self, result):
|
||||
attributes = [
|
||||
"date",
|
||||
"time",
|
||||
"type",
|
||||
"accuracy",
|
||||
"accuracy_std",
|
||||
"dataset",
|
||||
"classifier",
|
||||
"norm",
|
||||
"stand",
|
||||
"time_spent",
|
||||
"time_spent_std",
|
||||
"parameters",
|
||||
"nodes",
|
||||
"leaves",
|
||||
"depth",
|
||||
"platform",
|
||||
"nfolds",
|
||||
"seeds",
|
||||
]
|
||||
command_insert = (
|
||||
f"replace into {self.table_name} ("
|
||||
+ ",".join(attributes)
|
||||
+ ") values("
|
||||
+ ("'%s'," * len(attributes))[:-1]
|
||||
+ ");\n"
|
||||
)
|
||||
values = (
|
||||
self.data["date"],
|
||||
self.data["time"],
|
||||
"crossval",
|
||||
result["accuracy"],
|
||||
result["accuracy_std"],
|
||||
result["dataset"],
|
||||
self.data["model"],
|
||||
0,
|
||||
1,
|
||||
result["time"],
|
||||
result["time_std"],
|
||||
str(result["hyperparameters"]).replace("'", '"'),
|
||||
result["nodes"],
|
||||
result["leaves"],
|
||||
result["depth"],
|
||||
self.data["platform"],
|
||||
self.data["folds"],
|
||||
str(self.data["seeds"]),
|
||||
)
|
||||
self.file.write(command_insert % values)
|
||||
|
||||
def footer(self, accuracy):
|
||||
self.file.close()
|
Reference in New Issue
Block a user