import os from operator import itemgetter import math import json import abc import shutil import subprocess import xlsxwriter import numpy as np from .Experiments import BestResults from .Datasets import Datasets from .Arguments import EnvData, ALL_METRICS from .Utils import ( Folders, Files, Symbols, TextColor, NO_RESULTS, ) class BestResultsEver: def __init__(self): self.data = {} for i in ["Tanveer", "Surcov", "Arff"]: self.data[i] = {} for metric in ALL_METRICS: self.data[i][metric.replace("-", "_")] = ["self", 1.0] self.data[i][metric] = ["self", 1.0] self.data["Tanveer"]["accuracy"] = [ "STree_default (liblinear-ovr)", 40.282203, ] self.data["Arff"]["accuracy"] = [ "STree_default (linear-ovo)", 21.9765, ] def get_name_value(self, key, score): return self.data[key][score] class BaseReport(abc.ABC): def __init__(self, file_name, best_file=False): self.file_name = file_name if not os.path.isfile(file_name): if not os.path.isfile(os.path.join(Folders.results, file_name)): raise FileNotFoundError(f"{file_name} does not exists!") else: self.file_name = os.path.join(Folders.results, file_name) with open(self.file_name) as f: self.data = json.load(f) self.best_acc_file = best_file if best_file: self.lines = self.data else: self.lines = self.data["results"] self.score_name = self.data["score_name"] self.__compute_best_results_ever() def __compute_best_results_ever(self): args = EnvData.load() key = args["source_data"] best = BestResultsEver() self.best_score_name, self.best_score_value = best.get_name_value( key, self.score_name ) def _get_accuracy(self, item): return self.data[item][0] if self.best_acc_file else item["score"] def report(self): self.header() accuracy_total = 0.0 for result in self.lines: self.print_line(result) accuracy_total += self._get_accuracy(result) self.footer(accuracy_total) def _load_best_results(self, score, model): best = BestResults(score, model, Datasets()) self.best_results = best.load({}) def _compute_status(self, dataset, accuracy: float): best = self.best_results[dataset][0] status = " " if accuracy == best: status = Symbols.equal_best elif accuracy > best: status = Symbols.better_best if status != " ": if status not in self._compare_totals: self._compare_totals[status] = 1 else: self._compare_totals[status] += 1 return status @staticmethod def _status_meaning(status): meaning = { Symbols.equal_best: "Equal to best", Symbols.better_best: "Better than best", } return meaning[status] def _get_best_accuracy(self): return self.best_score_value def _get_message_best_accuracy(self): return f"{self.score_name} compared to {self.best_score_name} .:" @abc.abstractmethod def header(self) -> None: pass @abc.abstractmethod def print_line(self, result) -> None: pass @abc.abstractmethod def footer(self, accuracy: float) -> None: pass class Report(BaseReport): header_lengths = [30, 6, 5, 3, 7, 7, 7, 15, 16, 15] header_cols = [ "Dataset", "Sampl.", "Feat.", "Cls", "Nodes", "Leaves", "Depth", "Score", "Time", "Hyperparameters", ] def __init__(self, file_name: str, compare: bool = False): super().__init__(file_name) self.nline = 0 self.compare = compare def header_line(self, text: str) -> None: print(TextColor.LINE1, end="") length = sum(self.header_lengths) + len(self.header_lengths) - 3 if text == "*": print("*" * (length + 2)) else: print(f"*{text:{length}s}*") def print_line(self, result) -> None: self.nline += 1 text_color = ( TextColor.LINE1 if self.nline % 2 == 0 else TextColor.LINE2 ) print(text_color, end="") hl = self.header_lengths i = 0 print(f"{result['dataset']:{hl[i]}s} ", end="") i += 1 print(f"{result['samples']:{hl[i]},d} ", end="") i += 1 print(f"{result['features']:{hl[i]},d} ", end="") i += 1 print(f"{result['classes']:{hl[i]}d} ", end="") i += 1 print(f"{result['nodes']:{hl[i]}.2f} ", end="") i += 1 print(f"{result['leaves']:{hl[i]}.2f} ", end="") i += 1 print(f"{result['depth']:{hl[i]}.2f} ", end="") i += 1 if self.compare: status = self._compute_status(result["dataset"], result["score"]) else: status = " " print( f"{result['score']:8.6f}±{result['score_std']:6.4f}{status}", end="", ) i += 1 print( f"{result['time']:9.6f}±{result['time_std']:6.4f} ", end="", ) i += 1 print(f"{str(result['hyperparameters']):{hl[i]}s} ") def header(self) -> None: if self.compare: self._load_best_results( self.data["score_name"], self.data["model"] ) self._compare_totals = {} self.header_line("*") self.header_line( f" Report {self.data['model']} ver. {self.data['version']}" f" with {self.data['folds']} Folds " f"cross validation and {len(self.data['seeds'])} random seeds. " f"{self.data['date']} {self.data['time']}" ) self.header_line(f" {self.data['title']}") self.header_line( f" Random seeds: {self.data['seeds']} Stratified: " f"{self.data['stratified']}" ) hours = self.data["duration"] / 3600 self.header_line( f" Execution took {self.data['duration']:7.2f} seconds, " f" {hours:5.2f} hours, on {self.data['platform']}" ) self.header_line(f" Score is {self.data['score_name']}") self.header_line("*") print("") line_col = "" for field, underscore in zip(self.header_cols, self.header_lengths): print(f"{field:{underscore}s} ", end="") line_col += "=" * underscore + " " print(f"\n{line_col}") def footer(self, accuracy: float) -> None: self.header_line("*") if self.compare: for key, value in self._compare_totals.items(): self.header_line( f" {key} {self._status_meaning(key)} .....: {value:2d}" ) self.header_line( f" {self._get_message_best_accuracy()} " f"{accuracy/self._get_best_accuracy():7.4f}" ) self.header_line("*") class ReportBest(BaseReport): header_lengths = [30, 8, 76, 45] header_cols = [ "Dataset", "Score", "File/Message", "Hyperparameters", ] def __init__(self, score, model, best, grid): name = ( Files.best_results(score, model) if best else Files.grid_output(score, model) ) file_name = os.path.join(Folders.results, name) self.best = best self.grid = grid self.score_name = score self.model = model super().__init__(file_name, best_file=True) def header_line(self, text: str) -> None: length = sum(self.header_lengths) + len(self.header_lengths) - 3 if text == "*": print("*" * (length + 2)) else: print(f"*{text:{length}s}*") def print_line(self, result): hl = self.header_lengths print(f"{result:{hl[0]}s} ", end="") print( f"{self.data[result][0]:8.6f} ", end="", ) print( f"{self.data[result][2]:{hl[2]}s} ", end="", ) print(f"{str(self.data[result][1]):{hl[1]}s} ") def header(self): self.header_line("*") kind = "Best" if self.best else "Grid" self.header_line( f" Report {kind} {self.score_name} Scores with {self.model} " "in any platform" ) self.header_line("*") print("") line_col = "" for field, underscore in zip(self.header_cols, self.header_lengths): print(f"{field:{underscore}s} ", end="") line_col += "=" * underscore + " " print(f"\n{line_col}") def footer(self, accuracy): self.header_line("*") self.header_line( f" {self._get_message_best_accuracy()} " f"{accuracy/self._get_best_accuracy():7.4f}" ) self.header_line("*") class Excel(BaseReport): row = 6 # alternate lines colors color1 = "#DCE6F1" color2 = "#FDE9D9" color3 = "#B1A0C7" def __init__(self, file_name, compare=False, book=None): super().__init__(file_name) self.compare = compare if self.compare: self._load_best_results( self.data["score_name"], self.data["model"] ) self._compare_totals = {} if book is None: self.excel_file_name = self.file_name.replace(".json", ".xlsx") self.book = xlsxwriter.Workbook( self.excel_file_name, {"nan_inf_to_errors": True} ) self.set_book_properties() self.close = True else: self.book = book self.close = False self.sheet = self.book.add_worksheet(self.data["model"]) self.max_hyper_width = 0 self.col_hyperparams = 0 @staticmethod def set_properties(book, title): book.set_properties( { "title": title, "subject": "Machine learning results", "author": "Ricardo Montañana Gómez", "manager": "Dr. J. A. Gámez, Dr. J. M. Puerta", "company": "UCLM", "comments": "Created with Python and XlsxWriter", } ) def set_book_properties(self): self.set_properties(self.book, self.get_title()) def get_title(self): return ( f" Report {self.data['model']} ver. {self.data['version']}" f" with {self.data['folds']} Folds " f"cross validation and {len(self.data['seeds'])} random seeds. " f"{self.data['date']} {self.data['time']}" ) def get_file_name(self): return self.excel_file_name def header(self): merge_format = self.book.add_format( { "border": 1, "bold": 1, "align": "center", "valign": "vcenter", "font_size": 18, "bg_color": self.color3, } ) merge_format_subheader = self.book.add_format( { "border": 1, "bold": 1, "align": "center", "valign": "vcenter", "font_size": 16, "bg_color": self.color1, } ) merge_format_subheader_left = self.book.add_format( { "border": 1, "bold": 1, "align": "left", "valign": "vcenter", "font_size": 12, "bg_color": self.color1, } ) header_text = self.get_title() self.sheet.merge_range(0, 0, 0, 11, header_text, merge_format) self.sheet.merge_range( 1, 0, 1, 11, f" {self.data['title']}", merge_format_subheader ) self.sheet.merge_range( 2, 0, 3, 0, f" Score is {self.data['score_name']}", merge_format_subheader, ) self.sheet.merge_range( 2, 1, 3, 3, " Execution time", merge_format_subheader, ) hours = self.data["duration"] / 3600 self.sheet.merge_range( 2, 4, 2, 5, f"{self.data['duration']:7,.2f} s", merge_format_subheader, ) self.sheet.merge_range( 3, 4, 3, 5, f" {hours:5.2f} h", merge_format_subheader, ) self.sheet.merge_range( 2, 6, 3, 6, " ", merge_format_subheader, ) self.sheet.merge_range( 2, 7, 3, 7, "Platform", merge_format_subheader, ) self.sheet.merge_range( 2, 8, 3, 8, f"{self.data['platform']}", merge_format_subheader, ) self.sheet.merge_range( 2, 9, 2, 11, f"Random seeds: {self.data['seeds']}", merge_format_subheader_left, ) self.sheet.merge_range( 3, 9, 3, 11, f"Stratified: {self.data['stratified']}", merge_format_subheader_left, ) header_cols = [ ("Dataset", 30), ("Samples", 10), ("Features", 7), ("Classes", 7), ("Nodes", 7), ("Leaves", 7), ("Depth", 7), ("Score", 12), ("Score Std.", 12), ("Time", 12), ("Time Std.", 12), ("Hyperparameters", 50), ] if self.compare: header_cols.insert(8, ("Stat", 3)) bold = self.book.add_format( { "bold": True, "font_size": 14, "bg_color": self.color3, "border": 1, } ) i = 0 for item, length in header_cols: self.sheet.write(5, i, item, bold) self.sheet.set_column(i, i, length) i += 1 def print_line(self, result): size_n = 14 decimal = self.book.add_format( {"num_format": "0.000000", "font_size": size_n, "border": 1} ) integer = self.book.add_format( {"num_format": "#,###", "font_size": size_n, "border": 1} ) normal = self.book.add_format({"font_size": size_n, "border": 1}) col = 0 if self.row % 2 == 0: normal.set_bg_color(self.color1) decimal.set_bg_color(self.color1) integer.set_bg_color(self.color1) else: normal.set_bg_color(self.color2) decimal.set_bg_color(self.color2) integer.set_bg_color(self.color2) self.sheet.write(self.row, col, result["dataset"], normal) self.sheet.write(self.row, col + 1, result["samples"], integer) self.sheet.write(self.row, col + 2, result["features"], integer) self.sheet.write(self.row, col + 3, result["classes"], normal) self.sheet.write(self.row, col + 4, result["nodes"], normal) self.sheet.write(self.row, col + 5, result["leaves"], normal) self.sheet.write(self.row, col + 6, result["depth"], normal) self.sheet.write(self.row, col + 7, result["score"], decimal) if self.compare: status = self._compute_status(result["dataset"], result["score"]) self.sheet.write(self.row, col + 8, status, normal) col = 9 else: col = 8 self.sheet.write(self.row, col, result["score_std"], decimal) self.sheet.write(self.row, col + 1, result["time"], decimal) self.sheet.write(self.row, col + 2, result["time_std"], decimal) self.sheet.write( self.row, col + 3, str(result["hyperparameters"]), normal ) self.col_hyperparams = col + 3 self.max_hyper_width = max( self.max_hyper_width, len(str(result["hyperparameters"])) ) self.row += 1 def footer(self, accuracy): if self.compare: self.row += 2 bold = self.book.add_format({"bold": True, "font_size": 16}) for key, total in self._compare_totals.items(): self.sheet.write(self.row, 1, key, bold) self.sheet.write(self.row, 2, total, bold) self.sheet.write(self.row, 3, self._status_meaning(key), bold) self.row += 1 message = ( f"** {self._get_message_best_accuracy()} " f"{accuracy/self._get_best_accuracy():7.4f}" ) bold = self.book.add_format({"bold": True, "font_size": 14}) # set width of the hyperparams column with the maximum width self.sheet.set_column( self.col_hyperparams, self.col_hyperparams, max(self.max_hyper_width + 1, 23), ) self.sheet.write(self.row + 1, 0, message, bold) for c in range(self.row + 2): self.sheet.set_row(c, 20) self.sheet.set_row(0, 25) self.sheet.freeze_panes(6, 1) self.sheet.hide_gridlines() if self.close: self.book.close() class ReportDatasets: @staticmethod def report(): data_sets = Datasets() color_line = TextColor.LINE1 print(color_line, end="") print(f"{'Dataset':30s} Sampl. Feat. Cls Balance") print("=" * 30 + " ===== ====== === " + "=" * 40) for dataset in data_sets: X, y = data_sets.load(dataset) color_line = ( TextColor.LINE2 if color_line == TextColor.LINE1 else TextColor.LINE1 ) values, counts = np.unique(y, return_counts=True) comp = "" sep = "" for count in counts: comp += f"{sep}{count/sum(counts)*100:5.2f}%" sep = "/ " print(color_line, end="") print( f"{dataset:30s} {X.shape[0]:6,d} {X.shape[1]:5,d} " f"{len(np.unique(y)):3d} {comp:40s}" ) class SQL(BaseReport): table_name = "results" def header(self): file_name = self.file_name.replace(".json", ".sql") self.file = open(file_name, "w") def print_line(self, result): attributes = [ "date", "time", "type", "title", "stratified", "score_name", "score", "score_std", "dataset", "classifier", "version", "norm", "stand", "time_spent", "time_spent_std", "parameters", "nodes", "leaves", "depth", "platform", "nfolds", "seeds", ] command_insert = ( f"replace into {self.table_name} (" + ",".join(attributes) + ") values(" + ("'%s'," * len(attributes))[:-1] + ");\n" ) values = ( self.data["date"], self.data["time"], "crossval", self.data["title"], "1" if self.data["stratified"] else "0", self.data["score_name"], result["score"], result["score_std"], result["dataset"], self.data["model"], self.data["version"], 0, 1, result["time"], result["time_std"], str(result["hyperparameters"]).replace("'", '"'), result["nodes"], result["leaves"], result["depth"], self.data["platform"], self.data["folds"], str(self.data["seeds"]), ) self.file.write(command_insert % values) def footer(self, accuracy): self.file.close() class Benchmark: def __init__(self, score, visualize=True): self._score = score self._results = [] self._models = [] self._report = {} self._datasets = set() self.visualize = visualize self.__compute_best_results_ever() def __compute_best_results_ever(self): args = EnvData.load() key = args["source_data"] best = BestResultsEver() _, self.best_score_value = best.get_name_value(key, self._score) def get_result_file_name(self): return os.path.join(Folders.exreport, Files.exreport(self._score)) def compile_results(self): summary = Summary() summary.acquire(given_score=self._score) self._models = summary.get_models() if self._models == []: raise ValueError(NO_RESULTS) for model in self._models: best = summary.best_result( criterion="model", value=model, score=self._score ) file_name = os.path.join(Folders.results, best["file"]) with open(file_name) as fi: experiment = json.load(fi) for result in experiment["results"]: dataset = result["dataset"] record = { "model": model, "dataset": dataset, "score": result["score"], "score_std": result["score_std"], "file_name": file_name, } self._results.append(record) if model not in self._report: self._report[model] = {} self._report[model][dataset] = record self._datasets.add(dataset) self._datasets = sorted(self._datasets) def save_results(self): # build Files.exreport result_file_name = self.get_result_file_name() with open(result_file_name, "w") as f: f.write( f"classifier, dataset, {self._score.replace('-','')}, " "stdev, file_name\n" ) for record in self._results: f.write( f"{record['model']}, {record['dataset']}, " f"{record['score']}, {record['score_std']}, " f"{record['file_name']}\n" ) def exreport(self): def end_message(message, file): length = 100 print("*" * length) print(message) print("*" * length) with open(os.path.join(Folders.exreport, file)) as f: data = f.read().splitlines() for line in data: print(line) # Remove previous results if os.path.exists(Folders.report): shutil.rmtree(Folders.report) if os.path.exists(Files.exreport_pdf): os.remove(Files.exreport_pdf) # Compute Friedman & Holm Tests fout = open( os.path.join(Folders.exreport, Files.exreport_output(self._score)), "w", ) ferr = open( os.path.join(Folders.exreport, Files.exreport_err(self._score)), "w", ) result = subprocess.run( [ "Rscript", os.path.join(Folders.src(), Files.benchmark_r), self._score.replace("-", ""), os.path.join(Folders.exreport, f"exreport_{self._score}"), "1" if self.visualize else "0", ], stdout=fout, stderr=ferr, ) fout.close() ferr.close() if result.returncode != 0: end_message( "Error computing benchmark", Files.exreport_err(self._score) ) else: end_message("Benchmark Ok", Files.exreport_output(self._score)) Files.open(Files.exreport_pdf) def report(self, tex_output): # Report Header print(f"{'Dataset':30s} ", end="") lines = "=" * 30 + " " for model in self._models: print(f"{model:^13s} ", end="") lines += "=" * 13 + " " print(f"\n{lines}") if tex_output: self.print_tex_header() # Report Body for num, dataset in enumerate(self._datasets): print(f"{dataset:30s} ", end="") scores = [] for model in self._models: result = self._report[model][dataset] score = float(result["score"]) score_std = float(result["score_std"]) print(f"{score:.5f}±", end="") print(f"{score_std:.3f} ", end="") scores.append((score, score_std)) print("") if tex_output: self.print_tex_line(num, dataset, scores) if tex_output: self.print_tex_footer() # Summary of result files used d_name = next(iter(self._datasets)) print(f"\n{'Model':30s} {'File Name':75s} Score") print("=" * 30 + " " + "=" * 75 + " ========") for model in self._models: file_name = self._report[model][d_name]["file_name"] report = StubReport(file_name) report.report() print(f"{model:^30s} {file_name:75s} {report.score:8.5f}") def get_tex_file(self): return os.path.join(Folders.exreport, Files.tex_output(self._score)) def print_tex_header(self): with open(self.get_tex_file(), "w") as f: header_data = "# & Dataset & \\#S & \\#F & \\#L & " + " & ".join( self._models ) tabular = "{rlrrr" + "c" * len(self._models) + "}" header = ( "\\begin{sidewaystable}[ht]\n" "\\centering\n" "\\renewcommand{\\arraystretch}{1.2}\n" "\\renewcommand{\\tabcolsep}{0.07cm}\n" "\\caption{Accuracy results (mean ± std) for all the " "algorithms and datasets}\n" "\\label{table:datasets}\n" "\\resizebox{0.95\\textwidth}{!}{\n" "\\begin {tabular} {" + tabular + "}\\hline\n" "\\" + header_data + "\\\\\n" "\\hline\n" ) f.write(header) def print_tex_line(self, num, dataset, scores): dt = Datasets() with open(self.get_tex_file(), "a") as f: X, y = dt.load(dataset) samples, features = X.shape n_classes = len(np.unique(y)) dataset_name = dataset.replace("_", "\\_") print_line = ( f"{num + 1} & {dataset_name} & {samples} & {features} " f"& {n_classes}" ) max_value = max(scores)[0] for score, score_std in scores: # Add score and score_std value = f"{score:.4f}±{score_std:.3f}" value_formated = ( "\\bfseries " + value + " " if score == max_value else value ) print_line += " & " + value_formated print_line += "\\\\" f.write(f"{print_line}\n") def print_tex_footer(self): with open(self.get_tex_file(), "a") as f: f.write("\\hline\n\\end{tabular}}\n\\end{sidewaystable}\n") def get_excel_file_name(self): return os.path.join( Folders.exreport, Files.exreport_excel(self._score) ) def excel(self): book = xlsxwriter.Workbook( self.get_excel_file_name(), {"nan_inf_to_errors": True} ) Excel.set_properties(book, "Experimentation summary") sheet = book.add_worksheet("Benchmark") normal = book.add_format({"font_size": 14, "border": 1}) decimal = book.add_format( {"num_format": "0.000000", "font_size": 14, "border": 1} ) decimal_total = book.add_format( { "num_format": "0.000000", "font_size": 14, "border": 1, "bold": True, "bg_color": Excel.color3, } ) two_decimal_total = book.add_format( { "num_format": "0.00", "font_size": 14, "border": 1, "bold": True, "bg_color": Excel.color3, } ) merge_format_header = book.add_format( { "border": 1, "bold": 1, "align": "center", "valign": "vcenter", "font_size": 14, "bg_color": Excel.color1, } ) merge_format = book.add_format( { "border": 1, "bold": 1, "align": "center", "valign": "vcenter", "font_size": 14, "bg_color": Excel.color3, } ) merge_format_normal = book.add_format( { "border": 1, "valign": "vcenter", "font_size": 14, } ) row = row_init = 4 def header(): nonlocal row sheet.merge_range( 0, 0, 1, 0, "Benchmark of Models", merge_format_header ) sheet.merge_range( 0, 1, 1, 2, f"Score is {self._score}", merge_format_header ) sheet.set_row(1, 20) # Set columns width sheet.set_column(0, 0, 40) for column in range(2 * len(self._results)): sheet.set_column(column + 1, column + 1, 15) # Set report header # Merge 2 rows sheet.merge_range(row, 0, row + 1, 0, "Dataset", merge_format) column = 1 for model in self._models: # Merge 3 columns sheet.merge_range( row, column, row, column + 2, model, merge_format ) column += 3 row += 1 column = 1 for _ in range(len(self._models)): sheet.write(row, column, "Score", merge_format) sheet.write(row, column + 1, "Stdev", merge_format) sheet.write(row, column + 2, "Rank", merge_format) column += 3 def body(): nonlocal row for dataset in self._datasets: row += 1 normal = book.add_format({"font_size": 14, "border": 1}) decimal = book.add_format( { "num_format": "0.000000", "font_size": 14, "border": 1, } ) if row % 2 == 0: normal.set_bg_color(Excel.color1) decimal.set_bg_color(Excel.color1) else: normal.set_bg_color(Excel.color2) decimal.set_bg_color(Excel.color2) sheet.write(row, 0, f"{dataset:30s}", normal) column = 1 range_cells = "" for col in range(0, len(self._models) * 3, 3): range_cells += chr(ord("B") + col) + str(row + 1) + "," range_cells = range_cells[:-1] for model in self._models: sheet.write( row, column, float(self._report[model][dataset]["score"]), decimal, ) column += 1 sheet.write( row, column, float(self._report[model][dataset]["score_std"]), decimal, ) column += 1 cell_target = chr(ord("B") + column - 3) + str(row + 1) sheet.write_formula( row, column, f"=rank({cell_target},({range_cells}))", normal, ) column += 1 def footer(): nonlocal row for c in range(row_init, row + 2): sheet.set_row(c, 20) # Write totals row += 1 sheet.write(row, 0, "Total", merge_format) for col in range(0, len(self._models) * 3, 3): range_metric = ( f"{chr(ord('B') + col )}7:{chr(ord('B') + col )}{row}" ) sheet.write_formula( row, col + 1, f"=sum({range_metric})/{self.best_score_value}", decimal_total, ) range_rank = ( f"{chr(ord('B') + col + 2)}7:" f"{chr(ord('B') + col + 2)}{row}" ) sheet.write_formula( row, col + 3, f"=average({range_rank})", two_decimal_total, ) row += 1 def models_files(): nonlocal row row += 2 # Set report header # Merge 2 rows sheet.merge_range(row, 0, row + 1, 0, "Model", merge_format) sheet.merge_range(row, 1, row + 1, 5, "File", merge_format) sheet.merge_range(row, 6, row + 1, 6, "Score", merge_format) row += 1 d_name = next(iter(self._datasets)) for model in self._models: file_name = self._report[model][d_name]["file_name"] report = StubReport(file_name) report.report() row += 1 sheet.write( row, 0, model, normal, ) sheet.merge_range( row, 1, row, 5, file_name, merge_format_normal ) sheet.write( row, 6, report.score, decimal, ) k = Excel(file_name=file_name, book=book) k.report() sheet.freeze_panes(6, 1) sheet.hide_gridlines() def exreport_output(): file_name = os.path.join( Folders.exreport, Files.exreport_output(self._score) ) sheet = book.add_worksheet("Exreport") normal = book.add_format( { "font_size": 14, "border": 1, "font_color": "blue", "font_name": "Courier", "bold": True, } ) with open(file_name) as f: lines = f.read().splitlines() row = 0 for line in lines: sheet.write(row, 0, line, normal) row += 1 header() body() footer() models_files() exreport_output() book.close() class StubReport(BaseReport): def __init__(self, file_name): super().__init__(file_name=file_name, best_file=False) def print_line(self, line) -> None: pass def header(self) -> None: self.title = self.data["title"] self.duration = self.data["duration"] def footer(self, accuracy: float) -> None: self.accuracy = accuracy self.score = accuracy / self._get_best_accuracy() class Summary: def __init__(self, hidden=False) -> None: self.results = Files().get_all_results(hidden=hidden) self.data = [] self.datasets = {} self.models = set() self.hidden = hidden def get_models(self): return sorted(self.models) def acquire(self, given_score="any") -> None: """Get all results""" for result in self.results: ( score, model, platform, date, time, stratified, ) = Files().split_file_name(result) if given_score in ("any", score): self.models.add(model) report = StubReport( os.path.join( Folders.hidden_results if self.hidden else Folders.results, result, ) ) report.report() entry = dict( score=score, model=model, title=report.title, platform=platform, date=date, time=time, stratified=stratified, file=result, metric=report.score, duration=report.duration, ) self.datasets[result] = report.lines self.data.append(entry) def get_results_criteria( self, score, model, input_data, sort_key, number, ): data = self.data.copy() if input_data is None else input_data if score: data = [x for x in data if x["score"] == score] if model: data = [x for x in data if x["model"] == model] keys = ( itemgetter(sort_key, "time") if sort_key == "date" else itemgetter(sort_key, "date", "time") ) data = sorted(data, key=keys, reverse=True) if number > 0: data = data[:number] return data def list_results( self, score=None, model=None, input_data=None, sort_key="date", number=0, ) -> None: """Print the list of results""" data = self.get_results_criteria( score, model, input_data, sort_key, number ) if data == []: raise ValueError(NO_RESULTS) max_file = max(len(x["file"]) for x in data) max_title = max(len(x["title"]) for x in data) if self.hidden: color1 = TextColor.GREEN color2 = TextColor.YELLOW else: color1 = TextColor.LINE1 color2 = TextColor.LINE2 print(color1, end="") print( f"{'Date':10s} {'File':{max_file}s} {'Score':8s} {'Time(h)':7s} " f"{'Title':s}" ) print( "=" * 10 + " " + "=" * max_file + " " + "=" * 8 + " " + "=" * 7 + " " + "=" * max_title ) print( "\n".join( [ (color2 if n % 2 == 0 else color1) + f"{x['date']} {x['file']:{max_file}s} " f"{x['metric']:8.5f} " f"{x['duration']/3600:7.3f} " f"{x['title']}" for n, x in enumerate(data) ] ) ) def show_result(self, data: dict, title: str = "") -> None: def whites(n: int) -> str: return " " * n + color1 + "*" if data == {}: print(f"** {title} has No data **") return color1 = TextColor.CYAN color2 = TextColor.YELLOW file_name = data["file"] metric = data["metric"] result = StubReport(os.path.join(Folders.results, file_name)) length = 81 print(color1 + "*" * length) if title != "": print( "*" + color2 + TextColor.BOLD + f"{title:^{length - 2}s}" + TextColor.ENDC + color1 + "*" ) print("*" + "-" * (length - 2) + "*") print("*" + whites(length - 2)) print( "* " + color2 + f"{result.data['title']:^{length - 4}}" + color1 + " *" ) print("*" + whites(length - 2)) print( "* Model: " + color2 + f"{result.data['model']:15s} " + color1 + "Ver. " + color2 + f"{result.data['version']:10s} " + color1 + "Score: " + color2 + f"{result.data['score_name']:10s} " + color1 + "Metric: " + color2 + f"{metric:10.7f}" + whites(length - 78) ) print(color1 + "*" + whites(length - 2)) print( "* Date : " + color2 + f"{result.data['date']:15s}" + color1 + " Time: " + color2 + f"{result.data['time']:18s} " + color1 + "Time Spent: " + color2 + f"{result.data['duration']:9,.2f}" + color1 + " secs." + whites(length - 78) ) seeds = str(result.data["seeds"]) seeds_len = len(seeds) print( "* Seeds: " + color2 + f"{seeds:{seeds_len}s} " + color1 + "Platform: " + color2 + f"{result.data['platform']:17s} " + whites(length - 79) ) print( "* Stratified: " + color2 + f"{str(result.data['stratified']):15s}" + whites(length - 30) ) print("* " + color2 + f"{file_name:60s}" + whites(length - 63)) print(color1 + "*" + whites(length - 2)) print(color1 + "*" * length) def best_results(self, criterion=None, value=None, score="accuracy", n=10): # First filter the same score results (accuracy, f1, ...) haystack = [x for x in self.data if x["score"] == score] haystack = ( haystack if criterion is None or value is None else [x for x in haystack if x[criterion] == value] ) if haystack == []: raise ValueError(NO_RESULTS) return ( sorted( haystack, key=lambda x: -1.0 if math.isnan(x["metric"]) else x["metric"], reverse=True, )[:n] if len(haystack) > 0 else {} ) def best_result( self, criterion=None, value=None, score="accuracy" ) -> dict: return self.best_results(criterion, value, score)[0] def best_results_datasets(self, score="accuracy") -> dict: """Get the best results for each dataset""" dt = Datasets() best_results = {} for dataset in dt: best_results[dataset] = (1, "", "", "") haystack = [x for x in self.data if x["score"] == score] # Search for the best results for each dataset for entry in haystack: for dataset in self.datasets[entry["file"]]: if dataset["score"] < best_results[dataset["dataset"]][0]: best_results[dataset["dataset"]] = ( dataset["score"], dataset["hyperparameters"], entry["file"], entry["title"], ) return best_results def show_top(self, score="accuracy", n=10): try: self.list_results( score=score, input_data=self.best_results(score=score, n=n), sort_key="metric", ) except ValueError as e: print(e) class PairCheck: def __init__(self, score, model_a, model_b, winners=False, losers=False): self.score = score self.model_a = model_a self.model_b = model_b self.show_winners = winners self.show_losers = losers self.winners = [] self.losers = [] self.tie = [] def compute(self): summary = Summary() summary.acquire() best_a = summary.best_result( criterion="model", value=self.model_a, score=self.score ) self.file_a = best_a["file"] best_b = summary.best_result( criterion="model", value=self.model_b, score=self.score ) self.file_b = best_b["file"] report_a = StubReport(os.path.join(Folders.results, best_a["file"])) report_a.report() self.score_a = report_a.score report_b = StubReport(os.path.join(Folders.results, best_b["file"])) report_b.report() self.score_b = report_b.score for result_a, result_b in zip(report_a.lines, report_b.lines): result = result_a["score"] - result_b["score"] self._store_result(result, result_a["dataset"]) def _store_result(self, result, dataset): if result > 0: self.winners.append(dataset) elif result < 0: self.losers.append(dataset) else: self.tie.append(dataset) def report(self): print(f"{'Model':<20} {'File':<70} {'Score':<10} Win Tie Lose") print("=" * 20 + " " + "=" * 70 + " " + "=" * 10 + " === === ====") print(f"{self.model_a:<20} {self.file_a:<70} {self.score_a:10.5f}") print( f"{self.model_b:<20} {self.file_b:<70} " f"{self.score_b:10.5f} " f"{TextColor.GREEN}{len(self.winners):3d} {TextColor.YELLOW}" f"{len(self.tie):3d} {TextColor.RED}{len(self.losers):4d}" ) if self.show_winners: print(TextColor.GREEN + "Winners:") print(self.winners) if self.show_losers: print(TextColor.RED + "losers:") print(self.losers)