import os from operator import itemgetter import math import json import abc import shutil import subprocess import xlsxwriter from Experiments import Datasets, BestResults from Utils import Folders, Files, Symbols, BEST_ACCURACY_STREE, TextColor class BaseReport(abc.ABC): def __init__(self, file_name, best_file=False): self.file_name = file_name if not os.path.isfile(file_name): if not os.path.isfile(os.path.join(Folders.results, file_name)): raise FileNotFoundError(f"{file_name} does not exists!") else: file_name = os.path.join(Folders.results, file_name) with open(file_name) as f: self.data = json.load(f) self.best_acc_file = best_file self.lines = self.data if best_file else self.data["results"] def _get_accuracy(self, item): return self.data[item][0] if self.best_acc_file else item["score"] def report(self): self.header() accuracy_total = 0.0 for result in self.lines: self.print_line(result) accuracy_total += self._get_accuracy(result) self.footer(accuracy_total) def _load_best_results(self, score, model): best = BestResults(score, model, Datasets()) self.best_results = best.load({}) def _compute_status(self, dataset, accuracy: float): best = self.best_results[dataset][0] status = " " if accuracy == best: status = Symbols.equal_best elif accuracy > best: status = Symbols.better_best if status != " ": if status not in self._compare_totals: self._compare_totals[status] = 1 else: self._compare_totals[status] += 1 return status @staticmethod def _status_meaning(status): meaning = { Symbols.equal_best: "Equal to best", Symbols.better_best: "Better than best", } return meaning[status] @abc.abstractmethod def header(self) -> None: pass @abc.abstractmethod def print_line(self, result) -> None: pass @abc.abstractmethod def footer(self, accuracy: float) -> None: pass class Report(BaseReport): header_lengths = [30, 5, 3, 3, 7, 7, 7, 15, 16, 15] header_cols = [ "Dataset", "Samp", "Var", "Cls", "Nodes", "Leaves", "Depth", "Score", "Time", "Hyperparameters", ] def __init__(self, file_name: str, compare: bool = False): super().__init__(file_name) self.nline = 0 self.compare = compare def header_line(self, text: str) -> None: print(TextColor.LINE1, end="") length = sum(self.header_lengths) + len(self.header_lengths) - 3 if text == "*": print("*" * (length + 2)) else: print(f"*{text:{length}s}*") def print_line(self, result) -> None: self.nline += 1 text_color = ( TextColor.LINE1 if self.nline % 2 == 0 else TextColor.LINE2 ) print(text_color, end="") hl = self.header_lengths i = 0 print(f"{result['dataset']:{hl[i]}s} ", end="") i += 1 print(f"{result['samples']:{hl[i]},d} ", end="") i += 1 print(f"{result['features']:{hl[i]}d} ", end="") i += 1 print(f"{result['classes']:{hl[i]}d} ", end="") i += 1 print(f"{result['nodes']:{hl[i]}.2f} ", end="") i += 1 print(f"{result['leaves']:{hl[i]}.2f} ", end="") i += 1 print(f"{result['depth']:{hl[i]}.2f} ", end="") i += 1 if self.compare: status = self._compute_status(result["dataset"], result["score"]) else: status = " " print( f"{result['score']:8.6f}±{result['score_std']:6.4f}{status}", end="", ) i += 1 print( f"{result['time']:9.6f}±{result['time_std']:6.4f} ", end="", ) i += 1 print(f"{str(result['hyperparameters']):{hl[i]}s} ") def header(self) -> None: if self.compare: self._load_best_results( self.data["score_name"], self.data["model"] ) self._compare_totals = {} self.header_line("*") self.header_line( f" Report {self.data['model']} ver. {self.data['version']}" f" with {self.data['folds']} Folds " f"cross validation and {len(self.data['seeds'])} random seeds. " f"{self.data['date']} {self.data['time']}" ) self.header_line(f" {self.data['title']}") self.header_line( f" Random seeds: {self.data['seeds']} Stratified: " f"{self.data['stratified']}" ) hours = self.data["duration"] / 3600 self.header_line( f" Execution took {self.data['duration']:7.2f} seconds, " f" {hours:5.2f} hours, on {self.data['platform']}" ) self.header_line(f" Score is {self.data['score_name']}") self.header_line("*") print("") line_col = "" for field, underscore in zip(self.header_cols, self.header_lengths): print(f"{field:{underscore}s} ", end="") line_col += "=" * underscore + " " print(f"\n{line_col}") def footer(self, accuracy: float) -> None: self.header_line("*") if self.compare: for key, value in self._compare_totals.items(): self.header_line( f" {key} {self._status_meaning(key)} .....: {value:2d}" ) self.header_line( f" Accuracy compared to stree_default (liblinear-ovr) .: " f"{accuracy/BEST_ACCURACY_STREE:7.4f}" ) self.header_line("*") class ReportBest(BaseReport): header_lengths = [30, 8, 76, 45] header_cols = [ "Dataset", "Score", "File/Message", "Hyperparameters", ] def __init__(self, score, model, best, grid): name = ( Files.best_results(score, model) if best else Files.grid_output(score, model) ) self.best = best self.grid = grid file_name = os.path.join(Folders.results, name) super().__init__(file_name, best_file=True) self.compare = False self.score_name = score self.model = model def header_line(self, text: str) -> None: length = sum(self.header_lengths) + len(self.header_lengths) - 3 if text == "*": print("*" * (length + 2)) else: print(f"*{text:{length}s}*") def print_line(self, result): hl = self.header_lengths print(f"{result:{hl[0]}s} ", end="") print( f"{self.data[result][0]:8.6f} ", end="", ) print( f"{self.data[result][2]:{hl[2]}s} ", end="", ) print(f"{str(self.data[result][1]):{hl[1]}s} ") def header(self): self.header_line("*") kind = "Best" if self.best else "Grid" self.header_line( f" Report {kind} {self.score_name} Scores with {self.model} " "in any platform" ) self.header_line("*") print("") line_col = "" for field, underscore in zip(self.header_cols, self.header_lengths): print(f"{field:{underscore}s} ", end="") line_col += "=" * underscore + " " print(f"\n{line_col}") def footer(self, accuracy): self.header_line("*") if self.compare: for key, value in self._compare_totals.items(): self.header_line( f" {key} {self._status_meaning(key)} .....: {value:2d}" ) self.header_line( f" Scores compared to stree_default accuracy (liblinear-ovr) .: " f"{accuracy/BEST_ACCURACY_STREE:7.4f}" ) self.header_line("*") class Excel(BaseReport): row = 6 # alternate lines colors color1 = "#DCE6F1" color2 = "#FDE9D9" color3 = "#B1A0C7" def __init__(self, file_name, compare=False, book=None): super().__init__(file_name) self.compare = compare if self.compare: self._load_best_results( self.data["score_name"], self.data["model"] ) self._compare_totals = {} if book is None: self.excel_file_name = self.file_name.replace(".json", ".xlsx") self.book = xlsxwriter.Workbook(self.excel_file_name) self.set_book_properties() self.close = True else: self.book = book self.close = False self.sheet = self.book.add_worksheet(self.data["model"]) self.max_hyper_width = 0 self.col_hyperparams = 0 @staticmethod def set_properties(book, title): book.set_properties( { "title": title, "subject": "Machine learning results", "author": "Ricardo Montañana Gómez", "manager": "Dr. J. A. Gámez, Dr. J. M. Puerta", "company": "UCLM", "comments": "Created with Python and XlsxWriter", } ) def set_book_properties(self): self.set_properties(self.book, self.get_title()) def get_title(self): return ( f" Report {self.data['model']} ver. {self.data['version']}" f" with {self.data['folds']} Folds " f"cross validation and {len(self.data['seeds'])} random seeds. " f"{self.data['date']} {self.data['time']}" ) def get_file_name(self): return self.excel_file_name def header(self): merge_format = self.book.add_format( { "border": 1, "bold": 1, "align": "center", "valign": "vcenter", "font_size": 18, "bg_color": self.color3, } ) merge_format_subheader = self.book.add_format( { "border": 1, "bold": 1, "align": "center", "valign": "vcenter", "font_size": 16, "bg_color": self.color1, } ) merge_format_subheader_left = self.book.add_format( { "border": 1, "bold": 1, "align": "left", "valign": "vcenter", "font_size": 12, "bg_color": self.color1, } ) header_text = self.get_title() self.sheet.merge_range(0, 0, 0, 11, header_text, merge_format) self.sheet.merge_range( 1, 0, 1, 11, f" {self.data['title']}", merge_format_subheader ) self.sheet.merge_range( 2, 0, 3, 0, f" Score is {self.data['score_name']}", merge_format_subheader, ) self.sheet.merge_range( 2, 1, 3, 3, " Execution time", merge_format_subheader, ) hours = self.data["duration"] / 3600 self.sheet.merge_range( 2, 4, 2, 5, f"{self.data['duration']:7,.2f} s", merge_format_subheader, ) self.sheet.merge_range( 3, 4, 3, 5, f" {hours:5.2f} h", merge_format_subheader, ) self.sheet.merge_range( 2, 6, 3, 6, " ", merge_format_subheader, ) self.sheet.merge_range( 2, 7, 3, 7, "Platform", merge_format_subheader, ) self.sheet.merge_range( 2, 8, 3, 8, f"{self.data['platform']}", merge_format_subheader, ) self.sheet.merge_range( 2, 9, 2, 11, f"Random seeds: {self.data['seeds']}", merge_format_subheader_left, ) self.sheet.merge_range( 3, 9, 3, 11, f"Stratified: {self.data['stratified']}", merge_format_subheader_left, ) header_cols = [ ("Dataset", 30), ("Samples", 10), ("Features", 7), ("Classes", 7), ("Nodes", 7), ("Leaves", 7), ("Depth", 7), ("Score", 12), ("Score Std.", 12), ("Time", 12), ("Time Std.", 12), ("Hyperparameters", 50), ] if self.compare: header_cols.insert(8, ("Stat", 3)) bold = self.book.add_format( { "bold": True, "font_size": 14, "bg_color": self.color3, "border": 1, } ) i = 0 for item, length in header_cols: self.sheet.write(5, i, item, bold) self.sheet.set_column(i, i, length) i += 1 def print_line(self, result): size_n = 14 decimal = self.book.add_format( {"num_format": "0.000000", "font_size": size_n, "border": 1} ) integer = self.book.add_format( {"num_format": "#,###", "font_size": size_n, "border": 1} ) normal = self.book.add_format({"font_size": size_n, "border": 1}) col = 0 if self.row % 2 == 0: normal.set_bg_color(self.color1) decimal.set_bg_color(self.color1) integer.set_bg_color(self.color1) else: normal.set_bg_color(self.color2) decimal.set_bg_color(self.color2) integer.set_bg_color(self.color2) self.sheet.write(self.row, col, result["dataset"], normal) self.sheet.write(self.row, col + 1, result["samples"], integer) self.sheet.write(self.row, col + 2, result["features"], normal) self.sheet.write(self.row, col + 3, result["classes"], normal) self.sheet.write(self.row, col + 4, result["nodes"], normal) self.sheet.write(self.row, col + 5, result["leaves"], normal) self.sheet.write(self.row, col + 6, result["depth"], normal) self.sheet.write(self.row, col + 7, result["score"], decimal) if self.compare: status = self._compute_status(result["dataset"], result["score"]) self.sheet.write(self.row, col + 8, status, normal) col = 9 else: col = 8 self.sheet.write(self.row, col, result["score_std"], decimal) self.sheet.write(self.row, col + 1, result["time"], decimal) self.sheet.write(self.row, col + 2, result["time_std"], decimal) self.sheet.write( self.row, col + 3, str(result["hyperparameters"]), normal ) self.col_hyperparams = col + 3 self.max_hyper_width = max( self.max_hyper_width, len(str(result["hyperparameters"])) ) self.row += 1 def footer(self, accuracy): if self.compare: self.row += 2 bold = self.book.add_format({"bold": True, "font_size": 16}) for key, total in self._compare_totals.items(): self.sheet.write(self.row, 1, key, bold) self.sheet.write(self.row, 2, total, bold) self.sheet.write(self.row, 3, self._status_meaning(key), bold) self.row += 1 message = ( f"** Accuracy compared to stree_default (liblinear-ovr) .: " f"{accuracy/BEST_ACCURACY_STREE:7.4f}" ) bold = self.book.add_format({"bold": True, "font_size": 14}) # set width of the hyperparams column with the maximum width self.sheet.set_column( self.col_hyperparams, self.col_hyperparams, max(self.max_hyper_width + 1, 23), ) self.sheet.write(self.row + 1, 0, message, bold) for c in range(self.row + 2): self.sheet.set_row(c, 20) self.sheet.set_row(0, 25) self.sheet.freeze_panes(6, 1) self.sheet.hide_gridlines() if self.close: self.book.close() class SQL(BaseReport): table_name = "results" def header(self): file_name = self.file_name.replace(".json", ".sql") self.file = open(file_name, "w") def print_line(self, result): attributes = [ "date", "time", "type", "title", "stratified", "score_name", "score", "score_std", "dataset", "classifier", "version", "norm", "stand", "time_spent", "time_spent_std", "parameters", "nodes", "leaves", "depth", "platform", "nfolds", "seeds", ] command_insert = ( f"replace into {self.table_name} (" + ",".join(attributes) + ") values(" + ("'%s'," * len(attributes))[:-1] + ");\n" ) values = ( self.data["date"], self.data["time"], "crossval", self.data["title"], "1" if self.data["stratified"] else "0", self.data["score_name"], result["score"], result["score_std"], result["dataset"], self.data["model"], self.data["version"], 0, 1, result["time"], result["time_std"], str(result["hyperparameters"]).replace("'", '"'), result["nodes"], result["leaves"], result["depth"], self.data["platform"], self.data["folds"], str(self.data["seeds"]), ) self.file.write(command_insert % values) def footer(self, accuracy): self.file.close() class Benchmark: def __init__(self, score): self._score = score self._results = [] self._models = [] self._report = {} self._datasets = set() def get_result_file_name(self): return os.path.join(Folders.results, Files.exreport(self._score)) def compile_results(self): summary = Summary() summary.acquire(given_score=self._score) self._models = summary.get_models() for model in self._models: best = summary.best_result( criterion="model", value=model, score=self._score ) file_name = os.path.join(Folders.results, best["file"]) with open(file_name) as fi: experiment = json.load(fi) for result in experiment["results"]: dataset = result["dataset"] record = { "model": model, "dataset": dataset, "score": result["score"], "score_std": result["score_std"], "file_name": file_name, } self._results.append(record) if model not in self._report: self._report[model] = {} self._report[model][dataset] = record self._datasets.add(dataset) self._datasets = sorted(self._datasets) def save_results(self): # build Files.exreport result_file_name = self.get_result_file_name() with open(result_file_name, "w") as f: f.write(f"classifier, dataset, {self._score}, stdev, file_name\n") for record in self._results: f.write( f"{record['model']}, {record['dataset']}, " f"{record['score']}, {record['score_std']}, " f"{record['file_name']}\n" ) def exreport(self): def end_message(message, file): length = 100 print("*" * length) print(message) print("*" * length) with open(os.path.join(Folders.results, file)) as f: data = f.read().splitlines() for line in data: print(line) # Remove previous results try: shutil.rmtree(Folders.report) os.remove(Files.exreport_pdf) except FileNotFoundError: pass except OSError as os_error: print("Error: %s : %s" % (Folders.report, os_error.strerror)) # Compute Friedman & Holm Tests fout = open( os.path.join(Folders.results, Files.exreport_output(self._score)), "w", ) ferr = open( os.path.join(Folders.results, Files.exreport_err(self._score)), "w" ) result = subprocess.run( [ "Rscript", os.path.join(Folders.src, Files.benchmark_r), self._score, ], stdout=fout, stderr=ferr, ) fout.close() ferr.close() if result.returncode != 0: end_message( "Error computing benchmark", Files.exreport_err(self._score) ) else: end_message("Benchmark Ok", Files.exreport_output(self._score)) Files.open(Files.exreport_pdf) def report(self): print(f"{'Dataset':30s} ", end="") lines = "=" * 30 + " " for model in self._models: print(f"{model:^13s} ", end="") lines += "=" * 13 + " " print(f"\n{lines}") for dataset in self._datasets: print(f"{dataset:30s} ", end="") for model in self._models: result = self._report[model][dataset] print(f"{float(result['score']):.5f}±", end="") print(f"{float(result['score_std']):.3f} ", end="") print("") d_name = next(iter(self._datasets)) print(f"\n{'Model':30s} {'File Name':75s} Score") print("=" * 30 + " " + "=" * 75 + " ========") for model in self._models: file_name = self._report[model][d_name]["file_name"] report = StubReport(file_name) report.report() print(f"{model:^30s} {file_name:75s} {report.score:8.5f}") def get_excel_file_name(self): return os.path.join( Folders.exreport, Files.exreport_excel(self._score) ) def excel(self): book = xlsxwriter.Workbook(self.get_excel_file_name()) Excel.set_properties(book, "Experimentation summary") sheet = book.add_worksheet("Benchmark") normal = book.add_format({"font_size": 14, "border": 1}) decimal = book.add_format( {"num_format": "0.000000", "font_size": 14, "border": 1} ) decimal_total = book.add_format( { "num_format": "0.000000", "font_size": 14, "border": 1, "bold": True, "bg_color": Excel.color3, } ) two_decimal_total = book.add_format( { "num_format": "0.00", "font_size": 14, "border": 1, "bold": True, "bg_color": Excel.color3, } ) merge_format_header = book.add_format( { "border": 1, "bold": 1, "align": "center", "valign": "vcenter", "font_size": 14, "bg_color": Excel.color1, } ) merge_format = book.add_format( { "border": 1, "bold": 1, "align": "center", "valign": "vcenter", "font_size": 14, "bg_color": Excel.color3, } ) merge_format_normal = book.add_format( { "border": 1, "valign": "vcenter", "font_size": 14, } ) row = row_init = 4 def header(): nonlocal row sheet.merge_range( 0, 0, 1, 0, "Benchmark of Models", merge_format_header ) sheet.merge_range( 0, 1, 1, 2, f"Score is {self._score}", merge_format_header ) sheet.set_row(1, 20) # Set columns width sheet.set_column(0, 0, 40) for column in range(2 * len(self._results)): sheet.set_column(column + 1, column + 1, 15) # Set report header # Merge 2 rows sheet.merge_range(row, 0, row + 1, 0, "Dataset", merge_format) column = 1 for model in self._models: # Merge 3 columns sheet.merge_range( row, column, row, column + 2, model, merge_format ) column += 3 row += 1 column = 1 for _ in range(len(self._models)): sheet.write(row, column, "Score", merge_format) sheet.write(row, column + 1, "Stdev", merge_format) sheet.write(row, column + 2, "Rank", merge_format) column += 3 def body(): nonlocal row for dataset in self._datasets: row += 1 normal = book.add_format({"font_size": 14, "border": 1}) decimal = book.add_format( { "num_format": "0.000000", "font_size": 14, "border": 1, } ) if row % 2 == 0: normal.set_bg_color(Excel.color1) decimal.set_bg_color(Excel.color1) else: normal.set_bg_color(Excel.color2) decimal.set_bg_color(Excel.color2) sheet.write(row, 0, f"{dataset:30s}", normal) column = 1 range_cells = "" for col in range(0, len(self._models) * 3, 3): range_cells += chr(ord("B") + col) + str(row + 1) + "," range_cells = range_cells[:-1] for model in self._models: sheet.write( row, column, float(self._report[model][dataset]["score"]), decimal, ) column += 1 sheet.write( row, column, float(self._report[model][dataset]["score_std"]), decimal, ) column += 1 cell_target = chr(ord("B") + column - 3) + str(row + 1) sheet.write_formula( row, column, f"=rank({cell_target},({range_cells}))", normal, ) column += 1 def footer(): nonlocal row for c in range(row_init, row + 2): sheet.set_row(c, 20) # Write totals row += 1 sheet.write(row, 0, "Total", merge_format) for col in range(0, len(self._models) * 3, 3): range_metric = ( f"{chr(ord('B') + col )}7:{chr(ord('B') + col )}{row}" ) sheet.write_formula( row, col + 1, f"=sum({range_metric})/{BEST_ACCURACY_STREE}", decimal_total, ) range_rank = ( f"{chr(ord('B') + col + 2)}7:" f"{chr(ord('B') + col + 2)}{row}" ) sheet.write_formula( row, col + 3, f"=average({range_rank})", two_decimal_total, ) row += 1 def models_files(): nonlocal row row += 2 # Set report header # Merge 2 rows sheet.merge_range(row, 0, row + 1, 0, "Model", merge_format) sheet.merge_range(row, 1, row + 1, 5, "File", merge_format) sheet.merge_range(row, 6, row + 1, 6, "Score", merge_format) row += 1 d_name = next(iter(self._datasets)) for model in self._models: file_name = self._report[model][d_name]["file_name"] report = StubReport(file_name) report.report() row += 1 sheet.write( row, 0, model, normal, ) sheet.merge_range( row, 1, row, 5, file_name, merge_format_normal ) sheet.write( row, 6, report.score, decimal, ) k = Excel(file_name=file_name, book=book) k.report() sheet.freeze_panes(6, 1) sheet.hide_gridlines() def exreport_output(): file_name = os.path.join( Folders.results, Files.exreport_output(self._score) ) sheet = book.add_worksheet("Exreport") normal = book.add_format( { "font_size": 14, "border": 1, "font_color": "blue", "font_name": "Courier", "bold": True, } ) with open(file_name) as f: lines = f.read().splitlines() row = 0 for line in lines: sheet.write(row, 0, line, normal) row += 1 header() body() footer() models_files() exreport_output() book.close() class StubReport(BaseReport): def __init__(self, file_name): super().__init__(file_name=file_name, best_file=False) def print_line(self, line) -> None: pass def header(self) -> None: self.title = self.data["title"] self.duration = self.data["duration"] def footer(self, accuracy: float) -> None: self.accuracy = accuracy self.score = accuracy / BEST_ACCURACY_STREE class Summary: def __init__(self, hidden=False) -> None: self.results = Files().get_all_results(hidden=hidden) self.data = [] self.datasets = {} self.models = set() self.hidden = hidden def get_models(self): return sorted(self.models) def acquire(self, given_score="any") -> None: """Get all results""" for result in self.results: ( score, model, platform, date, time, stratified, ) = Files().split_file_name(result) if given_score in ("any", score): self.models.add(model) report = StubReport( os.path.join( Folders.hidden_results if self.hidden else Folders.results, result, ) ) report.report() entry = dict( score=score, model=model, title=report.title, platform=platform, date=date, time=time, stratified=stratified, file=result, metric=report.score, duration=report.duration, ) self.datasets[result] = report.lines self.data.append(entry) def list_results( self, score=None, model=None, input_data=None, sort_key="date", number=0, ) -> None: """Print the list of results""" data = self.data.copy() if input_data is None else input_data if score: data = [x for x in data if x["score"] == score] if model: data = [x for x in data if x["model"] == model] keys = ( itemgetter(sort_key, "time") if sort_key == "date" else itemgetter(sort_key, "date", "time") ) data = sorted(data, key=keys, reverse=True) if number > 0: data = data[:number] max_file = max(len(x["file"]) for x in data) max_title = max(len(x["title"]) for x in data) if self.hidden: color1 = TextColor.GREEN color2 = TextColor.YELLOW else: color1 = TextColor.LINE1 color2 = TextColor.LINE2 print(color1, end="") print( f"{'Date':10s} {'File':{max_file}s} {'Score':7s} {'Time(h)':7s} " f"{'Title':s}" ) print( "=" * 10 + " " + "=" * max_file + " " + "=" * 8 + " " + "=" * 7 + " " + "=" * max_title ) print( "\n".join( [ (color2 if n % 2 == 0 else color1) + f"{x['date']} {x['file']:{max_file}s} " f"{x['metric']:8.5f} " f"{x['duration']/3600:7.3f} " f"{x['title']}" for n, x in enumerate(data) ] ) ) def show_result(self, data: dict, title: str = "") -> None: def whites(n: int) -> str: return " " * n + "*" if data == {}: print(f"** {title} has No data **") return file_name = data["file"] metric = data["metric"] result = StubReport(os.path.join(Folders.results, file_name)) length = 81 print("*" * length) if title != "": print(f"*{title:^{length - 2}s}*") print("*" + "-" * (length - 2) + "*") print("*" + whites(length - 2)) print(f"* {result.data['title']:^{length - 4}} *") print("*" + whites(length - 2)) print( f"* Model: {result.data['model']:15s} " f"Ver. {result.data['version']:10s} " f"Score: {result.data['score_name']:10s} " f"Metric: {metric:10.7f}" + whites(length - 78) ) print("*" + whites(length - 2)) print( f"* Date : {result.data['date']:15s} Time: " f"{result.data['time']:18s} Time Spent: " f"{result.data['duration']:9,.2f} secs." + whites(length - 78) ) seeds = str(result.data["seeds"]) seeds_len = len(seeds) print( f"* Seeds: {seeds:{seeds_len}s} Platform: " f"{result.data['platform']:17s} " + whites(length - 79) ) print( f"* Stratified: {str(result.data['stratified']):15s}" + whites(length - 30) ) print(f"* {file_name:60s}" + whites(length - 63)) print("*" + whites(length - 2)) print("*" * length) def best_results(self, criterion=None, value=None, score="accuracy", n=10): # First filter the same score results (accuracy, f1, ...) haystack = [x for x in self.data if x["score"] == score] haystack = ( haystack if criterion is None or value is None else [x for x in haystack if x[criterion] == value] ) return ( sorted( haystack, key=lambda x: -1.0 if math.isnan(x["metric"]) else x["metric"], reverse=True, )[:n] if len(haystack) > 0 else {} ) def best_result( self, criterion=None, value=None, score="accuracy" ) -> dict: return self.best_results(criterion, value, score)[0] def best_results_datasets(self, score="accuracy") -> dict: """Get the best results for each dataset""" dt = Datasets() best_results = {} for dataset in dt: best_results[dataset] = (1, "", "", "") haystack = [x for x in self.data if x["score"] == score] # Search for the best results for each dataset for entry in haystack: for dataset in self.datasets[entry["file"]]: if dataset["score"] < best_results[dataset["dataset"]][0]: best_results[dataset["dataset"]] = ( dataset["score"], dataset["hyperparameters"], entry["file"], entry["title"], ) return best_results def show_top(self, score="accuracy", n=10): self.list_results( score=score, input_data=self.best_results(score=score, n=n), sort_key="metric", )