benchmark/benchmark/scripts/be_report

#!/usr/bin/env python
import argparse
import numpy as np
from benchmark.Experiments import Datasets
from benchmark.Results import Report, Excel, SQL, ReportBest
from benchmark.Utils import ALL_METRICS, Files, TextColor, EnvDefault


"""Build report on screen of a result file, optionally generate excel and sql
file, and can compare results of report with best results obtained by model
If no argument is set, displays the datasets and its characteristics
"""


def parse_arguments():
    ap = argparse.ArgumentParser()
    ap.add_argument(
        "-f",
        "--file",
        type=str,
        required=False,
        help="Result file",
    )
    ap.add_argument(
        "-x",
        "--excel",
        type=bool,
        required=False,
        help="Generate Excel file",
    )
    ap.add_argument(
        "-q",
        "--sql",
        type=bool,
        required=False,
        help="Generate sql file",
    )
    ap.add_argument(
        "-c",
        "--compare",
        type=bool,
        required=False,
        help="Compare accuracy with best results",
    )
    ap.add_argument(
        "-b",
        "--best",
        type=str,
        required=False,
        help="best results of models",
    )
    ap.add_argument(
        "-g",
        "--grid",
        type=str,
        required=False,
        help="grid results of model",
    )
    ap.add_argument(
        "-m",
        "--model",
        action=EnvDefault,
        envvar="model",
        type=str,
        required=True,
        help="model name",
    )
    ap.add_argument(
        "-s",
        "--score",
        action=EnvDefault,
        envvar="score",
        type=str,
        required=True,
        choices=ALL_METRICS,
        help="score name {accuracy, f1_macro, ...}",
    )
    args = ap.parse_args()

    return (
        args.file,
        args.excel,
        args.sql,
        args.compare,
        args.best,
        args.grid,
        args.score,
        args.model,
    )


def default_report():
    sets = Datasets()
    color_line = TextColor.LINE1
    print(color_line, end="")
    print(f"{'Dataset':30s} Samp. Feat Cls Balance")
    print("=" * 30 + " ===== ==== === " + "=" * 40)
    for line in sets:
        X, y = sets.load(line)
        color_line = (
            TextColor.LINE2
            if color_line == TextColor.LINE1
            else TextColor.LINE1
        )
        values, counts = np.unique(y, return_counts=True)
        comp = ""
        sep = ""
        for value, count in zip(values, counts):
            comp += f"{sep}{count/sum(counts)*100:5.2f}%"
            sep = "/ "
        print(color_line, end="")
        print(
            f"{line:30s} {X.shape[0]:5,d} {X.shape[1]:4d} "
            f"{len(np.unique(y)):3d} {comp:40s}"
        )


if __name__ == "__main__":
    (file, excel, sql, compare, best, grid, score, model) = parse_arguments()
    if grid:
        best = False
    if file is None and best is None:
        default_report()
    else:
        if best is not None or grid is not None:
            report = ReportBest(score, model, best, grid)
            report.report()
        else:
            report = Report(file, compare)
            report.report()
            if excel:
                excel = Excel(file, compare)
                excel.report()
                Files.open(excel.get_file_name())
            if sql:
                sql = SQL(file)
                sql.report()