diff --git a/src/Experiments.py b/src/Experiments.py index 0f4a478..919ed57 100644 --- a/src/Experiments.py +++ b/src/Experiments.py @@ -7,7 +7,7 @@ from datetime import datetime from tqdm import tqdm import numpy as np import pandas as pd -from sklearn.model_selection import StratifiedKFold, cross_validate +from sklearn.model_selection import StratifiedKFold, KFold, cross_validate from Utils import Folders, Files from Models import Models @@ -117,6 +117,7 @@ class Experiment: self, score_name, model_name, + stratified, datasets, hyperparams_dict, hyperparams_file, @@ -130,11 +131,18 @@ class Experiment: self.output_file = os.path.join( Folders.results, Files.results( - score_name, model_name, platform, self.date, self.time + score_name, + model_name, + platform, + self.date, + self.time, + stratified, ), ) self.score_name = score_name self.model_name = model_name + self.stratified = stratified == "1" + self.stratified_class = StratifiedKFold if self.stratified else KFold self.model = Models.get_model(model_name) self.datasets = datasets dictionary = json.loads(hyperparams_dict) @@ -185,7 +193,7 @@ class Experiment: loop.set_description(f"Seed({random_state:4d})") random.seed(random_state) np.random.seed(random_state) - kfold = StratifiedKFold( + kfold = self.stratified_class( shuffle=True, random_state=random_state, n_splits=self.folds ) clf = self._build_classifier(random_state, hyperparameters) @@ -229,6 +237,7 @@ class Experiment: output = {} output["score_name"] = self.score_name output["model"] = self.model_name + output["stratified"] = self.stratified output["folds"] = self.folds output["date"] = self.date output["time"] = self.time diff --git a/src/Results.py b/src/Results.py index 78fa0b1..5047128 100644 --- a/src/Results.py +++ b/src/Results.py @@ -139,7 +139,10 @@ class Report(BaseReport): f" Report {self.data['model']} with {self.data['folds']} Folds " f"cross validation and {len(self.data['seeds'])} random seeds" ) - self.header_line(f" Random seeds: {self.data['seeds']}") + self.header_line( + f" Random seeds: {self.data['seeds']} Stratified: " + f"{self.data['stratified']}" + ) self.header_line( f" Execution took {self.data['duration']:7.2f} seconds on an " f"{self.data['platform']}" @@ -271,11 +274,20 @@ class Excel(BaseReport): subheader, ) self.sheet.write( - 1, 5, f"Random seeds: {self.data['seeds']}", subheader + 1, + 5, + f"Random seeds: {self.data['seeds']}", + subheader, ) self.sheet.write( 2, 0, f" Score is {self.data['score_name']}", subheader ) + self.sheet.write( + 2, + 5, + f"Stratified: {self.data['stratified']}", + subheader, + ) header_cols = [ ("Dataset", 30), ("Samples", 10), @@ -364,6 +376,7 @@ class SQL(BaseReport): "date", "time", "type", + "stratified", "score_name", "score", "score_std", @@ -392,6 +405,7 @@ class SQL(BaseReport): self.data["date"], self.data["time"], "crossval", + self.data["stratified"], self.data["score_name"], result["score"], result["score_std"], diff --git a/src/Utils.py b/src/Utils.py index 9e2c4d2..2ff94f2 100644 --- a/src/Utils.py +++ b/src/Utils.py @@ -1,5 +1,6 @@ import os import subprocess +import argparse class Folders: @@ -17,6 +18,7 @@ class Files: cmd_open_linux = "/usr/bin/xdg-open" exreport_pdf = "Rplots.pdf" benchmark_r = "benchmark.r" + arguments = ".env" @staticmethod def exreport_output(score): @@ -39,8 +41,11 @@ class Files: return f"best_results_{score}_{model}.json" @staticmethod - def results(score, model, platform, date, time): - return f"results_{score}_{model}_{platform}_{date}_{time}.json" + def results(score, model, platform, date, time, stratified): + return ( + f"results_{score}_{model}_{platform}_{date}_{time}_" + f"{stratified}.json" + ) @staticmethod def results_suffixes(score="", model=""): @@ -77,3 +82,23 @@ class Symbols: black_star = "\N{black star}" equal_best = check_mark better_best = black_star + + +class EnvDefault(argparse.Action): + # Thanks to https://stackoverflow.com/users/445507/russell-heilling + def __init__(self, envvar, required=True, default=None, **kwargs): + self._args = {} + with open(Files.arguments) as f: + for line in f.read().splitlines(): + key, value = line.split("=") + self._args[key] = value + if not default and envvar in self._args: + default = self._args[envvar] + if required and default: + required = False + super(EnvDefault, self).__init__( + default=default, required=required, **kwargs + ) + + def __call__(self, parser, namespace, values, option_string=None): + setattr(namespace, self.dest, values) diff --git a/src/benchmark.py b/src/benchmark.py index 345bc70..21b977c 100644 --- a/src/benchmark.py +++ b/src/benchmark.py @@ -1,5 +1,5 @@ from Results import Benchmark -from Utils import Files +from Utils import Files, EnvDefault import argparse @@ -8,6 +8,8 @@ def parse_arguments(): ap.add_argument( "-s", "--score", + action=EnvDefault, + envvar="score", type=str, required=True, help="score name {accuracy, f1_macro, ...}", diff --git a/src/build_best.py b/src/build_best.py index d3be595..fcf1f50 100644 --- a/src/build_best.py +++ b/src/build_best.py @@ -1,6 +1,7 @@ import argparse from Results import ReportBest from Experiments import Datasets, BestResults +from Utils import EnvDefault """Build a json file with the best results of a model and its hyperparameters """ @@ -11,6 +12,8 @@ def parse_arguments(): ap.add_argument( "-s", "--score", + action=EnvDefault, + envvar="score", type=str, required=True, help="score name {accuracy, f1_macro, ...}", @@ -18,10 +21,11 @@ def parse_arguments(): ap.add_argument( "-m", "--model", + action=EnvDefault, + envvar="model", type=str, - required=False, - default="STree", - help="model name, dfault STree", + required=True, + help="model name.", ) ap.add_argument( "-r", diff --git a/src/main.py b/src/main.py index 156b7c8..e81f775 100644 --- a/src/main.py +++ b/src/main.py @@ -1,6 +1,7 @@ import argparse from Experiments import Experiment, Datasets from Results import Report +from Utils import EnvDefault """Do experiment and build result file, optionally print report with results """ @@ -11,6 +12,8 @@ def parse_arguments(): ap.add_argument( "-s", "--score", + action=EnvDefault, + envvar="score", type=str, required=True, help="score name {accuracy, f1_macro, ...}", @@ -18,6 +21,8 @@ def parse_arguments(): ap.add_argument( "-P", "--platform", + action=EnvDefault, + envvar="platform", type=str, required=True, help="Platform where the test is run", @@ -26,16 +31,16 @@ def parse_arguments(): "-m", "--model", type=str, - required=False, - default="STree", - help="model name, dfault STree", + required=True, + help="model name", ) ap.add_argument( "-n", "--n_folds", + action=EnvDefault, + envvar="n_folds", type=int, - required=False, - default=5, + required=True, help="number of folds", ) ap.add_argument( @@ -60,8 +65,18 @@ def parse_arguments(): required=False, help="Report results", ) + ap.add_argument( + "-t", + "--stratified", + action=EnvDefault, + envvar="stratified", + type=str, + required=True, + help="Stratified", + ) args = ap.parse_args() return ( + args.stratified, args.score, args.model, args.n_folds, @@ -74,6 +89,7 @@ def parse_arguments(): ( + stratified, score, model, folds, @@ -86,6 +102,7 @@ def parse_arguments(): job = Experiment( score_name=score, model_name=model, + stratified=stratified, datasets=Datasets(), hyperparams_dict=hyperparameters, hyperparams_file=paramfile, diff --git a/src/report.py b/src/report.py index 379c64f..fb1090b 100644 --- a/src/report.py +++ b/src/report.py @@ -56,6 +56,7 @@ def parse_arguments(): help="score used in best results model", ) args = ap.parse_args() + return ( args.file, args.excel,