Add .env parms file support

This commit is contained in:
2021-09-30 11:41:15 +02:00
parent 1056fcd42e
commit 339a948bfc
7 changed files with 88 additions and 16 deletions

View File

@@ -7,7 +7,7 @@ from datetime import datetime
from tqdm import tqdm
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold, cross_validate
from sklearn.model_selection import StratifiedKFold, KFold, cross_validate
from Utils import Folders, Files
from Models import Models
@@ -117,6 +117,7 @@ class Experiment:
self,
score_name,
model_name,
stratified,
datasets,
hyperparams_dict,
hyperparams_file,
@@ -130,11 +131,18 @@ class Experiment:
self.output_file = os.path.join(
Folders.results,
Files.results(
score_name, model_name, platform, self.date, self.time
score_name,
model_name,
platform,
self.date,
self.time,
stratified,
),
)
self.score_name = score_name
self.model_name = model_name
self.stratified = stratified == "1"
self.stratified_class = StratifiedKFold if self.stratified else KFold
self.model = Models.get_model(model_name)
self.datasets = datasets
dictionary = json.loads(hyperparams_dict)
@@ -185,7 +193,7 @@ class Experiment:
loop.set_description(f"Seed({random_state:4d})")
random.seed(random_state)
np.random.seed(random_state)
kfold = StratifiedKFold(
kfold = self.stratified_class(
shuffle=True, random_state=random_state, n_splits=self.folds
)
clf = self._build_classifier(random_state, hyperparameters)
@@ -229,6 +237,7 @@ class Experiment:
output = {}
output["score_name"] = self.score_name
output["model"] = self.model_name
output["stratified"] = self.stratified
output["folds"] = self.folds
output["date"] = self.date
output["time"] = self.time

View File

@@ -139,7 +139,10 @@ class Report(BaseReport):
f" Report {self.data['model']} with {self.data['folds']} Folds "
f"cross validation and {len(self.data['seeds'])} random seeds"
)
self.header_line(f" Random seeds: {self.data['seeds']}")
self.header_line(
f" Random seeds: {self.data['seeds']} Stratified: "
f"{self.data['stratified']}"
)
self.header_line(
f" Execution took {self.data['duration']:7.2f} seconds on an "
f"{self.data['platform']}"
@@ -271,11 +274,20 @@ class Excel(BaseReport):
subheader,
)
self.sheet.write(
1, 5, f"Random seeds: {self.data['seeds']}", subheader
1,
5,
f"Random seeds: {self.data['seeds']}",
subheader,
)
self.sheet.write(
2, 0, f" Score is {self.data['score_name']}", subheader
)
self.sheet.write(
2,
5,
f"Stratified: {self.data['stratified']}",
subheader,
)
header_cols = [
("Dataset", 30),
("Samples", 10),
@@ -364,6 +376,7 @@ class SQL(BaseReport):
"date",
"time",
"type",
"stratified",
"score_name",
"score",
"score_std",
@@ -392,6 +405,7 @@ class SQL(BaseReport):
self.data["date"],
self.data["time"],
"crossval",
self.data["stratified"],
self.data["score_name"],
result["score"],
result["score_std"],

View File

@@ -1,5 +1,6 @@
import os
import subprocess
import argparse
class Folders:
@@ -17,6 +18,7 @@ class Files:
cmd_open_linux = "/usr/bin/xdg-open"
exreport_pdf = "Rplots.pdf"
benchmark_r = "benchmark.r"
arguments = ".env"
@staticmethod
def exreport_output(score):
@@ -39,8 +41,11 @@ class Files:
return f"best_results_{score}_{model}.json"
@staticmethod
def results(score, model, platform, date, time):
return f"results_{score}_{model}_{platform}_{date}_{time}.json"
def results(score, model, platform, date, time, stratified):
return (
f"results_{score}_{model}_{platform}_{date}_{time}_"
f"{stratified}.json"
)
@staticmethod
def results_suffixes(score="", model=""):
@@ -77,3 +82,23 @@ class Symbols:
black_star = "\N{black star}"
equal_best = check_mark
better_best = black_star
class EnvDefault(argparse.Action):
# Thanks to https://stackoverflow.com/users/445507/russell-heilling
def __init__(self, envvar, required=True, default=None, **kwargs):
self._args = {}
with open(Files.arguments) as f:
for line in f.read().splitlines():
key, value = line.split("=")
self._args[key] = value
if not default and envvar in self._args:
default = self._args[envvar]
if required and default:
required = False
super(EnvDefault, self).__init__(
default=default, required=required, **kwargs
)
def __call__(self, parser, namespace, values, option_string=None):
setattr(namespace, self.dest, values)

View File

@@ -1,5 +1,5 @@
from Results import Benchmark
from Utils import Files
from Utils import Files, EnvDefault
import argparse
@@ -8,6 +8,8 @@ def parse_arguments():
ap.add_argument(
"-s",
"--score",
action=EnvDefault,
envvar="score",
type=str,
required=True,
help="score name {accuracy, f1_macro, ...}",

View File

@@ -1,6 +1,7 @@
import argparse
from Results import ReportBest
from Experiments import Datasets, BestResults
from Utils import EnvDefault
"""Build a json file with the best results of a model and its hyperparameters
"""
@@ -11,6 +12,8 @@ def parse_arguments():
ap.add_argument(
"-s",
"--score",
action=EnvDefault,
envvar="score",
type=str,
required=True,
help="score name {accuracy, f1_macro, ...}",
@@ -18,10 +21,11 @@ def parse_arguments():
ap.add_argument(
"-m",
"--model",
action=EnvDefault,
envvar="model",
type=str,
required=False,
default="STree",
help="model name, dfault STree",
required=True,
help="model name.",
)
ap.add_argument(
"-r",

View File

@@ -1,6 +1,7 @@
import argparse
from Experiments import Experiment, Datasets
from Results import Report
from Utils import EnvDefault
"""Do experiment and build result file, optionally print report with results
"""
@@ -11,6 +12,8 @@ def parse_arguments():
ap.add_argument(
"-s",
"--score",
action=EnvDefault,
envvar="score",
type=str,
required=True,
help="score name {accuracy, f1_macro, ...}",
@@ -18,6 +21,8 @@ def parse_arguments():
ap.add_argument(
"-P",
"--platform",
action=EnvDefault,
envvar="platform",
type=str,
required=True,
help="Platform where the test is run",
@@ -26,16 +31,16 @@ def parse_arguments():
"-m",
"--model",
type=str,
required=False,
default="STree",
help="model name, dfault STree",
required=True,
help="model name",
)
ap.add_argument(
"-n",
"--n_folds",
action=EnvDefault,
envvar="n_folds",
type=int,
required=False,
default=5,
required=True,
help="number of folds",
)
ap.add_argument(
@@ -60,8 +65,18 @@ def parse_arguments():
required=False,
help="Report results",
)
ap.add_argument(
"-t",
"--stratified",
action=EnvDefault,
envvar="stratified",
type=str,
required=True,
help="Stratified",
)
args = ap.parse_args()
return (
args.stratified,
args.score,
args.model,
args.n_folds,
@@ -74,6 +89,7 @@ def parse_arguments():
(
stratified,
score,
model,
folds,
@@ -86,6 +102,7 @@ def parse_arguments():
job = Experiment(
score_name=score,
model_name=model,
stratified=stratified,
datasets=Datasets(),
hyperparams_dict=hyperparameters,
hyperparams_file=paramfile,

View File

@@ -56,6 +56,7 @@ def parse_arguments():
help="score used in best results model",
)
args = ap.parse_args()
return (
args.file,
args.excel,