Add .env parms file support

2025-08-15 23:45:54 +00:00 · 2021-09-30 11:41:15 +02:00
parent 1056fcd42e
commit 339a948bfc
7 changed files with 88 additions and 16 deletions
--- a/src/Experiments.py
+++ b/src/Experiments.py
@@ -7,7 +7,7 @@ from datetime import datetime
 from tqdm import tqdm
 import numpy as np
 import pandas as pd
-from sklearn.model_selection import StratifiedKFold, cross_validate
+from sklearn.model_selection import StratifiedKFold, KFold, cross_validate
 from Utils import Folders, Files
 from Models import Models

@@ -117,6 +117,7 @@ class Experiment:
        self,
        score_name,
        model_name,
+        stratified,
        datasets,
        hyperparams_dict,
        hyperparams_file,
@@ -130,11 +131,18 @@ class Experiment:
        self.output_file = os.path.join(
            Folders.results,
            Files.results(
-                score_name, model_name, platform, self.date, self.time
+                score_name,
+                model_name,
+                platform,
+                self.date,
+                self.time,
+                stratified,
            ),
        )
        self.score_name = score_name
        self.model_name = model_name
+        self.stratified = stratified == "1"
+        self.stratified_class = StratifiedKFold if self.stratified else KFold
        self.model = Models.get_model(model_name)
        self.datasets = datasets
        dictionary = json.loads(hyperparams_dict)
@@ -185,7 +193,7 @@ class Experiment:
            loop.set_description(f"Seed({random_state:4d})")
            random.seed(random_state)
            np.random.seed(random_state)
-            kfold = StratifiedKFold(
+            kfold = self.stratified_class(
                shuffle=True, random_state=random_state, n_splits=self.folds
            )
            clf = self._build_classifier(random_state, hyperparameters)
@@ -229,6 +237,7 @@ class Experiment:
        output = {}
        output["score_name"] = self.score_name
        output["model"] = self.model_name
+        output["stratified"] = self.stratified
        output["folds"] = self.folds
        output["date"] = self.date
        output["time"] = self.time
--- a/src/Results.py
+++ b/src/Results.py
@@ -139,7 +139,10 @@ class Report(BaseReport):
            f" Report {self.data['model']} with {self.data['folds']} Folds "
            f"cross validation and {len(self.data['seeds'])} random seeds"
        )
-        self.header_line(f" Random seeds: {self.data['seeds']}")
+        self.header_line(
+            f" Random seeds: {self.data['seeds']} Stratified: "
+            f"{self.data['stratified']}"
+        )
        self.header_line(
            f" Execution took {self.data['duration']:7.2f} seconds on an "
            f"{self.data['platform']}"
@@ -271,11 +274,20 @@ class Excel(BaseReport):
            subheader,
        )
        self.sheet.write(
-            1, 5, f"Random seeds: {self.data['seeds']}", subheader
+            1,
+            5,
+            f"Random seeds: {self.data['seeds']}",
+            subheader,
        )
        self.sheet.write(
            2, 0, f" Score is {self.data['score_name']}", subheader
        )
+        self.sheet.write(
+            2,
+            5,
+            f"Stratified: {self.data['stratified']}",
+            subheader,
+        )
        header_cols = [
            ("Dataset", 30),
            ("Samples", 10),
@@ -364,6 +376,7 @@ class SQL(BaseReport):
            "date",
            "time",
            "type",
+            "stratified",
            "score_name",
            "score",
            "score_std",
@@ -392,6 +405,7 @@ class SQL(BaseReport):
            self.data["date"],
            self.data["time"],
            "crossval",
+            self.data["stratified"],
            self.data["score_name"],
            result["score"],
            result["score_std"],
--- a/src/Utils.py
+++ b/src/Utils.py
@@ -1,5 +1,6 @@
 import os
 import subprocess
+import argparse


 class Folders:
@@ -17,6 +18,7 @@ class Files:
    cmd_open_linux = "/usr/bin/xdg-open"
    exreport_pdf = "Rplots.pdf"
    benchmark_r = "benchmark.r"
+    arguments = ".env"

    @staticmethod
    def exreport_output(score):
@@ -39,8 +41,11 @@ class Files:
        return f"best_results_{score}_{model}.json"

    @staticmethod
-    def results(score, model, platform, date, time):
-        return f"results_{score}_{model}_{platform}_{date}_{time}.json"
+    def results(score, model, platform, date, time, stratified):
+        return (
+            f"results_{score}_{model}_{platform}_{date}_{time}_"
+            f"{stratified}.json"
+        )

    @staticmethod
    def results_suffixes(score="", model=""):
@@ -77,3 +82,23 @@ class Symbols:
    black_star = "\N{black star}"
    equal_best = check_mark
    better_best = black_star
+
+
+class EnvDefault(argparse.Action):
+    # Thanks to https://stackoverflow.com/users/445507/russell-heilling
+    def __init__(self, envvar, required=True, default=None, **kwargs):
+        self._args = {}
+        with open(Files.arguments) as f:
+            for line in f.read().splitlines():
+                key, value = line.split("=")
+                self._args[key] = value
+        if not default and envvar in self._args:
+            default = self._args[envvar]
+        if required and default:
+            required = False
+        super(EnvDefault, self).__init__(
+            default=default, required=required, **kwargs
+        )
+
+    def __call__(self, parser, namespace, values, option_string=None):
+        setattr(namespace, self.dest, values)
--- a/src/benchmark.py
+++ b/src/benchmark.py
@@ -1,5 +1,5 @@
 from Results import Benchmark
-from Utils import Files
+from Utils import Files, EnvDefault
 import argparse


@@ -8,6 +8,8 @@ def parse_arguments():
    ap.add_argument(
        "-s",
        "--score",
+        action=EnvDefault,
+        envvar="score",
        type=str,
        required=True,
        help="score name {accuracy, f1_macro, ...}",
--- a/src/build_best.py
+++ b/src/build_best.py
@@ -1,6 +1,7 @@
 import argparse
 from Results import ReportBest
 from Experiments import Datasets, BestResults
+from Utils import EnvDefault

 """Build a json file with the best results of a model and its hyperparameters
 """
@@ -11,6 +12,8 @@ def parse_arguments():
    ap.add_argument(
        "-s",
        "--score",
+        action=EnvDefault,
+        envvar="score",
        type=str,
        required=True,
        help="score name {accuracy, f1_macro, ...}",
@@ -18,10 +21,11 @@ def parse_arguments():
    ap.add_argument(
        "-m",
        "--model",
+        action=EnvDefault,
+        envvar="model",
        type=str,
-        required=False,
-        default="STree",
-        help="model name, dfault STree",
+        required=True,
+        help="model name.",
    )
    ap.add_argument(
        "-r",
--- a/src/main.py
+++ b/src/main.py
@@ -1,6 +1,7 @@
 import argparse
 from Experiments import Experiment, Datasets
 from Results import Report
+from Utils import EnvDefault

 """Do experiment and build result file, optionally print report with results
 """
@@ -11,6 +12,8 @@ def parse_arguments():
    ap.add_argument(
        "-s",
        "--score",
+        action=EnvDefault,
+        envvar="score",
        type=str,
        required=True,
        help="score name {accuracy, f1_macro, ...}",
@@ -18,6 +21,8 @@ def parse_arguments():
    ap.add_argument(
        "-P",
        "--platform",
+        action=EnvDefault,
+        envvar="platform",
        type=str,
        required=True,
        help="Platform where the test is run",
@@ -26,16 +31,16 @@ def parse_arguments():
        "-m",
        "--model",
        type=str,
-        required=False,
-        default="STree",
-        help="model name, dfault STree",
+        required=True,
+        help="model name",
    )
    ap.add_argument(
        "-n",
        "--n_folds",
+        action=EnvDefault,
+        envvar="n_folds",
        type=int,
-        required=False,
-        default=5,
+        required=True,
        help="number of folds",
    )
    ap.add_argument(
@@ -60,8 +65,18 @@ def parse_arguments():
        required=False,
        help="Report results",
    )
+    ap.add_argument(
+        "-t",
+        "--stratified",
+        action=EnvDefault,
+        envvar="stratified",
+        type=str,
+        required=True,
+        help="Stratified",
+    )
    args = ap.parse_args()
    return (
+        args.stratified,
        args.score,
        args.model,
        args.n_folds,
@@ -74,6 +89,7 @@ def parse_arguments():


 (
+    stratified,
    score,
    model,
    folds,
@@ -86,6 +102,7 @@ def parse_arguments():
 job = Experiment(
    score_name=score,
    model_name=model,
+    stratified=stratified,
    datasets=Datasets(),
    hyperparams_dict=hyperparameters,
    hyperparams_file=paramfile,
--- a/src/report.py
+++ b/src/report.py
@@ -56,6 +56,7 @@ def parse_arguments():
        help="score used in best results model",
    )
    args = ap.parse_args()
+
    return (
        args.file,
        args.excel,