diff --git a/benchmark/Models.py b/benchmark/Models.py index 1378218..9b363c3 100644 --- a/benchmark/Models.py +++ b/benchmark/Models.py @@ -4,6 +4,7 @@ from sklearn.ensemble import ( RandomForestClassifier, BaggingClassifier, AdaBoostClassifier, + GradientBoostingClassifier, ) from sklearn.svm import SVC from stree import Stree @@ -14,50 +15,48 @@ from xgboost import XGBClassifier class Models: @staticmethod - def get_model(name, random_state=None): - if name == "STree": - return Stree(random_state=random_state) - if name == "Cart": - return DecisionTreeClassifier(random_state=random_state) - if name == "ExtraTree": - return ExtraTreeClassifier(random_state=random_state) - if name == "Wodt": - return Wodt(random_state=random_state) - if name == "SVC": - return SVC(random_state=random_state) - if name == "ODTE": - return Odte( + def define_models(random_state): + return { + "STree": Stree(random_state=random_state), + "Cart": DecisionTreeClassifier(random_state=random_state), + "ExtraTree": ExtraTreeClassifier(random_state=random_state), + "Wodt": Wodt(random_state=random_state), + "SVC": SVC(random_state=random_state), + "ODTE": Odte( base_estimator=Stree(random_state=random_state), random_state=random_state, - ) - if name == "BaggingStree": - clf = Stree(random_state=random_state) - return BaggingClassifier( - base_estimator=clf, random_state=random_state - ) - if name == "BaggingWodt": - clf = Wodt(random_state=random_state) - return BaggingClassifier( - base_estimator=clf, random_state=random_state - ) - if name == "XGBoost": - return XGBClassifier(random_state=random_state) - if name == "AdaBoostStree": - clf = Stree( + ), + "BaggingStree": BaggingClassifier( + base_estimator=Stree(random_state=random_state), random_state=random_state, - ) - return AdaBoostClassifier( - base_estimator=clf, + ), + "BaggingWodt": BaggingClassifier( + base_estimator=Wodt(random_state=random_state), + random_state=random_state, + ), + "XGBoost": XGBClassifier(random_state=random_state), + "AdaBoostStree": AdaBoostClassifier( + base_estimator=Stree( + random_state=random_state, + ), algorithm="SAMME", random_state=random_state, - ) - if name == "RandomForest": - return RandomForestClassifier(random_state=random_state) - msg = f"No model recognized {name}" - if name in ("Stree", "stree"): - msg += ", did you mean STree?" - elif name in ("odte", "Odte"): - msg += ", did you mean ODTE?" + ), + "GBC": GradientBoostingClassifier(random_state=random_state), + "RandomForest": RandomForestClassifier(random_state=random_state), + } + + @staticmethod + def get_model(name, random_state=None): + try: + models = Models.define_models(random_state) + return models[name] + except KeyError: + msg = f"No model recognized {name}" + if name in ("Stree", "stree"): + msg += ", did you mean STree?" + elif name in ("odte", "Odte"): + msg += ", did you mean ODTE?" raise ValueError(msg) @staticmethod @@ -80,6 +79,10 @@ class Models: leaves = mean([x.get_n_leaves() for x in result.estimators_]) depth = mean([x.get_depth() for x in result.estimators_]) nodes = mean([x.tree_.node_count for x in result.estimators_]) + elif name == "GBC": + leaves = mean([x[0].get_n_leaves() for x in result.estimators_]) + depth = mean([x[0].get_depth() for x in result.estimators_]) + nodes = mean([x[0].tree_.node_count for x in result.estimators_]) elif name == "SVC" or name == "XGBoost": nodes = leaves = depth = 0 else: diff --git a/benchmark/Utils.py b/benchmark/Utils.py index 238eed6..b67f8e9 100644 --- a/benchmark/Utils.py +++ b/benchmark/Utils.py @@ -3,6 +3,13 @@ import subprocess import argparse BEST_ACCURACY_STREE = 40.282203 +ALL_METRICS = ( + "accuracy", + "f1-macro", + "f1-micro", + "f1-weighted", + "roc-auc-ovr", +) class Folders: diff --git a/benchmark/scripts/be_benchmark b/benchmark/scripts/be_benchmark index a4dac54..12b0fd6 100755 --- a/benchmark/scripts/be_benchmark +++ b/benchmark/scripts/be_benchmark @@ -1,6 +1,6 @@ #!/usr/bin/env python from benchmark.Results import Benchmark -from benchmark.Utils import Files, EnvDefault +from benchmark.Utils import ALL_METRICS, Files, EnvDefault import argparse @@ -13,6 +13,7 @@ def parse_arguments(): envvar="score", type=str, required=True, + choices=ALL_METRICS, help="score name {accuracy, f1_macro, ...}", ) ap.add_argument( diff --git a/benchmark/scripts/be_best b/benchmark/scripts/be_best index b6a8312..2e32aed 100755 --- a/benchmark/scripts/be_best +++ b/benchmark/scripts/be_best @@ -2,7 +2,7 @@ import argparse import json from benchmark.Results import Summary -from benchmark.Utils import EnvDefault +from benchmark.Utils import EnvDefault, ALL_METRICS def parse_arguments(): @@ -14,8 +14,8 @@ def parse_arguments(): action=EnvDefault, envvar="score", required=True, - help="score name {accuracy, f1-micro, f1-macro, f1-weighted, " - "roc-auc-ovr, all}", + choices=ALL_METRICS, + help="score name {accuracy, f1-macro, f1-weighted, roc-auc-ovr}", ) args = ap.parse_args() return (args.score,) @@ -23,15 +23,7 @@ def parse_arguments(): (score,) = parse_arguments() -all_metrics = [ - "accuracy", - "f1-macro", - "f1-micro", - "f1-weighted", - "roc-auc-ovr", -] - -metrics = all_metrics if score == "all" else [score] +metrics = ALL_METRICS if score == "all" else [score] summary = Summary() summary.acquire() diff --git a/benchmark/scripts/be_build_best b/benchmark/scripts/be_build_best index 9ee6aa5..6faa3dd 100755 --- a/benchmark/scripts/be_build_best +++ b/benchmark/scripts/be_build_best @@ -2,7 +2,7 @@ import argparse from benchmark.Results import ReportBest from benchmark.Experiments import Datasets, BestResults -from benchmark.Utils import EnvDefault +from benchmark.Utils import ALL_METRICS, EnvDefault """Build a json file with the best results of a model and its hyperparameters """ @@ -17,6 +17,7 @@ def parse_arguments(): envvar="score", type=str, required=True, + choices=ALL_METRICS, help="score name {accuracy, f1_macro, ...}", ) ap.add_argument( diff --git a/benchmark/scripts/be_grid b/benchmark/scripts/be_grid index 579cb30..4e5686b 100755 --- a/benchmark/scripts/be_grid +++ b/benchmark/scripts/be_grid @@ -1,7 +1,7 @@ #!/usr/bin/env python import argparse from benchmark.Experiments import GridSearch, Datasets -from benchmark.Utils import EnvDefault +from benchmark.Utils import EnvDefault, ALL_METRICS """Do experiment and build result file, optionally print report with results """ @@ -16,6 +16,7 @@ def parse_arguments(): envvar="score", type=str, required=True, + choices=ALL_METRICS, help="score name {accuracy, f1_macro, ...}", ) ap.add_argument( diff --git a/benchmark/scripts/be_list b/benchmark/scripts/be_list index d903590..400576d 100755 --- a/benchmark/scripts/be_list +++ b/benchmark/scripts/be_list @@ -1,8 +1,9 @@ #! /usr/bin/env python import os import argparse +from benchmark.Experiments import Models from benchmark.Results import Summary -from benchmark.Utils import Folders +from benchmark.Utils import ALL_METRICS, Folders """List experiments of a model """ @@ -21,14 +22,18 @@ def parse_arguments(): "--score", type=str, required=False, - help="score used in experiment", + choices=ALL_METRICS, + help="score name {accuracy, f1-macro, f1-weighted, roc-auc-ovr}", ) + models_data = Models.define_models(0) + models = "{" + ", ".join(models_data) + "}" ap.add_argument( "-m", "--model", type=str, required=False, - help="model used in experiment", + choices=list(models_data), + help=f"model name: {models}", ) ap.add_argument( "-k", diff --git a/benchmark/scripts/be_main b/benchmark/scripts/be_main index cb9cfea..1a57372 100755 --- a/benchmark/scripts/be_main +++ b/benchmark/scripts/be_main @@ -1,9 +1,9 @@ #!/usr/bin/env python import os import argparse -from benchmark.Experiments import Experiment, Datasets +from benchmark.Experiments import Experiment, Datasets, Models from benchmark.Results import Report -from benchmark.Utils import EnvDefault +from benchmark.Utils import EnvDefault, ALL_METRICS """Do experiment and build result file, optionally print report with results """ @@ -17,9 +17,9 @@ def parse_arguments(): action=EnvDefault, envvar="score", type=str, + choices=ALL_METRICS, required=True, - help="score name {accuracy, f1-micro, f1-macro, f1-weighted, " - "roc-auc-ovr, all}", + help="score name {accuracy, f1-macro, f1-weighted, roc-auc-ovr}", ) ap.add_argument( "-P", @@ -30,12 +30,15 @@ def parse_arguments(): required=True, help="Platform where the test is run", ) + models_data = Models.define_models(0) + models = "{" + ", ".join(models_data) + "}" ap.add_argument( "-m", "--model", type=str, required=True, - help="model name", + choices=list(models_data), + help=f"model name: {models}", ) ap.add_argument( "-n", diff --git a/benchmark/scripts/be_pair_check b/benchmark/scripts/be_pair_check index 5944d4e..eef879d 100755 --- a/benchmark/scripts/be_pair_check +++ b/benchmark/scripts/be_pair_check @@ -1,7 +1,7 @@ #!/usr/bin/env python import argparse from benchmark.Results import PairCheck -from benchmark.Utils import EnvDefault +from benchmark.Utils import ALL_METRICS, EnvDefault """Check best results of two models giving scores and win-tie-loose results """ @@ -16,6 +16,7 @@ def parse_arguments(): envvar="score", type=str, required=True, + choices=ALL_METRICS, help="score name {accuracy, f1_macro, ...}", ) ap.add_argument( diff --git a/benchmark/scripts/be_report b/benchmark/scripts/be_report index 7ecf6d2..a523472 100755 --- a/benchmark/scripts/be_report +++ b/benchmark/scripts/be_report @@ -3,7 +3,7 @@ import argparse import numpy as np from benchmark.Experiments import Datasets from benchmark.Results import Report, Excel, SQL, ReportBest -from benchmark.Utils import Files, TextColor, EnvDefault +from benchmark.Utils import ALL_METRICS, Files, TextColor, EnvDefault """Build report on screen of a result file, optionally generate excel and sql @@ -72,6 +72,7 @@ def parse_arguments(): envvar="score", type=str, required=True, + choices=ALL_METRICS, help="score name {accuracy, f1_macro, ...}", ) args = ap.parse_args() diff --git a/benchmark/scripts/be_summary b/benchmark/scripts/be_summary index 28c8a39..8b88640 100755 --- a/benchmark/scripts/be_summary +++ b/benchmark/scripts/be_summary @@ -1,7 +1,7 @@ #!/usr/bin/env python import argparse from benchmark.Results import Summary -from benchmark.Utils import EnvDefault +from benchmark.Utils import EnvDefault, ALL_METRICS def parse_arguments(): @@ -22,6 +22,7 @@ def parse_arguments(): action=EnvDefault, envvar="score", required=True, + choices=ALL_METRICS, help="score name {accuracy, f1_micro, f1_macro, all}", ) args = ap.parse_args() diff --git a/benchmark/tests/Models_test.py b/benchmark/tests/Models_test.py index 9f27a11..706dcc1 100644 --- a/benchmark/tests/Models_test.py +++ b/benchmark/tests/Models_test.py @@ -2,6 +2,7 @@ import warnings from sklearn.exceptions import ConvergenceWarning from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier from sklearn.ensemble import ( + GradientBoostingClassifier, RandomForestClassifier, BaggingClassifier, AdaBoostClassifier, @@ -27,6 +28,7 @@ class ModelTest(TestBase): "RandomForest": RandomForestClassifier, "ExtraTree": ExtraTreeClassifier, "XGBoost": XGBClassifier, + "GBC": GradientBoostingClassifier, } for key, value in test.items(): self.assertIsInstance(Models.get_model(key), value) @@ -64,19 +66,30 @@ class ModelTest(TestBase): def test_get_complexity(self): warnings.filterwarnings("ignore", category=ConvergenceWarning) test = { - "STree": (11, 6, 4), - "Wodt": (303, 152, 50), - "ODTE": (7.86, 4.43, 3.37), - "Cart": (23, 12, 5), - "SVC": (0, 0, 0), - "RandomForest": (21.3, 11, 5.26), - "ExtraTree": (0, 38, 0), - "BaggingStree": (8.4, 4.7, 3.5), - "BaggingWodt": (272, 136.5, 50), + "STree": ((11, 6, 4), 1.0), + "Wodt": ((303, 152, 50), 0.9382022471910112), + "ODTE": ((7.86, 4.43, 3.37), 1.0), + "Cart": ((23, 12, 5), 1.0), + "SVC": ((0, 0, 0), 0.7078651685393258), + "RandomForest": ((21.3, 11, 5.26), 1.0), + "ExtraTree": ((0, 38, 0), 1.0), + "BaggingStree": ((8.4, 4.7, 3.5), 1.0), + "BaggingWodt": ((272, 136.5, 50), 0.9101123595505618), + "AdaBoostStree": ((12.25, 6.625, 4.75), 1.0), + "XGBoost": ((0, 0, 0), 1.0), + "GBC": ((15, 8, 3), 1.0), } X, y = load_wine(return_X_y=True) - for key, value in test.items(): + print("") + for key, (value, score_expected) in test.items(): clf = Models.get_model(key, random_state=1) clf.fit(X, y) - # print(key, Models.get_complexity(key, clf)) + score_computed = clf.score(X, y) + # print( + # key, + # Models.get_complexity(key, clf), + # score_expected, + # score_computed, + # ) self.assertSequenceEqual(Models.get_complexity(key, clf), value) + self.assertEqual(score_computed, score_expected, key)