Add GradientBoostingClassifier and fix metrics

This commit is contained in:
2022-05-03 15:54:24 +02:00
parent 0f5ca402e2
commit 7f2033193e
12 changed files with 105 additions and 76 deletions

View File

@@ -4,6 +4,7 @@ from sklearn.ensemble import (
RandomForestClassifier, RandomForestClassifier,
BaggingClassifier, BaggingClassifier,
AdaBoostClassifier, AdaBoostClassifier,
GradientBoostingClassifier,
) )
from sklearn.svm import SVC from sklearn.svm import SVC
from stree import Stree from stree import Stree
@@ -14,45 +15,43 @@ from xgboost import XGBClassifier
class Models: class Models:
@staticmethod @staticmethod
def get_model(name, random_state=None): def define_models(random_state):
if name == "STree": return {
return Stree(random_state=random_state) "STree": Stree(random_state=random_state),
if name == "Cart": "Cart": DecisionTreeClassifier(random_state=random_state),
return DecisionTreeClassifier(random_state=random_state) "ExtraTree": ExtraTreeClassifier(random_state=random_state),
if name == "ExtraTree": "Wodt": Wodt(random_state=random_state),
return ExtraTreeClassifier(random_state=random_state) "SVC": SVC(random_state=random_state),
if name == "Wodt": "ODTE": Odte(
return Wodt(random_state=random_state)
if name == "SVC":
return SVC(random_state=random_state)
if name == "ODTE":
return Odte(
base_estimator=Stree(random_state=random_state), base_estimator=Stree(random_state=random_state),
random_state=random_state, random_state=random_state,
) ),
if name == "BaggingStree": "BaggingStree": BaggingClassifier(
clf = Stree(random_state=random_state) base_estimator=Stree(random_state=random_state),
return BaggingClassifier(
base_estimator=clf, random_state=random_state
)
if name == "BaggingWodt":
clf = Wodt(random_state=random_state)
return BaggingClassifier(
base_estimator=clf, random_state=random_state
)
if name == "XGBoost":
return XGBClassifier(random_state=random_state)
if name == "AdaBoostStree":
clf = Stree(
random_state=random_state, random_state=random_state,
) ),
return AdaBoostClassifier( "BaggingWodt": BaggingClassifier(
base_estimator=clf, base_estimator=Wodt(random_state=random_state),
random_state=random_state,
),
"XGBoost": XGBClassifier(random_state=random_state),
"AdaBoostStree": AdaBoostClassifier(
base_estimator=Stree(
random_state=random_state,
),
algorithm="SAMME", algorithm="SAMME",
random_state=random_state, random_state=random_state,
) ),
if name == "RandomForest": "GBC": GradientBoostingClassifier(random_state=random_state),
return RandomForestClassifier(random_state=random_state) "RandomForest": RandomForestClassifier(random_state=random_state),
}
@staticmethod
def get_model(name, random_state=None):
try:
models = Models.define_models(random_state)
return models[name]
except KeyError:
msg = f"No model recognized {name}" msg = f"No model recognized {name}"
if name in ("Stree", "stree"): if name in ("Stree", "stree"):
msg += ", did you mean STree?" msg += ", did you mean STree?"
@@ -80,6 +79,10 @@ class Models:
leaves = mean([x.get_n_leaves() for x in result.estimators_]) leaves = mean([x.get_n_leaves() for x in result.estimators_])
depth = mean([x.get_depth() for x in result.estimators_]) depth = mean([x.get_depth() for x in result.estimators_])
nodes = mean([x.tree_.node_count for x in result.estimators_]) nodes = mean([x.tree_.node_count for x in result.estimators_])
elif name == "GBC":
leaves = mean([x[0].get_n_leaves() for x in result.estimators_])
depth = mean([x[0].get_depth() for x in result.estimators_])
nodes = mean([x[0].tree_.node_count for x in result.estimators_])
elif name == "SVC" or name == "XGBoost": elif name == "SVC" or name == "XGBoost":
nodes = leaves = depth = 0 nodes = leaves = depth = 0
else: else:

View File

@@ -3,6 +3,13 @@ import subprocess
import argparse import argparse
BEST_ACCURACY_STREE = 40.282203 BEST_ACCURACY_STREE = 40.282203
ALL_METRICS = (
"accuracy",
"f1-macro",
"f1-micro",
"f1-weighted",
"roc-auc-ovr",
)
class Folders: class Folders:

View File

@@ -1,6 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
from benchmark.Results import Benchmark from benchmark.Results import Benchmark
from benchmark.Utils import Files, EnvDefault from benchmark.Utils import ALL_METRICS, Files, EnvDefault
import argparse import argparse
@@ -13,6 +13,7 @@ def parse_arguments():
envvar="score", envvar="score",
type=str, type=str,
required=True, required=True,
choices=ALL_METRICS,
help="score name {accuracy, f1_macro, ...}", help="score name {accuracy, f1_macro, ...}",
) )
ap.add_argument( ap.add_argument(

View File

@@ -2,7 +2,7 @@
import argparse import argparse
import json import json
from benchmark.Results import Summary from benchmark.Results import Summary
from benchmark.Utils import EnvDefault from benchmark.Utils import EnvDefault, ALL_METRICS
def parse_arguments(): def parse_arguments():
@@ -14,8 +14,8 @@ def parse_arguments():
action=EnvDefault, action=EnvDefault,
envvar="score", envvar="score",
required=True, required=True,
help="score name {accuracy, f1-micro, f1-macro, f1-weighted, " choices=ALL_METRICS,
"roc-auc-ovr, all}", help="score name {accuracy, f1-macro, f1-weighted, roc-auc-ovr}",
) )
args = ap.parse_args() args = ap.parse_args()
return (args.score,) return (args.score,)
@@ -23,15 +23,7 @@ def parse_arguments():
(score,) = parse_arguments() (score,) = parse_arguments()
all_metrics = [ metrics = ALL_METRICS if score == "all" else [score]
"accuracy",
"f1-macro",
"f1-micro",
"f1-weighted",
"roc-auc-ovr",
]
metrics = all_metrics if score == "all" else [score]
summary = Summary() summary = Summary()
summary.acquire() summary.acquire()

View File

@@ -2,7 +2,7 @@
import argparse import argparse
from benchmark.Results import ReportBest from benchmark.Results import ReportBest
from benchmark.Experiments import Datasets, BestResults from benchmark.Experiments import Datasets, BestResults
from benchmark.Utils import EnvDefault from benchmark.Utils import ALL_METRICS, EnvDefault
"""Build a json file with the best results of a model and its hyperparameters """Build a json file with the best results of a model and its hyperparameters
""" """
@@ -17,6 +17,7 @@ def parse_arguments():
envvar="score", envvar="score",
type=str, type=str,
required=True, required=True,
choices=ALL_METRICS,
help="score name {accuracy, f1_macro, ...}", help="score name {accuracy, f1_macro, ...}",
) )
ap.add_argument( ap.add_argument(

View File

@@ -1,7 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
import argparse import argparse
from benchmark.Experiments import GridSearch, Datasets from benchmark.Experiments import GridSearch, Datasets
from benchmark.Utils import EnvDefault from benchmark.Utils import EnvDefault, ALL_METRICS
"""Do experiment and build result file, optionally print report with results """Do experiment and build result file, optionally print report with results
""" """
@@ -16,6 +16,7 @@ def parse_arguments():
envvar="score", envvar="score",
type=str, type=str,
required=True, required=True,
choices=ALL_METRICS,
help="score name {accuracy, f1_macro, ...}", help="score name {accuracy, f1_macro, ...}",
) )
ap.add_argument( ap.add_argument(

View File

@@ -1,8 +1,9 @@
#! /usr/bin/env python #! /usr/bin/env python
import os import os
import argparse import argparse
from benchmark.Experiments import Models
from benchmark.Results import Summary from benchmark.Results import Summary
from benchmark.Utils import Folders from benchmark.Utils import ALL_METRICS, Folders
"""List experiments of a model """List experiments of a model
""" """
@@ -21,14 +22,18 @@ def parse_arguments():
"--score", "--score",
type=str, type=str,
required=False, required=False,
help="score used in experiment", choices=ALL_METRICS,
help="score name {accuracy, f1-macro, f1-weighted, roc-auc-ovr}",
) )
models_data = Models.define_models(0)
models = "{" + ", ".join(models_data) + "}"
ap.add_argument( ap.add_argument(
"-m", "-m",
"--model", "--model",
type=str, type=str,
required=False, required=False,
help="model used in experiment", choices=list(models_data),
help=f"model name: {models}",
) )
ap.add_argument( ap.add_argument(
"-k", "-k",

View File

@@ -1,9 +1,9 @@
#!/usr/bin/env python #!/usr/bin/env python
import os import os
import argparse import argparse
from benchmark.Experiments import Experiment, Datasets from benchmark.Experiments import Experiment, Datasets, Models
from benchmark.Results import Report from benchmark.Results import Report
from benchmark.Utils import EnvDefault from benchmark.Utils import EnvDefault, ALL_METRICS
"""Do experiment and build result file, optionally print report with results """Do experiment and build result file, optionally print report with results
""" """
@@ -17,9 +17,9 @@ def parse_arguments():
action=EnvDefault, action=EnvDefault,
envvar="score", envvar="score",
type=str, type=str,
choices=ALL_METRICS,
required=True, required=True,
help="score name {accuracy, f1-micro, f1-macro, f1-weighted, " help="score name {accuracy, f1-macro, f1-weighted, roc-auc-ovr}",
"roc-auc-ovr, all}",
) )
ap.add_argument( ap.add_argument(
"-P", "-P",
@@ -30,12 +30,15 @@ def parse_arguments():
required=True, required=True,
help="Platform where the test is run", help="Platform where the test is run",
) )
models_data = Models.define_models(0)
models = "{" + ", ".join(models_data) + "}"
ap.add_argument( ap.add_argument(
"-m", "-m",
"--model", "--model",
type=str, type=str,
required=True, required=True,
help="model name", choices=list(models_data),
help=f"model name: {models}",
) )
ap.add_argument( ap.add_argument(
"-n", "-n",

View File

@@ -1,7 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
import argparse import argparse
from benchmark.Results import PairCheck from benchmark.Results import PairCheck
from benchmark.Utils import EnvDefault from benchmark.Utils import ALL_METRICS, EnvDefault
"""Check best results of two models giving scores and win-tie-loose results """Check best results of two models giving scores and win-tie-loose results
""" """
@@ -16,6 +16,7 @@ def parse_arguments():
envvar="score", envvar="score",
type=str, type=str,
required=True, required=True,
choices=ALL_METRICS,
help="score name {accuracy, f1_macro, ...}", help="score name {accuracy, f1_macro, ...}",
) )
ap.add_argument( ap.add_argument(

View File

@@ -3,7 +3,7 @@ import argparse
import numpy as np import numpy as np
from benchmark.Experiments import Datasets from benchmark.Experiments import Datasets
from benchmark.Results import Report, Excel, SQL, ReportBest from benchmark.Results import Report, Excel, SQL, ReportBest
from benchmark.Utils import Files, TextColor, EnvDefault from benchmark.Utils import ALL_METRICS, Files, TextColor, EnvDefault
"""Build report on screen of a result file, optionally generate excel and sql """Build report on screen of a result file, optionally generate excel and sql
@@ -72,6 +72,7 @@ def parse_arguments():
envvar="score", envvar="score",
type=str, type=str,
required=True, required=True,
choices=ALL_METRICS,
help="score name {accuracy, f1_macro, ...}", help="score name {accuracy, f1_macro, ...}",
) )
args = ap.parse_args() args = ap.parse_args()

View File

@@ -1,7 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
import argparse import argparse
from benchmark.Results import Summary from benchmark.Results import Summary
from benchmark.Utils import EnvDefault from benchmark.Utils import EnvDefault, ALL_METRICS
def parse_arguments(): def parse_arguments():
@@ -22,6 +22,7 @@ def parse_arguments():
action=EnvDefault, action=EnvDefault,
envvar="score", envvar="score",
required=True, required=True,
choices=ALL_METRICS,
help="score name {accuracy, f1_micro, f1_macro, all}", help="score name {accuracy, f1_micro, f1_macro, all}",
) )
args = ap.parse_args() args = ap.parse_args()

View File

@@ -2,6 +2,7 @@ import warnings
from sklearn.exceptions import ConvergenceWarning from sklearn.exceptions import ConvergenceWarning
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from sklearn.ensemble import ( from sklearn.ensemble import (
GradientBoostingClassifier,
RandomForestClassifier, RandomForestClassifier,
BaggingClassifier, BaggingClassifier,
AdaBoostClassifier, AdaBoostClassifier,
@@ -27,6 +28,7 @@ class ModelTest(TestBase):
"RandomForest": RandomForestClassifier, "RandomForest": RandomForestClassifier,
"ExtraTree": ExtraTreeClassifier, "ExtraTree": ExtraTreeClassifier,
"XGBoost": XGBClassifier, "XGBoost": XGBClassifier,
"GBC": GradientBoostingClassifier,
} }
for key, value in test.items(): for key, value in test.items():
self.assertIsInstance(Models.get_model(key), value) self.assertIsInstance(Models.get_model(key), value)
@@ -64,19 +66,30 @@ class ModelTest(TestBase):
def test_get_complexity(self): def test_get_complexity(self):
warnings.filterwarnings("ignore", category=ConvergenceWarning) warnings.filterwarnings("ignore", category=ConvergenceWarning)
test = { test = {
"STree": (11, 6, 4), "STree": ((11, 6, 4), 1.0),
"Wodt": (303, 152, 50), "Wodt": ((303, 152, 50), 0.9382022471910112),
"ODTE": (7.86, 4.43, 3.37), "ODTE": ((7.86, 4.43, 3.37), 1.0),
"Cart": (23, 12, 5), "Cart": ((23, 12, 5), 1.0),
"SVC": (0, 0, 0), "SVC": ((0, 0, 0), 0.7078651685393258),
"RandomForest": (21.3, 11, 5.26), "RandomForest": ((21.3, 11, 5.26), 1.0),
"ExtraTree": (0, 38, 0), "ExtraTree": ((0, 38, 0), 1.0),
"BaggingStree": (8.4, 4.7, 3.5), "BaggingStree": ((8.4, 4.7, 3.5), 1.0),
"BaggingWodt": (272, 136.5, 50), "BaggingWodt": ((272, 136.5, 50), 0.9101123595505618),
"AdaBoostStree": ((12.25, 6.625, 4.75), 1.0),
"XGBoost": ((0, 0, 0), 1.0),
"GBC": ((15, 8, 3), 1.0),
} }
X, y = load_wine(return_X_y=True) X, y = load_wine(return_X_y=True)
for key, value in test.items(): print("")
for key, (value, score_expected) in test.items():
clf = Models.get_model(key, random_state=1) clf = Models.get_model(key, random_state=1)
clf.fit(X, y) clf.fit(X, y)
# print(key, Models.get_complexity(key, clf)) score_computed = clf.score(X, y)
# print(
# key,
# Models.get_complexity(key, clf),
# score_expected,
# score_computed,
# )
self.assertSequenceEqual(Models.get_complexity(key, clf), value) self.assertSequenceEqual(Models.get_complexity(key, clf), value)
self.assertEqual(score_computed, score_expected, key)