Add GradientBoostingClassifier and fix metrics

This commit is contained in:
2022-05-03 15:54:24 +02:00
parent 0f5ca402e2
commit 7f2033193e
12 changed files with 105 additions and 76 deletions

View File

@@ -4,6 +4,7 @@ from sklearn.ensemble import (
RandomForestClassifier,
BaggingClassifier,
AdaBoostClassifier,
GradientBoostingClassifier,
)
from sklearn.svm import SVC
from stree import Stree
@@ -14,50 +15,48 @@ from xgboost import XGBClassifier
class Models:
@staticmethod
def get_model(name, random_state=None):
if name == "STree":
return Stree(random_state=random_state)
if name == "Cart":
return DecisionTreeClassifier(random_state=random_state)
if name == "ExtraTree":
return ExtraTreeClassifier(random_state=random_state)
if name == "Wodt":
return Wodt(random_state=random_state)
if name == "SVC":
return SVC(random_state=random_state)
if name == "ODTE":
return Odte(
def define_models(random_state):
return {
"STree": Stree(random_state=random_state),
"Cart": DecisionTreeClassifier(random_state=random_state),
"ExtraTree": ExtraTreeClassifier(random_state=random_state),
"Wodt": Wodt(random_state=random_state),
"SVC": SVC(random_state=random_state),
"ODTE": Odte(
base_estimator=Stree(random_state=random_state),
random_state=random_state,
)
if name == "BaggingStree":
clf = Stree(random_state=random_state)
return BaggingClassifier(
base_estimator=clf, random_state=random_state
)
if name == "BaggingWodt":
clf = Wodt(random_state=random_state)
return BaggingClassifier(
base_estimator=clf, random_state=random_state
)
if name == "XGBoost":
return XGBClassifier(random_state=random_state)
if name == "AdaBoostStree":
clf = Stree(
),
"BaggingStree": BaggingClassifier(
base_estimator=Stree(random_state=random_state),
random_state=random_state,
)
return AdaBoostClassifier(
base_estimator=clf,
),
"BaggingWodt": BaggingClassifier(
base_estimator=Wodt(random_state=random_state),
random_state=random_state,
),
"XGBoost": XGBClassifier(random_state=random_state),
"AdaBoostStree": AdaBoostClassifier(
base_estimator=Stree(
random_state=random_state,
),
algorithm="SAMME",
random_state=random_state,
)
if name == "RandomForest":
return RandomForestClassifier(random_state=random_state)
msg = f"No model recognized {name}"
if name in ("Stree", "stree"):
msg += ", did you mean STree?"
elif name in ("odte", "Odte"):
msg += ", did you mean ODTE?"
),
"GBC": GradientBoostingClassifier(random_state=random_state),
"RandomForest": RandomForestClassifier(random_state=random_state),
}
@staticmethod
def get_model(name, random_state=None):
try:
models = Models.define_models(random_state)
return models[name]
except KeyError:
msg = f"No model recognized {name}"
if name in ("Stree", "stree"):
msg += ", did you mean STree?"
elif name in ("odte", "Odte"):
msg += ", did you mean ODTE?"
raise ValueError(msg)
@staticmethod
@@ -80,6 +79,10 @@ class Models:
leaves = mean([x.get_n_leaves() for x in result.estimators_])
depth = mean([x.get_depth() for x in result.estimators_])
nodes = mean([x.tree_.node_count for x in result.estimators_])
elif name == "GBC":
leaves = mean([x[0].get_n_leaves() for x in result.estimators_])
depth = mean([x[0].get_depth() for x in result.estimators_])
nodes = mean([x[0].tree_.node_count for x in result.estimators_])
elif name == "SVC" or name == "XGBoost":
nodes = leaves = depth = 0
else:

View File

@@ -3,6 +3,13 @@ import subprocess
import argparse
BEST_ACCURACY_STREE = 40.282203
ALL_METRICS = (
"accuracy",
"f1-macro",
"f1-micro",
"f1-weighted",
"roc-auc-ovr",
)
class Folders:

View File

@@ -1,6 +1,6 @@
#!/usr/bin/env python
from benchmark.Results import Benchmark
from benchmark.Utils import Files, EnvDefault
from benchmark.Utils import ALL_METRICS, Files, EnvDefault
import argparse
@@ -13,6 +13,7 @@ def parse_arguments():
envvar="score",
type=str,
required=True,
choices=ALL_METRICS,
help="score name {accuracy, f1_macro, ...}",
)
ap.add_argument(

View File

@@ -2,7 +2,7 @@
import argparse
import json
from benchmark.Results import Summary
from benchmark.Utils import EnvDefault
from benchmark.Utils import EnvDefault, ALL_METRICS
def parse_arguments():
@@ -14,8 +14,8 @@ def parse_arguments():
action=EnvDefault,
envvar="score",
required=True,
help="score name {accuracy, f1-micro, f1-macro, f1-weighted, "
"roc-auc-ovr, all}",
choices=ALL_METRICS,
help="score name {accuracy, f1-macro, f1-weighted, roc-auc-ovr}",
)
args = ap.parse_args()
return (args.score,)
@@ -23,15 +23,7 @@ def parse_arguments():
(score,) = parse_arguments()
all_metrics = [
"accuracy",
"f1-macro",
"f1-micro",
"f1-weighted",
"roc-auc-ovr",
]
metrics = all_metrics if score == "all" else [score]
metrics = ALL_METRICS if score == "all" else [score]
summary = Summary()
summary.acquire()

View File

@@ -2,7 +2,7 @@
import argparse
from benchmark.Results import ReportBest
from benchmark.Experiments import Datasets, BestResults
from benchmark.Utils import EnvDefault
from benchmark.Utils import ALL_METRICS, EnvDefault
"""Build a json file with the best results of a model and its hyperparameters
"""
@@ -17,6 +17,7 @@ def parse_arguments():
envvar="score",
type=str,
required=True,
choices=ALL_METRICS,
help="score name {accuracy, f1_macro, ...}",
)
ap.add_argument(

View File

@@ -1,7 +1,7 @@
#!/usr/bin/env python
import argparse
from benchmark.Experiments import GridSearch, Datasets
from benchmark.Utils import EnvDefault
from benchmark.Utils import EnvDefault, ALL_METRICS
"""Do experiment and build result file, optionally print report with results
"""
@@ -16,6 +16,7 @@ def parse_arguments():
envvar="score",
type=str,
required=True,
choices=ALL_METRICS,
help="score name {accuracy, f1_macro, ...}",
)
ap.add_argument(

View File

@@ -1,8 +1,9 @@
#! /usr/bin/env python
import os
import argparse
from benchmark.Experiments import Models
from benchmark.Results import Summary
from benchmark.Utils import Folders
from benchmark.Utils import ALL_METRICS, Folders
"""List experiments of a model
"""
@@ -21,14 +22,18 @@ def parse_arguments():
"--score",
type=str,
required=False,
help="score used in experiment",
choices=ALL_METRICS,
help="score name {accuracy, f1-macro, f1-weighted, roc-auc-ovr}",
)
models_data = Models.define_models(0)
models = "{" + ", ".join(models_data) + "}"
ap.add_argument(
"-m",
"--model",
type=str,
required=False,
help="model used in experiment",
choices=list(models_data),
help=f"model name: {models}",
)
ap.add_argument(
"-k",

View File

@@ -1,9 +1,9 @@
#!/usr/bin/env python
import os
import argparse
from benchmark.Experiments import Experiment, Datasets
from benchmark.Experiments import Experiment, Datasets, Models
from benchmark.Results import Report
from benchmark.Utils import EnvDefault
from benchmark.Utils import EnvDefault, ALL_METRICS
"""Do experiment and build result file, optionally print report with results
"""
@@ -17,9 +17,9 @@ def parse_arguments():
action=EnvDefault,
envvar="score",
type=str,
choices=ALL_METRICS,
required=True,
help="score name {accuracy, f1-micro, f1-macro, f1-weighted, "
"roc-auc-ovr, all}",
help="score name {accuracy, f1-macro, f1-weighted, roc-auc-ovr}",
)
ap.add_argument(
"-P",
@@ -30,12 +30,15 @@ def parse_arguments():
required=True,
help="Platform where the test is run",
)
models_data = Models.define_models(0)
models = "{" + ", ".join(models_data) + "}"
ap.add_argument(
"-m",
"--model",
type=str,
required=True,
help="model name",
choices=list(models_data),
help=f"model name: {models}",
)
ap.add_argument(
"-n",

View File

@@ -1,7 +1,7 @@
#!/usr/bin/env python
import argparse
from benchmark.Results import PairCheck
from benchmark.Utils import EnvDefault
from benchmark.Utils import ALL_METRICS, EnvDefault
"""Check best results of two models giving scores and win-tie-loose results
"""
@@ -16,6 +16,7 @@ def parse_arguments():
envvar="score",
type=str,
required=True,
choices=ALL_METRICS,
help="score name {accuracy, f1_macro, ...}",
)
ap.add_argument(

View File

@@ -3,7 +3,7 @@ import argparse
import numpy as np
from benchmark.Experiments import Datasets
from benchmark.Results import Report, Excel, SQL, ReportBest
from benchmark.Utils import Files, TextColor, EnvDefault
from benchmark.Utils import ALL_METRICS, Files, TextColor, EnvDefault
"""Build report on screen of a result file, optionally generate excel and sql
@@ -72,6 +72,7 @@ def parse_arguments():
envvar="score",
type=str,
required=True,
choices=ALL_METRICS,
help="score name {accuracy, f1_macro, ...}",
)
args = ap.parse_args()

View File

@@ -1,7 +1,7 @@
#!/usr/bin/env python
import argparse
from benchmark.Results import Summary
from benchmark.Utils import EnvDefault
from benchmark.Utils import EnvDefault, ALL_METRICS
def parse_arguments():
@@ -22,6 +22,7 @@ def parse_arguments():
action=EnvDefault,
envvar="score",
required=True,
choices=ALL_METRICS,
help="score name {accuracy, f1_micro, f1_macro, all}",
)
args = ap.parse_args()

View File

@@ -2,6 +2,7 @@ import warnings
from sklearn.exceptions import ConvergenceWarning
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from sklearn.ensemble import (
GradientBoostingClassifier,
RandomForestClassifier,
BaggingClassifier,
AdaBoostClassifier,
@@ -27,6 +28,7 @@ class ModelTest(TestBase):
"RandomForest": RandomForestClassifier,
"ExtraTree": ExtraTreeClassifier,
"XGBoost": XGBClassifier,
"GBC": GradientBoostingClassifier,
}
for key, value in test.items():
self.assertIsInstance(Models.get_model(key), value)
@@ -64,19 +66,30 @@ class ModelTest(TestBase):
def test_get_complexity(self):
warnings.filterwarnings("ignore", category=ConvergenceWarning)
test = {
"STree": (11, 6, 4),
"Wodt": (303, 152, 50),
"ODTE": (7.86, 4.43, 3.37),
"Cart": (23, 12, 5),
"SVC": (0, 0, 0),
"RandomForest": (21.3, 11, 5.26),
"ExtraTree": (0, 38, 0),
"BaggingStree": (8.4, 4.7, 3.5),
"BaggingWodt": (272, 136.5, 50),
"STree": ((11, 6, 4), 1.0),
"Wodt": ((303, 152, 50), 0.9382022471910112),
"ODTE": ((7.86, 4.43, 3.37), 1.0),
"Cart": ((23, 12, 5), 1.0),
"SVC": ((0, 0, 0), 0.7078651685393258),
"RandomForest": ((21.3, 11, 5.26), 1.0),
"ExtraTree": ((0, 38, 0), 1.0),
"BaggingStree": ((8.4, 4.7, 3.5), 1.0),
"BaggingWodt": ((272, 136.5, 50), 0.9101123595505618),
"AdaBoostStree": ((12.25, 6.625, 4.75), 1.0),
"XGBoost": ((0, 0, 0), 1.0),
"GBC": ((15, 8, 3), 1.0),
}
X, y = load_wine(return_X_y=True)
for key, value in test.items():
print("")
for key, (value, score_expected) in test.items():
clf = Models.get_model(key, random_state=1)
clf.fit(X, y)
# print(key, Models.get_complexity(key, clf))
score_computed = clf.score(X, y)
# print(
# key,
# Models.get_complexity(key, clf),
# score_expected,
# score_computed,
# )
self.assertSequenceEqual(Models.get_complexity(key, clf), value)
self.assertEqual(score_computed, score_expected, key)