mirror of
https://github.com/Doctorado-ML/benchmark.git
synced 2025-08-15 15:35:52 +00:00
Add GradientBoostingClassifier and fix metrics
This commit is contained in:
@@ -4,6 +4,7 @@ from sklearn.ensemble import (
|
||||
RandomForestClassifier,
|
||||
BaggingClassifier,
|
||||
AdaBoostClassifier,
|
||||
GradientBoostingClassifier,
|
||||
)
|
||||
from sklearn.svm import SVC
|
||||
from stree import Stree
|
||||
@@ -14,50 +15,48 @@ from xgboost import XGBClassifier
|
||||
|
||||
class Models:
|
||||
@staticmethod
|
||||
def get_model(name, random_state=None):
|
||||
if name == "STree":
|
||||
return Stree(random_state=random_state)
|
||||
if name == "Cart":
|
||||
return DecisionTreeClassifier(random_state=random_state)
|
||||
if name == "ExtraTree":
|
||||
return ExtraTreeClassifier(random_state=random_state)
|
||||
if name == "Wodt":
|
||||
return Wodt(random_state=random_state)
|
||||
if name == "SVC":
|
||||
return SVC(random_state=random_state)
|
||||
if name == "ODTE":
|
||||
return Odte(
|
||||
def define_models(random_state):
|
||||
return {
|
||||
"STree": Stree(random_state=random_state),
|
||||
"Cart": DecisionTreeClassifier(random_state=random_state),
|
||||
"ExtraTree": ExtraTreeClassifier(random_state=random_state),
|
||||
"Wodt": Wodt(random_state=random_state),
|
||||
"SVC": SVC(random_state=random_state),
|
||||
"ODTE": Odte(
|
||||
base_estimator=Stree(random_state=random_state),
|
||||
random_state=random_state,
|
||||
)
|
||||
if name == "BaggingStree":
|
||||
clf = Stree(random_state=random_state)
|
||||
return BaggingClassifier(
|
||||
base_estimator=clf, random_state=random_state
|
||||
)
|
||||
if name == "BaggingWodt":
|
||||
clf = Wodt(random_state=random_state)
|
||||
return BaggingClassifier(
|
||||
base_estimator=clf, random_state=random_state
|
||||
)
|
||||
if name == "XGBoost":
|
||||
return XGBClassifier(random_state=random_state)
|
||||
if name == "AdaBoostStree":
|
||||
clf = Stree(
|
||||
),
|
||||
"BaggingStree": BaggingClassifier(
|
||||
base_estimator=Stree(random_state=random_state),
|
||||
random_state=random_state,
|
||||
)
|
||||
return AdaBoostClassifier(
|
||||
base_estimator=clf,
|
||||
),
|
||||
"BaggingWodt": BaggingClassifier(
|
||||
base_estimator=Wodt(random_state=random_state),
|
||||
random_state=random_state,
|
||||
),
|
||||
"XGBoost": XGBClassifier(random_state=random_state),
|
||||
"AdaBoostStree": AdaBoostClassifier(
|
||||
base_estimator=Stree(
|
||||
random_state=random_state,
|
||||
),
|
||||
algorithm="SAMME",
|
||||
random_state=random_state,
|
||||
)
|
||||
if name == "RandomForest":
|
||||
return RandomForestClassifier(random_state=random_state)
|
||||
msg = f"No model recognized {name}"
|
||||
if name in ("Stree", "stree"):
|
||||
msg += ", did you mean STree?"
|
||||
elif name in ("odte", "Odte"):
|
||||
msg += ", did you mean ODTE?"
|
||||
),
|
||||
"GBC": GradientBoostingClassifier(random_state=random_state),
|
||||
"RandomForest": RandomForestClassifier(random_state=random_state),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def get_model(name, random_state=None):
|
||||
try:
|
||||
models = Models.define_models(random_state)
|
||||
return models[name]
|
||||
except KeyError:
|
||||
msg = f"No model recognized {name}"
|
||||
if name in ("Stree", "stree"):
|
||||
msg += ", did you mean STree?"
|
||||
elif name in ("odte", "Odte"):
|
||||
msg += ", did you mean ODTE?"
|
||||
raise ValueError(msg)
|
||||
|
||||
@staticmethod
|
||||
@@ -80,6 +79,10 @@ class Models:
|
||||
leaves = mean([x.get_n_leaves() for x in result.estimators_])
|
||||
depth = mean([x.get_depth() for x in result.estimators_])
|
||||
nodes = mean([x.tree_.node_count for x in result.estimators_])
|
||||
elif name == "GBC":
|
||||
leaves = mean([x[0].get_n_leaves() for x in result.estimators_])
|
||||
depth = mean([x[0].get_depth() for x in result.estimators_])
|
||||
nodes = mean([x[0].tree_.node_count for x in result.estimators_])
|
||||
elif name == "SVC" or name == "XGBoost":
|
||||
nodes = leaves = depth = 0
|
||||
else:
|
||||
|
@@ -3,6 +3,13 @@ import subprocess
|
||||
import argparse
|
||||
|
||||
BEST_ACCURACY_STREE = 40.282203
|
||||
ALL_METRICS = (
|
||||
"accuracy",
|
||||
"f1-macro",
|
||||
"f1-micro",
|
||||
"f1-weighted",
|
||||
"roc-auc-ovr",
|
||||
)
|
||||
|
||||
|
||||
class Folders:
|
||||
|
@@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
from benchmark.Results import Benchmark
|
||||
from benchmark.Utils import Files, EnvDefault
|
||||
from benchmark.Utils import ALL_METRICS, Files, EnvDefault
|
||||
import argparse
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@ def parse_arguments():
|
||||
envvar="score",
|
||||
type=str,
|
||||
required=True,
|
||||
choices=ALL_METRICS,
|
||||
help="score name {accuracy, f1_macro, ...}",
|
||||
)
|
||||
ap.add_argument(
|
||||
|
@@ -2,7 +2,7 @@
|
||||
import argparse
|
||||
import json
|
||||
from benchmark.Results import Summary
|
||||
from benchmark.Utils import EnvDefault
|
||||
from benchmark.Utils import EnvDefault, ALL_METRICS
|
||||
|
||||
|
||||
def parse_arguments():
|
||||
@@ -14,8 +14,8 @@ def parse_arguments():
|
||||
action=EnvDefault,
|
||||
envvar="score",
|
||||
required=True,
|
||||
help="score name {accuracy, f1-micro, f1-macro, f1-weighted, "
|
||||
"roc-auc-ovr, all}",
|
||||
choices=ALL_METRICS,
|
||||
help="score name {accuracy, f1-macro, f1-weighted, roc-auc-ovr}",
|
||||
)
|
||||
args = ap.parse_args()
|
||||
return (args.score,)
|
||||
@@ -23,15 +23,7 @@ def parse_arguments():
|
||||
|
||||
(score,) = parse_arguments()
|
||||
|
||||
all_metrics = [
|
||||
"accuracy",
|
||||
"f1-macro",
|
||||
"f1-micro",
|
||||
"f1-weighted",
|
||||
"roc-auc-ovr",
|
||||
]
|
||||
|
||||
metrics = all_metrics if score == "all" else [score]
|
||||
metrics = ALL_METRICS if score == "all" else [score]
|
||||
|
||||
summary = Summary()
|
||||
summary.acquire()
|
||||
|
@@ -2,7 +2,7 @@
|
||||
import argparse
|
||||
from benchmark.Results import ReportBest
|
||||
from benchmark.Experiments import Datasets, BestResults
|
||||
from benchmark.Utils import EnvDefault
|
||||
from benchmark.Utils import ALL_METRICS, EnvDefault
|
||||
|
||||
"""Build a json file with the best results of a model and its hyperparameters
|
||||
"""
|
||||
@@ -17,6 +17,7 @@ def parse_arguments():
|
||||
envvar="score",
|
||||
type=str,
|
||||
required=True,
|
||||
choices=ALL_METRICS,
|
||||
help="score name {accuracy, f1_macro, ...}",
|
||||
)
|
||||
ap.add_argument(
|
||||
|
@@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
import argparse
|
||||
from benchmark.Experiments import GridSearch, Datasets
|
||||
from benchmark.Utils import EnvDefault
|
||||
from benchmark.Utils import EnvDefault, ALL_METRICS
|
||||
|
||||
"""Do experiment and build result file, optionally print report with results
|
||||
"""
|
||||
@@ -16,6 +16,7 @@ def parse_arguments():
|
||||
envvar="score",
|
||||
type=str,
|
||||
required=True,
|
||||
choices=ALL_METRICS,
|
||||
help="score name {accuracy, f1_macro, ...}",
|
||||
)
|
||||
ap.add_argument(
|
||||
|
@@ -1,8 +1,9 @@
|
||||
#! /usr/bin/env python
|
||||
import os
|
||||
import argparse
|
||||
from benchmark.Experiments import Models
|
||||
from benchmark.Results import Summary
|
||||
from benchmark.Utils import Folders
|
||||
from benchmark.Utils import ALL_METRICS, Folders
|
||||
|
||||
"""List experiments of a model
|
||||
"""
|
||||
@@ -21,14 +22,18 @@ def parse_arguments():
|
||||
"--score",
|
||||
type=str,
|
||||
required=False,
|
||||
help="score used in experiment",
|
||||
choices=ALL_METRICS,
|
||||
help="score name {accuracy, f1-macro, f1-weighted, roc-auc-ovr}",
|
||||
)
|
||||
models_data = Models.define_models(0)
|
||||
models = "{" + ", ".join(models_data) + "}"
|
||||
ap.add_argument(
|
||||
"-m",
|
||||
"--model",
|
||||
type=str,
|
||||
required=False,
|
||||
help="model used in experiment",
|
||||
choices=list(models_data),
|
||||
help=f"model name: {models}",
|
||||
)
|
||||
ap.add_argument(
|
||||
"-k",
|
||||
|
@@ -1,9 +1,9 @@
|
||||
#!/usr/bin/env python
|
||||
import os
|
||||
import argparse
|
||||
from benchmark.Experiments import Experiment, Datasets
|
||||
from benchmark.Experiments import Experiment, Datasets, Models
|
||||
from benchmark.Results import Report
|
||||
from benchmark.Utils import EnvDefault
|
||||
from benchmark.Utils import EnvDefault, ALL_METRICS
|
||||
|
||||
"""Do experiment and build result file, optionally print report with results
|
||||
"""
|
||||
@@ -17,9 +17,9 @@ def parse_arguments():
|
||||
action=EnvDefault,
|
||||
envvar="score",
|
||||
type=str,
|
||||
choices=ALL_METRICS,
|
||||
required=True,
|
||||
help="score name {accuracy, f1-micro, f1-macro, f1-weighted, "
|
||||
"roc-auc-ovr, all}",
|
||||
help="score name {accuracy, f1-macro, f1-weighted, roc-auc-ovr}",
|
||||
)
|
||||
ap.add_argument(
|
||||
"-P",
|
||||
@@ -30,12 +30,15 @@ def parse_arguments():
|
||||
required=True,
|
||||
help="Platform where the test is run",
|
||||
)
|
||||
models_data = Models.define_models(0)
|
||||
models = "{" + ", ".join(models_data) + "}"
|
||||
ap.add_argument(
|
||||
"-m",
|
||||
"--model",
|
||||
type=str,
|
||||
required=True,
|
||||
help="model name",
|
||||
choices=list(models_data),
|
||||
help=f"model name: {models}",
|
||||
)
|
||||
ap.add_argument(
|
||||
"-n",
|
||||
|
@@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
import argparse
|
||||
from benchmark.Results import PairCheck
|
||||
from benchmark.Utils import EnvDefault
|
||||
from benchmark.Utils import ALL_METRICS, EnvDefault
|
||||
|
||||
"""Check best results of two models giving scores and win-tie-loose results
|
||||
"""
|
||||
@@ -16,6 +16,7 @@ def parse_arguments():
|
||||
envvar="score",
|
||||
type=str,
|
||||
required=True,
|
||||
choices=ALL_METRICS,
|
||||
help="score name {accuracy, f1_macro, ...}",
|
||||
)
|
||||
ap.add_argument(
|
||||
|
@@ -3,7 +3,7 @@ import argparse
|
||||
import numpy as np
|
||||
from benchmark.Experiments import Datasets
|
||||
from benchmark.Results import Report, Excel, SQL, ReportBest
|
||||
from benchmark.Utils import Files, TextColor, EnvDefault
|
||||
from benchmark.Utils import ALL_METRICS, Files, TextColor, EnvDefault
|
||||
|
||||
|
||||
"""Build report on screen of a result file, optionally generate excel and sql
|
||||
@@ -72,6 +72,7 @@ def parse_arguments():
|
||||
envvar="score",
|
||||
type=str,
|
||||
required=True,
|
||||
choices=ALL_METRICS,
|
||||
help="score name {accuracy, f1_macro, ...}",
|
||||
)
|
||||
args = ap.parse_args()
|
||||
|
@@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
import argparse
|
||||
from benchmark.Results import Summary
|
||||
from benchmark.Utils import EnvDefault
|
||||
from benchmark.Utils import EnvDefault, ALL_METRICS
|
||||
|
||||
|
||||
def parse_arguments():
|
||||
@@ -22,6 +22,7 @@ def parse_arguments():
|
||||
action=EnvDefault,
|
||||
envvar="score",
|
||||
required=True,
|
||||
choices=ALL_METRICS,
|
||||
help="score name {accuracy, f1_micro, f1_macro, all}",
|
||||
)
|
||||
args = ap.parse_args()
|
||||
|
@@ -2,6 +2,7 @@ import warnings
|
||||
from sklearn.exceptions import ConvergenceWarning
|
||||
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
|
||||
from sklearn.ensemble import (
|
||||
GradientBoostingClassifier,
|
||||
RandomForestClassifier,
|
||||
BaggingClassifier,
|
||||
AdaBoostClassifier,
|
||||
@@ -27,6 +28,7 @@ class ModelTest(TestBase):
|
||||
"RandomForest": RandomForestClassifier,
|
||||
"ExtraTree": ExtraTreeClassifier,
|
||||
"XGBoost": XGBClassifier,
|
||||
"GBC": GradientBoostingClassifier,
|
||||
}
|
||||
for key, value in test.items():
|
||||
self.assertIsInstance(Models.get_model(key), value)
|
||||
@@ -64,19 +66,30 @@ class ModelTest(TestBase):
|
||||
def test_get_complexity(self):
|
||||
warnings.filterwarnings("ignore", category=ConvergenceWarning)
|
||||
test = {
|
||||
"STree": (11, 6, 4),
|
||||
"Wodt": (303, 152, 50),
|
||||
"ODTE": (7.86, 4.43, 3.37),
|
||||
"Cart": (23, 12, 5),
|
||||
"SVC": (0, 0, 0),
|
||||
"RandomForest": (21.3, 11, 5.26),
|
||||
"ExtraTree": (0, 38, 0),
|
||||
"BaggingStree": (8.4, 4.7, 3.5),
|
||||
"BaggingWodt": (272, 136.5, 50),
|
||||
"STree": ((11, 6, 4), 1.0),
|
||||
"Wodt": ((303, 152, 50), 0.9382022471910112),
|
||||
"ODTE": ((7.86, 4.43, 3.37), 1.0),
|
||||
"Cart": ((23, 12, 5), 1.0),
|
||||
"SVC": ((0, 0, 0), 0.7078651685393258),
|
||||
"RandomForest": ((21.3, 11, 5.26), 1.0),
|
||||
"ExtraTree": ((0, 38, 0), 1.0),
|
||||
"BaggingStree": ((8.4, 4.7, 3.5), 1.0),
|
||||
"BaggingWodt": ((272, 136.5, 50), 0.9101123595505618),
|
||||
"AdaBoostStree": ((12.25, 6.625, 4.75), 1.0),
|
||||
"XGBoost": ((0, 0, 0), 1.0),
|
||||
"GBC": ((15, 8, 3), 1.0),
|
||||
}
|
||||
X, y = load_wine(return_X_y=True)
|
||||
for key, value in test.items():
|
||||
print("")
|
||||
for key, (value, score_expected) in test.items():
|
||||
clf = Models.get_model(key, random_state=1)
|
||||
clf.fit(X, y)
|
||||
# print(key, Models.get_complexity(key, clf))
|
||||
score_computed = clf.score(X, y)
|
||||
# print(
|
||||
# key,
|
||||
# Models.get_complexity(key, clf),
|
||||
# score_expected,
|
||||
# score_computed,
|
||||
# )
|
||||
self.assertSequenceEqual(Models.get_complexity(key, clf), value)
|
||||
self.assertEqual(score_computed, score_expected, key)
|
||||
|
Reference in New Issue
Block a user