add kernel hyperparameter subset in gridsearch

This commit is contained in:
2020-11-29 00:00:27 +01:00
parent b14edf4303
commit 2decec05fb
4 changed files with 98 additions and 36 deletions

View File

@@ -55,11 +55,24 @@ def report_line(line):
def report_footer(agg):
print(TextColor.GREEN + f"we have better results {agg['better']:2d} times")
print(TextColor.RED + f"we have worse results {agg['worse']:2d} times")
print(
TextColor.GREEN
+ f"we have better results {agg['better']['items']:2d} times"
)
print(
TextColor.RED
+ f"we have worse results {agg['worse']['items']:2d} times"
)
color = TextColor.LINE1
for item in models:
print(color + f"{item:10s} used {agg[item]:2d} times")
print(
color + f"{item:10s} used {agg[item]['items']:2d} times ", end=""
)
print(
color + f"better {agg[item]['better']:2d} times ",
end="",
)
print(color + f"worse {agg[item]['worse']:2d} times ")
color = (
TextColor.LINE2 if color == TextColor.LINE1 else TextColor.LINE1
)
@@ -77,7 +90,10 @@ for item in [
"better",
"worse",
] + models:
agg[item] = 0
agg[item] = {}
agg[item]["items"] = 0
agg[item]["better"] = 0
agg[item]["worse"] = 0
for dataset in dt:
find_one = False
line = {"dataset": color + dataset[0]}
@@ -91,13 +107,15 @@ for dataset in dt:
reference = record[10]
accuracy = record[5]
find_one = True
agg[model] += 1
agg[model]["items"] += 1
if accuracy > reference:
sign = "+"
agg["better"] += 1
agg["better"]["items"] += 1
agg[model]["better"] += 1
else:
sign = "-"
agg["worse"] += 1
agg["worse"]["items"] += 1
agg[model]["worse"] += 1
item = f"{accuracy:9.7} {sign}"
line["reference"] = f"{reference:9.7}"
line[model] = (

View File

@@ -47,6 +47,20 @@ def parse_arguments() -> Tuple[str, str, str, str, str, bool, bool, dict]:
help="Experiment: {gridsearch, gridbest, crossval, report_grid, "
"report_cross}",
)
ap.add_argument(
"-k",
"--kernel",
type=str,
choices=[
"linear",
"poly",
"rbf",
"any",
],
required=False,
default="any",
help="Kernel: {linear, poly, rbf, any} only used in gridsearch",
)
ap.add_argument(
"-d",
"--dataset",
@@ -88,6 +102,7 @@ def parse_arguments() -> Tuple[str, str, str, str, str, bool, bool, dict]:
args.normalize,
args.standardize,
args.excludeparams,
args.kernel,
)
@@ -100,10 +115,15 @@ def parse_arguments() -> Tuple[str, str, str, str, str, bool, bool, dict]:
normalize,
standardize,
exclude_params,
kernel,
) = parse_arguments()
experiment = Experiment(
random_state=1, model=model, host=host, set_of_files=set_of_files
random_state=1,
model=model,
host=host,
set_of_files=set_of_files,
kernel=kernel,
)
if experiment_type[0:6] == "report":
bd = (

View File

@@ -12,7 +12,12 @@ from .Sets import Datasets
class Experiment:
def __init__(
self, random_state: int, model: str, host: str, set_of_files: str
self,
random_state: int,
model: str,
host: str,
set_of_files: str,
kernel: str,
) -> None:
self._random_state = random_state
self._model_name = model
@@ -26,6 +31,7 @@ class Experiment:
# used in gridsearch with ensembles to take best hyperparams of
# base class or gridsearch these hyperparams as well
self._base_params = "any"
self._kernel = kernel
def set_base_params(self, base_params: str) -> None:
self._base_params = base_params
@@ -73,6 +79,12 @@ class Experiment:
"""
hyperparams = Hyperparameters(host=self._host, model=self._model_name)
model = self._clf.get_model()
if self._kernel != "any":
# set parameters grid to only one kernel
if isinstance(self._clf, Models.Ensemble):
self._clf._base_model.select_params(self._kernel)
else:
self._clf.select_params(self._kernel)
hyperparameters = self._clf.get_parameters()
grid_type = "gridsearch"
if (
@@ -111,7 +123,8 @@ class Experiment:
model,
return_train_score=True,
param_grid=hyperparameters,
n_jobs=-1,
n_jobs=1,
verbose=1,
)
start_time = time.time()
grid_search.fit(X, y)

View File

@@ -49,35 +49,46 @@ class ModelStree(ModelBase):
gamma = [1e-1, 1, 1e1]
max_features = [None, "auto"]
split_criteria = ["impurity", "max_samples"]
self._linear = {
"random_state": [self._random_state],
"C": C,
"max_iter": max_iter,
"split_criteria": split_criteria,
"max_features": max_features,
}
self._poly = {
"random_state": [self._random_state],
"kernel": ["rbf"],
"C": C,
"gamma": gamma,
"max_iter": max_iter,
"split_criteria": split_criteria,
"max_features": max_features,
}
self._rbf = {
"random_state": [self._random_state],
"kernel": ["poly"],
"degree": [3, 5],
"C": C,
"gamma": gamma,
"max_iter": max_iter,
"split_criteria": split_criteria,
"max_features": max_features,
}
self._param_grid = [
{
"random_state": [self._random_state],
"C": C,
"max_iter": max_iter,
"split_criteria": split_criteria,
"max_features": max_features,
},
{
"random_state": [self._random_state],
"kernel": ["rbf"],
"C": C,
"gamma": gamma,
"max_iter": max_iter,
"split_criteria": split_criteria,
"max_features": max_features,
},
{
"random_state": [self._random_state],
"kernel": ["poly"],
"degree": [3, 5],
"C": C,
"gamma": gamma,
"max_iter": max_iter,
"split_criteria": split_criteria,
"max_features": max_features,
},
self._linear,
self._poly,
self._rbf,
]
def select_params(self, kernel: str) -> None:
if kernel == "linear":
self._param_grid = [self._linear]
elif kernel == "poly":
self._param_grid = [self._poly]
else:
self._param_grid = [self._rbf]
class ModelSVC(ModelBase):
def __init__(self, random_state: Optional[int] = None) -> None: