From 2decec05fb8b777217039eb7c9c96d385f1d6591 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Sun, 29 Nov 2020 00:00:27 +0100 Subject: [PATCH] add kernel hyperparameter subset in gridsearch --- analysis_mysql.py | 32 +++++++++++++---- experiment.py | 22 +++++++++++- experimentation/Experiments.py | 17 +++++++-- experimentation/Models.py | 63 ++++++++++++++++++++-------------- 4 files changed, 98 insertions(+), 36 deletions(-) diff --git a/analysis_mysql.py b/analysis_mysql.py index 76ada06..08c72c1 100644 --- a/analysis_mysql.py +++ b/analysis_mysql.py @@ -55,11 +55,24 @@ def report_line(line): def report_footer(agg): - print(TextColor.GREEN + f"we have better results {agg['better']:2d} times") - print(TextColor.RED + f"we have worse results {agg['worse']:2d} times") + print( + TextColor.GREEN + + f"we have better results {agg['better']['items']:2d} times" + ) + print( + TextColor.RED + + f"we have worse results {agg['worse']['items']:2d} times" + ) color = TextColor.LINE1 for item in models: - print(color + f"{item:10s} used {agg[item]:2d} times") + print( + color + f"{item:10s} used {agg[item]['items']:2d} times ", end="" + ) + print( + color + f"better {agg[item]['better']:2d} times ", + end="", + ) + print(color + f"worse {agg[item]['worse']:2d} times ") color = ( TextColor.LINE2 if color == TextColor.LINE1 else TextColor.LINE1 ) @@ -77,7 +90,10 @@ for item in [ "better", "worse", ] + models: - agg[item] = 0 + agg[item] = {} + agg[item]["items"] = 0 + agg[item]["better"] = 0 + agg[item]["worse"] = 0 for dataset in dt: find_one = False line = {"dataset": color + dataset[0]} @@ -91,13 +107,15 @@ for dataset in dt: reference = record[10] accuracy = record[5] find_one = True - agg[model] += 1 + agg[model]["items"] += 1 if accuracy > reference: sign = "+" - agg["better"] += 1 + agg["better"]["items"] += 1 + agg[model]["better"] += 1 else: sign = "-" - agg["worse"] += 1 + agg["worse"]["items"] += 1 + agg[model]["worse"] += 1 item = f"{accuracy:9.7} {sign}" line["reference"] = f"{reference:9.7}" line[model] = ( diff --git a/experiment.py b/experiment.py index a7ce5b8..3d24e41 100644 --- a/experiment.py +++ b/experiment.py @@ -47,6 +47,20 @@ def parse_arguments() -> Tuple[str, str, str, str, str, bool, bool, dict]: help="Experiment: {gridsearch, gridbest, crossval, report_grid, " "report_cross}", ) + ap.add_argument( + "-k", + "--kernel", + type=str, + choices=[ + "linear", + "poly", + "rbf", + "any", + ], + required=False, + default="any", + help="Kernel: {linear, poly, rbf, any} only used in gridsearch", + ) ap.add_argument( "-d", "--dataset", @@ -88,6 +102,7 @@ def parse_arguments() -> Tuple[str, str, str, str, str, bool, bool, dict]: args.normalize, args.standardize, args.excludeparams, + args.kernel, ) @@ -100,10 +115,15 @@ def parse_arguments() -> Tuple[str, str, str, str, str, bool, bool, dict]: normalize, standardize, exclude_params, + kernel, ) = parse_arguments() experiment = Experiment( - random_state=1, model=model, host=host, set_of_files=set_of_files + random_state=1, + model=model, + host=host, + set_of_files=set_of_files, + kernel=kernel, ) if experiment_type[0:6] == "report": bd = ( diff --git a/experimentation/Experiments.py b/experimentation/Experiments.py index 291037d..4ff2f2a 100644 --- a/experimentation/Experiments.py +++ b/experimentation/Experiments.py @@ -12,7 +12,12 @@ from .Sets import Datasets class Experiment: def __init__( - self, random_state: int, model: str, host: str, set_of_files: str + self, + random_state: int, + model: str, + host: str, + set_of_files: str, + kernel: str, ) -> None: self._random_state = random_state self._model_name = model @@ -26,6 +31,7 @@ class Experiment: # used in gridsearch with ensembles to take best hyperparams of # base class or gridsearch these hyperparams as well self._base_params = "any" + self._kernel = kernel def set_base_params(self, base_params: str) -> None: self._base_params = base_params @@ -73,6 +79,12 @@ class Experiment: """ hyperparams = Hyperparameters(host=self._host, model=self._model_name) model = self._clf.get_model() + if self._kernel != "any": + # set parameters grid to only one kernel + if isinstance(self._clf, Models.Ensemble): + self._clf._base_model.select_params(self._kernel) + else: + self._clf.select_params(self._kernel) hyperparameters = self._clf.get_parameters() grid_type = "gridsearch" if ( @@ -111,7 +123,8 @@ class Experiment: model, return_train_score=True, param_grid=hyperparameters, - n_jobs=-1, + n_jobs=1, + verbose=1, ) start_time = time.time() grid_search.fit(X, y) diff --git a/experimentation/Models.py b/experimentation/Models.py index 7496f2e..6935c7d 100644 --- a/experimentation/Models.py +++ b/experimentation/Models.py @@ -49,35 +49,46 @@ class ModelStree(ModelBase): gamma = [1e-1, 1, 1e1] max_features = [None, "auto"] split_criteria = ["impurity", "max_samples"] + self._linear = { + "random_state": [self._random_state], + "C": C, + "max_iter": max_iter, + "split_criteria": split_criteria, + "max_features": max_features, + } + self._poly = { + "random_state": [self._random_state], + "kernel": ["rbf"], + "C": C, + "gamma": gamma, + "max_iter": max_iter, + "split_criteria": split_criteria, + "max_features": max_features, + } + self._rbf = { + "random_state": [self._random_state], + "kernel": ["poly"], + "degree": [3, 5], + "C": C, + "gamma": gamma, + "max_iter": max_iter, + "split_criteria": split_criteria, + "max_features": max_features, + } self._param_grid = [ - { - "random_state": [self._random_state], - "C": C, - "max_iter": max_iter, - "split_criteria": split_criteria, - "max_features": max_features, - }, - { - "random_state": [self._random_state], - "kernel": ["rbf"], - "C": C, - "gamma": gamma, - "max_iter": max_iter, - "split_criteria": split_criteria, - "max_features": max_features, - }, - { - "random_state": [self._random_state], - "kernel": ["poly"], - "degree": [3, 5], - "C": C, - "gamma": gamma, - "max_iter": max_iter, - "split_criteria": split_criteria, - "max_features": max_features, - }, + self._linear, + self._poly, + self._rbf, ] + def select_params(self, kernel: str) -> None: + if kernel == "linear": + self._param_grid = [self._linear] + elif kernel == "poly": + self._param_grid = [self._poly] + else: + self._param_grid = [self._rbf] + class ModelSVC(ModelBase): def __init__(self, random_state: Optional[int] = None) -> None: