mirror of
https://github.com/Doctorado-ML/benchmark.git
synced 2025-08-18 08:55:53 +00:00
Compare commits
9 Commits
Author | SHA1 | Date | |
---|---|---|---|
0e724f2c6b
|
|||
|
cf8fd3454e | ||
|
162cdc2da1 | ||
|
765112073c | ||
69e21584bd
|
|||
419c899c94
|
|||
2a2ed81a6c
|
|||
4c5502611a
|
|||
|
70f1da5fc7 |
@@ -1,12 +1,9 @@
|
|||||||
[](https://github.com/Doctorado-ML/benchmark/actions/workflows/main.yml)
|
|
||||||
[](https://codecov.io/gh/Doctorado-ML/benchmark)
|
[](https://codecov.io/gh/Doctorado-ML/benchmark)
|
||||||
[](https://sonar.rmontanana.es/dashboard?id=benchmark)
|
|
||||||
[](https://sonar.rmontanana.es/dashboard?id=benchmark)
|
|
||||||

|

|
||||||
|
|
||||||
# benchmark
|
# benchmark
|
||||||
|
|
||||||
Benchmarking models
|
Benchmarking Python models
|
||||||
|
|
||||||
## Experimentation
|
## Experimentation
|
||||||
|
|
||||||
|
@@ -160,6 +160,15 @@ class Arguments(argparse.ArgumentParser):
|
|||||||
"help": "Ignore nan results",
|
"help": "Ignore nan results",
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
|
"iwss": [
|
||||||
|
("--iwss",),
|
||||||
|
{
|
||||||
|
"default": False,
|
||||||
|
"action": "store_true",
|
||||||
|
"required": False,
|
||||||
|
"help": "Do IWSS with training set and then apply to test set",
|
||||||
|
},
|
||||||
|
],
|
||||||
"key": [
|
"key": [
|
||||||
("-k", "--key"),
|
("-k", "--key"),
|
||||||
{
|
{
|
||||||
|
@@ -32,6 +32,8 @@ class DatasetsArff:
|
|||||||
def get_range_features(X, c_features):
|
def get_range_features(X, c_features):
|
||||||
if c_features.strip() == "all":
|
if c_features.strip() == "all":
|
||||||
return list(range(X.shape[1]))
|
return list(range(X.shape[1]))
|
||||||
|
if c_features.strip() == "none":
|
||||||
|
return []
|
||||||
return json.loads(c_features)
|
return json.loads(c_features)
|
||||||
|
|
||||||
def load(self, name, class_name):
|
def load(self, name, class_name):
|
||||||
@@ -129,29 +131,28 @@ class Datasets:
|
|||||||
|
|
||||||
def _init_names(self, dataset_name):
|
def _init_names(self, dataset_name):
|
||||||
file_name = os.path.join(self.dataset.folder(), Files.index)
|
file_name = os.path.join(self.dataset.folder(), Files.index)
|
||||||
default_class = "class"
|
|
||||||
self.continuous_features = {}
|
self.continuous_features = {}
|
||||||
with open(file_name) as f:
|
with open(file_name) as f:
|
||||||
sets = f.read().splitlines()
|
sets = f.read().splitlines()
|
||||||
sets = [x for x in sets if not x.startswith("#")]
|
sets = [x for x in sets if not x.startswith("#")]
|
||||||
class_names = [default_class] * len(sets)
|
results = []
|
||||||
if "," in sets[0]:
|
|
||||||
result = []
|
|
||||||
class_names = []
|
class_names = []
|
||||||
for data in sets:
|
for set_name in sets:
|
||||||
name, class_name, features = data.split(",", 2)
|
try:
|
||||||
result.append(name)
|
name, class_name, features = set_name.split(";")
|
||||||
|
except ValueError:
|
||||||
|
class_name = "class"
|
||||||
|
features = "all"
|
||||||
|
name = set_name
|
||||||
|
results.append(name)
|
||||||
class_names.append(class_name)
|
class_names.append(class_name)
|
||||||
|
features = features.strip()
|
||||||
self.continuous_features[name] = features
|
self.continuous_features[name] = features
|
||||||
sets = result
|
|
||||||
else:
|
|
||||||
for name in sets:
|
|
||||||
self.continuous_features[name] = None
|
|
||||||
# Set as dataset list the dataset passed as argument
|
# Set as dataset list the dataset passed as argument
|
||||||
if dataset_name is None:
|
if dataset_name is None:
|
||||||
return class_names, sets
|
return class_names, results
|
||||||
try:
|
try:
|
||||||
class_name = class_names[sets.index(dataset_name)]
|
class_name = class_names[results.index(dataset_name)]
|
||||||
except ValueError:
|
except ValueError:
|
||||||
raise ValueError(f"Unknown dataset: {dataset_name}")
|
raise ValueError(f"Unknown dataset: {dataset_name}")
|
||||||
return [class_name], [dataset_name]
|
return [class_name], [dataset_name]
|
||||||
|
@@ -7,12 +7,17 @@ import time
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from mufs import MUFS
|
||||||
from sklearn.model_selection import (
|
from sklearn.model_selection import (
|
||||||
StratifiedKFold,
|
StratifiedKFold,
|
||||||
KFold,
|
KFold,
|
||||||
GridSearchCV,
|
GridSearchCV,
|
||||||
cross_validate,
|
|
||||||
)
|
)
|
||||||
|
from sklearn.svm import LinearSVC
|
||||||
|
from sklearn.feature_selection import SelectFromModel
|
||||||
|
from sklearn.preprocessing import label_binarize
|
||||||
|
from sklearn.base import clone
|
||||||
|
from sklearn.metrics import check_scoring, roc_auc_score
|
||||||
from .Utils import Folders, Files, NO_RESULTS
|
from .Utils import Folders, Files, NO_RESULTS
|
||||||
from .Datasets import Datasets
|
from .Datasets import Datasets
|
||||||
from .Models import Models
|
from .Models import Models
|
||||||
@@ -115,6 +120,7 @@ class Experiment:
|
|||||||
ignore_nan=True,
|
ignore_nan=True,
|
||||||
fit_features=None,
|
fit_features=None,
|
||||||
discretize=None,
|
discretize=None,
|
||||||
|
iwss=False,
|
||||||
folds=5,
|
folds=5,
|
||||||
):
|
):
|
||||||
env_data = EnvData().load()
|
env_data = EnvData().load()
|
||||||
@@ -176,6 +182,7 @@ class Experiment:
|
|||||||
self.random_seeds = Randomized.seeds()
|
self.random_seeds = Randomized.seeds()
|
||||||
self.results = []
|
self.results = []
|
||||||
self.duration = 0
|
self.duration = 0
|
||||||
|
self.iwss = iwss
|
||||||
self._init_experiment()
|
self._init_experiment()
|
||||||
|
|
||||||
def get_output_file(self):
|
def get_output_file(self):
|
||||||
@@ -212,48 +219,358 @@ class Experiment:
|
|||||||
res["state_names"] = states
|
res["state_names"] = states
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
# def _n_fold_crossval(self, name, X, y, hyperparameters):
|
||||||
|
# if self.scores != []:
|
||||||
|
# raise ValueError("Must init experiment before!")
|
||||||
|
|
||||||
|
# loop = tqdm(
|
||||||
|
# self.random_seeds,
|
||||||
|
# position=1,
|
||||||
|
# leave=False,
|
||||||
|
# disable=not self.progress_bar,
|
||||||
|
# )
|
||||||
|
|
||||||
|
# for random_state in loop:
|
||||||
|
# loop.set_description(f"Seed({random_state:4d})")
|
||||||
|
# random.seed(random_state)
|
||||||
|
# np.random.seed(random_state)
|
||||||
|
|
||||||
|
# kfold = self.stratified_class(
|
||||||
|
# shuffle=True, random_state=random_state, n_splits=self.folds
|
||||||
|
# )
|
||||||
|
|
||||||
|
# clf = self._build_classifier(random_state, hyperparameters)
|
||||||
|
# fit_params = self._build_fit_params(name)
|
||||||
|
# self.version = Models.get_version(self.model_name, clf)
|
||||||
|
|
||||||
|
# with warnings.catch_warnings():
|
||||||
|
# warnings.filterwarnings("ignore")
|
||||||
|
|
||||||
|
# if self.iwss:
|
||||||
|
# # Manual cross-validation with IWSS feature selection
|
||||||
|
# fold_scores = []
|
||||||
|
# fold_times = []
|
||||||
|
# fold_estimators = []
|
||||||
|
|
||||||
|
# for train_idx, test_idx in kfold.split(X, y):
|
||||||
|
# # Split data
|
||||||
|
# X_train, X_test = X[train_idx], X[test_idx]
|
||||||
|
# y_train, y_test = y[train_idx], y[test_idx]
|
||||||
|
|
||||||
|
# # Apply IWSS feature selection
|
||||||
|
# transformer = MUFS()
|
||||||
|
# transformer.iwss(X_train, y_train, 0.5)
|
||||||
|
# X_train_selected = X_train[
|
||||||
|
# :, transformer.get_results()
|
||||||
|
# ]
|
||||||
|
# X_test_selected = X_test[:, transformer.get_results()]
|
||||||
|
# # print("Selected features:", transformer.get_results())
|
||||||
|
# # print(
|
||||||
|
# # f"Number of selected features: {X_train_selected.shape[1]}"
|
||||||
|
# # )
|
||||||
|
|
||||||
|
# # Clone classifier to avoid data leakage between folds
|
||||||
|
# clf_fold = clone(clf)
|
||||||
|
|
||||||
|
# # Fit the classifier
|
||||||
|
# start_time = time.time()
|
||||||
|
# clf_fold.fit(X_train_selected, y_train)
|
||||||
|
# fit_time = time.time() - start_time
|
||||||
|
|
||||||
|
# # Score on test set
|
||||||
|
# score_func = get_scorer(
|
||||||
|
# self.score_name.replace("-", "_")
|
||||||
|
# )
|
||||||
|
# # Handle scoring based on the metric type
|
||||||
|
# if self.score_name in [
|
||||||
|
# "roc_auc",
|
||||||
|
# "log_loss",
|
||||||
|
# "roc_auc_ovr",
|
||||||
|
# "roc_auc_ovo",
|
||||||
|
# ]:
|
||||||
|
# # These metrics need probabilities
|
||||||
|
# if hasattr(clf_fold, "predict_proba"):
|
||||||
|
# y_score = clf_fold.predict_proba(
|
||||||
|
# X_test_selected
|
||||||
|
# )
|
||||||
|
|
||||||
|
# # Handle missing classes in the fold
|
||||||
|
# if len(unique_train_classes) < len(
|
||||||
|
# unique_all_classes
|
||||||
|
# ):
|
||||||
|
# # Create a full probability matrix with zeros for missing classes
|
||||||
|
# y_score_full = np.zeros(
|
||||||
|
# (len(y_test), len(unique_all_classes))
|
||||||
|
# )
|
||||||
|
# for i, class_label in enumerate(
|
||||||
|
# unique_train_classes
|
||||||
|
# ):
|
||||||
|
# class_idx = np.where(
|
||||||
|
# unique_all_classes == class_label
|
||||||
|
# )[0][0]
|
||||||
|
# y_score_full[:, class_idx] = y_score[
|
||||||
|
# :, i
|
||||||
|
# ]
|
||||||
|
# y_score = y_score_full
|
||||||
|
# else:
|
||||||
|
# # Fallback to decision_function for SVM-like models
|
||||||
|
# y_score = clf_fold.decision_function(
|
||||||
|
# X_test_selected
|
||||||
|
# )
|
||||||
|
|
||||||
|
# test_score = score_func._score_func(
|
||||||
|
# y_test, y_score
|
||||||
|
# )
|
||||||
|
# else:
|
||||||
|
# # For metrics that use predictions (accuracy, f1, etc.)
|
||||||
|
# test_score = score_func(
|
||||||
|
# clf_fold, X_test_selected, y_test
|
||||||
|
# )
|
||||||
|
|
||||||
|
# fold_scores.append(test_score)
|
||||||
|
# fold_times.append(fit_time)
|
||||||
|
# fold_estimators.append(clf_fold)
|
||||||
|
|
||||||
|
# # Package results to match cross_validate output format
|
||||||
|
# res = {
|
||||||
|
# "test_score": np.array(fold_scores),
|
||||||
|
# "fit_time": np.array(fold_times),
|
||||||
|
# "estimator": fold_estimators,
|
||||||
|
# }
|
||||||
|
# else:
|
||||||
|
# # Original cross_validate approach
|
||||||
|
# res = cross_validate(
|
||||||
|
# clf,
|
||||||
|
# X,
|
||||||
|
# y,
|
||||||
|
# cv=kfold,
|
||||||
|
# fit_params=fit_params,
|
||||||
|
# return_estimator=True,
|
||||||
|
# scoring=self.score_name.replace("-", "_"),
|
||||||
|
# )
|
||||||
|
|
||||||
|
# # Handle NaN values
|
||||||
|
# if np.isnan(res["test_score"]).any():
|
||||||
|
# if not self.ignore_nan:
|
||||||
|
# print(res["test_score"])
|
||||||
|
# raise ValueError("NaN in results")
|
||||||
|
# results = res["test_score"][~np.isnan(res["test_score"])]
|
||||||
|
# else:
|
||||||
|
# results = res["test_score"]
|
||||||
|
|
||||||
|
# # Store results
|
||||||
|
# self.scores.extend(results)
|
||||||
|
# self.times.extend(res["fit_time"])
|
||||||
|
|
||||||
|
# for result_item in res["estimator"]:
|
||||||
|
# nodes_item, leaves_item, depth_item = (
|
||||||
|
# Models.get_complexity(self.model_name, result_item)
|
||||||
|
# )
|
||||||
|
# self.nodes.append(nodes_item)
|
||||||
|
# self.leaves.append(leaves_item)
|
||||||
|
# self.depths.append(depth_item)
|
||||||
|
|
||||||
|
# from sklearn.base import clone
|
||||||
|
# import numpy as np
|
||||||
|
# import time
|
||||||
|
# import warnings
|
||||||
|
# from tqdm import tqdm
|
||||||
|
|
||||||
def _n_fold_crossval(self, name, X, y, hyperparameters):
|
def _n_fold_crossval(self, name, X, y, hyperparameters):
|
||||||
if self.scores != []:
|
if self.scores != []:
|
||||||
raise ValueError("Must init experiment before!")
|
raise ValueError("Must init experiment before!")
|
||||||
|
|
||||||
|
# Get all unique classes and check data
|
||||||
|
unique_all_classes = np.sort(np.unique(y))
|
||||||
|
n_classes = len(unique_all_classes)
|
||||||
|
|
||||||
|
# Check if we have enough samples per class for stratified k-fold
|
||||||
|
min_samples_per_class = np.min(np.bincount(y))
|
||||||
|
if min_samples_per_class < self.folds:
|
||||||
|
warnings.warn(
|
||||||
|
f"Class imbalance detected: minimum class has {min_samples_per_class} samples. "
|
||||||
|
f"Consider using fewer folds or handling imbalanced data."
|
||||||
|
)
|
||||||
|
|
||||||
loop = tqdm(
|
loop = tqdm(
|
||||||
self.random_seeds,
|
self.random_seeds,
|
||||||
position=1,
|
position=1,
|
||||||
leave=False,
|
leave=False,
|
||||||
disable=not self.progress_bar,
|
disable=not self.progress_bar,
|
||||||
)
|
)
|
||||||
|
|
||||||
for random_state in loop:
|
for random_state in loop:
|
||||||
loop.set_description(f"Seed({random_state:4d})")
|
loop.set_description(f"Seed({random_state:4d})")
|
||||||
random.seed(random_state)
|
|
||||||
np.random.seed(random_state)
|
np.random.seed(random_state)
|
||||||
|
|
||||||
kfold = self.stratified_class(
|
kfold = self.stratified_class(
|
||||||
shuffle=True, random_state=random_state, n_splits=self.folds
|
shuffle=True, random_state=random_state, n_splits=self.folds
|
||||||
)
|
)
|
||||||
|
|
||||||
clf = self._build_classifier(random_state, hyperparameters)
|
clf = self._build_classifier(random_state, hyperparameters)
|
||||||
fit_params = self._build_fit_params(name)
|
fit_params = self._build_fit_params(name)
|
||||||
self.version = Models.get_version(self.model_name, clf)
|
self.version = Models.get_version(self.model_name, clf)
|
||||||
|
|
||||||
|
# Check if the classifier supports probability predictions
|
||||||
|
scorer = check_scoring(clf, scoring="roc_auc_ovr")
|
||||||
|
if not hasattr(clf, "predict_proba") and not hasattr(
|
||||||
|
clf, "decision_function"
|
||||||
|
):
|
||||||
|
raise ValueError(
|
||||||
|
f"Classifier {self.model_name} doesn't support probability predictions "
|
||||||
|
"required for ROC-AUC scoring"
|
||||||
|
)
|
||||||
|
|
||||||
with warnings.catch_warnings():
|
with warnings.catch_warnings():
|
||||||
warnings.filterwarnings("ignore")
|
warnings.filterwarnings("ignore")
|
||||||
res = cross_validate(
|
|
||||||
clf,
|
fold_scores = []
|
||||||
X,
|
fold_times = []
|
||||||
y,
|
fold_estimators = []
|
||||||
cv=kfold,
|
|
||||||
fit_params=fit_params,
|
for fold_idx, (train_idx, test_idx) in enumerate(
|
||||||
return_estimator=True,
|
kfold.split(X, y)
|
||||||
scoring=self.score_name.replace("-", "_"),
|
):
|
||||||
|
# Split data
|
||||||
|
X_train, X_test = X[train_idx], X[test_idx]
|
||||||
|
y_train, y_test = y[train_idx], y[test_idx]
|
||||||
|
|
||||||
|
# Check classes in this fold
|
||||||
|
unique_test_classes = np.unique(y_test)
|
||||||
|
n_test_classes = len(unique_test_classes)
|
||||||
|
|
||||||
|
# Skip fold if we don't have at least 2 classes in test set
|
||||||
|
if n_test_classes < 2:
|
||||||
|
warnings.warn(
|
||||||
|
f"Fold {fold_idx}: Test set has only {n_test_classes} class(es). "
|
||||||
|
f"Skipping this fold for ROC-AUC calculation."
|
||||||
)
|
)
|
||||||
if np.isnan(res["test_score"]).any():
|
fold_scores.append(np.nan)
|
||||||
if not self.ignore_nan:
|
fold_times.append(np.nan)
|
||||||
print(res["test_score"])
|
fold_estimators.append(None)
|
||||||
raise ValueError("NaN in results")
|
continue
|
||||||
results = res["test_score"][~np.isnan(res["test_score"])]
|
|
||||||
|
# Apply IWSS feature selection if enabled
|
||||||
|
if self.iwss:
|
||||||
|
# transformer = (
|
||||||
|
# MUFS(discrete=False)
|
||||||
|
# if "cli_rad" in name
|
||||||
|
# else MUFS(discrete=True)
|
||||||
|
# )
|
||||||
|
# transformer.iwss(X_train, y_train, 0.5)
|
||||||
|
# selected_features = transformer.get_results()
|
||||||
|
# Apply L1-based feature selection
|
||||||
|
# Using LinearSVC with L1 penalty
|
||||||
|
lsvc = LinearSVC(
|
||||||
|
C=0.1, # Regularization parameter - adjust this for more/fewer features
|
||||||
|
penalty="l1",
|
||||||
|
dual=False,
|
||||||
|
max_iter=2000,
|
||||||
|
random_state=random_state,
|
||||||
|
)
|
||||||
|
selector = SelectFromModel(lsvc, prefit=False)
|
||||||
|
selector.fit(X_train, y_train)
|
||||||
|
|
||||||
|
# Transform the data
|
||||||
|
X_train_selected = selector.transform(X_train)
|
||||||
|
X_test_selected = selector.transform(X_test)
|
||||||
|
|
||||||
|
# Get information about selected features
|
||||||
|
selected_features = selector.get_support(indices=True)
|
||||||
|
n_selected = len(selected_features)
|
||||||
|
if len(selected_features) == 0:
|
||||||
|
warnings.warn(
|
||||||
|
f"Fold {fold_idx}: No features selected by IWSS. Using all features."
|
||||||
|
)
|
||||||
|
X_train_selected = X_train
|
||||||
|
X_test_selected = X_test
|
||||||
else:
|
else:
|
||||||
results = res["test_score"]
|
X_train_selected = X_train[:, selected_features]
|
||||||
self.scores.extend(results)
|
X_test_selected = X_test[:, selected_features]
|
||||||
self.times.extend(res["fit_time"])
|
else:
|
||||||
for result_item in res["estimator"]:
|
X_train_selected = X_train
|
||||||
nodes_item, leaves_item, depth_item = Models.get_complexity(
|
X_test_selected = X_test
|
||||||
self.model_name, result_item
|
|
||||||
|
# Clone and fit classifier
|
||||||
|
clf_fold = clone(clf)
|
||||||
|
|
||||||
|
start_time = time.time()
|
||||||
|
clf_fold.fit(X_train_selected, y_train)
|
||||||
|
fit_time = time.time() - start_time
|
||||||
|
|
||||||
|
# Get probability predictions
|
||||||
|
y_proba = clf_fold.predict_proba(X_test_selected)
|
||||||
|
|
||||||
|
# Calculate ROC-AUC score
|
||||||
|
# Handle case where test set doesn't have all classes
|
||||||
|
if len(clf_fold.classes_) != len(unique_test_classes):
|
||||||
|
# Map probabilities to only test classes
|
||||||
|
test_class_indices = [
|
||||||
|
np.where(clf_fold.classes_ == c)[0][0]
|
||||||
|
for c in unique_test_classes
|
||||||
|
if c in clf_fold.classes_
|
||||||
|
]
|
||||||
|
y_proba = y_proba[:, test_class_indices]
|
||||||
|
|
||||||
|
# Binarize labels for multi-class ROC-AUC
|
||||||
|
y_test_binarized = label_binarize(
|
||||||
|
y_test, classes=unique_test_classes
|
||||||
|
)
|
||||||
|
|
||||||
|
# Calculate ROC-AUC with OVR strategy
|
||||||
|
if n_test_classes == 2:
|
||||||
|
# Binary classification
|
||||||
|
test_score = roc_auc_score(y_test, y_proba[:, 1])
|
||||||
|
else:
|
||||||
|
# Multi-class with macro-average
|
||||||
|
test_score = roc_auc_score(
|
||||||
|
y_test_binarized,
|
||||||
|
y_proba,
|
||||||
|
multi_class="ovr",
|
||||||
|
average="macro",
|
||||||
|
)
|
||||||
|
|
||||||
|
fold_scores.append(test_score)
|
||||||
|
fold_times.append(fit_time)
|
||||||
|
fold_estimators.append(clf_fold)
|
||||||
|
|
||||||
|
# Filter out NaN scores if ignore_nan is True
|
||||||
|
scores_array = np.array(fold_scores)
|
||||||
|
times_array = np.array(fold_times)
|
||||||
|
|
||||||
|
if np.isnan(scores_array).any():
|
||||||
|
if not self.ignore_nan:
|
||||||
|
nan_folds = np.where(np.isnan(scores_array))[0]
|
||||||
|
raise ValueError(
|
||||||
|
f"NaN scores in folds {nan_folds}. "
|
||||||
|
f"Set ignore_nan=True to skip these folds."
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Filter out NaN values
|
||||||
|
valid_mask = ~np.isnan(scores_array)
|
||||||
|
scores_array = scores_array[valid_mask]
|
||||||
|
times_array = times_array[valid_mask]
|
||||||
|
fold_estimators = [
|
||||||
|
e
|
||||||
|
for e, valid in zip(fold_estimators, valid_mask)
|
||||||
|
if valid
|
||||||
|
]
|
||||||
|
|
||||||
|
if len(scores_array) == 0:
|
||||||
|
warnings.warn(
|
||||||
|
f"All folds resulted in NaN for seed {random_state}. Skipping."
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Store results
|
||||||
|
self.scores.extend(scores_array)
|
||||||
|
self.times.extend(times_array)
|
||||||
|
|
||||||
|
# Store complexity metrics
|
||||||
|
for estimator in fold_estimators:
|
||||||
|
if estimator is not None:
|
||||||
|
nodes_item, leaves_item, depth_item = (
|
||||||
|
Models.get_complexity(self.model_name, estimator)
|
||||||
)
|
)
|
||||||
self.nodes.append(nodes_item)
|
self.nodes.append(nodes_item)
|
||||||
self.leaves.append(leaves_item)
|
self.leaves.append(leaves_item)
|
||||||
|
@@ -71,6 +71,7 @@ class Models:
|
|||||||
algorithm="SAMME",
|
algorithm="SAMME",
|
||||||
random_state=random_state,
|
random_state=random_state,
|
||||||
),
|
),
|
||||||
|
"AdaBoost": AdaBoostClassifier(random_state=random_state),
|
||||||
"GBC": GradientBoostingClassifier(random_state=random_state),
|
"GBC": GradientBoostingClassifier(random_state=random_state),
|
||||||
"RandomForest": RandomForestClassifier(random_state=random_state),
|
"RandomForest": RandomForestClassifier(random_state=random_state),
|
||||||
"Mock": MockModel(random_state=random_state),
|
"Mock": MockModel(random_state=random_state),
|
||||||
@@ -99,13 +100,13 @@ class Models:
|
|||||||
nodes = 0
|
nodes = 0
|
||||||
leaves = result.get_n_leaves()
|
leaves = result.get_n_leaves()
|
||||||
depth = 0
|
depth = 0
|
||||||
elif name.startswith("Bagging") or name.startswith("AdaBoost"):
|
elif name.startswith("Bagging") or name == "AdaBoostStree":
|
||||||
nodes, leaves = list(
|
nodes, leaves = list(
|
||||||
zip(*[x.nodes_leaves() for x in result.estimators_])
|
zip(*[x.nodes_leaves() for x in result.estimators_])
|
||||||
)
|
)
|
||||||
nodes, leaves = mean(nodes), mean(leaves)
|
nodes, leaves = mean(nodes), mean(leaves)
|
||||||
depth = mean([x.depth_ for x in result.estimators_])
|
depth = mean([x.depth_ for x in result.estimators_])
|
||||||
elif name == "RandomForest":
|
elif name == "RandomForest" or name == "AdaBoost":
|
||||||
leaves = mean([x.get_n_leaves() for x in result.estimators_])
|
leaves = mean([x.get_n_leaves() for x in result.estimators_])
|
||||||
depth = mean([x.get_depth() for x in result.estimators_])
|
depth = mean([x.get_depth() for x in result.estimators_])
|
||||||
nodes = mean([x.tree_.node_count for x in result.estimators_])
|
nodes = mean([x.tree_.node_count for x in result.estimators_])
|
||||||
|
@@ -108,10 +108,12 @@ class BaseReport(abc.ABC):
|
|||||||
status = (
|
status = (
|
||||||
Symbols.cross
|
Symbols.cross
|
||||||
if accuracy <= max_value
|
if accuracy <= max_value
|
||||||
else Symbols.upward_arrow
|
else (
|
||||||
|
Symbols.upward_arrow
|
||||||
if accuracy > max_value
|
if accuracy > max_value
|
||||||
else " "
|
else " "
|
||||||
)
|
)
|
||||||
|
)
|
||||||
if status != " ":
|
if status != " ":
|
||||||
if status not in self._compare_totals:
|
if status not in self._compare_totals:
|
||||||
self._compare_totals[status] = 1
|
self._compare_totals[status] = 1
|
||||||
@@ -161,6 +163,11 @@ class StubReport(BaseReport):
|
|||||||
def header(self) -> None:
|
def header(self) -> None:
|
||||||
self.title = self.data["title"]
|
self.title = self.data["title"]
|
||||||
self.duration = self.data["duration"]
|
self.duration = self.data["duration"]
|
||||||
|
self.model = self.data["model"]
|
||||||
|
self.date = self.data["date"]
|
||||||
|
self.time = self.data["time"]
|
||||||
|
self.metric = self.data["score_name"]
|
||||||
|
self.platform = self.data["platform"]
|
||||||
|
|
||||||
def footer(self, accuracy: float) -> None:
|
def footer(self, accuracy: float) -> None:
|
||||||
self.accuracy = accuracy
|
self.accuracy = accuracy
|
||||||
@@ -195,9 +202,11 @@ class Summary:
|
|||||||
self.models.add(model)
|
self.models.add(model)
|
||||||
report = StubReport(
|
report = StubReport(
|
||||||
os.path.join(
|
os.path.join(
|
||||||
|
(
|
||||||
Folders.hidden_results
|
Folders.hidden_results
|
||||||
if self.hidden
|
if self.hidden
|
||||||
else Folders.results,
|
else Folders.results
|
||||||
|
),
|
||||||
result,
|
result,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
@@ -10,7 +10,7 @@ from .Results import Report
|
|||||||
from ._version import __version__
|
from ._version import __version__
|
||||||
|
|
||||||
__author__ = "Ricardo Montañana Gómez"
|
__author__ = "Ricardo Montañana Gómez"
|
||||||
__copyright__ = "Copyright 2020-2023, Ricardo Montañana Gómez"
|
__copyright__ = "Copyright 2020-2024, Ricardo Montañana Gómez"
|
||||||
__license__ = "MIT License"
|
__license__ = "MIT License"
|
||||||
__author_email__ = "ricardo.montanana@alu.uclm.es"
|
__author_email__ = "ricardo.montanana@alu.uclm.es"
|
||||||
|
|
||||||
|
@@ -1 +1 @@
|
|||||||
__version__ = "0.5.0"
|
__version__ = "1.0.1"
|
||||||
|
@@ -88,7 +88,7 @@
|
|||||||
<button type="button"
|
<button type="button"
|
||||||
class="btn-close"
|
class="btn-close"
|
||||||
aria-label="Close"
|
aria-label="Close"
|
||||||
onclick="location.href = '/index/{{ compare }}'"></button>
|
onclick="location.href = '{{ back }}'"></button>
|
||||||
<h7>
|
<h7>
|
||||||
<b>
|
<b>
|
||||||
Total score: {{ "%.6f" % (data.results | sum(attribute="score") ) }}
|
Total score: {{ "%.6f" % (data.results | sum(attribute="score") ) }}
|
||||||
|
@@ -90,7 +90,7 @@
|
|||||||
{% endif %}
|
{% endif %}
|
||||||
<h2 class="has-text-white has-background-primary">
|
<h2 class="has-text-white has-background-primary">
|
||||||
<b>
|
<b>
|
||||||
<button class="delete" onclick="location.href = '/index/{{ compare }}'"></button>
|
<button class="delete" onclick="location.href = '{{ back }}'"></button>
|
||||||
Total score: {{ "%.6f" % (data.results | sum(attribute="score") ) }}
|
Total score: {{ "%.6f" % (data.results | sum(attribute="score") ) }}
|
||||||
</b>
|
</b>
|
||||||
</h2>
|
</h2>
|
||||||
|
@@ -14,7 +14,7 @@ def main(args_test=None):
|
|||||||
arguments.xset("stratified").xset("score").xset("model", mandatory=True)
|
arguments.xset("stratified").xset("score").xset("model", mandatory=True)
|
||||||
arguments.xset("n_folds").xset("platform").xset("quiet").xset("title")
|
arguments.xset("n_folds").xset("platform").xset("quiet").xset("title")
|
||||||
arguments.xset("report").xset("ignore_nan").xset("discretize")
|
arguments.xset("report").xset("ignore_nan").xset("discretize")
|
||||||
arguments.xset("fit_features")
|
arguments.xset("fit_features").xset("iwss")
|
||||||
arguments.add_exclusive(
|
arguments.add_exclusive(
|
||||||
["grid_paramfile", "best_paramfile", "hyperparameters"]
|
["grid_paramfile", "best_paramfile", "hyperparameters"]
|
||||||
)
|
)
|
||||||
@@ -43,6 +43,7 @@ def main(args_test=None):
|
|||||||
folds=args.n_folds,
|
folds=args.n_folds,
|
||||||
fit_features=args.fit_features,
|
fit_features=args.fit_features,
|
||||||
discretize=args.discretize,
|
discretize=args.discretize,
|
||||||
|
iwss=args.iwss,
|
||||||
)
|
)
|
||||||
job.do_experiment()
|
job.do_experiment()
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
|
@@ -68,7 +68,7 @@ class ArgumentsTest(TestBase):
|
|||||||
test_args = ["-n", "3", "-k", "date"]
|
test_args = ["-n", "3", "-k", "date"]
|
||||||
with self.assertRaises(SystemExit):
|
with self.assertRaises(SystemExit):
|
||||||
arguments.parse(test_args)
|
arguments.parse(test_args)
|
||||||
self.assertRegexpMatches(
|
self.assertRegex(
|
||||||
stderr.getvalue(),
|
stderr.getvalue(),
|
||||||
r"error: the following arguments are required: -m/--model",
|
r"error: the following arguments are required: -m/--model",
|
||||||
)
|
)
|
||||||
@@ -79,7 +79,7 @@ class ArgumentsTest(TestBase):
|
|||||||
test_args = ["-n", "3", "-m", "SVC"]
|
test_args = ["-n", "3", "-m", "SVC"]
|
||||||
with self.assertRaises(SystemExit):
|
with self.assertRaises(SystemExit):
|
||||||
arguments.parse(test_args)
|
arguments.parse(test_args)
|
||||||
self.assertRegexpMatches(
|
self.assertRegex(
|
||||||
stderr.getvalue(),
|
stderr.getvalue(),
|
||||||
r"error: the following arguments are required: -k/--key",
|
r"error: the following arguments are required: -k/--key",
|
||||||
)
|
)
|
||||||
@@ -114,7 +114,7 @@ class ArgumentsTest(TestBase):
|
|||||||
test_args = None
|
test_args = None
|
||||||
with self.assertRaises(SystemExit):
|
with self.assertRaises(SystemExit):
|
||||||
arguments.parse(test_args)
|
arguments.parse(test_args)
|
||||||
self.assertRegexpMatches(
|
self.assertRegex(
|
||||||
stderr.getvalue(),
|
stderr.getvalue(),
|
||||||
r"error: the following arguments are required: -m/--model, "
|
r"error: the following arguments are required: -m/--model, "
|
||||||
"-k/--key, --title",
|
"-k/--key, --title",
|
||||||
|
@@ -102,7 +102,7 @@ class ModelTest(TestBase):
|
|||||||
test = {
|
test = {
|
||||||
"STree": ((11, 6, 4), 1.0),
|
"STree": ((11, 6, 4), 1.0),
|
||||||
"Wodt": ((303, 152, 50), 0.9382022471910112),
|
"Wodt": ((303, 152, 50), 0.9382022471910112),
|
||||||
"ODTE": ((7.86, 4.43, 3.37), 1.0),
|
"ODTE": ((786, 443, 337), 1.0),
|
||||||
"Cart": ((23, 12, 5), 1.0),
|
"Cart": ((23, 12, 5), 1.0),
|
||||||
"SVC": ((0, 0, 0), 0.7078651685393258),
|
"SVC": ((0, 0, 0), 0.7078651685393258),
|
||||||
"RandomForest": ((21.3, 11, 5.26), 1.0),
|
"RandomForest": ((21.3, 11, 5.26), 1.0),
|
||||||
|
@@ -1,2 +1,2 @@
|
|||||||
iris,class,all
|
iris;class;all
|
||||||
wine,class,[0, 1]
|
wine;class;[0, 1]
|
||||||
|
@@ -6,7 +6,7 @@
|
|||||||
"kernel": "liblinear",
|
"kernel": "liblinear",
|
||||||
"multiclass_strategy": "ovr"
|
"multiclass_strategy": "ovr"
|
||||||
},
|
},
|
||||||
"v. 1.3.1, Computed on Test on 2022-02-22 at 12:00:00 took 1s"
|
"v. 1.4.0, Computed on Test on 2022-02-22 at 12:00:00 took 1s"
|
||||||
],
|
],
|
||||||
"balloons": [
|
"balloons": [
|
||||||
0.625,
|
0.625,
|
||||||
@@ -15,6 +15,6 @@
|
|||||||
"kernel": "linear",
|
"kernel": "linear",
|
||||||
"multiclass_strategy": "ovr"
|
"multiclass_strategy": "ovr"
|
||||||
},
|
},
|
||||||
"v. 1.3.1, Computed on Test on 2022-02-22 at 12:00:00 took 1s"
|
"v. 1.4.0, Computed on Test on 2022-02-22 at 12:00:00 took 1s"
|
||||||
]
|
]
|
||||||
}
|
}
|
@@ -120,7 +120,7 @@ class BeMainTest(TestBase):
|
|||||||
module.main(parameter)
|
module.main(parameter)
|
||||||
self.assertEqual(msg.exception.code, 2)
|
self.assertEqual(msg.exception.code, 2)
|
||||||
self.assertEqual(stderr.getvalue(), "")
|
self.assertEqual(stderr.getvalue(), "")
|
||||||
self.assertRegexpMatches(stdout.getvalue(), message)
|
self.assertRegex(stdout.getvalue(), message)
|
||||||
|
|
||||||
def test_be_main_best_params_non_existent(self):
|
def test_be_main_best_params_non_existent(self):
|
||||||
model = "GBC"
|
model = "GBC"
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
1;1;"Datasets used in benchmark ver. 0.5.0"
|
1;1;"Datasets used in benchmark ver. 1.0.1"
|
||||||
2;1;" Default score accuracy"
|
2;1;" Default score accuracy"
|
||||||
2;2;"Cross validation"
|
2;2;"Cross validation"
|
||||||
2;6;"5 Folds"
|
2;6;"5 Folds"
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
[94mDatasets used in benchmark ver. 0.5.0
|
[94mDatasets used in benchmark ver. 1.0.1
|
||||||
|
|
||||||
Dataset Sampl. Feat. Cont Cls Balance
|
Dataset Sampl. Feat. Cont Cls Balance
|
||||||
============================== ====== ===== ==== === ==========================================
|
============================== ====== ===== ==== === ==========================================
|
||||||
|
Reference in New Issue
Block a user