mirror of
https://github.com/Doctorado-ML/Stree_datasets.git
synced 2025-08-15 23:46:03 +00:00
Add nodes, leaves, depth to mysql
Add nodes, leaves, depth, samples, features and classes to analysis
This commit is contained in:
@@ -55,22 +55,30 @@ def parse_arguments():
|
||||
return (args.set_of_files, args.model, args.dataset, args.sql, args.param)
|
||||
|
||||
|
||||
def nodes_leaves(clf):
|
||||
nodes = 0
|
||||
leaves = 0
|
||||
for node in clf:
|
||||
if node.is_leaf():
|
||||
leaves += 1
|
||||
else:
|
||||
nodes += 1
|
||||
return nodes, leaves
|
||||
|
||||
|
||||
def compute_auto_hyperparams(X, y):
|
||||
params = {"max_iter": 1e4, "C": 0.1}
|
||||
classes = len(np.unique(y))
|
||||
if classes > 2:
|
||||
params["split_criteria"] = "max_samples"
|
||||
"""Propuesta de auto configuración de hiperparámetros
|
||||
max_it = 10e4
|
||||
(1 valor)
|
||||
split = impurity si clases==2 y split=max_samples si clases > 2
|
||||
(1 valor)
|
||||
kernel=linear o polinómico
|
||||
(2 valores)
|
||||
C = 0.1, 0.5 y 1.0
|
||||
(3 valores)
|
||||
Caso 1: C=1, max_iter=1e4 + condicional split_max kernel lineal
|
||||
Caso 2: C=0.5, max_iter=1e4 + condicional split_max kernel lineal
|
||||
Caso 3: C=0.1, max_iter=1e4 + condicional split_max kernel lineal
|
||||
Caso 4: C=1, max_iter=1e4 + condicional split_max kernel poly
|
||||
Caso 5: C=0.5, max_iter=1e4 + condicional split_max kernel poly
|
||||
Caso 6: C=0.1, max_iter=1e4 + condicional split_max kernel poly
|
||||
Caso 7: C=1, max_iter=1e4 + condicional + kernel rbf
|
||||
Caso 8: kernel rbf
|
||||
"""
|
||||
# params = {"max_iter": 1e4, "kernel": "rbf"}
|
||||
# classes = len(np.unique(y))
|
||||
# if classes > 2:
|
||||
# params["split_criteria"] = "max_samples"
|
||||
params = {"kernel": "rbf"}
|
||||
return params
|
||||
|
||||
|
||||
@@ -97,7 +105,7 @@ def process_dataset(dataset, verbose, model, auto_params):
|
||||
clf = Stree(random_state=random_state)
|
||||
clf.set_params(**hyperparameters)
|
||||
res = cross_validate(clf, X, y, cv=kfold, return_estimator=True)
|
||||
nodes, leaves = nodes_leaves(res["estimator"][0])
|
||||
nodes, leaves = res["estimator"][0].nodes_leaves()
|
||||
depth = res["estimator"][0].depth_
|
||||
scores.append(res["test_score"])
|
||||
times.append(res["fit_time"])
|
||||
@@ -222,6 +230,9 @@ if dataset == "all":
|
||||
parameters = json.loads("{}")
|
||||
accuracy_best = 0.0
|
||||
acc_best_std = 0.0
|
||||
if auto_params:
|
||||
# show parameters computed
|
||||
parameters = json.loads(hyperparameters)
|
||||
accuracy_computed = np.mean(scores)
|
||||
diff = accuracy_best - accuracy_computed
|
||||
print(
|
||||
@@ -243,12 +254,12 @@ else:
|
||||
accuracy_best = record[5] if record is not None else 0.0
|
||||
acc_best_std = record[11] if record is not None else 0.0
|
||||
print(
|
||||
f"* Accuracy Computed : {accuracy:6.4f}±{np.std(scores):6.4f} "
|
||||
f"* Accuracy Computed .: {accuracy:6.4f}±{np.std(scores):6.4f} "
|
||||
f"{np.mean(times):5.3f}s"
|
||||
)
|
||||
print(f"* Accuracy Best .....: {accuracy_best:6.4f}±{acc_best_std:6.4f}")
|
||||
print(f"* Difference ........: {accuracy_best - accuracy:6.4f}")
|
||||
print(f"* Nodes/Leaves/Depth :{nodes:2d} {leaves:2d} " f"{depth:2d} ")
|
||||
print(f"* Nodes/Leaves/Depth : {nodes:2d} {leaves:2d} " f"{depth:2d} ")
|
||||
stop = time.time()
|
||||
print(f"- Auto Hyperparams ..: {hyperparameters}")
|
||||
hours, rem = divmod(stop - start, 3600)
|
||||
|
Reference in New Issue
Block a user