Add nodes, leaves, depth to mysql

Add nodes, leaves, depth, samples, features and classes to analysis
This commit is contained in:
2021-03-24 12:38:11 +01:00
parent ae6e2ba374
commit 66bceff179
5 changed files with 124 additions and 74 deletions

View File

@@ -55,22 +55,30 @@ def parse_arguments():
return (args.set_of_files, args.model, args.dataset, args.sql, args.param)
def nodes_leaves(clf):
nodes = 0
leaves = 0
for node in clf:
if node.is_leaf():
leaves += 1
else:
nodes += 1
return nodes, leaves
def compute_auto_hyperparams(X, y):
params = {"max_iter": 1e4, "C": 0.1}
classes = len(np.unique(y))
if classes > 2:
params["split_criteria"] = "max_samples"
"""Propuesta de auto configuración de hiperparámetros
max_it = 10e4
(1 valor)
split = impurity si clases==2 y split=max_samples si clases > 2
(1 valor)
kernel=linear o polinómico
(2 valores)
C = 0.1, 0.5 y 1.0
(3 valores)
Caso 1: C=1, max_iter=1e4 + condicional split_max kernel lineal
Caso 2: C=0.5, max_iter=1e4 + condicional split_max kernel lineal
Caso 3: C=0.1, max_iter=1e4 + condicional split_max kernel lineal
Caso 4: C=1, max_iter=1e4 + condicional split_max kernel poly
Caso 5: C=0.5, max_iter=1e4 + condicional split_max kernel poly
Caso 6: C=0.1, max_iter=1e4 + condicional split_max kernel poly
Caso 7: C=1, max_iter=1e4 + condicional + kernel rbf
Caso 8: kernel rbf
"""
# params = {"max_iter": 1e4, "kernel": "rbf"}
# classes = len(np.unique(y))
# if classes > 2:
# params["split_criteria"] = "max_samples"
params = {"kernel": "rbf"}
return params
@@ -97,7 +105,7 @@ def process_dataset(dataset, verbose, model, auto_params):
clf = Stree(random_state=random_state)
clf.set_params(**hyperparameters)
res = cross_validate(clf, X, y, cv=kfold, return_estimator=True)
nodes, leaves = nodes_leaves(res["estimator"][0])
nodes, leaves = res["estimator"][0].nodes_leaves()
depth = res["estimator"][0].depth_
scores.append(res["test_score"])
times.append(res["fit_time"])
@@ -222,6 +230,9 @@ if dataset == "all":
parameters = json.loads("{}")
accuracy_best = 0.0
acc_best_std = 0.0
if auto_params:
# show parameters computed
parameters = json.loads(hyperparameters)
accuracy_computed = np.mean(scores)
diff = accuracy_best - accuracy_computed
print(
@@ -243,12 +254,12 @@ else:
accuracy_best = record[5] if record is not None else 0.0
acc_best_std = record[11] if record is not None else 0.0
print(
f"* Accuracy Computed : {accuracy:6.4f}±{np.std(scores):6.4f} "
f"* Accuracy Computed .: {accuracy:6.4f}±{np.std(scores):6.4f} "
f"{np.mean(times):5.3f}s"
)
print(f"* Accuracy Best .....: {accuracy_best:6.4f}±{acc_best_std:6.4f}")
print(f"* Difference ........: {accuracy_best - accuracy:6.4f}")
print(f"* Nodes/Leaves/Depth :{nodes:2d} {leaves:2d} " f"{depth:2d} ")
print(f"* Nodes/Leaves/Depth : {nodes:2d} {leaves:2d} " f"{depth:2d} ")
stop = time.time()
print(f"- Auto Hyperparams ..: {hyperparameters}")
hours, rem = divmod(stop - start, 3600)