mirror of
https://github.com/Doctorado-ML/Stree_datasets.git
synced 2025-08-16 07:56:07 +00:00
Add nodes, leaves, depth to mysql
Add nodes, leaves, depth, samples, features and classes to analysis
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
import argparse
|
||||
from typing import Tuple
|
||||
import numpy as np
|
||||
from experimentation.Sets import Datasets
|
||||
from experimentation.Utils import TextColor
|
||||
from experimentation.Database import MySQL
|
||||
@@ -14,8 +15,10 @@ models_tree = [
|
||||
"baseRaF",
|
||||
]
|
||||
models_ensemble = ["odte", "adaBoost", "bagging", "TBRaF", "TBRoF", "TBRRoF"]
|
||||
description = ["samp", "var", "cls"]
|
||||
complexity = ["nodes", "leaves", "depth"]
|
||||
title = "Best model results"
|
||||
lengths = (30, 12, 12, 12, 12, 12, 12)
|
||||
lengths = (30, 4, 3, 3, 3, 3, 3, 12, 12, 12, 12, 12, 12)
|
||||
|
||||
|
||||
def parse_arguments() -> Tuple[str, str, str, bool, bool]:
|
||||
@@ -79,9 +82,11 @@ def report_header(title, experiment, model_type):
|
||||
|
||||
def report_line(line):
|
||||
output = f"{line['dataset']:{lengths[0] + 5}s} "
|
||||
for key, item in enumerate(description + complexity):
|
||||
output += f"{line[item]:{lengths[key + 1]}d} "
|
||||
data = models.copy()
|
||||
for key, model in enumerate(data):
|
||||
output += f"{line[model]:{lengths[key + 1]}s} "
|
||||
output += f"{line[model]:{lengths[key + 7]}s} "
|
||||
return output
|
||||
|
||||
|
||||
@@ -101,7 +106,15 @@ def report_footer(agg):
|
||||
dbh = MySQL()
|
||||
database = dbh.get_connection()
|
||||
dt = Datasets(False, False, "tanveer")
|
||||
fields = ("Dataset",)
|
||||
fields = (
|
||||
"Dataset",
|
||||
"Samp",
|
||||
"Var",
|
||||
"Cls",
|
||||
"Nod",
|
||||
"Lea",
|
||||
"Dep",
|
||||
)
|
||||
models = models_tree if model_type == "tree" else models_ensemble
|
||||
for item in models:
|
||||
fields += (f"{item}",)
|
||||
@@ -121,13 +134,23 @@ for dataset in dt:
|
||||
find_one = False
|
||||
# Look for max accuracy for any given dataset
|
||||
line = {"dataset": color + dataset[0]}
|
||||
X, y = dt.load(dataset[0]) # type: ignore
|
||||
line["samp"], line["var"] = X.shape
|
||||
line["cls"] = len(np.unique(y))
|
||||
record = dbh.find_best(dataset[0], models, experiment)
|
||||
max_accuracy = 0.0 if record is None else record[5]
|
||||
line["nodes"] = 0
|
||||
line["leaves"] = 0
|
||||
line["depth"] = 0
|
||||
for model in models:
|
||||
record = dbh.find_best(dataset[0], model, experiment)
|
||||
if record is None:
|
||||
line[model] = color + "-" * 12
|
||||
else:
|
||||
if model == "stree":
|
||||
line["nodes"] = record[12]
|
||||
line["leaves"] = record[13]
|
||||
line["depth"] = record[14]
|
||||
reference = record[13]
|
||||
accuracy = record[5]
|
||||
acc_std = record[11]
|
||||
|
Reference in New Issue
Block a user