mirror of
https://github.com/Doctorado-ML/Stree_datasets.git
synced 2025-08-15 07:26:02 +00:00
96 lines
2.7 KiB
Python
Executable File
96 lines
2.7 KiB
Python
Executable File
import os
|
|
from datetime import datetime
|
|
from experimentation.Sets import Datasets
|
|
from experimentation.Database import MySQL
|
|
from statistics import mean, stdev
|
|
|
|
|
|
def store_result(database, dataset, accuracy, time_spent, nodes, leaves, depth):
|
|
attributes = [
|
|
"date",
|
|
"time",
|
|
"type",
|
|
"accuracy",
|
|
"accuracy_std",
|
|
"dataset",
|
|
"classifier",
|
|
"norm",
|
|
"stand",
|
|
"time_spent",
|
|
"time_spent_std",
|
|
"parameters",
|
|
"nodes",
|
|
"leaves",
|
|
"depth"
|
|
]
|
|
command_insert = (
|
|
"insert into results ("
|
|
+ ",".join(attributes)
|
|
+ ") values("
|
|
+ ("\"%s\"," * len(attributes))[:-1]
|
|
+ ");"
|
|
)
|
|
now = datetime.now()
|
|
date = now.strftime("%Y-%m-%d")
|
|
time = now.strftime("%H:%M:%S")
|
|
values = (
|
|
date,
|
|
time,
|
|
"crossval",
|
|
mean(accuracy),
|
|
stdev(accuracy),
|
|
dataset,
|
|
"oc1",
|
|
1,
|
|
0,
|
|
mean(time_spent),
|
|
stdev(time_spent),
|
|
"{}",
|
|
nodes,
|
|
leaves,
|
|
depth
|
|
)
|
|
print(command_insert % values, file=database)
|
|
#cursor = database.cursor()
|
|
#cursor.execute(command_insert, values)
|
|
#database.commit()
|
|
|
|
|
|
def get_result(name):
|
|
accuracy = []
|
|
time_spent = []
|
|
for seed in [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]:
|
|
filename = os.path.join("data", "oc1output", f"{name}_{seed}.txt")
|
|
result_file = open(filename, "r")
|
|
lines = result_file.readlines()
|
|
result_file.close()
|
|
data = lines[-9:-4]
|
|
for line in data:
|
|
acc, time_s = line.split("***")[2:4]
|
|
accuracy.append(float(acc))
|
|
time_spent.append(float(time_s))
|
|
leaves = float(lines[-3:-2][0].split("=")[1])
|
|
nodes = float(lines[-2:-1][0].split("=")[1])
|
|
depth = float(lines[-1:][0].split("=")[1])
|
|
return accuracy, time_spent, nodes, leaves, depth
|
|
|
|
|
|
#dbh = MySQL()
|
|
#database = dbh.get_connection()
|
|
database = open("oc1.sql", "w")
|
|
dt = Datasets(False, False, "tanveer")
|
|
for dataset in dt:
|
|
print(f"Processing {dataset[0]:30s}", end=" ")
|
|
accuracy, time_spent, nodes, leaves, depth = get_result(dataset[0])
|
|
accuracy_mean, accuracy_std = mean(accuracy), stdev(accuracy)
|
|
time_mean, time_std = mean(time_spent), stdev(time_spent)
|
|
print(
|
|
f"nodes={nodes:6.2f} leaves={leaves:6.2f} depth={depth:5.2f} "
|
|
f"accuracy=[{accuracy_mean:05.3f}\u00B1{accuracy_std:05.3f}] "
|
|
f"time=[{time_mean:05.3f}\u00B1{time_std:05.3f}] "
|
|
f"elements=[{len(accuracy)} {len(time_spent)}]"
|
|
)
|
|
store_result(database, dataset[0], accuracy, time_spent, nodes, leaves, depth)
|
|
#dbh.close()
|
|
database.close
|