mirror of
https://github.com/Doctorado-ML/benchmark.git
synced 2025-08-15 23:45:54 +00:00
Add build exreport.csv
This commit is contained in:
@@ -8,7 +8,7 @@ from tqdm import tqdm
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from sklearn.model_selection import StratifiedKFold, cross_validate
|
||||
from sklearn.tree import DecisionTreeClassifier
|
||||
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
|
||||
from stree import Stree
|
||||
from Utils import Folders, Files
|
||||
|
||||
@@ -24,6 +24,8 @@ class Models:
|
||||
return Stree
|
||||
elif name == "Cart":
|
||||
return DecisionTreeClassifier
|
||||
elif name == "ExtraTree":
|
||||
return ExtraTreeClassifier
|
||||
else:
|
||||
msg = f"No model recognized {name}"
|
||||
if name == "Stree" or name == "stree":
|
||||
@@ -176,6 +178,20 @@ class Experiment:
|
||||
self.leaves = []
|
||||
self.depths = []
|
||||
|
||||
def _get_complexity(self, result):
|
||||
if self.model_name == "Cart":
|
||||
nodes = result.tree_.node_count
|
||||
depth = result.tree_.max_depth
|
||||
leaves = result.get_n_leaves()
|
||||
if self.model_name == "ExtraTree":
|
||||
nodes = 0
|
||||
leaves = result.get_n_leaves()
|
||||
depth = 0
|
||||
else:
|
||||
nodes, leaves = result.nodes_leaves()
|
||||
depth = result.depth_ if hasattr(result, "depth_") else 0
|
||||
return nodes, leaves, depth
|
||||
|
||||
def _n_fold_crossval(self, X, y, hyperparameters):
|
||||
if self.scores != []:
|
||||
raise ValueError("Must init experiment before!")
|
||||
@@ -201,17 +217,9 @@ class Experiment:
|
||||
self.scores.append(res["test_score"])
|
||||
self.times.append(res["fit_time"])
|
||||
for result_item in res["estimator"]:
|
||||
if self.model_name == "Cart":
|
||||
nodes_item = result_item.tree_.node_count
|
||||
depth_item = result_item.tree_.max_depth
|
||||
leaves_item = result_item.get_n_leaves()
|
||||
else:
|
||||
nodes_item, leaves_item = result_item.nodes_leaves()
|
||||
depth_item = (
|
||||
result_item.depth_
|
||||
if hasattr(result_item, "depth_")
|
||||
else 0
|
||||
)
|
||||
nodes_item, leaves_item, depth_item = self._get_complexity(
|
||||
result_item
|
||||
)
|
||||
self.nodes.append(nodes_item)
|
||||
self.leaves.append(leaves_item)
|
||||
self.depths.append(depth_item)
|
||||
|
@@ -1,7 +1,10 @@
|
||||
import os
|
||||
import json
|
||||
import abc
|
||||
import shutil
|
||||
import subprocess
|
||||
import xlsxwriter
|
||||
from tqdm import tqdm
|
||||
from Experiments import Datasets, BestResults
|
||||
from Utils import Folders, Files, Symbols
|
||||
|
||||
@@ -201,7 +204,7 @@ class ReportBest(BaseReport):
|
||||
def header(self):
|
||||
self.header_line("*")
|
||||
self.header_line(
|
||||
f" Report Best Accuracies with {self.model}" f" in any platform"
|
||||
f" Report Best Accuracies with {self.model} in any platform"
|
||||
)
|
||||
self.header_line("*")
|
||||
print("")
|
||||
@@ -394,3 +397,77 @@ class SQL(BaseReport):
|
||||
|
||||
def footer(self, accuracy):
|
||||
self.file.close()
|
||||
|
||||
|
||||
class Benchmark:
|
||||
@staticmethod
|
||||
def _process_dataset(results, data):
|
||||
model = data["model"]
|
||||
for record in data["results"]:
|
||||
dataset = record["dataset"]
|
||||
if (model, dataset) in results:
|
||||
if record["accuracy"] > results[model, dataset]:
|
||||
results[model, dataset] = record["accuracy"]
|
||||
else:
|
||||
results[model, dataset] = record["accuracy"]
|
||||
|
||||
@staticmethod
|
||||
def compile_results():
|
||||
# build Files.exreport
|
||||
result_file_name = os.path.join(Folders.results, Files.exreport)
|
||||
results = {}
|
||||
init_suffix, end_suffix = Files.results_suffixes("")
|
||||
all_files = list(os.walk(Folders.results))
|
||||
for root, _, files in tqdm(all_files, desc="files"):
|
||||
for name in files:
|
||||
if name.startswith(init_suffix) and name.endswith(end_suffix):
|
||||
file_name = os.path.join(root, name)
|
||||
with open(file_name) as fp:
|
||||
data = json.load(fp)
|
||||
Benchmark._process_dataset(results, data)
|
||||
|
||||
with open(result_file_name, "w") as f:
|
||||
f.write("classifier, dataset, accuracy\n")
|
||||
for (model, dataset), accuracy in results.items():
|
||||
f.write(f"{model}, {dataset}, {accuracy}\n")
|
||||
|
||||
@staticmethod
|
||||
def report():
|
||||
def end_message(message, file):
|
||||
length = 100
|
||||
print("*" * length)
|
||||
print(message)
|
||||
print("*" * length)
|
||||
with open(os.path.join(Folders.results, file)) as f:
|
||||
data = f.read().splitlines()
|
||||
for line in data:
|
||||
print(line)
|
||||
|
||||
def is_exe(fpath):
|
||||
return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
|
||||
|
||||
# Remove previous results
|
||||
try:
|
||||
shutil.rmtree(Folders.report)
|
||||
os.remove(Files.exreport_pdf)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
except OSError as e:
|
||||
print("Error: %s : %s" % (Folders.report, e.strerror))
|
||||
# Compute Friedman & Holm Tests
|
||||
fout = open(os.path.join(Folders.results, Files.exreport_output), "w")
|
||||
ferr = open(os.path.join(Folders.results, Files.exreport_err), "w")
|
||||
result = subprocess.run(
|
||||
["Rscript", os.path.join(Folders.src, "benchmark.r")],
|
||||
stdout=fout,
|
||||
stderr=ferr,
|
||||
)
|
||||
fout.close()
|
||||
ferr.close()
|
||||
if result.returncode != 0:
|
||||
end_message("Error computing benchmark", Files.exreport_err)
|
||||
else:
|
||||
end_message("Benchmark Ok", Files.exreport_output)
|
||||
|
||||
if is_exe(Files.cmd_open):
|
||||
subprocess.run([Files.cmd_open, Files.exreport_pdf])
|
||||
|
@@ -10,6 +10,7 @@ class Folders:
|
||||
|
||||
class Files:
|
||||
index = "all.txt"
|
||||
exreport = "exreport.csv"
|
||||
exreport_output = "exreport.txt"
|
||||
exreport_err = "exreport_err.txt"
|
||||
cmd_open = "/usr/bin/open"
|
||||
@@ -25,7 +26,10 @@ class Files:
|
||||
|
||||
@staticmethod
|
||||
def results_suffixes(model):
|
||||
return f"results_{model}_", ".json"
|
||||
if model == "":
|
||||
return "results_", ".json"
|
||||
else:
|
||||
return f"results_{model}_", ".json"
|
||||
|
||||
@staticmethod
|
||||
def dataset(name):
|
||||
|
@@ -1,46 +1,5 @@
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
from Utils import Files, Folders
|
||||
from Results import Benchmark
|
||||
|
||||
|
||||
def end_message(message, file):
|
||||
length = 100
|
||||
print("*" * length)
|
||||
print(message)
|
||||
print("*" * length)
|
||||
with open(os.path.join(Folders.results, file)) as f:
|
||||
data = f.read().splitlines()
|
||||
for line in data:
|
||||
print(line)
|
||||
|
||||
|
||||
def is_exe(fpath):
|
||||
return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
|
||||
|
||||
|
||||
# Remove previous results
|
||||
try:
|
||||
shutil.rmtree(Folders.report)
|
||||
os.remove(Files.exreport_pdf)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
except OSError as e:
|
||||
print("Error: %s : %s" % (Folders.report, e.strerror))
|
||||
# Compute Friedman & Holm Tests
|
||||
fout = open(os.path.join(Folders.results, Files.exreport_output), "w")
|
||||
ferr = open(os.path.join(Folders.results, Files.exreport_err), "w")
|
||||
result = subprocess.run(
|
||||
["Rscript", os.path.join(Folders.src, "benchmark.r")],
|
||||
stdout=fout,
|
||||
stderr=ferr,
|
||||
)
|
||||
fout.close()
|
||||
ferr.close()
|
||||
if result.returncode != 0:
|
||||
end_message("Error computing benchmark", Files.exreport_err)
|
||||
else:
|
||||
end_message("Benchmark Ok", Files.exreport_output)
|
||||
|
||||
if is_exe(Files.cmd_open):
|
||||
subprocess.run([Files.cmd_open, Files.exreport_pdf])
|
||||
benchmark = Benchmark()
|
||||
benchmark.compile_results()
|
||||
benchmark.report()
|
||||
|
Reference in New Issue
Block a user