mirror of
https://github.com/Doctorado-ML/benchmark.git
synced 2025-08-17 00:15:55 +00:00
Add build exreport.csv
This commit is contained in:
@@ -8,7 +8,7 @@ from tqdm import tqdm
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from sklearn.model_selection import StratifiedKFold, cross_validate
|
from sklearn.model_selection import StratifiedKFold, cross_validate
|
||||||
from sklearn.tree import DecisionTreeClassifier
|
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
|
||||||
from stree import Stree
|
from stree import Stree
|
||||||
from Utils import Folders, Files
|
from Utils import Folders, Files
|
||||||
|
|
||||||
@@ -24,6 +24,8 @@ class Models:
|
|||||||
return Stree
|
return Stree
|
||||||
elif name == "Cart":
|
elif name == "Cart":
|
||||||
return DecisionTreeClassifier
|
return DecisionTreeClassifier
|
||||||
|
elif name == "ExtraTree":
|
||||||
|
return ExtraTreeClassifier
|
||||||
else:
|
else:
|
||||||
msg = f"No model recognized {name}"
|
msg = f"No model recognized {name}"
|
||||||
if name == "Stree" or name == "stree":
|
if name == "Stree" or name == "stree":
|
||||||
@@ -176,6 +178,20 @@ class Experiment:
|
|||||||
self.leaves = []
|
self.leaves = []
|
||||||
self.depths = []
|
self.depths = []
|
||||||
|
|
||||||
|
def _get_complexity(self, result):
|
||||||
|
if self.model_name == "Cart":
|
||||||
|
nodes = result.tree_.node_count
|
||||||
|
depth = result.tree_.max_depth
|
||||||
|
leaves = result.get_n_leaves()
|
||||||
|
if self.model_name == "ExtraTree":
|
||||||
|
nodes = 0
|
||||||
|
leaves = result.get_n_leaves()
|
||||||
|
depth = 0
|
||||||
|
else:
|
||||||
|
nodes, leaves = result.nodes_leaves()
|
||||||
|
depth = result.depth_ if hasattr(result, "depth_") else 0
|
||||||
|
return nodes, leaves, depth
|
||||||
|
|
||||||
def _n_fold_crossval(self, X, y, hyperparameters):
|
def _n_fold_crossval(self, X, y, hyperparameters):
|
||||||
if self.scores != []:
|
if self.scores != []:
|
||||||
raise ValueError("Must init experiment before!")
|
raise ValueError("Must init experiment before!")
|
||||||
@@ -201,17 +217,9 @@ class Experiment:
|
|||||||
self.scores.append(res["test_score"])
|
self.scores.append(res["test_score"])
|
||||||
self.times.append(res["fit_time"])
|
self.times.append(res["fit_time"])
|
||||||
for result_item in res["estimator"]:
|
for result_item in res["estimator"]:
|
||||||
if self.model_name == "Cart":
|
nodes_item, leaves_item, depth_item = self._get_complexity(
|
||||||
nodes_item = result_item.tree_.node_count
|
result_item
|
||||||
depth_item = result_item.tree_.max_depth
|
)
|
||||||
leaves_item = result_item.get_n_leaves()
|
|
||||||
else:
|
|
||||||
nodes_item, leaves_item = result_item.nodes_leaves()
|
|
||||||
depth_item = (
|
|
||||||
result_item.depth_
|
|
||||||
if hasattr(result_item, "depth_")
|
|
||||||
else 0
|
|
||||||
)
|
|
||||||
self.nodes.append(nodes_item)
|
self.nodes.append(nodes_item)
|
||||||
self.leaves.append(leaves_item)
|
self.leaves.append(leaves_item)
|
||||||
self.depths.append(depth_item)
|
self.depths.append(depth_item)
|
||||||
|
@@ -1,7 +1,10 @@
|
|||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
import abc
|
import abc
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
import xlsxwriter
|
import xlsxwriter
|
||||||
|
from tqdm import tqdm
|
||||||
from Experiments import Datasets, BestResults
|
from Experiments import Datasets, BestResults
|
||||||
from Utils import Folders, Files, Symbols
|
from Utils import Folders, Files, Symbols
|
||||||
|
|
||||||
@@ -201,7 +204,7 @@ class ReportBest(BaseReport):
|
|||||||
def header(self):
|
def header(self):
|
||||||
self.header_line("*")
|
self.header_line("*")
|
||||||
self.header_line(
|
self.header_line(
|
||||||
f" Report Best Accuracies with {self.model}" f" in any platform"
|
f" Report Best Accuracies with {self.model} in any platform"
|
||||||
)
|
)
|
||||||
self.header_line("*")
|
self.header_line("*")
|
||||||
print("")
|
print("")
|
||||||
@@ -394,3 +397,77 @@ class SQL(BaseReport):
|
|||||||
|
|
||||||
def footer(self, accuracy):
|
def footer(self, accuracy):
|
||||||
self.file.close()
|
self.file.close()
|
||||||
|
|
||||||
|
|
||||||
|
class Benchmark:
|
||||||
|
@staticmethod
|
||||||
|
def _process_dataset(results, data):
|
||||||
|
model = data["model"]
|
||||||
|
for record in data["results"]:
|
||||||
|
dataset = record["dataset"]
|
||||||
|
if (model, dataset) in results:
|
||||||
|
if record["accuracy"] > results[model, dataset]:
|
||||||
|
results[model, dataset] = record["accuracy"]
|
||||||
|
else:
|
||||||
|
results[model, dataset] = record["accuracy"]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def compile_results():
|
||||||
|
# build Files.exreport
|
||||||
|
result_file_name = os.path.join(Folders.results, Files.exreport)
|
||||||
|
results = {}
|
||||||
|
init_suffix, end_suffix = Files.results_suffixes("")
|
||||||
|
all_files = list(os.walk(Folders.results))
|
||||||
|
for root, _, files in tqdm(all_files, desc="files"):
|
||||||
|
for name in files:
|
||||||
|
if name.startswith(init_suffix) and name.endswith(end_suffix):
|
||||||
|
file_name = os.path.join(root, name)
|
||||||
|
with open(file_name) as fp:
|
||||||
|
data = json.load(fp)
|
||||||
|
Benchmark._process_dataset(results, data)
|
||||||
|
|
||||||
|
with open(result_file_name, "w") as f:
|
||||||
|
f.write("classifier, dataset, accuracy\n")
|
||||||
|
for (model, dataset), accuracy in results.items():
|
||||||
|
f.write(f"{model}, {dataset}, {accuracy}\n")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def report():
|
||||||
|
def end_message(message, file):
|
||||||
|
length = 100
|
||||||
|
print("*" * length)
|
||||||
|
print(message)
|
||||||
|
print("*" * length)
|
||||||
|
with open(os.path.join(Folders.results, file)) as f:
|
||||||
|
data = f.read().splitlines()
|
||||||
|
for line in data:
|
||||||
|
print(line)
|
||||||
|
|
||||||
|
def is_exe(fpath):
|
||||||
|
return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
|
||||||
|
|
||||||
|
# Remove previous results
|
||||||
|
try:
|
||||||
|
shutil.rmtree(Folders.report)
|
||||||
|
os.remove(Files.exreport_pdf)
|
||||||
|
except FileNotFoundError:
|
||||||
|
pass
|
||||||
|
except OSError as e:
|
||||||
|
print("Error: %s : %s" % (Folders.report, e.strerror))
|
||||||
|
# Compute Friedman & Holm Tests
|
||||||
|
fout = open(os.path.join(Folders.results, Files.exreport_output), "w")
|
||||||
|
ferr = open(os.path.join(Folders.results, Files.exreport_err), "w")
|
||||||
|
result = subprocess.run(
|
||||||
|
["Rscript", os.path.join(Folders.src, "benchmark.r")],
|
||||||
|
stdout=fout,
|
||||||
|
stderr=ferr,
|
||||||
|
)
|
||||||
|
fout.close()
|
||||||
|
ferr.close()
|
||||||
|
if result.returncode != 0:
|
||||||
|
end_message("Error computing benchmark", Files.exreport_err)
|
||||||
|
else:
|
||||||
|
end_message("Benchmark Ok", Files.exreport_output)
|
||||||
|
|
||||||
|
if is_exe(Files.cmd_open):
|
||||||
|
subprocess.run([Files.cmd_open, Files.exreport_pdf])
|
||||||
|
@@ -10,6 +10,7 @@ class Folders:
|
|||||||
|
|
||||||
class Files:
|
class Files:
|
||||||
index = "all.txt"
|
index = "all.txt"
|
||||||
|
exreport = "exreport.csv"
|
||||||
exreport_output = "exreport.txt"
|
exreport_output = "exreport.txt"
|
||||||
exreport_err = "exreport_err.txt"
|
exreport_err = "exreport_err.txt"
|
||||||
cmd_open = "/usr/bin/open"
|
cmd_open = "/usr/bin/open"
|
||||||
@@ -25,7 +26,10 @@ class Files:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def results_suffixes(model):
|
def results_suffixes(model):
|
||||||
return f"results_{model}_", ".json"
|
if model == "":
|
||||||
|
return "results_", ".json"
|
||||||
|
else:
|
||||||
|
return f"results_{model}_", ".json"
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def dataset(name):
|
def dataset(name):
|
||||||
|
@@ -1,46 +1,5 @@
|
|||||||
import os
|
from Results import Benchmark
|
||||||
import shutil
|
|
||||||
import subprocess
|
|
||||||
from Utils import Files, Folders
|
|
||||||
|
|
||||||
|
benchmark = Benchmark()
|
||||||
def end_message(message, file):
|
benchmark.compile_results()
|
||||||
length = 100
|
benchmark.report()
|
||||||
print("*" * length)
|
|
||||||
print(message)
|
|
||||||
print("*" * length)
|
|
||||||
with open(os.path.join(Folders.results, file)) as f:
|
|
||||||
data = f.read().splitlines()
|
|
||||||
for line in data:
|
|
||||||
print(line)
|
|
||||||
|
|
||||||
|
|
||||||
def is_exe(fpath):
|
|
||||||
return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
|
|
||||||
|
|
||||||
|
|
||||||
# Remove previous results
|
|
||||||
try:
|
|
||||||
shutil.rmtree(Folders.report)
|
|
||||||
os.remove(Files.exreport_pdf)
|
|
||||||
except FileNotFoundError:
|
|
||||||
pass
|
|
||||||
except OSError as e:
|
|
||||||
print("Error: %s : %s" % (Folders.report, e.strerror))
|
|
||||||
# Compute Friedman & Holm Tests
|
|
||||||
fout = open(os.path.join(Folders.results, Files.exreport_output), "w")
|
|
||||||
ferr = open(os.path.join(Folders.results, Files.exreport_err), "w")
|
|
||||||
result = subprocess.run(
|
|
||||||
["Rscript", os.path.join(Folders.src, "benchmark.r")],
|
|
||||||
stdout=fout,
|
|
||||||
stderr=ferr,
|
|
||||||
)
|
|
||||||
fout.close()
|
|
||||||
ferr.close()
|
|
||||||
if result.returncode != 0:
|
|
||||||
end_message("Error computing benchmark", Files.exreport_err)
|
|
||||||
else:
|
|
||||||
end_message("Benchmark Ok", Files.exreport_output)
|
|
||||||
|
|
||||||
if is_exe(Files.cmd_open):
|
|
||||||
subprocess.run([Files.cmd_open, Files.exreport_pdf])
|
|
||||||
|
Reference in New Issue
Block a user