Add build exreport.csv

This commit is contained in:
2021-09-24 17:07:40 +02:00
parent 2fc188adca
commit d630dfaeab
4 changed files with 107 additions and 59 deletions

View File

@@ -8,7 +8,7 @@ from tqdm import tqdm
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold, cross_validate
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from stree import Stree
from Utils import Folders, Files
@@ -24,6 +24,8 @@ class Models:
return Stree
elif name == "Cart":
return DecisionTreeClassifier
elif name == "ExtraTree":
return ExtraTreeClassifier
else:
msg = f"No model recognized {name}"
if name == "Stree" or name == "stree":
@@ -176,6 +178,20 @@ class Experiment:
self.leaves = []
self.depths = []
def _get_complexity(self, result):
if self.model_name == "Cart":
nodes = result.tree_.node_count
depth = result.tree_.max_depth
leaves = result.get_n_leaves()
if self.model_name == "ExtraTree":
nodes = 0
leaves = result.get_n_leaves()
depth = 0
else:
nodes, leaves = result.nodes_leaves()
depth = result.depth_ if hasattr(result, "depth_") else 0
return nodes, leaves, depth
def _n_fold_crossval(self, X, y, hyperparameters):
if self.scores != []:
raise ValueError("Must init experiment before!")
@@ -201,17 +217,9 @@ class Experiment:
self.scores.append(res["test_score"])
self.times.append(res["fit_time"])
for result_item in res["estimator"]:
if self.model_name == "Cart":
nodes_item = result_item.tree_.node_count
depth_item = result_item.tree_.max_depth
leaves_item = result_item.get_n_leaves()
else:
nodes_item, leaves_item = result_item.nodes_leaves()
depth_item = (
result_item.depth_
if hasattr(result_item, "depth_")
else 0
)
nodes_item, leaves_item, depth_item = self._get_complexity(
result_item
)
self.nodes.append(nodes_item)
self.leaves.append(leaves_item)
self.depths.append(depth_item)

View File

@@ -1,7 +1,10 @@
import os
import json
import abc
import shutil
import subprocess
import xlsxwriter
from tqdm import tqdm
from Experiments import Datasets, BestResults
from Utils import Folders, Files, Symbols
@@ -201,7 +204,7 @@ class ReportBest(BaseReport):
def header(self):
self.header_line("*")
self.header_line(
f" Report Best Accuracies with {self.model}" f" in any platform"
f" Report Best Accuracies with {self.model} in any platform"
)
self.header_line("*")
print("")
@@ -394,3 +397,77 @@ class SQL(BaseReport):
def footer(self, accuracy):
self.file.close()
class Benchmark:
@staticmethod
def _process_dataset(results, data):
model = data["model"]
for record in data["results"]:
dataset = record["dataset"]
if (model, dataset) in results:
if record["accuracy"] > results[model, dataset]:
results[model, dataset] = record["accuracy"]
else:
results[model, dataset] = record["accuracy"]
@staticmethod
def compile_results():
# build Files.exreport
result_file_name = os.path.join(Folders.results, Files.exreport)
results = {}
init_suffix, end_suffix = Files.results_suffixes("")
all_files = list(os.walk(Folders.results))
for root, _, files in tqdm(all_files, desc="files"):
for name in files:
if name.startswith(init_suffix) and name.endswith(end_suffix):
file_name = os.path.join(root, name)
with open(file_name) as fp:
data = json.load(fp)
Benchmark._process_dataset(results, data)
with open(result_file_name, "w") as f:
f.write("classifier, dataset, accuracy\n")
for (model, dataset), accuracy in results.items():
f.write(f"{model}, {dataset}, {accuracy}\n")
@staticmethod
def report():
def end_message(message, file):
length = 100
print("*" * length)
print(message)
print("*" * length)
with open(os.path.join(Folders.results, file)) as f:
data = f.read().splitlines()
for line in data:
print(line)
def is_exe(fpath):
return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
# Remove previous results
try:
shutil.rmtree(Folders.report)
os.remove(Files.exreport_pdf)
except FileNotFoundError:
pass
except OSError as e:
print("Error: %s : %s" % (Folders.report, e.strerror))
# Compute Friedman & Holm Tests
fout = open(os.path.join(Folders.results, Files.exreport_output), "w")
ferr = open(os.path.join(Folders.results, Files.exreport_err), "w")
result = subprocess.run(
["Rscript", os.path.join(Folders.src, "benchmark.r")],
stdout=fout,
stderr=ferr,
)
fout.close()
ferr.close()
if result.returncode != 0:
end_message("Error computing benchmark", Files.exreport_err)
else:
end_message("Benchmark Ok", Files.exreport_output)
if is_exe(Files.cmd_open):
subprocess.run([Files.cmd_open, Files.exreport_pdf])

View File

@@ -10,6 +10,7 @@ class Folders:
class Files:
index = "all.txt"
exreport = "exreport.csv"
exreport_output = "exreport.txt"
exreport_err = "exreport_err.txt"
cmd_open = "/usr/bin/open"
@@ -25,7 +26,10 @@ class Files:
@staticmethod
def results_suffixes(model):
return f"results_{model}_", ".json"
if model == "":
return "results_", ".json"
else:
return f"results_{model}_", ".json"
@staticmethod
def dataset(name):

View File

@@ -1,46 +1,5 @@
import os
import shutil
import subprocess
from Utils import Files, Folders
from Results import Benchmark
def end_message(message, file):
length = 100
print("*" * length)
print(message)
print("*" * length)
with open(os.path.join(Folders.results, file)) as f:
data = f.read().splitlines()
for line in data:
print(line)
def is_exe(fpath):
return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
# Remove previous results
try:
shutil.rmtree(Folders.report)
os.remove(Files.exreport_pdf)
except FileNotFoundError:
pass
except OSError as e:
print("Error: %s : %s" % (Folders.report, e.strerror))
# Compute Friedman & Holm Tests
fout = open(os.path.join(Folders.results, Files.exreport_output), "w")
ferr = open(os.path.join(Folders.results, Files.exreport_err), "w")
result = subprocess.run(
["Rscript", os.path.join(Folders.src, "benchmark.r")],
stdout=fout,
stderr=ferr,
)
fout.close()
ferr.close()
if result.returncode != 0:
end_message("Error computing benchmark", Files.exreport_err)
else:
end_message("Benchmark Ok", Files.exreport_output)
if is_exe(Files.cmd_open):
subprocess.run([Files.cmd_open, Files.exreport_pdf])
benchmark = Benchmark()
benchmark.compile_results()
benchmark.report()