Add build exreport.csv

This commit is contained in:
2021-09-24 17:07:40 +02:00
parent 2fc188adca
commit d630dfaeab
4 changed files with 107 additions and 59 deletions

View File

@@ -8,7 +8,7 @@ from tqdm import tqdm
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from sklearn.model_selection import StratifiedKFold, cross_validate from sklearn.model_selection import StratifiedKFold, cross_validate
from sklearn.tree import DecisionTreeClassifier from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from stree import Stree from stree import Stree
from Utils import Folders, Files from Utils import Folders, Files
@@ -24,6 +24,8 @@ class Models:
return Stree return Stree
elif name == "Cart": elif name == "Cart":
return DecisionTreeClassifier return DecisionTreeClassifier
elif name == "ExtraTree":
return ExtraTreeClassifier
else: else:
msg = f"No model recognized {name}" msg = f"No model recognized {name}"
if name == "Stree" or name == "stree": if name == "Stree" or name == "stree":
@@ -176,6 +178,20 @@ class Experiment:
self.leaves = [] self.leaves = []
self.depths = [] self.depths = []
def _get_complexity(self, result):
if self.model_name == "Cart":
nodes = result.tree_.node_count
depth = result.tree_.max_depth
leaves = result.get_n_leaves()
if self.model_name == "ExtraTree":
nodes = 0
leaves = result.get_n_leaves()
depth = 0
else:
nodes, leaves = result.nodes_leaves()
depth = result.depth_ if hasattr(result, "depth_") else 0
return nodes, leaves, depth
def _n_fold_crossval(self, X, y, hyperparameters): def _n_fold_crossval(self, X, y, hyperparameters):
if self.scores != []: if self.scores != []:
raise ValueError("Must init experiment before!") raise ValueError("Must init experiment before!")
@@ -201,17 +217,9 @@ class Experiment:
self.scores.append(res["test_score"]) self.scores.append(res["test_score"])
self.times.append(res["fit_time"]) self.times.append(res["fit_time"])
for result_item in res["estimator"]: for result_item in res["estimator"]:
if self.model_name == "Cart": nodes_item, leaves_item, depth_item = self._get_complexity(
nodes_item = result_item.tree_.node_count result_item
depth_item = result_item.tree_.max_depth )
leaves_item = result_item.get_n_leaves()
else:
nodes_item, leaves_item = result_item.nodes_leaves()
depth_item = (
result_item.depth_
if hasattr(result_item, "depth_")
else 0
)
self.nodes.append(nodes_item) self.nodes.append(nodes_item)
self.leaves.append(leaves_item) self.leaves.append(leaves_item)
self.depths.append(depth_item) self.depths.append(depth_item)

View File

@@ -1,7 +1,10 @@
import os import os
import json import json
import abc import abc
import shutil
import subprocess
import xlsxwriter import xlsxwriter
from tqdm import tqdm
from Experiments import Datasets, BestResults from Experiments import Datasets, BestResults
from Utils import Folders, Files, Symbols from Utils import Folders, Files, Symbols
@@ -201,7 +204,7 @@ class ReportBest(BaseReport):
def header(self): def header(self):
self.header_line("*") self.header_line("*")
self.header_line( self.header_line(
f" Report Best Accuracies with {self.model}" f" in any platform" f" Report Best Accuracies with {self.model} in any platform"
) )
self.header_line("*") self.header_line("*")
print("") print("")
@@ -394,3 +397,77 @@ class SQL(BaseReport):
def footer(self, accuracy): def footer(self, accuracy):
self.file.close() self.file.close()
class Benchmark:
@staticmethod
def _process_dataset(results, data):
model = data["model"]
for record in data["results"]:
dataset = record["dataset"]
if (model, dataset) in results:
if record["accuracy"] > results[model, dataset]:
results[model, dataset] = record["accuracy"]
else:
results[model, dataset] = record["accuracy"]
@staticmethod
def compile_results():
# build Files.exreport
result_file_name = os.path.join(Folders.results, Files.exreport)
results = {}
init_suffix, end_suffix = Files.results_suffixes("")
all_files = list(os.walk(Folders.results))
for root, _, files in tqdm(all_files, desc="files"):
for name in files:
if name.startswith(init_suffix) and name.endswith(end_suffix):
file_name = os.path.join(root, name)
with open(file_name) as fp:
data = json.load(fp)
Benchmark._process_dataset(results, data)
with open(result_file_name, "w") as f:
f.write("classifier, dataset, accuracy\n")
for (model, dataset), accuracy in results.items():
f.write(f"{model}, {dataset}, {accuracy}\n")
@staticmethod
def report():
def end_message(message, file):
length = 100
print("*" * length)
print(message)
print("*" * length)
with open(os.path.join(Folders.results, file)) as f:
data = f.read().splitlines()
for line in data:
print(line)
def is_exe(fpath):
return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
# Remove previous results
try:
shutil.rmtree(Folders.report)
os.remove(Files.exreport_pdf)
except FileNotFoundError:
pass
except OSError as e:
print("Error: %s : %s" % (Folders.report, e.strerror))
# Compute Friedman & Holm Tests
fout = open(os.path.join(Folders.results, Files.exreport_output), "w")
ferr = open(os.path.join(Folders.results, Files.exreport_err), "w")
result = subprocess.run(
["Rscript", os.path.join(Folders.src, "benchmark.r")],
stdout=fout,
stderr=ferr,
)
fout.close()
ferr.close()
if result.returncode != 0:
end_message("Error computing benchmark", Files.exreport_err)
else:
end_message("Benchmark Ok", Files.exreport_output)
if is_exe(Files.cmd_open):
subprocess.run([Files.cmd_open, Files.exreport_pdf])

View File

@@ -10,6 +10,7 @@ class Folders:
class Files: class Files:
index = "all.txt" index = "all.txt"
exreport = "exreport.csv"
exreport_output = "exreport.txt" exreport_output = "exreport.txt"
exreport_err = "exreport_err.txt" exreport_err = "exreport_err.txt"
cmd_open = "/usr/bin/open" cmd_open = "/usr/bin/open"
@@ -25,7 +26,10 @@ class Files:
@staticmethod @staticmethod
def results_suffixes(model): def results_suffixes(model):
return f"results_{model}_", ".json" if model == "":
return "results_", ".json"
else:
return f"results_{model}_", ".json"
@staticmethod @staticmethod
def dataset(name): def dataset(name):

View File

@@ -1,46 +1,5 @@
import os from Results import Benchmark
import shutil
import subprocess
from Utils import Files, Folders
benchmark = Benchmark()
def end_message(message, file): benchmark.compile_results()
length = 100 benchmark.report()
print("*" * length)
print(message)
print("*" * length)
with open(os.path.join(Folders.results, file)) as f:
data = f.read().splitlines()
for line in data:
print(line)
def is_exe(fpath):
return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
# Remove previous results
try:
shutil.rmtree(Folders.report)
os.remove(Files.exreport_pdf)
except FileNotFoundError:
pass
except OSError as e:
print("Error: %s : %s" % (Folders.report, e.strerror))
# Compute Friedman & Holm Tests
fout = open(os.path.join(Folders.results, Files.exreport_output), "w")
ferr = open(os.path.join(Folders.results, Files.exreport_err), "w")
result = subprocess.run(
["Rscript", os.path.join(Folders.src, "benchmark.r")],
stdout=fout,
stderr=ferr,
)
fout.close()
ferr.close()
if result.returncode != 0:
end_message("Error computing benchmark", Files.exreport_err)
else:
end_message("Benchmark Ok", Files.exreport_output)
if is_exe(Files.cmd_open):
subprocess.run([Files.cmd_open, Files.exreport_pdf])