diff --git a/src/Experiments.py b/src/Experiments.py index e41c826..a1f6c2e 100644 --- a/src/Experiments.py +++ b/src/Experiments.py @@ -8,7 +8,7 @@ from tqdm import tqdm import numpy as np import pandas as pd from sklearn.model_selection import StratifiedKFold, cross_validate -from sklearn.tree import DecisionTreeClassifier +from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier from stree import Stree from Utils import Folders, Files @@ -24,6 +24,8 @@ class Models: return Stree elif name == "Cart": return DecisionTreeClassifier + elif name == "ExtraTree": + return ExtraTreeClassifier else: msg = f"No model recognized {name}" if name == "Stree" or name == "stree": @@ -176,6 +178,20 @@ class Experiment: self.leaves = [] self.depths = [] + def _get_complexity(self, result): + if self.model_name == "Cart": + nodes = result.tree_.node_count + depth = result.tree_.max_depth + leaves = result.get_n_leaves() + if self.model_name == "ExtraTree": + nodes = 0 + leaves = result.get_n_leaves() + depth = 0 + else: + nodes, leaves = result.nodes_leaves() + depth = result.depth_ if hasattr(result, "depth_") else 0 + return nodes, leaves, depth + def _n_fold_crossval(self, X, y, hyperparameters): if self.scores != []: raise ValueError("Must init experiment before!") @@ -201,17 +217,9 @@ class Experiment: self.scores.append(res["test_score"]) self.times.append(res["fit_time"]) for result_item in res["estimator"]: - if self.model_name == "Cart": - nodes_item = result_item.tree_.node_count - depth_item = result_item.tree_.max_depth - leaves_item = result_item.get_n_leaves() - else: - nodes_item, leaves_item = result_item.nodes_leaves() - depth_item = ( - result_item.depth_ - if hasattr(result_item, "depth_") - else 0 - ) + nodes_item, leaves_item, depth_item = self._get_complexity( + result_item + ) self.nodes.append(nodes_item) self.leaves.append(leaves_item) self.depths.append(depth_item) diff --git a/src/Results.py b/src/Results.py index 897fc7a..eddca0d 100644 --- a/src/Results.py +++ b/src/Results.py @@ -1,7 +1,10 @@ import os import json import abc +import shutil +import subprocess import xlsxwriter +from tqdm import tqdm from Experiments import Datasets, BestResults from Utils import Folders, Files, Symbols @@ -201,7 +204,7 @@ class ReportBest(BaseReport): def header(self): self.header_line("*") self.header_line( - f" Report Best Accuracies with {self.model}" f" in any platform" + f" Report Best Accuracies with {self.model} in any platform" ) self.header_line("*") print("") @@ -394,3 +397,77 @@ class SQL(BaseReport): def footer(self, accuracy): self.file.close() + + +class Benchmark: + @staticmethod + def _process_dataset(results, data): + model = data["model"] + for record in data["results"]: + dataset = record["dataset"] + if (model, dataset) in results: + if record["accuracy"] > results[model, dataset]: + results[model, dataset] = record["accuracy"] + else: + results[model, dataset] = record["accuracy"] + + @staticmethod + def compile_results(): + # build Files.exreport + result_file_name = os.path.join(Folders.results, Files.exreport) + results = {} + init_suffix, end_suffix = Files.results_suffixes("") + all_files = list(os.walk(Folders.results)) + for root, _, files in tqdm(all_files, desc="files"): + for name in files: + if name.startswith(init_suffix) and name.endswith(end_suffix): + file_name = os.path.join(root, name) + with open(file_name) as fp: + data = json.load(fp) + Benchmark._process_dataset(results, data) + + with open(result_file_name, "w") as f: + f.write("classifier, dataset, accuracy\n") + for (model, dataset), accuracy in results.items(): + f.write(f"{model}, {dataset}, {accuracy}\n") + + @staticmethod + def report(): + def end_message(message, file): + length = 100 + print("*" * length) + print(message) + print("*" * length) + with open(os.path.join(Folders.results, file)) as f: + data = f.read().splitlines() + for line in data: + print(line) + + def is_exe(fpath): + return os.path.isfile(fpath) and os.access(fpath, os.X_OK) + + # Remove previous results + try: + shutil.rmtree(Folders.report) + os.remove(Files.exreport_pdf) + except FileNotFoundError: + pass + except OSError as e: + print("Error: %s : %s" % (Folders.report, e.strerror)) + # Compute Friedman & Holm Tests + fout = open(os.path.join(Folders.results, Files.exreport_output), "w") + ferr = open(os.path.join(Folders.results, Files.exreport_err), "w") + result = subprocess.run( + ["Rscript", os.path.join(Folders.src, "benchmark.r")], + stdout=fout, + stderr=ferr, + ) + fout.close() + ferr.close() + if result.returncode != 0: + end_message("Error computing benchmark", Files.exreport_err) + else: + end_message("Benchmark Ok", Files.exreport_output) + + if is_exe(Files.cmd_open): + subprocess.run([Files.cmd_open, Files.exreport_pdf]) diff --git a/src/Utils.py b/src/Utils.py index 4bc9545..c5ceaa1 100644 --- a/src/Utils.py +++ b/src/Utils.py @@ -10,6 +10,7 @@ class Folders: class Files: index = "all.txt" + exreport = "exreport.csv" exreport_output = "exreport.txt" exreport_err = "exreport_err.txt" cmd_open = "/usr/bin/open" @@ -25,7 +26,10 @@ class Files: @staticmethod def results_suffixes(model): - return f"results_{model}_", ".json" + if model == "": + return "results_", ".json" + else: + return f"results_{model}_", ".json" @staticmethod def dataset(name): diff --git a/src/benchmark.py b/src/benchmark.py index 16f6538..d6a66a4 100644 --- a/src/benchmark.py +++ b/src/benchmark.py @@ -1,46 +1,5 @@ -import os -import shutil -import subprocess -from Utils import Files, Folders +from Results import Benchmark - -def end_message(message, file): - length = 100 - print("*" * length) - print(message) - print("*" * length) - with open(os.path.join(Folders.results, file)) as f: - data = f.read().splitlines() - for line in data: - print(line) - - -def is_exe(fpath): - return os.path.isfile(fpath) and os.access(fpath, os.X_OK) - - -# Remove previous results -try: - shutil.rmtree(Folders.report) - os.remove(Files.exreport_pdf) -except FileNotFoundError: - pass -except OSError as e: - print("Error: %s : %s" % (Folders.report, e.strerror)) -# Compute Friedman & Holm Tests -fout = open(os.path.join(Folders.results, Files.exreport_output), "w") -ferr = open(os.path.join(Folders.results, Files.exreport_err), "w") -result = subprocess.run( - ["Rscript", os.path.join(Folders.src, "benchmark.r")], - stdout=fout, - stderr=ferr, -) -fout.close() -ferr.close() -if result.returncode != 0: - end_message("Error computing benchmark", Files.exreport_err) -else: - end_message("Benchmark Ok", Files.exreport_output) - -if is_exe(Files.cmd_open): - subprocess.run([Files.cmd_open, Files.exreport_pdf]) +benchmark = Benchmark() +benchmark.compile_results() +benchmark.report()