Add score hyperparameter

This commit is contained in:
2021-09-27 16:32:17 +02:00
parent 7a4409bb1f
commit 50cbc95919
7 changed files with 137 additions and 65 deletions

View File

@@ -46,13 +46,16 @@ class Datasets:
class BestResults: class BestResults:
def __init__(self, model, datasets): def __init__(self, score, model, datasets):
self.score_name = score
self.datasets = datasets self.datasets = datasets
self.model = model self.model = model
self.data = {} self.data = {}
def _get_file_name(self): def _get_file_name(self):
return os.path.join(Folders.results, Files.best_results(self.model)) return os.path.join(
Folders.results, Files.best_results(self.score_name, self.model)
)
def load(self, dictionary): def load(self, dictionary):
self.file_name = self._get_file_name() self.file_name = self._get_file_name()
@@ -75,7 +78,7 @@ class BestResults:
for record in data["results"]: for record in data["results"]:
dataset = record["dataset"] dataset = record["dataset"]
if dataset in results: if dataset in results:
if record["accuracy"] > results[dataset]["accuracy"]: if record["score"] > results[dataset]["score"]:
record["file_name"] = file_name record["file_name"] = file_name
results[dataset] = record results[dataset] = record
else: else:
@@ -84,7 +87,9 @@ class BestResults:
def build(self): def build(self):
results = {} results = {}
init_suffix, end_suffix = Files.results_suffixes(self.model) init_suffix, end_suffix = Files.results_suffixes(
score=self.score_name, model=self.model
)
all_files = list(os.walk(Folders.results)) all_files = list(os.walk(Folders.results))
for root, _, files in tqdm(all_files, desc="files"): for root, _, files in tqdm(all_files, desc="files"):
for name in files: for name in files:
@@ -98,7 +103,7 @@ class BestResults:
datasets = Datasets() datasets = Datasets()
for name in tqdm(list(datasets), desc="datasets"): for name in tqdm(list(datasets), desc="datasets"):
output[name] = ( output[name] = (
results[name]["accuracy"], results[name]["score"],
results[name]["hyperparameters"], results[name]["hyperparameters"],
results[name]["file_name"], results[name]["file_name"],
) )
@@ -110,6 +115,7 @@ class BestResults:
class Experiment: class Experiment:
def __init__( def __init__(
self, self,
score_name,
model_name, model_name,
datasets, datasets,
hyperparams_dict, hyperparams_dict,
@@ -123,13 +129,18 @@ class Experiment:
self.date = today.strftime("%Y-%m-%d") self.date = today.strftime("%Y-%m-%d")
self.output_file = os.path.join( self.output_file = os.path.join(
Folders.results, Folders.results,
Files.results(model_name, platform, self.date, self.time), Files.results(
score_name, model_name, platform, self.date, self.time
),
) )
self.score_name = score_name
self.model_name = model_name self.model_name = model_name
self.model = Models.get_model(model_name) self.model = Models.get_model(model_name)
self.datasets = datasets self.datasets = datasets
dictionary = json.loads(hyperparams_dict) dictionary = json.loads(hyperparams_dict)
hyper = BestResults(model=model_name, datasets=datasets) hyper = BestResults(
score=score_name, model=model_name, datasets=datasets
)
if hyperparams_file: if hyperparams_file:
self.hyperparameters_dict = hyper.load( self.hyperparameters_dict = hyper.load(
dictionary=dictionary, dictionary=dictionary,
@@ -181,7 +192,12 @@ class Experiment:
with warnings.catch_warnings(): with warnings.catch_warnings():
warnings.filterwarnings("ignore") warnings.filterwarnings("ignore")
res = cross_validate( res = cross_validate(
clf, X, y, cv=kfold, return_estimator=True clf,
X,
y,
cv=kfold,
return_estimator=True,
scoring=self.score_name,
) )
self.scores.append(res["test_score"]) self.scores.append(res["test_score"])
self.times.append(res["fit_time"]) self.times.append(res["fit_time"])
@@ -203,14 +219,15 @@ class Experiment:
record["nodes"] = np.mean(self.nodes) record["nodes"] = np.mean(self.nodes)
record["leaves"] = np.mean(self.leaves) record["leaves"] = np.mean(self.leaves)
record["depth"] = np.mean(self.depths) record["depth"] = np.mean(self.depths)
record["accuracy"] = np.mean(self.scores) record["score"] = np.mean(self.scores)
record["accuracy_std"] = np.std(self.scores) record["score_std"] = np.std(self.scores)
record["time"] = np.mean(self.times) record["time"] = np.mean(self.times)
record["time_std"] = np.std(self.times) record["time_std"] = np.std(self.times)
self.results.append(record) self.results.append(record)
def _output_results(self): def _output_results(self):
output = {} output = {}
output["score_name"] = self.score_name
output["model"] = self.model_name output["model"] = self.model_name
output["folds"] = self.folds output["folds"] = self.folds
output["date"] = self.date output["date"] = self.date

View File

@@ -20,7 +20,7 @@ class BaseReport(abc.ABC):
self.lines = self.data if best_file else self.data["results"] self.lines = self.data if best_file else self.data["results"]
def _get_accuracy(self, item): def _get_accuracy(self, item):
return self.data[item][0] if self.best_acc_file else item["accuracy"] return self.data[item][0] if self.best_acc_file else item["score"]
def report(self): def report(self):
self.header() self.header()
@@ -30,8 +30,8 @@ class BaseReport(abc.ABC):
accuracy_total += self._get_accuracy(result) accuracy_total += self._get_accuracy(result)
self.footer(accuracy_total) self.footer(accuracy_total)
def _load_best_results(self, model): def _load_best_results(self, score, model):
best = BestResults(model, Datasets()) best = BestResults(score, model, Datasets())
self.best_results = best.load({}) self.best_results = best.load({})
def _compute_status(self, dataset, accuracy): def _compute_status(self, dataset, accuracy):
@@ -79,7 +79,7 @@ class Report(BaseReport):
"Nodes", "Nodes",
"Leaves", "Leaves",
"Depth", "Depth",
"Accuracy", "Score",
"Time", "Time",
"Hyperparameters", "Hyperparameters",
] ]
@@ -113,13 +113,11 @@ class Report(BaseReport):
print(f"{result['depth']:{hl[i]}.2f} ", end="") print(f"{result['depth']:{hl[i]}.2f} ", end="")
i += 1 i += 1
if self.compare: if self.compare:
status = self._compute_status( status = self._compute_status(result["dataset"], result["score"])
result["dataset"], result["accuracy"]
)
else: else:
status = " " status = " "
print( print(
f"{result['accuracy']:8.6f}±{result['accuracy_std']:6.4f}{status}", f"{result['score']:8.6f}±{result['score_std']:6.4f}{status}",
end="", end="",
) )
i += 1 i += 1
@@ -132,7 +130,9 @@ class Report(BaseReport):
def header(self): def header(self):
if self.compare: if self.compare:
self._load_best_results(self.data["model"]) self._load_best_results(
self.data["score_name"], self.data["model"]
)
self._compare_totals = {} self._compare_totals = {}
self.header_line("*") self.header_line("*")
self.header_line( self.header_line(
@@ -144,6 +144,7 @@ class Report(BaseReport):
f" Execution took {self.data['duration']:7.2f} seconds on an " f" Execution took {self.data['duration']:7.2f} seconds on an "
f"{self.data['platform']}" f"{self.data['platform']}"
) )
self.header_line(f" Score is {self.data['score_name']}")
self.header_line("*") self.header_line("*")
print("") print("")
line_col = "" line_col = ""
@@ -170,15 +171,18 @@ class ReportBest(BaseReport):
header_lengths = [30, 8, 50, 35] header_lengths = [30, 8, 50, 35]
header_cols = [ header_cols = [
"Dataset", "Dataset",
"Accuracy", "Score",
"File", "File",
"Hyperparameters", "Hyperparameters",
] ]
def __init__(self, model): def __init__(self, score, model):
file_name = os.path.join(Folders.results, Files.best_results(model)) file_name = os.path.join(
Folders.results, Files.best_results(score, model)
)
super().__init__(file_name, best_file=True) super().__init__(file_name, best_file=True)
self.compare = False self.compare = False
self.score_name = score
self.model = model self.model = model
def header_line(self, text): def header_line(self, text):
@@ -204,7 +208,8 @@ class ReportBest(BaseReport):
def header(self): def header(self):
self.header_line("*") self.header_line("*")
self.header_line( self.header_line(
f" Report Best Accuracies with {self.model} in any platform" f" Report Best {self.score_name} Scores with {self.model} in any "
"platform"
) )
self.header_line("*") self.header_line("*")
print("") print("")
@@ -222,14 +227,14 @@ class ReportBest(BaseReport):
f" {key} {self._status_meaning(key)} .....: {value:2d}" f" {key} {self._status_meaning(key)} .....: {value:2d}"
) )
self.header_line( self.header_line(
f" Accuracy compared to stree_default (liblinear-ovr) .: " f" Scores compared to stree_default accuracy (liblinear-ovr) .: "
f"{accuracy/40.282203:7.4f}" f"{accuracy/40.282203:7.4f}"
) )
self.header_line("*") self.header_line("*")
class Excel(BaseReport): class Excel(BaseReport):
row = 4 row = 5
def __init__(self, file_name, compare=False): def __init__(self, file_name, compare=False):
super().__init__(file_name) super().__init__(file_name)
@@ -240,7 +245,9 @@ class Excel(BaseReport):
def header(self): def header(self):
if self.compare: if self.compare:
self._load_best_results(self.data["model"]) self._load_best_results(
self.data["score_name"], self.data["model"]
)
self._compare_totals = {} self._compare_totals = {}
self.excel_file_name = self.file_name.replace(".json", ".xlsx") self.excel_file_name = self.file_name.replace(".json", ".xlsx")
self.book = xlsxwriter.Workbook(self.excel_file_name) self.book = xlsxwriter.Workbook(self.excel_file_name)
@@ -266,6 +273,9 @@ class Excel(BaseReport):
self.sheet.write( self.sheet.write(
1, 5, f"Random seeds: {self.data['seeds']}", subheader 1, 5, f"Random seeds: {self.data['seeds']}", subheader
) )
self.sheet.write(
2, 0, f" Score is {self.data['score_name']}", subheader
)
header_cols = [ header_cols = [
("Dataset", 30), ("Dataset", 30),
("Samples", 10), ("Samples", 10),
@@ -274,8 +284,8 @@ class Excel(BaseReport):
("Nodes", 7), ("Nodes", 7),
("Leaves", 7), ("Leaves", 7),
("Depth", 7), ("Depth", 7),
("Accuracy", 10), ("Score", 10),
("Acc. Std.", 10), ("Score Std.", 10),
("Time", 10), ("Time", 10),
("Time Std.", 10), ("Time Std.", 10),
("Parameters", 50), ("Parameters", 50),
@@ -285,7 +295,7 @@ class Excel(BaseReport):
bold = self.book.add_format({"bold": True, "font_size": 14}) bold = self.book.add_format({"bold": True, "font_size": 14})
i = 0 i = 0
for item, length in header_cols: for item, length in header_cols:
self.sheet.write(3, i, item, bold) self.sheet.write(4, i, item, bold)
self.sheet.set_column(i, i, length) self.sheet.set_column(i, i, length)
i += 1 i += 1
@@ -306,16 +316,14 @@ class Excel(BaseReport):
self.sheet.write(self.row, col + 4, result["nodes"], normal) self.sheet.write(self.row, col + 4, result["nodes"], normal)
self.sheet.write(self.row, col + 5, result["leaves"], normal) self.sheet.write(self.row, col + 5, result["leaves"], normal)
self.sheet.write(self.row, col + 6, result["depth"], normal) self.sheet.write(self.row, col + 6, result["depth"], normal)
self.sheet.write(self.row, col + 7, result["accuracy"], decimal) self.sheet.write(self.row, col + 7, result["score"], decimal)
if self.compare: if self.compare:
status = self._compute_status( status = self._compute_status(result["dataset"], result["score"])
result["dataset"], result["accuracy"]
)
self.sheet.write(self.row, col + 8, status, normal) self.sheet.write(self.row, col + 8, status, normal)
col = 9 col = 9
else: else:
col = 8 col = 8
self.sheet.write(self.row, col, result["accuracy_std"], decimal) self.sheet.write(self.row, col, result["score_std"], decimal)
self.sheet.write(self.row, col + 1, result["time"], decimal) self.sheet.write(self.row, col + 1, result["time"], decimal)
self.sheet.write(self.row, col + 2, result["time_std"], decimal) self.sheet.write(self.row, col + 2, result["time_std"], decimal)
self.sheet.write( self.sheet.write(
@@ -355,8 +363,9 @@ class SQL(BaseReport):
"date", "date",
"time", "time",
"type", "type",
"accuracy", "score_name",
"accuracy_std", "score",
"score_std",
"dataset", "dataset",
"classifier", "classifier",
"norm", "norm",
@@ -382,8 +391,9 @@ class SQL(BaseReport):
self.data["date"], self.data["date"],
self.data["time"], self.data["time"],
"crossval", "crossval",
result["accuracy"], self.data["score_name"],
result["accuracy_std"], result["score"],
result["score_std"],
result["dataset"], result["dataset"],
self.data["model"], self.data["model"],
0, 0,
@@ -406,8 +416,8 @@ class SQL(BaseReport):
class Benchmark: class Benchmark:
@staticmethod @staticmethod
def get_result_file_name(): def get_result_file_name(score):
return os.path.join(Folders.results, Files.exreport) return os.path.join(Folders.results, Files.exreport(score))
@staticmethod @staticmethod
def _process_dataset(results, data): def _process_dataset(results, data):
@@ -415,23 +425,23 @@ class Benchmark:
for record in data["results"]: for record in data["results"]:
dataset = record["dataset"] dataset = record["dataset"]
if (model, dataset) in results: if (model, dataset) in results:
if record["accuracy"] > results[model, dataset][0]: if record["score"] > results[model, dataset][0]:
results[model, dataset] = ( results[model, dataset] = (
record["accuracy"], record["score"],
record["accuracy_std"], record["score_std"],
) )
else: else:
results[model, dataset] = ( results[model, dataset] = (
record["accuracy"], record["score"],
record["accuracy_std"], record["score_std"],
) )
@staticmethod @staticmethod
def compile_results(): def compile_results(score):
# build Files.exreport # build Files.exreport
result_file_name = Benchmark.get_result_file_name() result_file_name = Benchmark.get_result_file_name(score)
results = {} results = {}
init_suffix, end_suffix = Files.results_suffixes("") init_suffix, end_suffix = Files.results_suffixes(score=score)
all_files = list(os.walk(Folders.results)) all_files = list(os.walk(Folders.results))
for root, _, files in tqdm(all_files, desc="files"): for root, _, files in tqdm(all_files, desc="files"):
for name in files: for name in files:
@@ -557,7 +567,7 @@ class Benchmark:
row += 1 row += 1
column = 1 column = 1
for _ in range(len(results)): for _ in range(len(results)):
sheet.write(row, column, "Accuracy", merge_format) sheet.write(row, column, "Score", merge_format)
sheet.write(row, column + 1, "Stdev", merge_format) sheet.write(row, column + 1, "Stdev", merge_format)
column += 2 column += 2

View File

@@ -12,7 +12,7 @@ class Folders:
class Files: class Files:
index = "all.txt" index = "all.txt"
exreport = "exreport.csv"
exreport_output = "exreport.txt" exreport_output = "exreport.txt"
exreport_err = "exreport_err.txt" exreport_err = "exreport_err.txt"
exreport_excel = "exreport.xlsx" exreport_excel = "exreport.xlsx"
@@ -22,19 +22,26 @@ class Files:
benchmark_r = "benchmark.r" benchmark_r = "benchmark.r"
@staticmethod @staticmethod
def best_results(model): def exreport(score):
return f"best_results_{model}.json" return f"exreport_{score}.csv"
@staticmethod @staticmethod
def results(model, platform, date, time): def best_results(score, model):
return f"results_{model}_{platform}_{date}_{time}.json" return f"best_results_{score}_{model}.json"
@staticmethod @staticmethod
def results_suffixes(model): def results(score, model, platform, date, time):
if model == "": return f"results_{score}_{model}_{platform}_{date}_{time}.json"
return "results_", ".json"
@staticmethod
def results_suffixes(score="", model=""):
suffix = ".json"
if model == "" and score == "":
return "results_", suffix
elif model == "":
return f"results_{score}_", suffix
else: else:
return f"results_{model}_", ".json" return f"results_{score}_{model}_", suffix
@staticmethod @staticmethod
def dataset(name): def dataset(name):

View File

@@ -5,6 +5,13 @@ import argparse
def parse_arguments(): def parse_arguments():
ap = argparse.ArgumentParser() ap = argparse.ArgumentParser()
ap.add_argument(
"-s",
"--score",
type=str,
required=True,
help="score name {accuracy, f1_macro, ...}",
)
ap.add_argument( ap.add_argument(
"-x", "-x",
"--excel", "--excel",
@@ -13,12 +20,12 @@ def parse_arguments():
help="Generate Excel File", help="Generate Excel File",
) )
args = ap.parse_args() args = ap.parse_args()
return args.excel return (args, score, args.excel)
excel = parse_arguments() (score, excel) = parse_arguments()
benchmark = Benchmark() benchmark = Benchmark()
benchmark.compile_results() benchmark.compile_results(score)
benchmark.report() benchmark.report()
benchmark.exreport() benchmark.exreport()
if excel: if excel:

View File

@@ -8,6 +8,13 @@ from Experiments import Datasets, BestResults
def parse_arguments(): def parse_arguments():
ap = argparse.ArgumentParser() ap = argparse.ArgumentParser()
ap.add_argument(
"-s",
"--score",
type=str,
required=True,
help="score name {accuracy, f1_macro, ...}",
)
ap.add_argument( ap.add_argument(
"-m", "-m",
"--model", "--model",
@@ -24,13 +31,13 @@ def parse_arguments():
help="Generate Report", help="Generate Report",
) )
args = ap.parse_args() args = ap.parse_args()
return (args.model, args.report) return (args.score, args.model, args.report)
(model, report) = parse_arguments() (score, model, report) = parse_arguments()
datasets = Datasets() datasets = Datasets()
best = BestResults(model, datasets) best = BestResults(score, model, datasets)
best.build() best.build()
if report: if report:
report = ReportBest(model) report = ReportBest(score, model)
report.report() report.report()

View File

@@ -8,6 +8,13 @@ from Results import Report
def parse_arguments(): def parse_arguments():
ap = argparse.ArgumentParser() ap = argparse.ArgumentParser()
ap.add_argument(
"-s",
"--score",
type=str,
required=True,
help="score name {accuracy, f1_macro, ...}",
)
ap.add_argument( ap.add_argument(
"-P", "-P",
"--platform", "--platform",
@@ -55,6 +62,7 @@ def parse_arguments():
) )
args = ap.parse_args() args = ap.parse_args()
return ( return (
args.score,
args.model, args.model,
args.n_folds, args.n_folds,
args.platform, args.platform,
@@ -66,6 +74,7 @@ def parse_arguments():
( (
score,
model, model,
folds, folds,
platform, platform,
@@ -75,6 +84,7 @@ def parse_arguments():
report, report,
) = parse_arguments() ) = parse_arguments()
job = Experiment( job = Experiment(
score_name=score,
model_name=model, model_name=model,
datasets=Datasets(), datasets=Datasets(),
hyperparams_dict=hyperparameters, hyperparams_dict=hyperparameters,

14
test.sh Executable file
View File

@@ -0,0 +1,14 @@
#!/bin/bash
for i in STree Wodt Cart SVC ExtraTree; do
for a in accuracy f1_macro; do
python src/main.py -s $a -P iMac27 -m $i -r 1
done
done
for i in STree Wodt Cart SVC ExtraTree; do
for a in accuracy f1_macro; do
python src/build_best.py -s $a -m $i -r 1
done
done
for a in accuracy f1_macro; do
ptyhon src/benchmark.py -s $a
done