Add score hyperparameter

This commit is contained in:
2021-09-27 16:32:17 +02:00
parent 7a4409bb1f
commit 50cbc95919
7 changed files with 137 additions and 65 deletions

View File

@@ -46,13 +46,16 @@ class Datasets:
class BestResults:
def __init__(self, model, datasets):
def __init__(self, score, model, datasets):
self.score_name = score
self.datasets = datasets
self.model = model
self.data = {}
def _get_file_name(self):
return os.path.join(Folders.results, Files.best_results(self.model))
return os.path.join(
Folders.results, Files.best_results(self.score_name, self.model)
)
def load(self, dictionary):
self.file_name = self._get_file_name()
@@ -75,7 +78,7 @@ class BestResults:
for record in data["results"]:
dataset = record["dataset"]
if dataset in results:
if record["accuracy"] > results[dataset]["accuracy"]:
if record["score"] > results[dataset]["score"]:
record["file_name"] = file_name
results[dataset] = record
else:
@@ -84,7 +87,9 @@ class BestResults:
def build(self):
results = {}
init_suffix, end_suffix = Files.results_suffixes(self.model)
init_suffix, end_suffix = Files.results_suffixes(
score=self.score_name, model=self.model
)
all_files = list(os.walk(Folders.results))
for root, _, files in tqdm(all_files, desc="files"):
for name in files:
@@ -98,7 +103,7 @@ class BestResults:
datasets = Datasets()
for name in tqdm(list(datasets), desc="datasets"):
output[name] = (
results[name]["accuracy"],
results[name]["score"],
results[name]["hyperparameters"],
results[name]["file_name"],
)
@@ -110,6 +115,7 @@ class BestResults:
class Experiment:
def __init__(
self,
score_name,
model_name,
datasets,
hyperparams_dict,
@@ -123,13 +129,18 @@ class Experiment:
self.date = today.strftime("%Y-%m-%d")
self.output_file = os.path.join(
Folders.results,
Files.results(model_name, platform, self.date, self.time),
Files.results(
score_name, model_name, platform, self.date, self.time
),
)
self.score_name = score_name
self.model_name = model_name
self.model = Models.get_model(model_name)
self.datasets = datasets
dictionary = json.loads(hyperparams_dict)
hyper = BestResults(model=model_name, datasets=datasets)
hyper = BestResults(
score=score_name, model=model_name, datasets=datasets
)
if hyperparams_file:
self.hyperparameters_dict = hyper.load(
dictionary=dictionary,
@@ -181,7 +192,12 @@ class Experiment:
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
res = cross_validate(
clf, X, y, cv=kfold, return_estimator=True
clf,
X,
y,
cv=kfold,
return_estimator=True,
scoring=self.score_name,
)
self.scores.append(res["test_score"])
self.times.append(res["fit_time"])
@@ -203,14 +219,15 @@ class Experiment:
record["nodes"] = np.mean(self.nodes)
record["leaves"] = np.mean(self.leaves)
record["depth"] = np.mean(self.depths)
record["accuracy"] = np.mean(self.scores)
record["accuracy_std"] = np.std(self.scores)
record["score"] = np.mean(self.scores)
record["score_std"] = np.std(self.scores)
record["time"] = np.mean(self.times)
record["time_std"] = np.std(self.times)
self.results.append(record)
def _output_results(self):
output = {}
output["score_name"] = self.score_name
output["model"] = self.model_name
output["folds"] = self.folds
output["date"] = self.date

View File

@@ -20,7 +20,7 @@ class BaseReport(abc.ABC):
self.lines = self.data if best_file else self.data["results"]
def _get_accuracy(self, item):
return self.data[item][0] if self.best_acc_file else item["accuracy"]
return self.data[item][0] if self.best_acc_file else item["score"]
def report(self):
self.header()
@@ -30,8 +30,8 @@ class BaseReport(abc.ABC):
accuracy_total += self._get_accuracy(result)
self.footer(accuracy_total)
def _load_best_results(self, model):
best = BestResults(model, Datasets())
def _load_best_results(self, score, model):
best = BestResults(score, model, Datasets())
self.best_results = best.load({})
def _compute_status(self, dataset, accuracy):
@@ -79,7 +79,7 @@ class Report(BaseReport):
"Nodes",
"Leaves",
"Depth",
"Accuracy",
"Score",
"Time",
"Hyperparameters",
]
@@ -113,13 +113,11 @@ class Report(BaseReport):
print(f"{result['depth']:{hl[i]}.2f} ", end="")
i += 1
if self.compare:
status = self._compute_status(
result["dataset"], result["accuracy"]
)
status = self._compute_status(result["dataset"], result["score"])
else:
status = " "
print(
f"{result['accuracy']:8.6f}±{result['accuracy_std']:6.4f}{status}",
f"{result['score']:8.6f}±{result['score_std']:6.4f}{status}",
end="",
)
i += 1
@@ -132,7 +130,9 @@ class Report(BaseReport):
def header(self):
if self.compare:
self._load_best_results(self.data["model"])
self._load_best_results(
self.data["score_name"], self.data["model"]
)
self._compare_totals = {}
self.header_line("*")
self.header_line(
@@ -144,6 +144,7 @@ class Report(BaseReport):
f" Execution took {self.data['duration']:7.2f} seconds on an "
f"{self.data['platform']}"
)
self.header_line(f" Score is {self.data['score_name']}")
self.header_line("*")
print("")
line_col = ""
@@ -170,15 +171,18 @@ class ReportBest(BaseReport):
header_lengths = [30, 8, 50, 35]
header_cols = [
"Dataset",
"Accuracy",
"Score",
"File",
"Hyperparameters",
]
def __init__(self, model):
file_name = os.path.join(Folders.results, Files.best_results(model))
def __init__(self, score, model):
file_name = os.path.join(
Folders.results, Files.best_results(score, model)
)
super().__init__(file_name, best_file=True)
self.compare = False
self.score_name = score
self.model = model
def header_line(self, text):
@@ -204,7 +208,8 @@ class ReportBest(BaseReport):
def header(self):
self.header_line("*")
self.header_line(
f" Report Best Accuracies with {self.model} in any platform"
f" Report Best {self.score_name} Scores with {self.model} in any "
"platform"
)
self.header_line("*")
print("")
@@ -222,14 +227,14 @@ class ReportBest(BaseReport):
f" {key} {self._status_meaning(key)} .....: {value:2d}"
)
self.header_line(
f" Accuracy compared to stree_default (liblinear-ovr) .: "
f" Scores compared to stree_default accuracy (liblinear-ovr) .: "
f"{accuracy/40.282203:7.4f}"
)
self.header_line("*")
class Excel(BaseReport):
row = 4
row = 5
def __init__(self, file_name, compare=False):
super().__init__(file_name)
@@ -240,7 +245,9 @@ class Excel(BaseReport):
def header(self):
if self.compare:
self._load_best_results(self.data["model"])
self._load_best_results(
self.data["score_name"], self.data["model"]
)
self._compare_totals = {}
self.excel_file_name = self.file_name.replace(".json", ".xlsx")
self.book = xlsxwriter.Workbook(self.excel_file_name)
@@ -266,6 +273,9 @@ class Excel(BaseReport):
self.sheet.write(
1, 5, f"Random seeds: {self.data['seeds']}", subheader
)
self.sheet.write(
2, 0, f" Score is {self.data['score_name']}", subheader
)
header_cols = [
("Dataset", 30),
("Samples", 10),
@@ -274,8 +284,8 @@ class Excel(BaseReport):
("Nodes", 7),
("Leaves", 7),
("Depth", 7),
("Accuracy", 10),
("Acc. Std.", 10),
("Score", 10),
("Score Std.", 10),
("Time", 10),
("Time Std.", 10),
("Parameters", 50),
@@ -285,7 +295,7 @@ class Excel(BaseReport):
bold = self.book.add_format({"bold": True, "font_size": 14})
i = 0
for item, length in header_cols:
self.sheet.write(3, i, item, bold)
self.sheet.write(4, i, item, bold)
self.sheet.set_column(i, i, length)
i += 1
@@ -306,16 +316,14 @@ class Excel(BaseReport):
self.sheet.write(self.row, col + 4, result["nodes"], normal)
self.sheet.write(self.row, col + 5, result["leaves"], normal)
self.sheet.write(self.row, col + 6, result["depth"], normal)
self.sheet.write(self.row, col + 7, result["accuracy"], decimal)
self.sheet.write(self.row, col + 7, result["score"], decimal)
if self.compare:
status = self._compute_status(
result["dataset"], result["accuracy"]
)
status = self._compute_status(result["dataset"], result["score"])
self.sheet.write(self.row, col + 8, status, normal)
col = 9
else:
col = 8
self.sheet.write(self.row, col, result["accuracy_std"], decimal)
self.sheet.write(self.row, col, result["score_std"], decimal)
self.sheet.write(self.row, col + 1, result["time"], decimal)
self.sheet.write(self.row, col + 2, result["time_std"], decimal)
self.sheet.write(
@@ -355,8 +363,9 @@ class SQL(BaseReport):
"date",
"time",
"type",
"accuracy",
"accuracy_std",
"score_name",
"score",
"score_std",
"dataset",
"classifier",
"norm",
@@ -382,8 +391,9 @@ class SQL(BaseReport):
self.data["date"],
self.data["time"],
"crossval",
result["accuracy"],
result["accuracy_std"],
self.data["score_name"],
result["score"],
result["score_std"],
result["dataset"],
self.data["model"],
0,
@@ -406,8 +416,8 @@ class SQL(BaseReport):
class Benchmark:
@staticmethod
def get_result_file_name():
return os.path.join(Folders.results, Files.exreport)
def get_result_file_name(score):
return os.path.join(Folders.results, Files.exreport(score))
@staticmethod
def _process_dataset(results, data):
@@ -415,23 +425,23 @@ class Benchmark:
for record in data["results"]:
dataset = record["dataset"]
if (model, dataset) in results:
if record["accuracy"] > results[model, dataset][0]:
if record["score"] > results[model, dataset][0]:
results[model, dataset] = (
record["accuracy"],
record["accuracy_std"],
record["score"],
record["score_std"],
)
else:
results[model, dataset] = (
record["accuracy"],
record["accuracy_std"],
record["score"],
record["score_std"],
)
@staticmethod
def compile_results():
def compile_results(score):
# build Files.exreport
result_file_name = Benchmark.get_result_file_name()
result_file_name = Benchmark.get_result_file_name(score)
results = {}
init_suffix, end_suffix = Files.results_suffixes("")
init_suffix, end_suffix = Files.results_suffixes(score=score)
all_files = list(os.walk(Folders.results))
for root, _, files in tqdm(all_files, desc="files"):
for name in files:
@@ -557,7 +567,7 @@ class Benchmark:
row += 1
column = 1
for _ in range(len(results)):
sheet.write(row, column, "Accuracy", merge_format)
sheet.write(row, column, "Score", merge_format)
sheet.write(row, column + 1, "Stdev", merge_format)
column += 2

View File

@@ -12,7 +12,7 @@ class Folders:
class Files:
index = "all.txt"
exreport = "exreport.csv"
exreport_output = "exreport.txt"
exreport_err = "exreport_err.txt"
exreport_excel = "exreport.xlsx"
@@ -22,19 +22,26 @@ class Files:
benchmark_r = "benchmark.r"
@staticmethod
def best_results(model):
return f"best_results_{model}.json"
def exreport(score):
return f"exreport_{score}.csv"
@staticmethod
def results(model, platform, date, time):
return f"results_{model}_{platform}_{date}_{time}.json"
def best_results(score, model):
return f"best_results_{score}_{model}.json"
@staticmethod
def results_suffixes(model):
if model == "":
return "results_", ".json"
def results(score, model, platform, date, time):
return f"results_{score}_{model}_{platform}_{date}_{time}.json"
@staticmethod
def results_suffixes(score="", model=""):
suffix = ".json"
if model == "" and score == "":
return "results_", suffix
elif model == "":
return f"results_{score}_", suffix
else:
return f"results_{model}_", ".json"
return f"results_{score}_{model}_", suffix
@staticmethod
def dataset(name):

View File

@@ -5,6 +5,13 @@ import argparse
def parse_arguments():
ap = argparse.ArgumentParser()
ap.add_argument(
"-s",
"--score",
type=str,
required=True,
help="score name {accuracy, f1_macro, ...}",
)
ap.add_argument(
"-x",
"--excel",
@@ -13,12 +20,12 @@ def parse_arguments():
help="Generate Excel File",
)
args = ap.parse_args()
return args.excel
return (args, score, args.excel)
excel = parse_arguments()
(score, excel) = parse_arguments()
benchmark = Benchmark()
benchmark.compile_results()
benchmark.compile_results(score)
benchmark.report()
benchmark.exreport()
if excel:

View File

@@ -8,6 +8,13 @@ from Experiments import Datasets, BestResults
def parse_arguments():
ap = argparse.ArgumentParser()
ap.add_argument(
"-s",
"--score",
type=str,
required=True,
help="score name {accuracy, f1_macro, ...}",
)
ap.add_argument(
"-m",
"--model",
@@ -24,13 +31,13 @@ def parse_arguments():
help="Generate Report",
)
args = ap.parse_args()
return (args.model, args.report)
return (args.score, args.model, args.report)
(model, report) = parse_arguments()
(score, model, report) = parse_arguments()
datasets = Datasets()
best = BestResults(model, datasets)
best = BestResults(score, model, datasets)
best.build()
if report:
report = ReportBest(model)
report = ReportBest(score, model)
report.report()

View File

@@ -8,6 +8,13 @@ from Results import Report
def parse_arguments():
ap = argparse.ArgumentParser()
ap.add_argument(
"-s",
"--score",
type=str,
required=True,
help="score name {accuracy, f1_macro, ...}",
)
ap.add_argument(
"-P",
"--platform",
@@ -55,6 +62,7 @@ def parse_arguments():
)
args = ap.parse_args()
return (
args.score,
args.model,
args.n_folds,
args.platform,
@@ -66,6 +74,7 @@ def parse_arguments():
(
score,
model,
folds,
platform,
@@ -75,6 +84,7 @@ def parse_arguments():
report,
) = parse_arguments()
job = Experiment(
score_name=score,
model_name=model,
datasets=Datasets(),
hyperparams_dict=hyperparameters,

14
test.sh Executable file
View File

@@ -0,0 +1,14 @@
#!/bin/bash
for i in STree Wodt Cart SVC ExtraTree; do
for a in accuracy f1_macro; do
python src/main.py -s $a -P iMac27 -m $i -r 1
done
done
for i in STree Wodt Cart SVC ExtraTree; do
for a in accuracy f1_macro; do
python src/build_best.py -s $a -m $i -r 1
done
done
for a in accuracy f1_macro; do
ptyhon src/benchmark.py -s $a
done