mirror of
https://github.com/Doctorado-ML/benchmark.git
synced 2025-08-16 07:55:54 +00:00
Add score hyperparameter
This commit is contained in:
@@ -46,13 +46,16 @@ class Datasets:
|
||||
|
||||
|
||||
class BestResults:
|
||||
def __init__(self, model, datasets):
|
||||
def __init__(self, score, model, datasets):
|
||||
self.score_name = score
|
||||
self.datasets = datasets
|
||||
self.model = model
|
||||
self.data = {}
|
||||
|
||||
def _get_file_name(self):
|
||||
return os.path.join(Folders.results, Files.best_results(self.model))
|
||||
return os.path.join(
|
||||
Folders.results, Files.best_results(self.score_name, self.model)
|
||||
)
|
||||
|
||||
def load(self, dictionary):
|
||||
self.file_name = self._get_file_name()
|
||||
@@ -75,7 +78,7 @@ class BestResults:
|
||||
for record in data["results"]:
|
||||
dataset = record["dataset"]
|
||||
if dataset in results:
|
||||
if record["accuracy"] > results[dataset]["accuracy"]:
|
||||
if record["score"] > results[dataset]["score"]:
|
||||
record["file_name"] = file_name
|
||||
results[dataset] = record
|
||||
else:
|
||||
@@ -84,7 +87,9 @@ class BestResults:
|
||||
|
||||
def build(self):
|
||||
results = {}
|
||||
init_suffix, end_suffix = Files.results_suffixes(self.model)
|
||||
init_suffix, end_suffix = Files.results_suffixes(
|
||||
score=self.score_name, model=self.model
|
||||
)
|
||||
all_files = list(os.walk(Folders.results))
|
||||
for root, _, files in tqdm(all_files, desc="files"):
|
||||
for name in files:
|
||||
@@ -98,7 +103,7 @@ class BestResults:
|
||||
datasets = Datasets()
|
||||
for name in tqdm(list(datasets), desc="datasets"):
|
||||
output[name] = (
|
||||
results[name]["accuracy"],
|
||||
results[name]["score"],
|
||||
results[name]["hyperparameters"],
|
||||
results[name]["file_name"],
|
||||
)
|
||||
@@ -110,6 +115,7 @@ class BestResults:
|
||||
class Experiment:
|
||||
def __init__(
|
||||
self,
|
||||
score_name,
|
||||
model_name,
|
||||
datasets,
|
||||
hyperparams_dict,
|
||||
@@ -123,13 +129,18 @@ class Experiment:
|
||||
self.date = today.strftime("%Y-%m-%d")
|
||||
self.output_file = os.path.join(
|
||||
Folders.results,
|
||||
Files.results(model_name, platform, self.date, self.time),
|
||||
Files.results(
|
||||
score_name, model_name, platform, self.date, self.time
|
||||
),
|
||||
)
|
||||
self.score_name = score_name
|
||||
self.model_name = model_name
|
||||
self.model = Models.get_model(model_name)
|
||||
self.datasets = datasets
|
||||
dictionary = json.loads(hyperparams_dict)
|
||||
hyper = BestResults(model=model_name, datasets=datasets)
|
||||
hyper = BestResults(
|
||||
score=score_name, model=model_name, datasets=datasets
|
||||
)
|
||||
if hyperparams_file:
|
||||
self.hyperparameters_dict = hyper.load(
|
||||
dictionary=dictionary,
|
||||
@@ -181,7 +192,12 @@ class Experiment:
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings("ignore")
|
||||
res = cross_validate(
|
||||
clf, X, y, cv=kfold, return_estimator=True
|
||||
clf,
|
||||
X,
|
||||
y,
|
||||
cv=kfold,
|
||||
return_estimator=True,
|
||||
scoring=self.score_name,
|
||||
)
|
||||
self.scores.append(res["test_score"])
|
||||
self.times.append(res["fit_time"])
|
||||
@@ -203,14 +219,15 @@ class Experiment:
|
||||
record["nodes"] = np.mean(self.nodes)
|
||||
record["leaves"] = np.mean(self.leaves)
|
||||
record["depth"] = np.mean(self.depths)
|
||||
record["accuracy"] = np.mean(self.scores)
|
||||
record["accuracy_std"] = np.std(self.scores)
|
||||
record["score"] = np.mean(self.scores)
|
||||
record["score_std"] = np.std(self.scores)
|
||||
record["time"] = np.mean(self.times)
|
||||
record["time_std"] = np.std(self.times)
|
||||
self.results.append(record)
|
||||
|
||||
def _output_results(self):
|
||||
output = {}
|
||||
output["score_name"] = self.score_name
|
||||
output["model"] = self.model_name
|
||||
output["folds"] = self.folds
|
||||
output["date"] = self.date
|
||||
|
@@ -20,7 +20,7 @@ class BaseReport(abc.ABC):
|
||||
self.lines = self.data if best_file else self.data["results"]
|
||||
|
||||
def _get_accuracy(self, item):
|
||||
return self.data[item][0] if self.best_acc_file else item["accuracy"]
|
||||
return self.data[item][0] if self.best_acc_file else item["score"]
|
||||
|
||||
def report(self):
|
||||
self.header()
|
||||
@@ -30,8 +30,8 @@ class BaseReport(abc.ABC):
|
||||
accuracy_total += self._get_accuracy(result)
|
||||
self.footer(accuracy_total)
|
||||
|
||||
def _load_best_results(self, model):
|
||||
best = BestResults(model, Datasets())
|
||||
def _load_best_results(self, score, model):
|
||||
best = BestResults(score, model, Datasets())
|
||||
self.best_results = best.load({})
|
||||
|
||||
def _compute_status(self, dataset, accuracy):
|
||||
@@ -79,7 +79,7 @@ class Report(BaseReport):
|
||||
"Nodes",
|
||||
"Leaves",
|
||||
"Depth",
|
||||
"Accuracy",
|
||||
"Score",
|
||||
"Time",
|
||||
"Hyperparameters",
|
||||
]
|
||||
@@ -113,13 +113,11 @@ class Report(BaseReport):
|
||||
print(f"{result['depth']:{hl[i]}.2f} ", end="")
|
||||
i += 1
|
||||
if self.compare:
|
||||
status = self._compute_status(
|
||||
result["dataset"], result["accuracy"]
|
||||
)
|
||||
status = self._compute_status(result["dataset"], result["score"])
|
||||
else:
|
||||
status = " "
|
||||
print(
|
||||
f"{result['accuracy']:8.6f}±{result['accuracy_std']:6.4f}{status}",
|
||||
f"{result['score']:8.6f}±{result['score_std']:6.4f}{status}",
|
||||
end="",
|
||||
)
|
||||
i += 1
|
||||
@@ -132,7 +130,9 @@ class Report(BaseReport):
|
||||
|
||||
def header(self):
|
||||
if self.compare:
|
||||
self._load_best_results(self.data["model"])
|
||||
self._load_best_results(
|
||||
self.data["score_name"], self.data["model"]
|
||||
)
|
||||
self._compare_totals = {}
|
||||
self.header_line("*")
|
||||
self.header_line(
|
||||
@@ -144,6 +144,7 @@ class Report(BaseReport):
|
||||
f" Execution took {self.data['duration']:7.2f} seconds on an "
|
||||
f"{self.data['platform']}"
|
||||
)
|
||||
self.header_line(f" Score is {self.data['score_name']}")
|
||||
self.header_line("*")
|
||||
print("")
|
||||
line_col = ""
|
||||
@@ -170,15 +171,18 @@ class ReportBest(BaseReport):
|
||||
header_lengths = [30, 8, 50, 35]
|
||||
header_cols = [
|
||||
"Dataset",
|
||||
"Accuracy",
|
||||
"Score",
|
||||
"File",
|
||||
"Hyperparameters",
|
||||
]
|
||||
|
||||
def __init__(self, model):
|
||||
file_name = os.path.join(Folders.results, Files.best_results(model))
|
||||
def __init__(self, score, model):
|
||||
file_name = os.path.join(
|
||||
Folders.results, Files.best_results(score, model)
|
||||
)
|
||||
super().__init__(file_name, best_file=True)
|
||||
self.compare = False
|
||||
self.score_name = score
|
||||
self.model = model
|
||||
|
||||
def header_line(self, text):
|
||||
@@ -204,7 +208,8 @@ class ReportBest(BaseReport):
|
||||
def header(self):
|
||||
self.header_line("*")
|
||||
self.header_line(
|
||||
f" Report Best Accuracies with {self.model} in any platform"
|
||||
f" Report Best {self.score_name} Scores with {self.model} in any "
|
||||
"platform"
|
||||
)
|
||||
self.header_line("*")
|
||||
print("")
|
||||
@@ -222,14 +227,14 @@ class ReportBest(BaseReport):
|
||||
f" {key} {self._status_meaning(key)} .....: {value:2d}"
|
||||
)
|
||||
self.header_line(
|
||||
f" Accuracy compared to stree_default (liblinear-ovr) .: "
|
||||
f" Scores compared to stree_default accuracy (liblinear-ovr) .: "
|
||||
f"{accuracy/40.282203:7.4f}"
|
||||
)
|
||||
self.header_line("*")
|
||||
|
||||
|
||||
class Excel(BaseReport):
|
||||
row = 4
|
||||
row = 5
|
||||
|
||||
def __init__(self, file_name, compare=False):
|
||||
super().__init__(file_name)
|
||||
@@ -240,7 +245,9 @@ class Excel(BaseReport):
|
||||
|
||||
def header(self):
|
||||
if self.compare:
|
||||
self._load_best_results(self.data["model"])
|
||||
self._load_best_results(
|
||||
self.data["score_name"], self.data["model"]
|
||||
)
|
||||
self._compare_totals = {}
|
||||
self.excel_file_name = self.file_name.replace(".json", ".xlsx")
|
||||
self.book = xlsxwriter.Workbook(self.excel_file_name)
|
||||
@@ -266,6 +273,9 @@ class Excel(BaseReport):
|
||||
self.sheet.write(
|
||||
1, 5, f"Random seeds: {self.data['seeds']}", subheader
|
||||
)
|
||||
self.sheet.write(
|
||||
2, 0, f" Score is {self.data['score_name']}", subheader
|
||||
)
|
||||
header_cols = [
|
||||
("Dataset", 30),
|
||||
("Samples", 10),
|
||||
@@ -274,8 +284,8 @@ class Excel(BaseReport):
|
||||
("Nodes", 7),
|
||||
("Leaves", 7),
|
||||
("Depth", 7),
|
||||
("Accuracy", 10),
|
||||
("Acc. Std.", 10),
|
||||
("Score", 10),
|
||||
("Score Std.", 10),
|
||||
("Time", 10),
|
||||
("Time Std.", 10),
|
||||
("Parameters", 50),
|
||||
@@ -285,7 +295,7 @@ class Excel(BaseReport):
|
||||
bold = self.book.add_format({"bold": True, "font_size": 14})
|
||||
i = 0
|
||||
for item, length in header_cols:
|
||||
self.sheet.write(3, i, item, bold)
|
||||
self.sheet.write(4, i, item, bold)
|
||||
self.sheet.set_column(i, i, length)
|
||||
i += 1
|
||||
|
||||
@@ -306,16 +316,14 @@ class Excel(BaseReport):
|
||||
self.sheet.write(self.row, col + 4, result["nodes"], normal)
|
||||
self.sheet.write(self.row, col + 5, result["leaves"], normal)
|
||||
self.sheet.write(self.row, col + 6, result["depth"], normal)
|
||||
self.sheet.write(self.row, col + 7, result["accuracy"], decimal)
|
||||
self.sheet.write(self.row, col + 7, result["score"], decimal)
|
||||
if self.compare:
|
||||
status = self._compute_status(
|
||||
result["dataset"], result["accuracy"]
|
||||
)
|
||||
status = self._compute_status(result["dataset"], result["score"])
|
||||
self.sheet.write(self.row, col + 8, status, normal)
|
||||
col = 9
|
||||
else:
|
||||
col = 8
|
||||
self.sheet.write(self.row, col, result["accuracy_std"], decimal)
|
||||
self.sheet.write(self.row, col, result["score_std"], decimal)
|
||||
self.sheet.write(self.row, col + 1, result["time"], decimal)
|
||||
self.sheet.write(self.row, col + 2, result["time_std"], decimal)
|
||||
self.sheet.write(
|
||||
@@ -355,8 +363,9 @@ class SQL(BaseReport):
|
||||
"date",
|
||||
"time",
|
||||
"type",
|
||||
"accuracy",
|
||||
"accuracy_std",
|
||||
"score_name",
|
||||
"score",
|
||||
"score_std",
|
||||
"dataset",
|
||||
"classifier",
|
||||
"norm",
|
||||
@@ -382,8 +391,9 @@ class SQL(BaseReport):
|
||||
self.data["date"],
|
||||
self.data["time"],
|
||||
"crossval",
|
||||
result["accuracy"],
|
||||
result["accuracy_std"],
|
||||
self.data["score_name"],
|
||||
result["score"],
|
||||
result["score_std"],
|
||||
result["dataset"],
|
||||
self.data["model"],
|
||||
0,
|
||||
@@ -406,8 +416,8 @@ class SQL(BaseReport):
|
||||
|
||||
class Benchmark:
|
||||
@staticmethod
|
||||
def get_result_file_name():
|
||||
return os.path.join(Folders.results, Files.exreport)
|
||||
def get_result_file_name(score):
|
||||
return os.path.join(Folders.results, Files.exreport(score))
|
||||
|
||||
@staticmethod
|
||||
def _process_dataset(results, data):
|
||||
@@ -415,23 +425,23 @@ class Benchmark:
|
||||
for record in data["results"]:
|
||||
dataset = record["dataset"]
|
||||
if (model, dataset) in results:
|
||||
if record["accuracy"] > results[model, dataset][0]:
|
||||
if record["score"] > results[model, dataset][0]:
|
||||
results[model, dataset] = (
|
||||
record["accuracy"],
|
||||
record["accuracy_std"],
|
||||
record["score"],
|
||||
record["score_std"],
|
||||
)
|
||||
else:
|
||||
results[model, dataset] = (
|
||||
record["accuracy"],
|
||||
record["accuracy_std"],
|
||||
record["score"],
|
||||
record["score_std"],
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def compile_results():
|
||||
def compile_results(score):
|
||||
# build Files.exreport
|
||||
result_file_name = Benchmark.get_result_file_name()
|
||||
result_file_name = Benchmark.get_result_file_name(score)
|
||||
results = {}
|
||||
init_suffix, end_suffix = Files.results_suffixes("")
|
||||
init_suffix, end_suffix = Files.results_suffixes(score=score)
|
||||
all_files = list(os.walk(Folders.results))
|
||||
for root, _, files in tqdm(all_files, desc="files"):
|
||||
for name in files:
|
||||
@@ -557,7 +567,7 @@ class Benchmark:
|
||||
row += 1
|
||||
column = 1
|
||||
for _ in range(len(results)):
|
||||
sheet.write(row, column, "Accuracy", merge_format)
|
||||
sheet.write(row, column, "Score", merge_format)
|
||||
sheet.write(row, column + 1, "Stdev", merge_format)
|
||||
column += 2
|
||||
|
||||
|
25
src/Utils.py
25
src/Utils.py
@@ -12,7 +12,7 @@ class Folders:
|
||||
|
||||
class Files:
|
||||
index = "all.txt"
|
||||
exreport = "exreport.csv"
|
||||
|
||||
exreport_output = "exreport.txt"
|
||||
exreport_err = "exreport_err.txt"
|
||||
exreport_excel = "exreport.xlsx"
|
||||
@@ -22,19 +22,26 @@ class Files:
|
||||
benchmark_r = "benchmark.r"
|
||||
|
||||
@staticmethod
|
||||
def best_results(model):
|
||||
return f"best_results_{model}.json"
|
||||
def exreport(score):
|
||||
return f"exreport_{score}.csv"
|
||||
|
||||
@staticmethod
|
||||
def results(model, platform, date, time):
|
||||
return f"results_{model}_{platform}_{date}_{time}.json"
|
||||
def best_results(score, model):
|
||||
return f"best_results_{score}_{model}.json"
|
||||
|
||||
@staticmethod
|
||||
def results_suffixes(model):
|
||||
if model == "":
|
||||
return "results_", ".json"
|
||||
def results(score, model, platform, date, time):
|
||||
return f"results_{score}_{model}_{platform}_{date}_{time}.json"
|
||||
|
||||
@staticmethod
|
||||
def results_suffixes(score="", model=""):
|
||||
suffix = ".json"
|
||||
if model == "" and score == "":
|
||||
return "results_", suffix
|
||||
elif model == "":
|
||||
return f"results_{score}_", suffix
|
||||
else:
|
||||
return f"results_{model}_", ".json"
|
||||
return f"results_{score}_{model}_", suffix
|
||||
|
||||
@staticmethod
|
||||
def dataset(name):
|
||||
|
@@ -5,6 +5,13 @@ import argparse
|
||||
|
||||
def parse_arguments():
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument(
|
||||
"-s",
|
||||
"--score",
|
||||
type=str,
|
||||
required=True,
|
||||
help="score name {accuracy, f1_macro, ...}",
|
||||
)
|
||||
ap.add_argument(
|
||||
"-x",
|
||||
"--excel",
|
||||
@@ -13,12 +20,12 @@ def parse_arguments():
|
||||
help="Generate Excel File",
|
||||
)
|
||||
args = ap.parse_args()
|
||||
return args.excel
|
||||
return (args, score, args.excel)
|
||||
|
||||
|
||||
excel = parse_arguments()
|
||||
(score, excel) = parse_arguments()
|
||||
benchmark = Benchmark()
|
||||
benchmark.compile_results()
|
||||
benchmark.compile_results(score)
|
||||
benchmark.report()
|
||||
benchmark.exreport()
|
||||
if excel:
|
||||
|
@@ -8,6 +8,13 @@ from Experiments import Datasets, BestResults
|
||||
|
||||
def parse_arguments():
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument(
|
||||
"-s",
|
||||
"--score",
|
||||
type=str,
|
||||
required=True,
|
||||
help="score name {accuracy, f1_macro, ...}",
|
||||
)
|
||||
ap.add_argument(
|
||||
"-m",
|
||||
"--model",
|
||||
@@ -24,13 +31,13 @@ def parse_arguments():
|
||||
help="Generate Report",
|
||||
)
|
||||
args = ap.parse_args()
|
||||
return (args.model, args.report)
|
||||
return (args.score, args.model, args.report)
|
||||
|
||||
|
||||
(model, report) = parse_arguments()
|
||||
(score, model, report) = parse_arguments()
|
||||
datasets = Datasets()
|
||||
best = BestResults(model, datasets)
|
||||
best = BestResults(score, model, datasets)
|
||||
best.build()
|
||||
if report:
|
||||
report = ReportBest(model)
|
||||
report = ReportBest(score, model)
|
||||
report.report()
|
||||
|
10
src/main.py
10
src/main.py
@@ -8,6 +8,13 @@ from Results import Report
|
||||
|
||||
def parse_arguments():
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument(
|
||||
"-s",
|
||||
"--score",
|
||||
type=str,
|
||||
required=True,
|
||||
help="score name {accuracy, f1_macro, ...}",
|
||||
)
|
||||
ap.add_argument(
|
||||
"-P",
|
||||
"--platform",
|
||||
@@ -55,6 +62,7 @@ def parse_arguments():
|
||||
)
|
||||
args = ap.parse_args()
|
||||
return (
|
||||
args.score,
|
||||
args.model,
|
||||
args.n_folds,
|
||||
args.platform,
|
||||
@@ -66,6 +74,7 @@ def parse_arguments():
|
||||
|
||||
|
||||
(
|
||||
score,
|
||||
model,
|
||||
folds,
|
||||
platform,
|
||||
@@ -75,6 +84,7 @@ def parse_arguments():
|
||||
report,
|
||||
) = parse_arguments()
|
||||
job = Experiment(
|
||||
score_name=score,
|
||||
model_name=model,
|
||||
datasets=Datasets(),
|
||||
hyperparams_dict=hyperparameters,
|
||||
|
14
test.sh
Executable file
14
test.sh
Executable file
@@ -0,0 +1,14 @@
|
||||
#!/bin/bash
|
||||
for i in STree Wodt Cart SVC ExtraTree; do
|
||||
for a in accuracy f1_macro; do
|
||||
python src/main.py -s $a -P iMac27 -m $i -r 1
|
||||
done
|
||||
done
|
||||
for i in STree Wodt Cart SVC ExtraTree; do
|
||||
for a in accuracy f1_macro; do
|
||||
python src/build_best.py -s $a -m $i -r 1
|
||||
done
|
||||
done
|
||||
for a in accuracy f1_macro; do
|
||||
ptyhon src/benchmark.py -s $a
|
||||
done
|
Reference in New Issue
Block a user