mirror of
https://github.com/Doctorado-ML/benchmark.git
synced 2025-08-16 16:05:54 +00:00
Begin refactor Results
This commit is contained in:
@@ -163,16 +163,18 @@ class Datasets:
|
|||||||
attr = SimpleNamespace()
|
attr = SimpleNamespace()
|
||||||
attr.dataset = name
|
attr.dataset = name
|
||||||
values, counts = np.unique(y, return_counts=True)
|
values, counts = np.unique(y, return_counts=True)
|
||||||
comp = ""
|
attr.classes = len(values)
|
||||||
sep = ""
|
|
||||||
for count in counts:
|
|
||||||
comp += f"{sep}{count/sum(counts)*100:5.2f}% ({count}) "
|
|
||||||
sep = "/ "
|
|
||||||
attr.balance = comp
|
|
||||||
attr.classes = len(np.unique(y))
|
|
||||||
attr.samples = X.shape[0]
|
attr.samples = X.shape[0]
|
||||||
attr.features = X.shape[1]
|
attr.features = X.shape[1]
|
||||||
attr.cont_features = len(self.get_continuous_features())
|
attr.cont_features = len(self.get_continuous_features())
|
||||||
|
attr.distribution = {}
|
||||||
|
comp = ""
|
||||||
|
sep = ""
|
||||||
|
for value, count in zip(values, counts):
|
||||||
|
comp += f"{sep}{count/sum(counts)*100:5.2f}% ({count}) "
|
||||||
|
sep = "/ "
|
||||||
|
attr.distribution[value.item()] = count / sum(counts)
|
||||||
|
attr.balance = comp
|
||||||
self.discretize = tmp
|
self.discretize = tmp
|
||||||
return attr
|
return attr
|
||||||
|
|
||||||
|
1220
benchmark/Results.py
1220
benchmark/Results.py
File diff suppressed because it is too large
Load Diff
163
benchmark/ResultsBase.py
Normal file
163
benchmark/ResultsBase.py
Normal file
@@ -0,0 +1,163 @@
|
|||||||
|
import abc
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
from .Arguments import ALL_METRICS, EnvData
|
||||||
|
from .Datasets import Datasets
|
||||||
|
from .Experiments import BestResults
|
||||||
|
from .Utils import Folders, Symbols
|
||||||
|
|
||||||
|
|
||||||
|
def get_input(message="", is_test=False):
|
||||||
|
return "test" if is_test else input(message)
|
||||||
|
|
||||||
|
|
||||||
|
class BestResultsEver:
|
||||||
|
def __init__(self):
|
||||||
|
self.data = {}
|
||||||
|
for i in ["Tanveer", "Surcov", "Arff"]:
|
||||||
|
self.data[i] = {}
|
||||||
|
for metric in ALL_METRICS:
|
||||||
|
self.data[i][metric.replace("-", "_")] = ["self", 1.0]
|
||||||
|
self.data[i][metric] = ["self", 1.0]
|
||||||
|
self.data["Tanveer"]["accuracy"] = [
|
||||||
|
"STree_default (liblinear-ovr)",
|
||||||
|
40.282203,
|
||||||
|
]
|
||||||
|
self.data["Arff"]["accuracy"] = [
|
||||||
|
"STree_default (linear-ovo)",
|
||||||
|
22.109799,
|
||||||
|
]
|
||||||
|
|
||||||
|
def get_name_value(self, key, score):
|
||||||
|
return self.data[key][score]
|
||||||
|
|
||||||
|
|
||||||
|
class BaseReport(abc.ABC):
|
||||||
|
def __init__(self, file_name, best_file=False):
|
||||||
|
self.file_name = file_name
|
||||||
|
if not os.path.isfile(file_name):
|
||||||
|
if not os.path.isfile(os.path.join(Folders.results, file_name)):
|
||||||
|
raise FileNotFoundError(f"{file_name} does not exists!")
|
||||||
|
else:
|
||||||
|
self.file_name = os.path.join(Folders.results, file_name)
|
||||||
|
with open(self.file_name) as f:
|
||||||
|
self.data = json.load(f)
|
||||||
|
self.best_acc_file = best_file
|
||||||
|
if best_file:
|
||||||
|
self.lines = self.data
|
||||||
|
else:
|
||||||
|
self.lines = self.data["results"]
|
||||||
|
self.score_name = self.data["score_name"]
|
||||||
|
self.__load_env_data()
|
||||||
|
self.__compute_best_results_ever()
|
||||||
|
|
||||||
|
def __load_env_data(self):
|
||||||
|
# Set the labels for nodes, leaves, depth
|
||||||
|
env_data = EnvData.load()
|
||||||
|
self.nodes_label = env_data["nodes"]
|
||||||
|
self.leaves_label = env_data["leaves"]
|
||||||
|
self.depth_label = env_data["depth"]
|
||||||
|
self.key = env_data["source_data"]
|
||||||
|
self.margin = float(env_data["margin"])
|
||||||
|
|
||||||
|
def __compute_best_results_ever(self):
|
||||||
|
best = BestResultsEver()
|
||||||
|
self.best_score_name, self.best_score_value = best.get_name_value(
|
||||||
|
self.key, self.score_name
|
||||||
|
)
|
||||||
|
|
||||||
|
def _get_accuracy(self, item):
|
||||||
|
return self.data[item][0] if self.best_acc_file else item["score"]
|
||||||
|
|
||||||
|
def report(self):
|
||||||
|
self.header()
|
||||||
|
accuracy_total = 0.0
|
||||||
|
for result in self.lines:
|
||||||
|
self.print_line(result)
|
||||||
|
accuracy_total += self._get_accuracy(result)
|
||||||
|
self.footer(accuracy_total)
|
||||||
|
|
||||||
|
def _load_best_results(self, score, model):
|
||||||
|
best = BestResults(score, model, Datasets())
|
||||||
|
self.best_results = best.load({})
|
||||||
|
|
||||||
|
def _compute_status(self, dataset, accuracy: float):
|
||||||
|
status = " "
|
||||||
|
if self.compare:
|
||||||
|
# Compare with best results
|
||||||
|
best = self.best_results[dataset][0]
|
||||||
|
if accuracy == best:
|
||||||
|
status = Symbols.equal_best
|
||||||
|
elif accuracy > best:
|
||||||
|
status = Symbols.better_best
|
||||||
|
else:
|
||||||
|
# compare with dataset label distribution only if its a binary one
|
||||||
|
# down_arrow if accuracy is less than the ZeroR
|
||||||
|
# black_star if accuracy is greater than the ZeroR + margin%
|
||||||
|
if self.score_name == "accuracy":
|
||||||
|
dt = Datasets()
|
||||||
|
attr = dt.get_attributes(dataset)
|
||||||
|
if attr.classes == 2:
|
||||||
|
max_category = max(attr.distribution.values())
|
||||||
|
max_value = max_category * (1 + self.margin)
|
||||||
|
if max_value > 1:
|
||||||
|
max_value = 0.9995
|
||||||
|
status = (
|
||||||
|
Symbols.cross
|
||||||
|
if accuracy <= max_value
|
||||||
|
else Symbols.upward_arrow
|
||||||
|
if accuracy > max_value
|
||||||
|
else " "
|
||||||
|
)
|
||||||
|
if status != " ":
|
||||||
|
if status not in self._compare_totals:
|
||||||
|
self._compare_totals[status] = 1
|
||||||
|
else:
|
||||||
|
self._compare_totals[status] += 1
|
||||||
|
return status
|
||||||
|
|
||||||
|
def _status_meaning(self, status):
|
||||||
|
meaning = {
|
||||||
|
Symbols.equal_best: "Equal to best",
|
||||||
|
Symbols.better_best: "Better than best",
|
||||||
|
Symbols.cross: "Less than or equal to ZeroR",
|
||||||
|
Symbols.upward_arrow: f"Better than ZeroR + "
|
||||||
|
f"{self.margin*100:3.1f}%",
|
||||||
|
}
|
||||||
|
return meaning[status]
|
||||||
|
|
||||||
|
def _get_best_accuracy(self):
|
||||||
|
return self.best_score_value
|
||||||
|
|
||||||
|
def _get_message_best_accuracy(self):
|
||||||
|
return f"{self.score_name} compared to {self.best_score_name} .:"
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def header(self) -> None:
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def print_line(self, result) -> None:
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def footer(self, accuracy: float) -> None:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class StubReport(BaseReport):
|
||||||
|
def __init__(self, file_name):
|
||||||
|
super().__init__(file_name=file_name, best_file=False)
|
||||||
|
|
||||||
|
def print_line(self, line) -> None:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def header(self) -> None:
|
||||||
|
self.title = self.data["title"]
|
||||||
|
self.duration = self.data["duration"]
|
||||||
|
|
||||||
|
def footer(self, accuracy: float) -> None:
|
||||||
|
self.accuracy = accuracy
|
||||||
|
self.score = accuracy / self._get_best_accuracy()
|
1044
benchmark/ResultsFiles.py
Normal file
1044
benchmark/ResultsFiles.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -14,6 +14,7 @@ class Folders:
|
|||||||
report = os.path.join(exreport, "exreport_output")
|
report = os.path.join(exreport, "exreport_output")
|
||||||
img = "img"
|
img = "img"
|
||||||
excel = "excel"
|
excel = "excel"
|
||||||
|
sql = "sql"
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def src():
|
def src():
|
||||||
@@ -127,6 +128,9 @@ class Symbols:
|
|||||||
check_mark = "\N{heavy check mark}"
|
check_mark = "\N{heavy check mark}"
|
||||||
exclamation = "\N{heavy exclamation mark symbol}"
|
exclamation = "\N{heavy exclamation mark symbol}"
|
||||||
black_star = "\N{black star}"
|
black_star = "\N{black star}"
|
||||||
|
cross = "\N{Ballot X}"
|
||||||
|
upward_arrow = "\N{Black-feathered north east arrow}"
|
||||||
|
down_arrow = "\N{downwards black arrow}"
|
||||||
equal_best = check_mark
|
equal_best = check_mark
|
||||||
better_best = black_star
|
better_best = black_star
|
||||||
|
|
||||||
|
@@ -16,6 +16,8 @@ def main(args_test=None):
|
|||||||
folders.append(os.path.join(args.project_name, Folders.report))
|
folders.append(os.path.join(args.project_name, Folders.report))
|
||||||
folders.append(os.path.join(args.project_name, Folders.img))
|
folders.append(os.path.join(args.project_name, Folders.img))
|
||||||
folders.append(os.path.join(args.project_name, Folders.excel))
|
folders.append(os.path.join(args.project_name, Folders.excel))
|
||||||
|
folders.append(os.path.join(args.project_name, Folders.sql))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
for folder in folders:
|
for folder in folders:
|
||||||
print(f"Creating folder {folder}")
|
print(f"Creating folder {folder}")
|
||||||
|
@@ -1,6 +1,7 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
import os
|
import os
|
||||||
from benchmark.Results import Report, Excel, SQL, ReportBest, ReportDatasets
|
from benchmark.Results import Report, ReportBest
|
||||||
|
from benchmark.ResultsFiles import Excel, SQLFile, ReportDatasets
|
||||||
from benchmark.Utils import Files, Folders
|
from benchmark.Utils import Files, Folders
|
||||||
from benchmark.Arguments import Arguments
|
from benchmark.Arguments import Arguments
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@@ -67,7 +68,7 @@ def main(args_test=None):
|
|||||||
print(e)
|
print(e)
|
||||||
return
|
return
|
||||||
if args.sql:
|
if args.sql:
|
||||||
sql = SQL(args.file_name)
|
sql = SQLFile(args.file_name)
|
||||||
sql.report()
|
sql.report()
|
||||||
if args.excel:
|
if args.excel:
|
||||||
excel = Excel(
|
excel = Excel(
|
||||||
|
@@ -10,4 +10,5 @@ discretize=0
|
|||||||
nodes=Nodes
|
nodes=Nodes
|
||||||
leaves=Leaves
|
leaves=Leaves
|
||||||
depth=Depth
|
depth=Depth
|
||||||
fit_features=0
|
fit_features=0
|
||||||
|
margin=0.1
|
@@ -9,4 +9,5 @@ discretize=1
|
|||||||
nodes=Nodes
|
nodes=Nodes
|
||||||
leaves=Leaves
|
leaves=Leaves
|
||||||
depth=Depth
|
depth=Depth
|
||||||
fit_features=1
|
fit_features=1
|
||||||
|
margin=0.1
|
@@ -10,4 +10,5 @@ discretize=0
|
|||||||
nodes=Nodes
|
nodes=Nodes
|
||||||
leaves=Leaves
|
leaves=Leaves
|
||||||
depth=Depth
|
depth=Depth
|
||||||
fit_features=0
|
fit_features=0
|
||||||
|
margin=0.1
|
@@ -10,4 +10,5 @@ discretize=0
|
|||||||
nodes=Nodes
|
nodes=Nodes
|
||||||
leaves=Leaves
|
leaves=Leaves
|
||||||
depth=Depth
|
depth=Depth
|
||||||
fit_features=0
|
fit_features=0
|
||||||
|
margin=0.1
|
@@ -4,7 +4,7 @@ from unittest.mock import patch
|
|||||||
from openpyxl import load_workbook
|
from openpyxl import load_workbook
|
||||||
from .TestBase import TestBase
|
from .TestBase import TestBase
|
||||||
from ..Utils import Folders, Files, NO_RESULTS
|
from ..Utils import Folders, Files, NO_RESULTS
|
||||||
from ..Results import Benchmark
|
from ..ResultsFiles import Benchmark
|
||||||
from .._version import __version__
|
from .._version import __version__
|
||||||
|
|
||||||
|
|
||||||
|
@@ -2,7 +2,7 @@ import os
|
|||||||
from openpyxl import load_workbook
|
from openpyxl import load_workbook
|
||||||
from xlsxwriter import Workbook
|
from xlsxwriter import Workbook
|
||||||
from .TestBase import TestBase
|
from .TestBase import TestBase
|
||||||
from ..Results import Excel
|
from ..ResultsFiles import Excel
|
||||||
from ..Utils import Folders
|
from ..Utils import Folders
|
||||||
|
|
||||||
|
|
||||||
|
@@ -2,7 +2,9 @@ import os
|
|||||||
from io import StringIO
|
from io import StringIO
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
from .TestBase import TestBase
|
from .TestBase import TestBase
|
||||||
from ..Results import Report, BaseReport, ReportBest, ReportDatasets, get_input
|
from ..Results import Report, ReportBest
|
||||||
|
from ..ResultsFiles import ReportDatasets
|
||||||
|
from ..ResultsBase import BaseReport, get_input
|
||||||
from ..Utils import Symbols
|
from ..Utils import Symbols
|
||||||
|
|
||||||
|
|
||||||
|
@@ -1,7 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
from .TestBase import TestBase
|
from .TestBase import TestBase
|
||||||
from ..Results import SQL
|
from ..ResultsFiles import SQLFile
|
||||||
from ..Utils import Folders
|
from ..Utils import Folders, Files
|
||||||
|
|
||||||
|
|
||||||
class SQLTest(TestBase):
|
class SQLTest(TestBase):
|
||||||
@@ -9,14 +9,14 @@ class SQLTest(TestBase):
|
|||||||
files = [
|
files = [
|
||||||
"results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.sql",
|
"results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.sql",
|
||||||
]
|
]
|
||||||
self.remove_files(files, Folders.results)
|
self.remove_files(files, Folders.sql)
|
||||||
return super().tearDown()
|
return super().tearDown()
|
||||||
|
|
||||||
def test_report_SQL(self):
|
def test_report_SQL(self):
|
||||||
file_name = "results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json"
|
file_name = "results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json"
|
||||||
report = SQL(file_name)
|
report = SQLFile(file_name)
|
||||||
report.report()
|
report.report()
|
||||||
file_name = os.path.join(
|
file_name = os.path.join(
|
||||||
Folders.results, file_name.replace(".json", ".sql")
|
Folders.sql, file_name.replace(Files.report_ext, ".sql")
|
||||||
)
|
)
|
||||||
self.check_file_file(file_name, "sql")
|
self.check_file_file(file_name, "sql")
|
||||||
|
@@ -186,6 +186,7 @@ class UtilTest(TestBase):
|
|||||||
"leaves": "Leaves",
|
"leaves": "Leaves",
|
||||||
"depth": "Depth",
|
"depth": "Depth",
|
||||||
"fit_features": "0",
|
"fit_features": "0",
|
||||||
|
"margin": "0.1",
|
||||||
}
|
}
|
||||||
computed = EnvData().load()
|
computed = EnvData().load()
|
||||||
self.assertDictEqual(computed, expected)
|
self.assertDictEqual(computed, expected)
|
||||||
|
@@ -5,6 +5,7 @@ Creating folder test_project/exreport
|
|||||||
Creating folder test_project/exreport/exreport_output
|
Creating folder test_project/exreport/exreport_output
|
||||||
Creating folder test_project/img
|
Creating folder test_project/img
|
||||||
Creating folder test_project/excel
|
Creating folder test_project/excel
|
||||||
|
Creating folder test_project/sql
|
||||||
Done!
|
Done!
|
||||||
Please, edit .env file with your settings and add a datasets folder
|
Please, edit .env file with your settings and add a datasets folder
|
||||||
with an all.txt file with the datasets you want to use.
|
with an all.txt file with the datasets you want to use.
|
||||||
|
Reference in New Issue
Block a user