mirror of
https://github.com/Doctorado-ML/benchmark.git
synced 2025-08-15 15:35:52 +00:00
Begin refactor Results
This commit is contained in:
@@ -163,16 +163,18 @@ class Datasets:
|
||||
attr = SimpleNamespace()
|
||||
attr.dataset = name
|
||||
values, counts = np.unique(y, return_counts=True)
|
||||
comp = ""
|
||||
sep = ""
|
||||
for count in counts:
|
||||
comp += f"{sep}{count/sum(counts)*100:5.2f}% ({count}) "
|
||||
sep = "/ "
|
||||
attr.balance = comp
|
||||
attr.classes = len(np.unique(y))
|
||||
attr.classes = len(values)
|
||||
attr.samples = X.shape[0]
|
||||
attr.features = X.shape[1]
|
||||
attr.cont_features = len(self.get_continuous_features())
|
||||
attr.distribution = {}
|
||||
comp = ""
|
||||
sep = ""
|
||||
for value, count in zip(values, counts):
|
||||
comp += f"{sep}{count/sum(counts)*100:5.2f}% ({count}) "
|
||||
sep = "/ "
|
||||
attr.distribution[value.item()] = count / sum(counts)
|
||||
attr.balance = comp
|
||||
self.discretize = tmp
|
||||
return attr
|
||||
|
||||
|
1220
benchmark/Results.py
1220
benchmark/Results.py
File diff suppressed because it is too large
Load Diff
163
benchmark/ResultsBase.py
Normal file
163
benchmark/ResultsBase.py
Normal file
@@ -0,0 +1,163 @@
|
||||
import abc
|
||||
import json
|
||||
import os
|
||||
|
||||
|
||||
from .Arguments import ALL_METRICS, EnvData
|
||||
from .Datasets import Datasets
|
||||
from .Experiments import BestResults
|
||||
from .Utils import Folders, Symbols
|
||||
|
||||
|
||||
def get_input(message="", is_test=False):
|
||||
return "test" if is_test else input(message)
|
||||
|
||||
|
||||
class BestResultsEver:
|
||||
def __init__(self):
|
||||
self.data = {}
|
||||
for i in ["Tanveer", "Surcov", "Arff"]:
|
||||
self.data[i] = {}
|
||||
for metric in ALL_METRICS:
|
||||
self.data[i][metric.replace("-", "_")] = ["self", 1.0]
|
||||
self.data[i][metric] = ["self", 1.0]
|
||||
self.data["Tanveer"]["accuracy"] = [
|
||||
"STree_default (liblinear-ovr)",
|
||||
40.282203,
|
||||
]
|
||||
self.data["Arff"]["accuracy"] = [
|
||||
"STree_default (linear-ovo)",
|
||||
22.109799,
|
||||
]
|
||||
|
||||
def get_name_value(self, key, score):
|
||||
return self.data[key][score]
|
||||
|
||||
|
||||
class BaseReport(abc.ABC):
|
||||
def __init__(self, file_name, best_file=False):
|
||||
self.file_name = file_name
|
||||
if not os.path.isfile(file_name):
|
||||
if not os.path.isfile(os.path.join(Folders.results, file_name)):
|
||||
raise FileNotFoundError(f"{file_name} does not exists!")
|
||||
else:
|
||||
self.file_name = os.path.join(Folders.results, file_name)
|
||||
with open(self.file_name) as f:
|
||||
self.data = json.load(f)
|
||||
self.best_acc_file = best_file
|
||||
if best_file:
|
||||
self.lines = self.data
|
||||
else:
|
||||
self.lines = self.data["results"]
|
||||
self.score_name = self.data["score_name"]
|
||||
self.__load_env_data()
|
||||
self.__compute_best_results_ever()
|
||||
|
||||
def __load_env_data(self):
|
||||
# Set the labels for nodes, leaves, depth
|
||||
env_data = EnvData.load()
|
||||
self.nodes_label = env_data["nodes"]
|
||||
self.leaves_label = env_data["leaves"]
|
||||
self.depth_label = env_data["depth"]
|
||||
self.key = env_data["source_data"]
|
||||
self.margin = float(env_data["margin"])
|
||||
|
||||
def __compute_best_results_ever(self):
|
||||
best = BestResultsEver()
|
||||
self.best_score_name, self.best_score_value = best.get_name_value(
|
||||
self.key, self.score_name
|
||||
)
|
||||
|
||||
def _get_accuracy(self, item):
|
||||
return self.data[item][0] if self.best_acc_file else item["score"]
|
||||
|
||||
def report(self):
|
||||
self.header()
|
||||
accuracy_total = 0.0
|
||||
for result in self.lines:
|
||||
self.print_line(result)
|
||||
accuracy_total += self._get_accuracy(result)
|
||||
self.footer(accuracy_total)
|
||||
|
||||
def _load_best_results(self, score, model):
|
||||
best = BestResults(score, model, Datasets())
|
||||
self.best_results = best.load({})
|
||||
|
||||
def _compute_status(self, dataset, accuracy: float):
|
||||
status = " "
|
||||
if self.compare:
|
||||
# Compare with best results
|
||||
best = self.best_results[dataset][0]
|
||||
if accuracy == best:
|
||||
status = Symbols.equal_best
|
||||
elif accuracy > best:
|
||||
status = Symbols.better_best
|
||||
else:
|
||||
# compare with dataset label distribution only if its a binary one
|
||||
# down_arrow if accuracy is less than the ZeroR
|
||||
# black_star if accuracy is greater than the ZeroR + margin%
|
||||
if self.score_name == "accuracy":
|
||||
dt = Datasets()
|
||||
attr = dt.get_attributes(dataset)
|
||||
if attr.classes == 2:
|
||||
max_category = max(attr.distribution.values())
|
||||
max_value = max_category * (1 + self.margin)
|
||||
if max_value > 1:
|
||||
max_value = 0.9995
|
||||
status = (
|
||||
Symbols.cross
|
||||
if accuracy <= max_value
|
||||
else Symbols.upward_arrow
|
||||
if accuracy > max_value
|
||||
else " "
|
||||
)
|
||||
if status != " ":
|
||||
if status not in self._compare_totals:
|
||||
self._compare_totals[status] = 1
|
||||
else:
|
||||
self._compare_totals[status] += 1
|
||||
return status
|
||||
|
||||
def _status_meaning(self, status):
|
||||
meaning = {
|
||||
Symbols.equal_best: "Equal to best",
|
||||
Symbols.better_best: "Better than best",
|
||||
Symbols.cross: "Less than or equal to ZeroR",
|
||||
Symbols.upward_arrow: f"Better than ZeroR + "
|
||||
f"{self.margin*100:3.1f}%",
|
||||
}
|
||||
return meaning[status]
|
||||
|
||||
def _get_best_accuracy(self):
|
||||
return self.best_score_value
|
||||
|
||||
def _get_message_best_accuracy(self):
|
||||
return f"{self.score_name} compared to {self.best_score_name} .:"
|
||||
|
||||
@abc.abstractmethod
|
||||
def header(self) -> None:
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def print_line(self, result) -> None:
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def footer(self, accuracy: float) -> None:
|
||||
pass
|
||||
|
||||
|
||||
class StubReport(BaseReport):
|
||||
def __init__(self, file_name):
|
||||
super().__init__(file_name=file_name, best_file=False)
|
||||
|
||||
def print_line(self, line) -> None:
|
||||
pass
|
||||
|
||||
def header(self) -> None:
|
||||
self.title = self.data["title"]
|
||||
self.duration = self.data["duration"]
|
||||
|
||||
def footer(self, accuracy: float) -> None:
|
||||
self.accuracy = accuracy
|
||||
self.score = accuracy / self._get_best_accuracy()
|
1044
benchmark/ResultsFiles.py
Normal file
1044
benchmark/ResultsFiles.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -14,6 +14,7 @@ class Folders:
|
||||
report = os.path.join(exreport, "exreport_output")
|
||||
img = "img"
|
||||
excel = "excel"
|
||||
sql = "sql"
|
||||
|
||||
@staticmethod
|
||||
def src():
|
||||
@@ -127,6 +128,9 @@ class Symbols:
|
||||
check_mark = "\N{heavy check mark}"
|
||||
exclamation = "\N{heavy exclamation mark symbol}"
|
||||
black_star = "\N{black star}"
|
||||
cross = "\N{Ballot X}"
|
||||
upward_arrow = "\N{Black-feathered north east arrow}"
|
||||
down_arrow = "\N{downwards black arrow}"
|
||||
equal_best = check_mark
|
||||
better_best = black_star
|
||||
|
||||
|
@@ -16,6 +16,8 @@ def main(args_test=None):
|
||||
folders.append(os.path.join(args.project_name, Folders.report))
|
||||
folders.append(os.path.join(args.project_name, Folders.img))
|
||||
folders.append(os.path.join(args.project_name, Folders.excel))
|
||||
folders.append(os.path.join(args.project_name, Folders.sql))
|
||||
|
||||
try:
|
||||
for folder in folders:
|
||||
print(f"Creating folder {folder}")
|
||||
|
@@ -1,6 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
import os
|
||||
from benchmark.Results import Report, Excel, SQL, ReportBest, ReportDatasets
|
||||
from benchmark.Results import Report, ReportBest
|
||||
from benchmark.ResultsFiles import Excel, SQLFile, ReportDatasets
|
||||
from benchmark.Utils import Files, Folders
|
||||
from benchmark.Arguments import Arguments
|
||||
from pathlib import Path
|
||||
@@ -67,7 +68,7 @@ def main(args_test=None):
|
||||
print(e)
|
||||
return
|
||||
if args.sql:
|
||||
sql = SQL(args.file_name)
|
||||
sql = SQLFile(args.file_name)
|
||||
sql.report()
|
||||
if args.excel:
|
||||
excel = Excel(
|
||||
|
@@ -10,4 +10,5 @@ discretize=0
|
||||
nodes=Nodes
|
||||
leaves=Leaves
|
||||
depth=Depth
|
||||
fit_features=0
|
||||
fit_features=0
|
||||
margin=0.1
|
@@ -9,4 +9,5 @@ discretize=1
|
||||
nodes=Nodes
|
||||
leaves=Leaves
|
||||
depth=Depth
|
||||
fit_features=1
|
||||
fit_features=1
|
||||
margin=0.1
|
@@ -10,4 +10,5 @@ discretize=0
|
||||
nodes=Nodes
|
||||
leaves=Leaves
|
||||
depth=Depth
|
||||
fit_features=0
|
||||
fit_features=0
|
||||
margin=0.1
|
@@ -10,4 +10,5 @@ discretize=0
|
||||
nodes=Nodes
|
||||
leaves=Leaves
|
||||
depth=Depth
|
||||
fit_features=0
|
||||
fit_features=0
|
||||
margin=0.1
|
@@ -4,7 +4,7 @@ from unittest.mock import patch
|
||||
from openpyxl import load_workbook
|
||||
from .TestBase import TestBase
|
||||
from ..Utils import Folders, Files, NO_RESULTS
|
||||
from ..Results import Benchmark
|
||||
from ..ResultsFiles import Benchmark
|
||||
from .._version import __version__
|
||||
|
||||
|
||||
|
@@ -2,7 +2,7 @@ import os
|
||||
from openpyxl import load_workbook
|
||||
from xlsxwriter import Workbook
|
||||
from .TestBase import TestBase
|
||||
from ..Results import Excel
|
||||
from ..ResultsFiles import Excel
|
||||
from ..Utils import Folders
|
||||
|
||||
|
||||
|
@@ -2,7 +2,9 @@ import os
|
||||
from io import StringIO
|
||||
from unittest.mock import patch
|
||||
from .TestBase import TestBase
|
||||
from ..Results import Report, BaseReport, ReportBest, ReportDatasets, get_input
|
||||
from ..Results import Report, ReportBest
|
||||
from ..ResultsFiles import ReportDatasets
|
||||
from ..ResultsBase import BaseReport, get_input
|
||||
from ..Utils import Symbols
|
||||
|
||||
|
||||
|
@@ -1,7 +1,7 @@
|
||||
import os
|
||||
from .TestBase import TestBase
|
||||
from ..Results import SQL
|
||||
from ..Utils import Folders
|
||||
from ..ResultsFiles import SQLFile
|
||||
from ..Utils import Folders, Files
|
||||
|
||||
|
||||
class SQLTest(TestBase):
|
||||
@@ -9,14 +9,14 @@ class SQLTest(TestBase):
|
||||
files = [
|
||||
"results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.sql",
|
||||
]
|
||||
self.remove_files(files, Folders.results)
|
||||
self.remove_files(files, Folders.sql)
|
||||
return super().tearDown()
|
||||
|
||||
def test_report_SQL(self):
|
||||
file_name = "results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json"
|
||||
report = SQL(file_name)
|
||||
report = SQLFile(file_name)
|
||||
report.report()
|
||||
file_name = os.path.join(
|
||||
Folders.results, file_name.replace(".json", ".sql")
|
||||
Folders.sql, file_name.replace(Files.report_ext, ".sql")
|
||||
)
|
||||
self.check_file_file(file_name, "sql")
|
||||
|
@@ -186,6 +186,7 @@ class UtilTest(TestBase):
|
||||
"leaves": "Leaves",
|
||||
"depth": "Depth",
|
||||
"fit_features": "0",
|
||||
"margin": "0.1",
|
||||
}
|
||||
computed = EnvData().load()
|
||||
self.assertDictEqual(computed, expected)
|
||||
|
@@ -5,6 +5,7 @@ Creating folder test_project/exreport
|
||||
Creating folder test_project/exreport/exreport_output
|
||||
Creating folder test_project/img
|
||||
Creating folder test_project/excel
|
||||
Creating folder test_project/sql
|
||||
Done!
|
||||
Please, edit .env file with your settings and add a datasets folder
|
||||
with an all.txt file with the datasets you want to use.
|
||||
|
Reference in New Issue
Block a user