58 Commits

Author SHA1 Message Date
9ace64832a Add sonar build github action 2022-05-18 19:07:25 +02:00
9b78c1a73e Set English, if needed, as default language for R 2022-05-12 12:10:13 +02:00
29d17a4072 Set english as default language for R 2022-05-12 11:27:49 +02:00
81e8bbfebb Update codecov badge 2022-05-11 22:58:49 +02:00
58199262c6 Change message language to R script 2022-05-11 22:33:01 +02:00
f254ea77a7 Add GITHUB_PAT env variable 2022-05-11 18:51:52 +02:00
3d12f458e7 try with remotes 2022-05-11 18:38:52 +02:00
a99f8e6916 update main.yml 2022-05-11 18:29:52 +02:00
8c4a5ebae5 Remove codeql and add R env 2022-05-11 18:14:37 +02:00
3c28fa242e Fix issue in ci 2022-05-11 17:16:53 +02:00
1cb916867c Add package to requirements 2022-05-11 17:12:53 +02:00
65a810d60e remove windows from platforms in ci 2022-05-11 16:25:41 +02:00
39c93e8957 fix python version issue in ci 2022-05-11 16:19:12 +02:00
5bf31b1304 debug github ci 2022-05-11 16:16:22 +02:00
e69c8fea59 remove uneeded requirement from tests 2022-05-11 12:56:52 +02:00
302a6d536b update readme 2022-05-11 12:32:06 +02:00
d77e9737fe update ci workflow 2022-05-11 12:24:55 +02:00
c7768ad387 Add github ci and badges
refactor setup
2022-05-11 12:21:55 +02:00
d826a65300 Fix be_print_strees_test 2022-05-10 14:16:51 +02:00
aebf301b29 Fix be_print_strees_test 2022-05-10 14:14:46 +02:00
e16dde713c Fix issue in be_print_strees_test 2022-05-10 12:42:01 +02:00
a649efde73 Fix be_print_strees issues 2022-05-09 16:27:37 +02:00
e45ef1c9fa Add file not found manage to be_report 2022-05-09 12:02:33 +02:00
7501ce7761 Enhance error msgs in be_main 2022-05-09 11:37:53 +02:00
ca96d05124 Complete be_print_strees 2022-05-09 01:34:25 +02:00
b0c94d4983 Begin print_strees_test 2022-05-09 01:00:51 +02:00
534f32b625 Begin print_strees_test 2022-05-09 00:30:33 +02:00
b3bc2fbd2f Complete be_main tests 2022-05-09 00:23:18 +02:00
09b2ede836 refactor remove iwss from results 2022-05-08 22:50:09 +02:00
4a5225d3dc refactor remove iwss from results 2022-05-08 22:49:50 +02:00
80eb9f1db7 Begin be_main tests 2022-05-08 19:59:53 +02:00
e58901a307 Complete be_benchmark tests 2022-05-08 18:14:55 +02:00
bb4769de43 Continue benchmark tests 2022-05-08 17:19:35 +02:00
1db5d8723a Add no .env exception 2022-05-08 16:51:20 +02:00
2c8646c8d8 Begin be_benchmark test 2022-05-08 16:06:14 +02:00
8457c9b531 Compte be_best & be_build_best tests 2022-05-08 02:03:22 +02:00
4fe1e10488 Complete be_grid tests 2022-05-08 01:31:03 +02:00
5c4d5cb99e Continue be_grid tests 2022-05-08 00:12:52 +02:00
986341723c Continue be_grid tests 2022-05-07 23:33:35 +02:00
af95e9c6bc Begin be_grid tests 2022-05-07 23:28:38 +02:00
b8c4e30714 Remove be_td and be_repair 2022-05-07 19:58:20 +02:00
50d0464702 Complete be_list tests 2022-05-07 19:37:36 +02:00
fe0daf6204 Refactor fake_out variable 2022-05-07 19:14:44 +02:00
fb324ad7ad Fix lint issues 2022-05-07 18:48:45 +02:00
40814c6f1f Add be_summary tests 2022-05-07 18:35:04 +02:00
31059ea117 Continue script testing 2022-05-07 02:08:11 +02:00
df757fefcd Refactor testing 2022-05-07 01:33:35 +02:00
3b214773ff Refactor scripts testing 2022-05-06 23:05:43 +02:00
bb0821c56e Begin script testing 2022-05-06 19:35:14 +02:00
3009167813 Add some tests 2022-05-06 17:15:24 +02:00
d87c7064a9 Fix test issues 2022-05-06 11:15:29 +02:00
3056bb649a Fix score names 2022-05-06 10:58:18 +02:00
47749cea94 Add color to summary and fix some issues 2022-05-05 23:37:13 +02:00
1cefc51870 Add be_build_grid and fix some scripts issues 2022-05-05 20:19:50 +02:00
5bcd4beca9 Fix be_list no data 2022-05-05 13:20:10 +02:00
4c7110214b Fix some issues 2022-05-05 13:11:39 +02:00
Ricardo Montañana Gómez
81ecec8846 Merge pull request #2 from Doctorado-ML/refactor_arguments
Refactor arguments
2022-05-05 00:03:04 +02:00
9d5d9ebd13 add nan handling to excel files 2022-05-04 11:37:57 +02:00
83 changed files with 2053 additions and 473 deletions

32
.github/workflows/build.yml vendored Normal file
View File

@@ -0,0 +1,32 @@
name: Build
on:
push:
branches:
- main
jobs:
build:
name: Build
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Get project version
id: step_one
run: |
version=$(git describe --tags --abbrev=0)
echo "project_version=$version" >> $GITHUB_ENV
- uses: sonarsource/sonarqube-scan-action@master
env:
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
SONAR_HOST_URL: ${{ secrets.SONAR_HOST_URL }}
with:
args: >
-Dsonar.projectVersion=${{ env.project_version }}
-Dsonar.python.version=3.10
# If you wish to fail your job when the Quality Gate is red, uncomment the
# following lines. This would typically be used to fail a deployment.
- uses: sonarsource/sonarqube-quality-gate-action@master
timeout-minutes: 5
env:
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}

58
.github/workflows/main.yml vendored Normal file
View File

@@ -0,0 +1,58 @@
name: CI
on:
push:
branches: [main]
pull_request:
branches: [main]
workflow_dispatch:
jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [macos-latest, ubuntu-latest]
python: ["3.10"]
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python }}
# Make dot command available in the environment
- name: Setup Graphviz
uses: ts-graphviz/setup-graphviz@v1
- uses: r-lib/actions/setup-r@v2
- name: Install R dependencies
env:
GITHUB_PAT: ${{ secrets.PAT_TOKEN }}
run: |
install.packages("remotes")
remotes::install_github("jacintoarias/exreport")
shell: Rscript {0}
# Allows install Wodt in dependencies.
- uses: webfactory/ssh-agent@v0.5.4
with:
ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}
# - name: Setup tmate session
# uses: mxschmitt/action-tmate@v3
- name: Install dependencies
run: |
pip install -q --upgrade pip
pip install -q -r requirements.txt
pip install -q --upgrade codecov coverage black flake8
- name: Lint
run: |
black --check --diff benchmark
flake8 --count benchmark
- name: Tests
run: |
coverage run -m unittest -v benchmark.tests
coverage xml
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v1
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: ./coverage.xml

1
.gitignore vendored
View File

@@ -134,3 +134,4 @@ Rplots.pdf
.vscode .vscode
.RData .RData
.Rhistory .Rhistory
.pre-commit-config.yaml

View File

@@ -1,3 +1,9 @@
[![CI](https://github.com/Doctorado-ML/benchmark/actions/workflows/main.yml/badge.svg)](https://github.com/Doctorado-ML/benchmark/actions/workflows/main.yml)
[![codecov](https://codecov.io/gh/Doctorado-ML/benchmark/branch/main/graph/badge.svg?token=ZRP937NDSG)](https://codecov.io/gh/Doctorado-ML/benchmark)
[![Quality Gate Status](https://haystack.rmontanana.es:25000/api/project_badges/measure?project=benchmark&metric=alert_status&token=336a6e501988888543c3153baa91bad4b9914dd2)](http://haystack.local:25000/dashboard?id=benchmark)
[![Technical Debt](https://haystack.rmontanana.es:25000/api/project_badges/measure?project=benchmark&metric=sqale_index&token=336a6e501988888543c3153baa91bad4b9914dd2)](http://haystack.local:25000/dashboard?id=benchmark)
![https://img.shields.io/badge/python-3.8%2B-blue](https://img.shields.io/badge/python-3.8%2B-brightgreen)
# benchmark # benchmark
Benchmarking models Benchmarking models
@@ -6,53 +12,55 @@ Benchmarking models
```python ```python
# 5 Fold 10 seeds with STree with default hyperparameters and report # 5 Fold 10 seeds with STree with default hyperparameters and report
python src/main.py -m STree -P iMac27 -r 1 be_main -m STree -P iMac27 -r 1
# Setting number of folds, in this case 7 # Setting number of folds, in this case 7
python src/main.py -m STree -P iMac27 -n 7 be_main -m STree -P iMac27 -n 7
# 5 Fold 10 seeds with STree and best results hyperparams # 5 Fold 10 seeds with STree and best results hyperparams
python src/main.py -m STree -P iMac27 -f 1 be_main -m STree -P iMac27 -f 1
# 5 Fold 10 seeds with STree and same hyperparameters # 5 Fold 10 seeds with STree and same hyperparameters
python src/main.py -m STree -P iMac27 -p '{"kernel": "rbf", "gamma": 0.1}' be_main -m STree -P iMac27 -p '{"kernel": "rbf", "gamma": 0.1}'
``` ```
## Best Results ## Best Results
```python ```python
# Build best results of STree model and print report # Build best results of STree model and print report
python src/build_best.py -m STree -r 1 be_build_best -m STree -r 1
# Report of STree best results # Report of STree best results
python src/report.py -b STree be_report -b STree
``` ```
## Reports ## Reports
```python ```python
# Datasets list # Datasets list
python src/report.py be_report
# Report of given experiment # Report of given experiment
python src/report.py -f results/results_STree_iMac27_2021-09-22_17:13:02.json be_report -f results/results_STree_iMac27_2021-09-22_17:13:02.json
# Report of given experiment building excel file and compare with best results # Report of given experiment building excel file and compare with best results
python src/report.py -f results/results_STree_iMac27_2021-09-22_17:13:02.json -x 1 -c 1 be_report -f results/results_STree_iMac27_2021-09-22_17:13:02.json -x 1 -c 1
# Report of given experiment building sql file # Report of given experiment building sql file
python src/report.py -f results/results_STree_iMac27_2021-09-22_17:13:02.json -q 1 be_report -f results/results_STree_iMac27_2021-09-22_17:13:02.json -q 1
``` ```
## Benchmark ## Benchmark
```python ```python
# Do benchmark and print report # Do benchmark and print report
python src/benchmark.py be_benchmark
# Do benchmark, print report and build excel file with data # Do benchmark, print report and build excel file with data
python src/benchmark.py -x 1 be_benchmark -x 1
# Do benchmark, print report and build tex table with results
be_benchmark -t 1
``` ```
## List ## List
```python ```python
# List of results of given model # List of results of given model
python src/list.py -m ODTE be_list -m ODTE
# List of results of given model and score # List of results of given model and score
python src/list.py -m STree -s f1-macro be_list -m STree -s f1-macro
# List all results # List all results
python src/list.py be_list
``` ```

View File

@@ -1,6 +1,7 @@
import sys
import argparse import argparse
from .Experiments import Models from .Experiments import Models
from .Utils import Files from .Utils import Files, NO_ENV
ALL_METRICS = ( ALL_METRICS = (
"accuracy", "accuracy",
@@ -15,21 +16,29 @@ class EnvData:
@staticmethod @staticmethod
def load(): def load():
args = {} args = {}
with open(Files.dot_env) as f: try:
for line in f.read().splitlines(): with open(Files.dot_env) as f:
if line == "" or line.startswith("#"): for line in f.read().splitlines():
continue if line == "" or line.startswith("#"):
key, value = line.split("=") continue
args[key] = value key, value = line.split("=")
return args args[key] = value
except FileNotFoundError:
print(NO_ENV, file=sys.stderr)
exit(1)
else:
return args
class EnvDefault(argparse.Action): class EnvDefault(argparse.Action):
# Thanks to https://stackoverflow.com/users/445507/russell-heilling # Thanks to https://stackoverflow.com/users/445507/russell-heilling
def __init__(self, envvar, required=True, default=None, **kwargs): def __init__(
self, envvar, required=True, default=None, mandatory=False, **kwargs
):
self._args = EnvData.load() self._args = EnvData.load()
default = self._args[envvar] if required and not mandatory:
required = False default = self._args[envvar]
required = False
super(EnvDefault, self).__init__( super(EnvDefault, self).__init__(
default=default, required=required, **kwargs default=default, required=required, **kwargs
) )
@@ -154,8 +163,6 @@ class Arguments:
"type": str, "type": str,
"required": True, "required": True,
"choices": list(models_data), "choices": list(models_data),
"action": EnvDefault,
"envvar": "model",
"help": "model name", "help": "model name",
}, },
], ],
@@ -165,8 +172,6 @@ class Arguments:
"type": str, "type": str,
"required": True, "required": True,
"choices": list(models_data), "choices": list(models_data),
"action": EnvDefault,
"envvar": "model",
"help": "model name", "help": "model name",
}, },
], ],
@@ -289,5 +294,5 @@ class Arguments:
) )
return self return self
def parse(self): def parse(self, args=None):
return self.ap.parse_args() return self.ap.parse_args(args)

View File

@@ -13,7 +13,7 @@ from sklearn.model_selection import (
GridSearchCV, GridSearchCV,
cross_validate, cross_validate,
) )
from .Utils import Folders, Files from .Utils import Folders, Files, NO_RESULTS
from .Models import Models from .Models import Models
from .Arguments import EnvData from .Arguments import EnvData
@@ -70,7 +70,8 @@ class DatasetsSurcov:
) )
data.dropna(axis=0, how="any", inplace=True) data.dropna(axis=0, how="any", inplace=True)
self.columns = data.columns self.columns = data.columns
X = data.drop("class", axis=1).to_numpy() col_list = ["class"]
X = data.drop(col_list, axis=1).to_numpy()
y = data["class"].to_numpy() y = data["class"].to_numpy()
return X, y return X, y
@@ -91,7 +92,10 @@ class Datasets:
self.data_sets = [dataset_name] self.data_sets = [dataset_name]
def load(self, name): def load(self, name):
return self.dataset.load(name) try:
return self.dataset.load(name)
except FileNotFoundError:
raise ValueError(f"Unknown dataset: {name}")
def __iter__(self) -> Diterator: def __iter__(self) -> Diterator:
return Diterator(self.data_sets) return Diterator(self.data_sets)
@@ -144,6 +148,7 @@ class BestResults:
score=self.score_name, model=self.model score=self.score_name, model=self.model
) )
all_files = sorted(list(os.walk(Folders.results))) all_files = sorted(list(os.walk(Folders.results)))
found = False
for root, _, files in tqdm( for root, _, files in tqdm(
all_files, desc="files", disable=self.quiet all_files, desc="files", disable=self.quiet
): ):
@@ -153,6 +158,9 @@ class BestResults:
with open(file_name) as fp: with open(file_name) as fp:
data = json.load(fp) data = json.load(fp)
self._process_datafile(results, data, name) self._process_datafile(results, data, name)
found = True
if not found:
raise ValueError(NO_RESULTS)
# Build best results json file # Build best results json file
output = {} output = {}
datasets = Datasets() datasets = Datasets()
@@ -214,8 +222,11 @@ class Experiment:
grid_file = os.path.join( grid_file = os.path.join(
Folders.results, Files.grid_output(score_name, model_name) Folders.results, Files.grid_output(score_name, model_name)
) )
with open(grid_file) as f: try:
self.hyperparameters_dict = json.load(f) with open(grid_file) as f:
self.hyperparameters_dict = json.load(f)
except FileNotFoundError:
raise ValueError(f"{grid_file} does not exist")
else: else:
self.hyperparameters_dict = hyper.fill( self.hyperparameters_dict = hyper.fill(
dictionary=dictionary, dictionary=dictionary,
@@ -374,10 +385,6 @@ class GridSearch:
self.grid_file = os.path.join( self.grid_file = os.path.join(
Folders.results, Files.grid_input(score_name, model_name) Folders.results, Files.grid_input(score_name, model_name)
) )
with open(self.grid_file) as f:
self.grid = json.load(f)
self.duration = 0
self._init_data()
def get_output_file(self): def get_output_file(self):
return self.output_file return self.output_file
@@ -426,6 +433,10 @@ class GridSearch:
self.results[name] = [score, hyperparameters, message] self.results[name] = [score, hyperparameters, message]
def do_gridsearch(self): def do_gridsearch(self):
with open(self.grid_file) as f:
self.grid = json.load(f)
self.duration = 0
self._init_data()
now = time.time() now = time.time()
loop = tqdm( loop = tqdm(
list(self.datasets), list(self.datasets),
@@ -445,7 +456,7 @@ class GridSearch:
random_state=self.random_seeds[0], random_state=self.random_seeds[0],
n_splits=self.folds, n_splits=self.folds,
) )
clf = Models.get_model(self.model_name) clf = Models.get_model(self.model_name, self.random_seeds[0])
self.version = clf.version() if hasattr(clf, "version") else "-" self.version = clf.version() if hasattr(clf, "version") else "-"
self._num_warnings = 0 self._num_warnings = 0
warnings.warn = self._warn warnings.warn = self._warn
@@ -455,7 +466,7 @@ class GridSearch:
estimator=clf, estimator=clf,
cv=kfold, cv=kfold,
param_grid=self.grid, param_grid=self.grid,
scoring=self.score_name, scoring=self.score_name.replace("-", "_"),
n_jobs=-1, n_jobs=-1,
) )
grid.fit(X, y) grid.fit(X, y)

View File

@@ -8,7 +8,14 @@ import subprocess
import xlsxwriter import xlsxwriter
import numpy as np import numpy as np
from .Experiments import Datasets, BestResults from .Experiments import Datasets, BestResults
from .Utils import Folders, Files, Symbols, BEST_ACCURACY_STREE, TextColor from .Utils import (
Folders,
Files,
Symbols,
BEST_ACCURACY_STREE,
TextColor,
NO_RESULTS,
)
class BaseReport(abc.ABC): class BaseReport(abc.ABC):
@@ -269,7 +276,9 @@ class Excel(BaseReport):
self._compare_totals = {} self._compare_totals = {}
if book is None: if book is None:
self.excel_file_name = self.file_name.replace(".json", ".xlsx") self.excel_file_name = self.file_name.replace(".json", ".xlsx")
self.book = xlsxwriter.Workbook(self.excel_file_name) self.book = xlsxwriter.Workbook(
self.excel_file_name, {"nan_inf_to_errors": True}
)
self.set_book_properties() self.set_book_properties()
self.close = True self.close = True
else: else:
@@ -519,6 +528,34 @@ class Excel(BaseReport):
self.book.close() self.book.close()
class ReportDatasets:
@staticmethod
def report():
data_sets = Datasets()
color_line = TextColor.LINE1
print(color_line, end="")
print(f"{'Dataset':30s} Samp. Feat. Cls Balance")
print("=" * 30 + " ===== ===== === " + "=" * 40)
for dataset in data_sets:
X, y = data_sets.load(dataset)
color_line = (
TextColor.LINE2
if color_line == TextColor.LINE1
else TextColor.LINE1
)
values, counts = np.unique(y, return_counts=True)
comp = ""
sep = ""
for count in counts:
comp += f"{sep}{count/sum(counts)*100:5.2f}%"
sep = "/ "
print(color_line, end="")
print(
f"{dataset:30s} {X.shape[0]:5,d} {X.shape[1]:5,d} "
f"{len(np.unique(y)):3d} {comp:40s}"
)
class SQL(BaseReport): class SQL(BaseReport):
table_name = "results" table_name = "results"
@@ -604,6 +641,8 @@ class Benchmark:
summary = Summary() summary = Summary()
summary.acquire(given_score=self._score) summary.acquire(given_score=self._score)
self._models = summary.get_models() self._models = summary.get_models()
if self._models == []:
raise ValueError(NO_RESULTS)
for model in self._models: for model in self._models:
best = summary.best_result( best = summary.best_result(
criterion="model", value=model, score=self._score criterion="model", value=model, score=self._score
@@ -782,7 +821,9 @@ class Benchmark:
) )
def excel(self): def excel(self):
book = xlsxwriter.Workbook(self.get_excel_file_name()) book = xlsxwriter.Workbook(
self.get_excel_file_name(), {"nan_inf_to_errors": True}
)
Excel.set_properties(book, "Experimentation summary") Excel.set_properties(book, "Experimentation summary")
sheet = book.add_worksheet("Benchmark") sheet = book.add_worksheet("Benchmark")
normal = book.add_format({"font_size": 14, "border": 1}) normal = book.add_format({"font_size": 14, "border": 1})
@@ -1107,6 +1148,8 @@ class Summary:
data = self.get_results_criteria( data = self.get_results_criteria(
score, model, input_data, sort_key, number score, model, input_data, sort_key, number
) )
if data == []:
raise ValueError(NO_RESULTS)
max_file = max(len(x["file"]) for x in data) max_file = max(len(x["file"]) for x in data)
max_title = max(len(x["title"]) for x in data) max_title = max(len(x["title"]) for x in data)
if self.hidden: if self.hidden:
@@ -1117,7 +1160,7 @@ class Summary:
color2 = TextColor.LINE2 color2 = TextColor.LINE2
print(color1, end="") print(color1, end="")
print( print(
f"{'Date':10s} {'File':{max_file}s} {'Score':7s} {'Time(h)':7s} " f"{'Date':10s} {'File':{max_file}s} {'Score':8s} {'Time(h)':7s} "
f"{'Title':s}" f"{'Title':s}"
) )
print( print(
@@ -1146,47 +1189,94 @@ class Summary:
def show_result(self, data: dict, title: str = "") -> None: def show_result(self, data: dict, title: str = "") -> None:
def whites(n: int) -> str: def whites(n: int) -> str:
return " " * n + "*" return " " * n + color1 + "*"
if data == {}: if data == {}:
print(f"** {title} has No data **") print(f"** {title} has No data **")
return return
color1 = TextColor.CYAN
color2 = TextColor.YELLOW
file_name = data["file"] file_name = data["file"]
metric = data["metric"] metric = data["metric"]
result = StubReport(os.path.join(Folders.results, file_name)) result = StubReport(os.path.join(Folders.results, file_name))
length = 81 length = 81
print("*" * length) print(color1 + "*" * length)
if title != "": if title != "":
print(f"*{title:^{length - 2}s}*") print(
"*"
+ color2
+ TextColor.BOLD
+ f"{title:^{length - 2}s}"
+ TextColor.ENDC
+ color1
+ "*"
)
print("*" + "-" * (length - 2) + "*") print("*" + "-" * (length - 2) + "*")
print("*" + whites(length - 2)) print("*" + whites(length - 2))
print(f"* {result.data['title']:^{length - 4}} *")
print("*" + whites(length - 2))
print( print(
f"* Model: {result.data['model']:15s} " "* "
f"Ver. {result.data['version']:10s} " + color2
f"Score: {result.data['score_name']:10s} " + f"{result.data['title']:^{length - 4}}"
f"Metric: {metric:10.7f}" + whites(length - 78) + color1
+ " *"
) )
print("*" + whites(length - 2)) print("*" + whites(length - 2))
print( print(
f"* Date : {result.data['date']:15s} Time: " "* Model: "
f"{result.data['time']:18s} Time Spent: " + color2
f"{result.data['duration']:9,.2f} secs." + whites(length - 78) + f"{result.data['model']:15s} "
+ color1
+ "Ver. "
+ color2
+ f"{result.data['version']:10s} "
+ color1
+ "Score: "
+ color2
+ f"{result.data['score_name']:10s} "
+ color1
+ "Metric: "
+ color2
+ f"{metric:10.7f}"
+ whites(length - 78)
)
print(color1 + "*" + whites(length - 2))
print(
"* Date : "
+ color2
+ f"{result.data['date']:15s}"
+ color1
+ " Time: "
+ color2
+ f"{result.data['time']:18s} "
+ color1
+ "Time Spent: "
+ color2
+ f"{result.data['duration']:9,.2f}"
+ color1
+ " secs."
+ whites(length - 78)
) )
seeds = str(result.data["seeds"]) seeds = str(result.data["seeds"])
seeds_len = len(seeds) seeds_len = len(seeds)
print( print(
f"* Seeds: {seeds:{seeds_len}s} Platform: " "* Seeds: "
f"{result.data['platform']:17s} " + whites(length - 79) + color2
+ f"{seeds:{seeds_len}s} "
+ color1
+ "Platform: "
+ color2
+ f"{result.data['platform']:17s} "
+ whites(length - 79)
) )
print( print(
f"* Stratified: {str(result.data['stratified']):15s}" "* Stratified: "
+ color2
+ f"{str(result.data['stratified']):15s}"
+ whites(length - 30) + whites(length - 30)
) )
print(f"* {file_name:60s}" + whites(length - 63)) print("* " + color2 + f"{file_name:60s}" + whites(length - 63))
print("*" + whites(length - 2)) print(color1 + "*" + whites(length - 2))
print("*" * length) print(color1 + "*" * length)
def best_results(self, criterion=None, value=None, score="accuracy", n=10): def best_results(self, criterion=None, value=None, score="accuracy", n=10):
# First filter the same score results (accuracy, f1, ...) # First filter the same score results (accuracy, f1, ...)
@@ -1196,6 +1286,8 @@ class Summary:
if criterion is None or value is None if criterion is None or value is None
else [x for x in haystack if x[criterion] == value] else [x for x in haystack if x[criterion] == value]
) )
if haystack == []:
raise ValueError(NO_RESULTS)
return ( return (
sorted( sorted(
haystack, haystack,
@@ -1231,11 +1323,14 @@ class Summary:
return best_results return best_results
def show_top(self, score="accuracy", n=10): def show_top(self, score="accuracy", n=10):
self.list_results( try:
score=score, self.list_results(
input_data=self.best_results(score=score, n=n), score=score,
sort_key="metric", input_data=self.best_results(score=score, n=n),
) sort_key="metric",
)
except ValueError as e:
print(e)
class PairCheck: class PairCheck:

View File

@@ -2,6 +2,8 @@ import os
import subprocess import subprocess
BEST_ACCURACY_STREE = 40.282203 BEST_ACCURACY_STREE = 40.282203
NO_RESULTS = "** No results found **"
NO_ENV = "File .env not found"
class Folders: class Folders:
@@ -9,6 +11,7 @@ class Folders:
hidden_results = "hidden_results" hidden_results = "hidden_results"
exreport = "exreport" exreport = "exreport"
report = os.path.join(exreport, "exreport_output") report = os.path.join(exreport, "exreport_output")
img = "img"
@staticmethod @staticmethod
def src(): def src():

View File

@@ -1,10 +1,9 @@
from .Experiments import Experiment, Datasets, DatasetsSurcov, DatasetsTanveer from .Experiments import Experiment, Datasets, DatasetsSurcov, DatasetsTanveer
from .Results import Report, Summary from .Results import Report, Summary
from .Arguments import EnvDefault
__author__ = "Ricardo Montañana Gómez" __author__ = "Ricardo Montañana Gómez"
__copyright__ = "Copyright 2020-2022, Ricardo Montañana Gómez" __copyright__ = "Copyright 2020-2022, Ricardo Montañana Gómez"
__license__ = "MIT License" __license__ = "MIT License"
__author_email__ = "ricardo.montanana@alu.uclm.es" __author_email__ = "ricardo.montanana@alu.uclm.es"
__all__ = ["Experiment", "Datasets", "Report", "Summary", "EnvDefault"] __all__ = ["Experiment", "Datasets", "Report", "Summary"]

View File

@@ -1,4 +1,8 @@
library(glue) library(glue)
Sys.setenv(LANG = "en")
if (Sys.getlocale("LC_MESSAGES") == "es_ES.UTF-8") {
resoutput <- capture.output(Sys.setlocale("LC_MESSAGES", 'en_GB.UTF-8'))
}
args = commandArgs(trailingOnly=TRUE) args = commandArgs(trailingOnly=TRUE)
if (length(args)!=3) { if (length(args)!=3) {
stop("Only two arguments must be supplied (score & input_file & visualize).n", call.=FALSE) stop("Only two arguments must be supplied (score & input_file & visualize).n", call.=FALSE)

View File

@@ -4,17 +4,21 @@ from benchmark.Utils import Files
from benchmark.Arguments import Arguments from benchmark.Arguments import Arguments
def main(): def main(args_test=None):
arguments = Arguments() arguments = Arguments()
arguments.xset("score").xset("excel").xset("tex_output") arguments.xset("score").xset("excel").xset("tex_output").xset("quiet")
ar = arguments.parse() args = arguments.parse(args_test)
benchmark = Benchmark(score=ar.score, visualize=True) benchmark = Benchmark(score=args.score, visualize=not args.quiet)
benchmark.compile_results() try:
benchmark.save_results() benchmark.compile_results()
benchmark.report(ar.tex_output) except ValueError as e:
benchmark.exreport() print(e)
if ar.excel: else:
benchmark.excel() benchmark.save_results()
Files.open(benchmark.get_excel_file_name()) benchmark.report(args.tex_output)
if ar.tex_output: benchmark.exreport()
print(f"File {benchmark.get_tex_file()} generated") if args.excel:
benchmark.excel()
Files.open(benchmark.get_excel_file_name(), test=args.quiet)
if args.tex_output:
print(f"File {benchmark.get_tex_file()} generated")

View File

@@ -4,12 +4,12 @@ from benchmark.Results import Summary
from benchmark.Arguments import ALL_METRICS, Arguments from benchmark.Arguments import ALL_METRICS, Arguments
def main(): def main(args_test=None):
arguments = Arguments() arguments = Arguments()
metrics = list(ALL_METRICS) metrics = list(ALL_METRICS)
metrics.append("all") metrics.append("all")
arguments.xset("score", choices=metrics) arguments.xset("score", choices=metrics)
args = arguments.parse() args = arguments.parse(args_test)
metrics = ALL_METRICS if args.score == "all" else [args.score] metrics = ALL_METRICS if args.score == "all" else [args.score]
summary = Summary() summary = Summary()
summary.acquire() summary.acquire()

View File

@@ -7,13 +7,18 @@ from benchmark.Arguments import Arguments
""" """
def main(): def main(args_test=None):
arguments = Arguments() arguments = Arguments()
arguments.xset("score").xset("report").xset("model") arguments.xset("score", mandatory=True).xset("report")
args = arguments.parse() arguments.xset("model", mandatory=True)
args = arguments.parse(args_test)
datasets = Datasets() datasets = Datasets()
best = BestResults(args.score, args.model, datasets) best = BestResults(args.score, args.model, datasets)
best.build() try:
if args.report: best.build()
report = ReportBest(args.score, args.model, best=True, grid=False) except ValueError as e:
report.report() print(e)
else:
if args.report:
report = ReportBest(args.score, args.model, best=True, grid=False)
report.report()

View File

@@ -2,9 +2,17 @@
import os import os
import json import json
from benchmark.Utils import Files, Folders from benchmark.Utils import Files, Folders
from benchmark.Arguments import Arguments
"""Build sample grid input file for the model with data taken from the
input grid used optimizing STree
"""
def main(): def main(args_test=None):
arguments = Arguments()
arguments.xset("model", mandatory=True).xset("score", mandatory=True)
args = arguments.parse(args_test)
data = [ data = [
'{"C": 1e4, "gamma": 0.1, "kernel": "rbf"}', '{"C": 1e4, "gamma": 0.1, "kernel": "rbf"}',
'{"C": 7, "gamma": 0.14, "kernel": "rbf"}', '{"C": 7, "gamma": 0.14, "kernel": "rbf"}',
@@ -103,10 +111,9 @@ def main():
t2 = sorted([x for x in value if isinstance(x, str)]) t2 = sorted([x for x in value if isinstance(x, str)])
results_tmp[new_key] = t1 + t2 results_tmp[new_key] = t1 + t2
output.append(results_tmp) output.append(results_tmp)
# save results # save results
file_name = Files.grid_input("accuracy", "ODTE") file_name = Files.grid_input(args.score, args.model)
file_output = os.path.join(Folders.results, file_name) file_output = os.path.join(Folders.results, file_name)
with open(file_output, "w") as f: with open(file_output, "w") as f:
json.dump(output, f, indent=4) json.dump(output, f, indent=4)
print(f"Grid values saved to {file_output}") print(f"Generated grid input file to {file_output}")

View File

@@ -6,11 +6,13 @@ from benchmark.Arguments import Arguments
""" """
def main(): def main(args_test=None):
arguments = Arguments() arguments = Arguments()
arguments.xset("score").xset("platform").xset("model").xset("n_folds") arguments.xset("score").xset("platform").xset("model", mandatory=True)
arguments.xset("quiet").xset("stratified").xset("dataset") arguments.xset("quiet").xset("stratified").xset("dataset").xset("n_folds")
args = arguments.parse() args = arguments.parse(args_test)
if not args.quiet:
print(f"Perform grid search with {args.model} model")
job = GridSearch( job = GridSearch(
score_name=args.score, score_name=args.score,
model_name=args.model, model_name=args.model,
@@ -18,6 +20,9 @@ def main():
datasets=Datasets(dataset_name=args.dataset), datasets=Datasets(dataset_name=args.dataset),
progress_bar=not args.quiet, progress_bar=not args.quiet,
platform=args.platform, platform=args.platform,
folds=args.folds, folds=args.n_folds,
) )
job.do_gridsearch() try:
job.do_gridsearch()
except FileNotFoundError:
print(f"** The grid input file [{job.grid_file}] could not be found")

View File

@@ -8,42 +8,46 @@ from benchmark.Arguments import Arguments
""" """
def main(): def main(args_test=None):
arguments = Arguments() arguments = Arguments()
arguments.xset("number").xset("model", required=False).xset("score") arguments.xset("number").xset("model", required=False).xset("key")
arguments.xset("hidden").xset("nan").xset("key") arguments.xset("hidden").xset("nan").xset("score", required=False)
args = arguments.parse() args = arguments.parse(args_test)
data = Summary(hidden=args.hidden) data = Summary(hidden=args.hidden)
data.acquire() data.acquire()
data.list_results( try:
score=args.score, data.list_results(
model=args.model,
sort_key=args.key,
number=args.number,
)
if args.nan:
results_nan = []
results = data.get_results_criteria(
score=args.score, score=args.score,
model=args.model, model=args.model,
input_data=None,
sort_key=args.key, sort_key=args.key,
number=args.number, number=args.number,
) )
for result in results: except ValueError as e:
if result["metric"] != result["metric"]: print(e)
results_nan.append(result) else:
if results_nan != []: if args.nan:
print( results_nan = []
"\n" results = data.get_results_criteria(
+ "*" * 30 score=args.score,
+ " Results with nan moved to hidden " model=args.model,
+ "*" * 30 input_data=None,
sort_key=args.key,
number=args.number,
) )
data.list_results(input_data=results_nan) for result in results:
for result in results_nan: if result["metric"] != result["metric"]:
name = result["file"] results_nan.append(result)
os.rename( if results_nan != []:
os.path.join(Folders.results, name), print(
os.path.join(Folders.hidden_results, name), "\n"
+ "*" * 30
+ " Results with nan moved to hidden "
+ "*" * 30
) )
data.list_results(input_data=results_nan)
for result in results_nan:
name = result["file"]
os.rename(
os.path.join(Folders.results, name),
os.path.join(Folders.hidden_results, name),
)

View File

@@ -8,36 +8,40 @@ from benchmark.Arguments import Arguments
""" """
def main(): def main(args_test=None):
arguments = Arguments() arguments = Arguments()
arguments.xset("stratified").xset("score").xset("model").xset("dataset") arguments.xset("stratified").xset("score").xset("model", mandatory=True)
arguments.xset("n_folds").xset("platform").xset("quiet").xset("title") arguments.xset("n_folds").xset("platform").xset("quiet").xset("title")
arguments.xset("hyperparameters").xset("paramfile").xset("report") arguments.xset("hyperparameters").xset("paramfile").xset("report")
arguments.xset("grid_paramfile") arguments.xset("grid_paramfile").xset("dataset")
args = arguments.parse() args = arguments.parse(args_test)
report = args.report or args.dataset is not None report = args.report or args.dataset is not None
if args.grid_paramfile: if args.grid_paramfile:
args.paramfile = False args.paramfile = False
job = Experiment( try:
score_name=args.score, job = Experiment(
model_name=args.model, score_name=args.score,
stratified=args.stratified, model_name=args.model,
datasets=Datasets(dataset_name=args.dataset), stratified=args.stratified,
hyperparams_dict=args.hyperparameters, datasets=Datasets(dataset_name=args.dataset),
hyperparams_file=args.paramfile, hyperparams_dict=args.hyperparameters,
grid_paramfile=args.grid_paramfile, hyperparams_file=args.paramfile,
progress_bar=not args.quiet, grid_paramfile=args.grid_paramfile,
platform=args.platform, progress_bar=not args.quiet,
title=args.title, platform=args.platform,
folds=args.n_folds, title=args.title,
) folds=args.n_folds,
job.do_experiment() )
if report: job.do_experiment()
result_file = job.get_output_file() except ValueError as e:
report = Report(result_file) print(e)
report.report()
if args.dataset is not None:
print(f"Partial result file removed: {result_file}")
os.remove(result_file)
else: else:
print(f"Results in {job.get_output_file()}") if report:
result_file = job.get_output_file()
report = Report(result_file)
report.report()
if args.dataset is not None:
print(f"Partial result file removed: {result_file}")
os.remove(result_file)
else:
print(f"Results in {job.get_output_file()}")

View File

@@ -1,22 +1,26 @@
#!/usr/bin/env python #!/usr/bin/env python
from benchmark.Results import PairCheck from benchmark.Results import PairCheck
from Arguments import Arguments from benchmark.Arguments import Arguments
"""Check best results of two models giving scores and win-tie-loose results """Check best results of two models giving scores and win-tie-loose results
""" """
def main(): def main(args_test=None):
arguments = Arguments() arguments = Arguments()
arguments.xset("score").xset("win").xset("model1").xset("model2") arguments.xset("score").xset("win").xset("model1").xset("model2")
arguments.xset("lose") arguments.xset("lose")
args = arguments.parse() args = arguments.parse(args_test)
pair_check = PairCheck( pair_check = PairCheck(
args.score, args.score,
args.model1, args.model1,
args.model2, args.model2,
args.win_results, args.win,
args.lose_results, args.lose,
) )
pair_check.compute() try:
pair_check.report() pair_check.compute()
except ValueError as e:
print(str(e))
else:
pair_check.report()

View File

@@ -1,18 +1,11 @@
#!/usr/bin/env python #!/usr/bin/env python
import os import os
import subprocess
import json import json
from stree import Stree from stree import Stree
from graphviz import Source from graphviz import Source
from benchmark.Experiments import Datasets from benchmark.Experiments import Datasets
from benchmark.Utils import Files, Folders from benchmark.Utils import Files, Folders
from Arguments import Arguments from benchmark.Arguments import Arguments
def compute_stree(X, y, random_state):
clf = Stree(random_state=random_state)
clf.fit(X, y)
return clf
def load_hyperparams(score_name, model_name): def load_hyperparams(score_name, model_name):
@@ -62,7 +55,6 @@ def add_color(source):
def print_stree(clf, dataset, X, y, color, quiet): def print_stree(clf, dataset, X, y, color, quiet):
output_folder = "img"
samples, features = X.shape samples, features = X.shape
classes = max(y) + 1 classes = max(y) + 1
accuracy = clf.score(X, y) accuracy = clf.score(X, y)
@@ -72,20 +64,18 @@ def print_stree(clf, dataset, X, y, color, quiet):
if color: if color:
dot_source = add_color(dot_source) dot_source = add_color(dot_source)
grp = Source(dot_source) grp = Source(dot_source)
file_name = os.path.join(output_folder, f"stree_{dataset}") file_name = os.path.join(Folders.img, f"stree_{dataset}")
grp.render(format="png", filename=f"{file_name}") grp.render(format="png", filename=f"{file_name}")
os.remove(f"{file_name}") os.remove(f"{file_name}")
print(f"File {file_name}.png generated") file_name += ".png"
if not quiet: print(f"File {file_name} generated")
cmd_open = "/usr/bin/open" Files.open(name=file_name, test=quiet)
if os.path.isfile(cmd_open) and os.access(cmd_open, os.X_OK):
subprocess.run([cmd_open, f"{file_name}.png"])
def main(): def main(args_test=None):
arguments = Arguments() arguments = Arguments()
arguments.xset("color").xset("dataset", default="all").xset("quiet") arguments.xset("color").xset("dataset", default="all").xset("quiet")
args = arguments.parse() args = arguments.parse(args_test)
hyperparameters = load_hyperparams("accuracy", "ODTE") hyperparameters = load_hyperparams("accuracy", "ODTE")
random_state = 57 random_state = 57
dt = Datasets() dt = Datasets()

View File

@@ -1,23 +0,0 @@
#!/usr/bin/env python
import os
import json
from benchmark.Experiments import Files, Folders
def main():
versions = dict(SVC="-", STree="1.2.3", ODTE="0.3.2")
results = Files().get_all_results(hidden=False)
for result in results:
print(result)
file_name = os.path.join(Folders.results, result)
with open(file_name) as f:
data = json.load(f)
if "title" not in data:
print(f"Repairing title in {result}")
data["title"] = "default"
if "version" not in data:
print(f"Repairing version in {result}")
model = data["model"]
data["version"] = versions[model] if model in versions else "-"
with open(file_name, "w") as f:
json.dump(data, f, indent=4)

View File

@@ -1,11 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
import numpy as np from benchmark.Results import Report, Excel, SQL, ReportBest, ReportDatasets
from benchmark.Experiments import Datasets from benchmark.Utils import Files
from benchmark.Results import Report, Excel, SQL, ReportBest
from benchmark.Utils import (
Files,
TextColor,
)
from benchmark.Arguments import Arguments from benchmark.Arguments import Arguments
@@ -15,55 +10,37 @@ If no argument is set, displays the datasets and its characteristics
""" """
def default_report(): def main(args_test=None):
sets = Datasets()
color_line = TextColor.LINE1
print(color_line, end="")
print(f"{'Dataset':30s} Samp. Feat Cls Balance")
print("=" * 30 + " ===== ==== === " + "=" * 40)
for line in sets:
X, y = sets.load(line)
color_line = (
TextColor.LINE2
if color_line == TextColor.LINE1
else TextColor.LINE1
)
values, counts = np.unique(y, return_counts=True)
comp = ""
sep = ""
for value, count in zip(values, counts):
comp += f"{sep}{count/sum(counts)*100:5.2f}%"
sep = "/ "
print(color_line, end="")
print(
f"{line:30s} {X.shape[0]:5,d} {X.shape[1]:4d} "
f"{len(np.unique(y)):3d} {comp:40s}"
)
def main():
arguments = Arguments() arguments = Arguments()
arguments.xset("file").xset("excel").xset("sql").xset("compare") arguments.xset("file").xset("excel").xset("sql").xset("compare")
arguments.xset("best").xset("grid").xset("model", required=False).xset( arguments.xset("best").xset("grid").xset("model", required=False)
"score" arguments.xset("score", required=False)
) args = arguments.parse(args_test)
args = arguments.parse() if args.best:
args.grid = None
if args.grid: if args.grid:
args.best = False args.best = None
if args.file is None and args.best is None: if args.file is None and args.best is None and args.grid is None:
default_report() ReportDatasets.report()
else: else:
if args.best is not None or args.grid is not None: if args.best is not None or args.grid is not None:
report = ReportBest(args.score, args.model, args.best, args.grid) report = ReportBest(args.score, args.model, args.best, args.grid)
report.report() report.report()
else: else:
report = Report(args.file, args.compare) try:
report.report() report = Report(args.file, args.compare)
if args.excel: except FileNotFoundError as e:
excel = Excel(args.file, args.compare) print(e)
excel.report() else:
Files.open(excel.get_file_name()) report.report()
if args.sql: if args.excel:
sql = SQL(args.file) excel = Excel(
sql.report() file_name=args.file,
compare=args.compare,
)
excel.report()
is_test = args_test is not None
Files.open(excel.get_file_name(), is_test)
if args.sql:
sql = SQL(args.file)
sql.report()

View File

@@ -3,22 +3,27 @@ from benchmark.Results import Summary
from benchmark.Arguments import ALL_METRICS, Arguments from benchmark.Arguments import ALL_METRICS, Arguments
def main(): def main(args_test=None):
arguments = Arguments() arguments = Arguments()
metrics = list(ALL_METRICS) metrics = list(ALL_METRICS)
metrics.append("all") metrics.append("all")
arguments.xset("score", choices=metrics).xset("model", required=False) arguments.xset("score", choices=metrics).xset("model")
args = arguments.parse() args = arguments.parse(args_test)
metrics = ALL_METRICS if args.score == "all" else [args.score] metrics = ALL_METRICS if args.score == "all" else [args.score]
summary = Summary() summary = Summary()
summary.acquire() summary.acquire()
for metric in metrics: for metric in metrics:
title = f"BEST RESULT of {metric} for {args.model}" title = f"BEST RESULT of {metric} for {args.model}"
best = summary.best_result( try:
criterion="model", value=args.model, score=metric best = summary.best_result(
) criterion="model", value=args.model, score=metric
summary.show_result(data=best, title=title) )
summary.show_result( except ValueError as e:
summary.best_result(score=metric), title=f"BEST RESULT of {metric}" print(e)
) else:
summary.show_top(score=metric, n=10) summary.show_result(data=best, title=title)
summary.show_result(
summary.best_result(score=metric),
title=f"BEST RESULT of {metric}",
)
summary.show_top(score=metric, n=10)

View File

@@ -1,48 +0,0 @@
#!/usr/bin/env python
import sys
import time
from benchmark.Experiments import Datasets
from mufs import MUFS
def main():
mufs_i = MUFS()
mufs_c = MUFS()
mufs_f = MUFS()
datasets = Datasets()
iwss_t = iwss_tl = cfs_t = cfs_tl = fcbf_t = fcbf_tl = 0
for i in datasets:
X, y = datasets.load(i)
now = time.time()
mufs_i.iwss(X, y, float(sys.argv[1]))
iwss = time.time() - now
iwss_r = len(mufs_i.get_results())
now = time.time()
mufs_c.cfs(X, y)
cfs = time.time() - now
cfs_r = len(mufs_c.get_results())
now = time.time()
mufs_f.fcbf(X, y, 1e-5)
fcbf = time.time() - now
fcbf_r = len(mufs_f.get_results())
print(
f"{i:30s} {iwss:.4f}({iwss_r:2d}) {cfs:.4f}({cfs_r:2d}) {fcbf:.4f}"
f"({fcbf_r:2d})"
)
iwss_t += iwss
iwss_tl += iwss_r
cfs_t += cfs
cfs_tl += cfs_r
fcbf_t += fcbf
fcbf_tl += fcbf_r
num = len(list(datasets))
iwss_t /= num
iwss_tl /= num
cfs_t /= num
cfs_tl /= num
fcbf_t /= num
fcbf_tl /= num
print(
f"{'Average ..: ':30s} {iwss_t:.4f}({iwss_tl:.2f}) {cfs_t:.4f}"
f"({cfs_tl:.2f}) {fcbf_t:.4f}({fcbf_tl:.2f})"
)

View File

@@ -0,0 +1,100 @@
import os
from io import StringIO
from unittest.mock import patch
from .TestBase import TestBase
from ..Arguments import Arguments, ALL_METRICS, NO_ENV
class ArgumentsTest(TestBase):
def build_args(self):
arguments = Arguments()
arguments.xset("n_folds").xset("model", mandatory=True)
arguments.xset("key", required=True)
return arguments
def test_build_hyperparams_file(self):
expected_metrics = (
"accuracy",
"f1-macro",
"f1-micro",
"f1-weighted",
"roc-auc-ovr",
)
self.assertSequenceEqual(ALL_METRICS, expected_metrics)
def test_parameters(self):
expected_parameters = {
"best": ("-b", "--best"),
"color": ("-c", "--color"),
"compare": ("-c", "--compare"),
"dataset": ("-d", "--dataset"),
"excel": ("-x", "--excel"),
"file": ("-f", "--file"),
"grid": ("-g", "--grid"),
"grid_paramfile": ("-g", "--grid_paramfile"),
"hidden": ("--hidden",),
"hyperparameters": ("-p", "--hyperparameters"),
"key": ("-k", "--key"),
"lose": ("-l", "--lose"),
"model": ("-m", "--model"),
"model1": ("-m1", "--model1"),
"model2": ("-m2", "--model2"),
"nan": ("--nan",),
"number": ("-n", "--number"),
"n_folds": ("-n", "--n_folds"),
"paramfile": ("-f", "--paramfile"),
"platform": ("-P", "--platform"),
"quiet": ("-q", "--quiet"),
"report": ("-r", "--report"),
"score": ("-s", "--score"),
"sql": ("-q", "--sql"),
"stratified": ("-t", "--stratified"),
"tex_output": ("-t", "--tex-output"),
"title": ("--title",),
"win": ("-w", "--win"),
}
arg = Arguments()
for key, value in expected_parameters.items():
self.assertSequenceEqual(arg.parameters[key][0], value, key)
def test_xset(self):
arguments = self.build_args()
test_args = ["-n", "3", "--model", "SVC", "-k", "metric"]
args = arguments.parse(test_args)
self.assertEqual(args.n_folds, 3)
self.assertEqual(args.model, "SVC")
self.assertEqual(args.key, "metric")
@patch("sys.stderr", new_callable=StringIO)
def test_xset_mandatory(self, stderr):
arguments = self.build_args()
test_args = ["-n", "3", "-k", "date"]
with self.assertRaises(SystemExit):
arguments.parse(test_args)
self.assertRegexpMatches(
stderr.getvalue(),
r"error: the following arguments are required: -m/--model",
)
@patch("sys.stderr", new_callable=StringIO)
def test_xset_required(self, stderr):
arguments = self.build_args()
test_args = ["-n", "3", "-m", "SVC"]
with self.assertRaises(SystemExit):
arguments.parse(test_args)
self.assertRegexpMatches(
stderr.getvalue(),
r"error: the following arguments are required: -k/--key",
)
@patch("sys.stderr", new_callable=StringIO)
def test_no_env(self, stderr):
path = os.getcwd()
os.chdir("..")
try:
self.build_args()
except SystemExit:
pass
finally:
os.chdir(path)
self.assertEqual(stderr.getvalue(), f"{NO_ENV}\n")

View File

@@ -3,24 +3,20 @@ from io import StringIO
from unittest.mock import patch from unittest.mock import patch
from openpyxl import load_workbook from openpyxl import load_workbook
from .TestBase import TestBase from .TestBase import TestBase
from ..Utils import Folders, Files from ..Utils import Folders, Files, NO_RESULTS
from ..Results import Benchmark from ..Results import Benchmark
class BenchmarkTest(TestBase): class BenchmarkTest(TestBase):
def tearDown(self) -> None: def tearDown(self) -> None:
benchmark = Benchmark("accuracy", visualize=False) files = []
files = [ for score in ["accuracy", "unknown"]:
"exreport_accuracy.csv", files.append(Files.exreport(score))
"exreport_accuracy.txt", files.append(Files.exreport_output(score))
"exreport_accuracy.xlsx", files.append(Files.exreport_err(score))
"exreport_err_accuracy.txt", files.append(Files.exreport_excel(score))
"exreport_err_unknown.txt", files.append(Files.exreport_pdf)
"exreport_unknown.csv", files.append(Files.tex_output("accuracy"))
"exreport_unknown.txt",
"Rplots.pdf",
benchmark.get_tex_file(),
]
self.remove_files(files, Folders.exreport) self.remove_files(files, Folders.exreport)
self.remove_files(files, ".") self.remove_files(files, ".")
return super().tearDown() return super().tearDown()
@@ -29,27 +25,25 @@ class BenchmarkTest(TestBase):
benchmark = Benchmark("accuracy", visualize=False) benchmark = Benchmark("accuracy", visualize=False)
benchmark.compile_results() benchmark.compile_results()
benchmark.save_results() benchmark.save_results()
self.check_file_file( self.check_file_file(benchmark.get_result_file_name(), "exreport_csv")
benchmark.get_result_file_name(), "exreport_csv.test"
)
def test_exreport_report(self): def test_exreport_report(self):
benchmark = Benchmark("accuracy", visualize=False) benchmark = Benchmark("accuracy", visualize=False)
benchmark.compile_results() benchmark.compile_results()
benchmark.save_results() benchmark.save_results()
with patch(self.output, new=StringIO()) as fake_out: with patch(self.output, new=StringIO()) as stdout:
benchmark.report(tex_output=False) benchmark.report(tex_output=False)
self.check_output_file(fake_out, "exreport_report.test") self.check_output_file(stdout, "exreport_report")
def test_exreport(self): def test_exreport(self):
benchmark = Benchmark("accuracy", visualize=False) benchmark = Benchmark("accuracy", visualize=False)
benchmark.compile_results() benchmark.compile_results()
benchmark.save_results() benchmark.save_results()
with patch(self.output, new=StringIO()) as fake_out: with patch(self.output, new=StringIO()) as stdout:
benchmark.exreport() benchmark.exreport()
with open(os.path.join(self.test_files, "exreport.test")) as f: with open(os.path.join(self.test_files, "exreport.test")) as f:
expected_t = f.read() expected_t = f.read()
computed_t = fake_out.getvalue() computed_t = stdout.getvalue()
computed_t = computed_t.split("\n") computed_t = computed_t.split("\n")
computed_t.pop(0) computed_t.pop(0)
for computed, expected in zip(computed_t, expected_t.split("\n")): for computed, expected in zip(computed_t, expected_t.split("\n")):
@@ -70,24 +64,30 @@ class BenchmarkTest(TestBase):
self.assertFalse(os.path.exists(Folders.report)) self.assertFalse(os.path.exists(Folders.report))
def test_exreport_error(self): def test_exreport_error(self):
benchmark = Benchmark("unknown", visualize=False) benchmark = Benchmark("accuracy", visualize=False)
benchmark.compile_results() benchmark.compile_results()
benchmark.save_results() benchmark.save_results()
with patch(self.output, new=StringIO()) as fake_out: # Make Rscript exreport fail
benchmark._score = "unknown"
with patch(self.output, new=StringIO()) as stdout:
benchmark.exreport() benchmark.exreport()
self.check_output_file(fake_out, "exreport_error.test") self.check_output_file(stdout, "exreport_error")
def test_exreport_no_data(self):
benchmark = Benchmark("f1-weighted", visualize=False)
with self.assertRaises(ValueError) as msg:
benchmark.compile_results()
self.assertEqual(str(msg.exception), NO_RESULTS)
def test_tex_output(self): def test_tex_output(self):
benchmark = Benchmark("accuracy", visualize=False) benchmark = Benchmark("accuracy", visualize=False)
benchmark.compile_results() benchmark.compile_results()
benchmark.save_results() benchmark.save_results()
with patch(self.output, new=StringIO()) as fake_out: with patch(self.output, new=StringIO()) as stdout:
benchmark.report(tex_output=True) benchmark.report(tex_output=True)
with open(os.path.join(self.test_files, "exreport_report.test")) as f: self.check_output_file(stdout, "exreport_report")
expected = f.read()
self.assertEqual(fake_out.getvalue(), expected)
self.assertTrue(os.path.exists(benchmark.get_tex_file())) self.assertTrue(os.path.exists(benchmark.get_tex_file()))
self.check_file_file(benchmark.get_tex_file(), "exreport_tex.test") self.check_file_file(benchmark.get_tex_file(), "exreport_tex")
def test_excel_output(self): def test_excel_output(self):
benchmark = Benchmark("accuracy", visualize=False) benchmark = Benchmark("accuracy", visualize=False)
@@ -100,7 +100,7 @@ class BenchmarkTest(TestBase):
book = load_workbook(file_name) book = load_workbook(file_name)
for sheet_name in book.sheetnames: for sheet_name in book.sheetnames:
sheet = book[sheet_name] sheet = book[sheet_name]
self.check_excel_sheet(sheet, f"exreport_excel_{sheet_name}.test") self.check_excel_sheet(sheet, f"exreport_excel_{sheet_name}")
# ExcelTest.generate_excel_sheet( # ExcelTest.generate_excel_sheet(
# self, sheet, f"exreport_excel_{sheet_name}.test" # self, sheet, f"exreport_excel_{sheet_name}"
# ) # )

View File

@@ -8,7 +8,7 @@ class BestResultTest(TestBase):
expected = { expected = {
"balance-scale": [ "balance-scale": [
0.98, 0.98,
{"splitter": "iwss", "max_features": "auto"}, {"splitter": "best", "max_features": "auto"},
"results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json", "results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json",
], ],
"balloons": [ "balloons": [
@@ -62,3 +62,12 @@ class BestResultTest(TestBase):
best.fill({}), best.fill({}),
{"balance-scale": (0.0, {}, ""), "balloons": (0.0, {}, "")}, {"balance-scale": (0.0, {}, ""), "balloons": (0.0, {}, "")},
) )
def test_build_error(self):
dt = Datasets()
model = "SVC"
best = BestResults(
score="accuracy", model=model, datasets=dt, quiet=True
)
with self.assertRaises(ValueError):
best.build()

View File

@@ -45,6 +45,18 @@ class DatasetTest(TestBase):
self.assertSequenceEqual(computed, value) self.assertSequenceEqual(computed, value)
self.set_env(".env.dist") self.set_env(".env.dist")
def test_load_dataset(self):
dt = Datasets()
X, y = dt.load("balance-scale")
self.assertSequenceEqual(X.shape, (625, 4))
self.assertSequenceEqual(y.shape, (625,))
def test_load_unknown_dataset(self):
dt = Datasets()
with self.assertRaises(ValueError) as msg:
dt.load("unknown")
self.assertEqual(str(msg.exception), "Unknown dataset: unknown")
def test_Datasets_subset(self): def test_Datasets_subset(self):
test = { test = {
".env.dist": "balloons", ".env.dist": "balloons",

View File

@@ -23,7 +23,7 @@ class ExcelTest(TestBase):
file_output = report.get_file_name() file_output = report.get_file_name()
book = load_workbook(file_output) book = load_workbook(file_output)
sheet = book["STree"] sheet = book["STree"]
self.check_excel_sheet(sheet, "excel_compared.test") self.check_excel_sheet(sheet, "excel_compared")
def test_report_excel(self): def test_report_excel(self):
file_name = "results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json" file_name = "results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json"
@@ -32,7 +32,7 @@ class ExcelTest(TestBase):
file_output = report.get_file_name() file_output = report.get_file_name()
book = load_workbook(file_output) book = load_workbook(file_output)
sheet = book["STree"] sheet = book["STree"]
self.check_excel_sheet(sheet, "excel.test") self.check_excel_sheet(sheet, "excel")
def test_Excel_Add_sheet(self): def test_Excel_Add_sheet(self):
file_name = "results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json" file_name = "results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json"
@@ -48,6 +48,6 @@ class ExcelTest(TestBase):
book.close() book.close()
book = load_workbook(os.path.join(Folders.results, excel_file_name)) book = load_workbook(os.path.join(Folders.results, excel_file_name))
sheet = book["STree"] sheet = book["STree"]
self.check_excel_sheet(sheet, "excel_add_STree.test") self.check_excel_sheet(sheet, "excel_add_STree")
sheet = book["ODTE"] sheet = book["ODTE"]
self.check_excel_sheet(sheet, "excel_add_ODTE.test") self.check_excel_sheet(sheet, "excel_add_ODTE")

View File

@@ -36,7 +36,7 @@ class ExperimentTest(TestBase):
expected = { expected = {
"balance-scale": [ "balance-scale": [
0.98, 0.98,
{"splitter": "iwss", "max_features": "auto"}, {"splitter": "best", "max_features": "auto"},
"results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json", "results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json",
], ],
"balloons": [ "balloons": [

View File

@@ -37,7 +37,8 @@ class GridSearchTest(TestBase):
], ],
".", ".",
) )
_ = self.build_exp() grid = self.build_exp()
grid._init_data()
# check the output file is initialized # check the output file is initialized
with open(file_name) as f: with open(file_name) as f:
data = json.load(f) data = json.load(f)

View File

@@ -80,7 +80,6 @@ class ModelTest(TestBase):
"GBC": ((15, 8, 3), 1.0), "GBC": ((15, 8, 3), 1.0),
} }
X, y = load_wine(return_X_y=True) X, y = load_wine(return_X_y=True)
print("")
for key, (value, score_expected) in test.items(): for key, (value, score_expected) in test.items():
clf = Models.get_model(key, random_state=1) clf = Models.get_model(key, random_state=1)
clf.fit(X, y) clf.fit(X, y)
@@ -91,5 +90,16 @@ class ModelTest(TestBase):
# score_expected, # score_expected,
# score_computed, # score_computed,
# ) # )
self.assertSequenceEqual(Models.get_complexity(key, clf), value) # Fix flaky test
if key == "AdaBoostStree":
# computed values
a_c, b_c, c_c = Models.get_complexity(key, clf)
# expected values
a_e, b_e, c_e = value
for c, e in zip((a_c, b_c, c_c), (a_e, b_e, c_e)):
self.assertAlmostEqual(c, e, delta=0.25)
else:
self.assertSequenceEqual(
Models.get_complexity(key, clf), value
)
self.assertEqual(score_computed, score_expected, key) self.assertEqual(score_computed, score_expected, key)

View File

@@ -1,4 +1,3 @@
import os
from io import StringIO from io import StringIO
from unittest.mock import patch from unittest.mock import patch
from .TestBase import TestBase from .TestBase import TestBase
@@ -19,35 +18,32 @@ class PairCheckTest(TestBase):
def test_pair_check(self): def test_pair_check(self):
report = self.build_model(model1="ODTE", model2="STree") report = self.build_model(model1="ODTE", model2="STree")
report.compute() report.compute()
with patch(self.output, new=StringIO()) as fake_out: with patch(self.output, new=StringIO()) as stdout:
report.report() report.report()
computed = fake_out.getvalue() self.check_output_file(stdout, "paircheck")
with open(os.path.join(self.test_files, "paircheck.test"), "r") as f:
expected = f.read()
self.assertEqual(computed, expected)
def test_pair_check_win(self): def test_pair_check_win(self):
report = self.build_model(win=True) report = self.build_model(win=True)
report.compute() report.compute()
with patch(self.output, new=StringIO()) as fake_out: with patch(self.output, new=StringIO()) as stdout:
report.report() report.report()
self.check_output_file(fake_out, "paircheck_win.test") self.check_output_file(stdout, "paircheck_win")
def test_pair_check_lose(self): def test_pair_check_lose(self):
report = self.build_model( report = self.build_model(
model1="RandomForest", model2="STree", lose=True model1="RandomForest", model2="STree", lose=True
) )
report.compute() report.compute()
with patch(self.output, new=StringIO()) as fake_out: with patch(self.output, new=StringIO()) as stdout:
report.report() report.report()
self.check_output_file(fake_out, "paircheck_lose.test") self.check_output_file(stdout, "paircheck_lose")
def test_pair_check_win_lose(self): def test_pair_check_win_lose(self):
report = self.build_model(win=True, lose=True) report = self.build_model(win=True, lose=True)
report.compute() report.compute()
with patch(self.output, new=StringIO()) as fake_out: with patch(self.output, new=StringIO()) as stdout:
report.report() report.report()
self.check_output_file(fake_out, "paircheck_win_lose.test") self.check_output_file(stdout, "paircheck_win_lose")
def test_pair_check_store_result(self): def test_pair_check_store_result(self):
report = self.build_model(win=True, lose=True) report = self.build_model(win=True, lose=True)

View File

@@ -1,16 +1,17 @@
import os
from io import StringIO from io import StringIO
from unittest.mock import patch from unittest.mock import patch
from .TestBase import TestBase from .TestBase import TestBase
from ..Results import Report, BaseReport, ReportBest from ..Results import Report, BaseReport, ReportBest, ReportDatasets
from ..Utils import Symbols from ..Utils import Symbols
class ReportTest(TestBase): class ReportTest(TestBase):
def test_BaseReport(self): def test_BaseReport(self):
with patch.multiple(BaseReport, __abstractmethods__=set()): with patch.multiple(BaseReport, __abstractmethods__=set()):
file_name = ( file_name = os.path.join(
"results/results_accuracy_STree_iMac27_2021-09-30_11:" "results",
"42:07_0.json" "results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json",
) )
a = BaseReport(file_name) a = BaseReport(file_name)
self.assertIsNone(a.header()) self.assertIsNone(a.header())
@@ -19,21 +20,23 @@ class ReportTest(TestBase):
def test_report_with_folder(self): def test_report_with_folder(self):
report = Report( report = Report(
file_name="results/results_accuracy_STree_iMac27_2021-09-30_11:" file_name=os.path.join(
"42:07_0.json" "results",
"results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json",
)
) )
with patch(self.output, new=StringIO()) as fake_out: with patch(self.output, new=StringIO()) as stdout:
report.report() report.report()
self.check_output_file(fake_out, "report.test") self.check_output_file(stdout, "report")
def test_report_without_folder(self): def test_report_without_folder(self):
report = Report( report = Report(
file_name="results_accuracy_STree_iMac27_2021-09-30_11:42:07_0" file_name="results_accuracy_STree_iMac27_2021-09-30_11:42:07_0"
".json" ".json"
) )
with patch(self.output, new=StringIO()) as fake_out: with patch(self.output, new=StringIO()) as stdout:
report.report() report.report()
self.check_output_file(fake_out, "report.test") self.check_output_file(stdout, "report")
def test_report_compared(self): def test_report_compared(self):
report = Report( report = Report(
@@ -41,9 +44,9 @@ class ReportTest(TestBase):
".json", ".json",
compare=True, compare=True,
) )
with patch(self.output, new=StringIO()) as fake_out: with patch(self.output, new=StringIO()) as stdout:
report.report() report.report()
self.check_output_file(fake_out, "report_compared.test") self.check_output_file(stdout, "report_compared")
def test_compute_status(self): def test_compute_status(self):
file_name = "results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json" file_name = "results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json"
@@ -64,18 +67,24 @@ class ReportTest(TestBase):
def test_report_best(self): def test_report_best(self):
report = ReportBest("accuracy", "STree", best=True, grid=False) report = ReportBest("accuracy", "STree", best=True, grid=False)
with patch(self.output, new=StringIO()) as fake_out: with patch(self.output, new=StringIO()) as stdout:
report.report() report.report()
self.check_output_file(fake_out, "report_best.test") self.check_output_file(stdout, "report_best")
def test_report_grid(self): def test_report_grid(self):
report = ReportBest("accuracy", "STree", best=False, grid=True) report = ReportBest("accuracy", "STree", best=False, grid=True)
with patch(self.output, new=StringIO()) as fake_out: with patch(self.output, new=StringIO()) as stdout:
report.report() report.report()
self.check_output_file(fake_out, "report_grid.test") self.check_output_file(stdout, "report_grid")
def test_report_best_both(self): def test_report_best_both(self):
report = ReportBest("accuracy", "STree", best=True, grid=True) report = ReportBest("accuracy", "STree", best=True, grid=True)
with patch(self.output, new=StringIO()) as fake_out: with patch(self.output, new=StringIO()) as stdout:
report.report() report.report()
self.check_output_file(fake_out, "report_best.test") self.check_output_file(stdout, "report_best")
@patch("sys.stdout", new_callable=StringIO)
def test_report_datasets(self, mock_output):
report = ReportDatasets()
report.report()
self.check_output_file(mock_output, "report_datasets")

View File

@@ -19,4 +19,4 @@ class SQLTest(TestBase):
file_name = os.path.join( file_name = os.path.join(
Folders.results, file_name.replace(".json", ".sql") Folders.results, file_name.replace(".json", ".sql")
) )
self.check_file_file(file_name, "sql.test") self.check_file_file(file_name, "sql")

View File

@@ -2,6 +2,7 @@ from io import StringIO
from unittest.mock import patch from unittest.mock import patch
from .TestBase import TestBase from .TestBase import TestBase
from ..Results import Summary from ..Results import Summary
from ..Utils import NO_RESULTS
class SummaryTest(TestBase): class SummaryTest(TestBase):
@@ -130,60 +131,60 @@ class SummaryTest(TestBase):
def test_summary_list_results_model(self): def test_summary_list_results_model(self):
report = Summary() report = Summary()
report.acquire() report.acquire()
with patch(self.output, new=StringIO()) as fake_out: with patch(self.output, new=StringIO()) as stdout:
report.list_results(model="STree") report.list_results(model="STree")
self.check_output_file(fake_out, "summary_list_model.test") self.check_output_file(stdout, "summary_list_model")
def test_summary_list_results_score(self): def test_summary_list_results_score(self):
report = Summary() report = Summary()
report.acquire() report.acquire()
with patch(self.output, new=StringIO()) as fake_out: with patch(self.output, new=StringIO()) as stdout:
report.list_results(score="accuracy") report.list_results(score="accuracy")
self.check_output_file(fake_out, "summary_list_score.test") self.check_output_file(stdout, "summary_list_score")
def test_summary_list_results_n(self): def test_summary_list_results_n(self):
report = Summary() report = Summary()
report.acquire() report.acquire()
with patch(self.output, new=StringIO()) as fake_out: with patch(self.output, new=StringIO()) as stdout:
report.list_results(score="accuracy", number=3) report.list_results(score="accuracy", number=3)
self.check_output_file(fake_out, "summary_list_n.test") self.check_output_file(stdout, "summary_list_n")
def test_summary_list_hidden(self): def test_summary_list_hidden(self):
report = Summary(hidden=True) report = Summary(hidden=True)
report.acquire() report.acquire()
with patch(self.output, new=StringIO()) as fake_out: with patch(self.output, new=StringIO()) as stdout:
report.list_results(score="accuracy") report.list_results(score="accuracy")
self.check_output_file(fake_out, "summary_list_hidden.test") self.check_output_file(stdout, "summary_list_hidden")
def test_show_result_no_title(self): def test_show_result_no_title(self):
report = Summary() report = Summary()
report.acquire() report.acquire()
with patch(self.output, new=StringIO()) as fake_out: with patch(self.output, new=StringIO()) as stdout:
title = "" title = ""
best = report.best_result( best = report.best_result(
criterion="model", value="STree", score="accuracy" criterion="model", value="STree", score="accuracy"
) )
report.show_result(data=best, title=title) report.show_result(data=best, title=title)
self.check_output_file(fake_out, "summary_show_results.test") self.check_output_file(stdout, "summary_show_results")
def test_show_result_title(self): def test_show_result_title(self):
report = Summary() report = Summary()
report.acquire() report.acquire()
with patch(self.output, new=StringIO()) as fake_out: with patch(self.output, new=StringIO()) as stdout:
title = "**Title**" title = "**Title**"
best = report.best_result( best = report.best_result(
criterion="model", value="STree", score="accuracy" criterion="model", value="STree", score="accuracy"
) )
report.show_result(data=best, title=title) report.show_result(data=best, title=title)
self.check_output_file(fake_out, "summary_show_results_title.test") self.check_output_file(stdout, "summary_show_results_title")
def test_show_result_no_data(self): def test_show_result_no_data(self):
report = Summary() report = Summary()
report.acquire() report.acquire()
with patch(self.output, new=StringIO()) as fake_out: with patch(self.output, new=StringIO()) as stdout:
title = "**Test**" title = "**Test**"
report.show_result(data={}, title=title) report.show_result(data={}, title=title)
computed = fake_out.getvalue() computed = stdout.getvalue()
expected = "** **Test** has No data **\n" expected = "** **Test** has No data **\n"
self.assertEqual(computed, expected) self.assertEqual(computed, expected)
@@ -212,6 +213,20 @@ class SummaryTest(TestBase):
def test_show_top(self): def test_show_top(self):
report = Summary() report = Summary()
report.acquire() report.acquire()
with patch(self.output, new=StringIO()) as fake_out: with patch(self.output, new=StringIO()) as stdout:
report.show_top() report.show_top()
self.check_output_file(fake_out, "summary_show_top.test") self.check_output_file(stdout, "summary_show_top")
@patch("sys.stdout", new_callable=StringIO)
def test_show_top_no_data(self, stdout):
report = Summary()
report.acquire()
report.show_top(score="f1-macro")
self.assertEqual(stdout.getvalue(), f"{NO_RESULTS}\n")
def test_no_data(self):
report = Summary()
report.acquire()
with self.assertRaises(ValueError) as msg:
report.list_results(score="f1-macro", model="STree")
self.assertEqual(str(msg.exception), NO_RESULTS)

View File

@@ -1,6 +1,12 @@
import os import os
import glob
import pathlib
import sys
import csv import csv
import unittest import unittest
from importlib import import_module
from io import StringIO
from unittest.mock import patch
class TestBase(unittest.TestCase): class TestBase(unittest.TestCase):
@@ -25,6 +31,7 @@ class TestBase(unittest.TestCase):
print(f'{row};{col};"{value}"', file=f) print(f'{row};{col};"{value}"', file=f)
def check_excel_sheet(self, sheet, file_name): def check_excel_sheet(self, sheet, file_name):
file_name += ".test"
with open(os.path.join(self.test_files, file_name), "r") as f: with open(os.path.join(self.test_files, file_name), "r") as f:
expected = csv.reader(f, delimiter=";") expected = csv.reader(f, delimiter=";")
for row, col, value in expected: for row, col, value in expected:
@@ -38,6 +45,7 @@ class TestBase(unittest.TestCase):
self.assertEqual(sheet.cell(int(row), int(col)).value, value) self.assertEqual(sheet.cell(int(row), int(col)).value, value)
def check_output_file(self, output, file_name): def check_output_file(self, output, file_name):
file_name += ".test"
with open(os.path.join(self.test_files, file_name)) as f: with open(os.path.join(self.test_files, file_name)) as f:
expected = f.read() expected = f.read()
self.assertEqual(output.getvalue(), expected) self.assertEqual(output.getvalue(), expected)
@@ -45,6 +53,39 @@ class TestBase(unittest.TestCase):
def check_file_file(self, computed_file, expected_file): def check_file_file(self, computed_file, expected_file):
with open(computed_file) as f: with open(computed_file) as f:
computed = f.read() computed = f.read()
expected_file += ".test"
with open(os.path.join(self.test_files, expected_file)) as f: with open(os.path.join(self.test_files, expected_file)) as f:
expected = f.read() expected = f.read()
self.assertEqual(computed, expected) self.assertEqual(computed, expected)
def check_output_lines(self, stdout, file_name, lines_to_compare):
with open(os.path.join(self.test_files, f"{file_name}.test")) as f:
expected = f.read()
computed_data = stdout.getvalue().splitlines()
n_line = 0
# compare only report lines without date, time, duration...
for expected, computed in zip(expected.splitlines(), computed_data):
if n_line in lines_to_compare:
self.assertEqual(computed, expected, n_line)
n_line += 1
def prepare_scripts_env(self):
self.scripts_folder = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "..", "scripts"
)
sys.path.append(self.scripts_folder)
def search_script(self, name):
py_files = glob.glob(os.path.join(self.scripts_folder, "*.py"))
for py_file in py_files:
module_name = pathlib.Path(py_file).stem
if name == module_name:
module = import_module(module_name)
return module
@patch("sys.stdout", new_callable=StringIO)
@patch("sys.stderr", new_callable=StringIO)
def execute_script(self, script, args, stderr, stdout):
module = self.search_script(script)
module.main(args)
return stdout, stderr

View File

@@ -129,7 +129,11 @@ class UtilTest(TestBase):
) )
self.assertCountEqual( self.assertCountEqual(
Files().get_all_results(hidden=True), Files().get_all_results(hidden=True),
["results_accuracy_STree_iMac27_2021-11-01_23:55:16_0.json"], [
"results_accuracy_STree_iMac27_2021-11-01_23:55:16_0.json",
"results_accuracy_XGBoost_MacBookpro16_2022-05-04_11:00:35_"
"0.json",
],
) )
def test_Files_get_results_Error(self): def test_Files_get_results_Error(self):

View File

@@ -10,6 +10,16 @@ from .SQL_test import SQLTest
from .Benchmark_test import BenchmarkTest from .Benchmark_test import BenchmarkTest
from .Summary_test import SummaryTest from .Summary_test import SummaryTest
from .PairCheck_test import PairCheckTest from .PairCheck_test import PairCheckTest
from .Arguments_test import ArgumentsTest
from .scripts.Be_Pair_check_test import BePairCheckTest
from .scripts.Be_List_test import BeListTest
from .scripts.Be_Report_test import BeReportTest
from .scripts.Be_Summary_test import BeSummaryTest
from .scripts.Be_Grid_test import BeGridTest
from .scripts.Be_Best_test import BeBestTest
from .scripts.Be_Benchmark_test import BeBenchmarkTest
from .scripts.Be_Main_test import BeMainTest
from .scripts.Be_Print_Strees_test import BePrintStrees
all = [ all = [
"UtilTest", "UtilTest",
@@ -24,5 +34,14 @@ all = [
"BenchmarkTest", "BenchmarkTest",
"SummaryTest", "SummaryTest",
"PairCheckTest", "PairCheckTest",
"be_list", "ArgumentsTest",
"BePairCheckTest",
"BeListTest",
"BeReportTest",
"BeSummaryTest",
"BeGridTest",
"BeBestTest",
"BeBenchmarkTest",
"BeMainTest",
"BePrintStrees",
] ]

File diff suppressed because one or more lines are too long

2
benchmark/tests/img/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
*
!.gitignore

View File

@@ -1 +1 @@
{"balance-scale": [0.98, {"splitter": "iwss", "max_features": "auto"}, "results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json"], "balloons": [0.86, {"C": 7, "gamma": 0.1, "kernel": "rbf", "max_iter": 10000.0, "multiclass_strategy": "ovr"}, "results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json"]} {"balance-scale": [0.98, {"splitter": "best", "max_features": "auto"}, "results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json"], "balloons": [0.86, {"C": 7, "gamma": 0.1, "kernel": "rbf", "max_iter": 10000.0, "multiclass_strategy": "ovr"}, "results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json"]}

View File

@@ -0,0 +1,6 @@
[
{
"C": [1.0, 5.0],
"kernel": ["linear", "rbf", "poly"]
}
]

View File

@@ -0,0 +1,26 @@
{
"balance-scale": [
0.9743999999999999,
{
"base_estimator__C": 57,
"base_estimator__gamma": 0.1,
"base_estimator__kernel": "rbf",
"base_estimator__multiclass_strategy": "ovr",
"n_estimators": 100,
"n_jobs": -1
},
"v. 0.3.2, Computed on bart on 2022-03-10 at 22:56:53 took 12.182 min"
],
"balloons": [
0.7666666666666667,
{
"base_estimator__C": 5,
"base_estimator__gamma": 0.14,
"base_estimator__kernel": "rbf",
"base_estimator__multiclass_strategy": "ovr",
"n_estimators": 100,
"n_jobs": -1
},
"v. 0.3.2, Computed on bart on 2022-03-10 at 23:09:07 took 18.229 s"
]
}

View File

@@ -15,7 +15,7 @@
"features": 4, "features": 4,
"classes": 3, "classes": 3,
"hyperparameters": { "hyperparameters": {
"splitter": "iwss", "splitter": "best",
"max_features": "auto" "max_features": "auto"
}, },
"nodes": 11.08, "nodes": 11.08,
@@ -32,7 +32,7 @@
"features": 4, "features": 4,
"classes": 2, "classes": 2,
"hyperparameters": { "hyperparameters": {
"splitter": "iwss", "splitter": "best",
"max_features": "auto" "max_features": "auto"
}, },
"nodes": 4.12, "nodes": 4.12,

View File

@@ -0,0 +1,66 @@
import os
from openpyxl import load_workbook
from ...Utils import NO_RESULTS, Folders, Files
from ..TestBase import TestBase
class BeBenchmarkTest(TestBase):
def setUp(self):
self.prepare_scripts_env()
self.score = "accuracy"
def tearDown(self) -> None:
files = []
for score in [self.score, "unknown"]:
files.append(Files.exreport(score))
files.append(Files.exreport_output(score))
files.append(Files.exreport_err(score))
files.append(Files.exreport_excel(self.score))
files.append(Files.exreport_pdf)
files.append(Files.tex_output(self.score))
self.remove_files(files, Folders.exreport)
self.remove_files(files, ".")
return super().tearDown()
def test_be_benchmark_complete(self):
stdout, stderr = self.execute_script(
"be_benchmark", ["-s", self.score, "-q", "1", "-t", "1", "-x", "1"]
)
self.assertEqual(stderr.getvalue(), "")
# Check output
self.check_output_file(stdout, "be_benchmark_complete")
# Check csv file
file_name = os.path.join(Folders.exreport, Files.exreport(self.score))
self.check_file_file(file_name, "exreport_csv")
# Check tex file
file_name = os.path.join(
Folders.exreport, Files.tex_output(self.score)
)
self.assertTrue(os.path.exists(file_name))
self.check_file_file(file_name, "exreport_tex")
# Check excel file
file_name = os.path.join(
Folders.exreport, Files.exreport_excel(self.score)
)
book = load_workbook(file_name)
for sheet_name in book.sheetnames:
sheet = book[sheet_name]
self.check_excel_sheet(sheet, f"exreport_excel_{sheet_name}")
def test_be_benchmark_single(self):
stdout, stderr = self.execute_script(
"be_benchmark", ["-s", self.score, "-q", "1"]
)
self.assertEqual(stderr.getvalue(), "")
# Check output
self.check_output_file(stdout, "be_benchmark")
# Check csv file
file_name = os.path.join(Folders.exreport, Files.exreport(self.score))
self.check_file_file(file_name, "exreport_csv")
def test_be_benchmark_no_data(self):
stdout, stderr = self.execute_script(
"be_benchmark", ["-s", "f1-weighted"]
)
self.assertEqual(stderr.getvalue(), "")
self.assertEqual(stdout.getvalue(), f"{NO_RESULTS}\n")

View File

@@ -0,0 +1,108 @@
import os
import json
from ...Utils import Folders, Files, NO_RESULTS
from ..TestBase import TestBase
class BeBestTest(TestBase):
def setUp(self):
self.prepare_scripts_env()
def tearDown(self) -> None:
self.remove_files(
[Files.best_results("accuracy", "ODTE")],
Folders.results,
)
return super().tearDown()
def test_be_best_all(self):
stdout, stderr = self.execute_script("be_best", ["-s", "all"])
self.assertEqual(stderr.getvalue(), "")
self.check_output_file(stdout, "be_best_all")
def test_be_build_best_error(self):
stdout, _ = self.execute_script(
"be_build_best", ["-s", "accuracy", "-m", "SVC"]
)
self.assertEqual(stdout.getvalue(), f"{NO_RESULTS}\n")
def test_be_build_best(self):
self.execute_script("be_build_best", ["-s", "accuracy", "-m", "ODTE"])
expected_data = {
"balance-scale": [
0.96352,
{
"base_estimator__C": 57,
"base_estimator__gamma": 0.1,
"base_estimator__kernel": "rbf",
"base_estimator__multiclass_strategy": "ovr",
"n_estimators": 100,
"n_jobs": -1,
},
"results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json",
],
"balloons": [
0.785,
{
"base_estimator__C": 5,
"base_estimator__gamma": 0.14,
"base_estimator__kernel": "rbf",
"base_estimator__multiclass_strategy": "ovr",
"n_estimators": 100,
"n_jobs": -1,
},
"results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json",
],
}
name = Files.best_results("accuracy", "ODTE")
file_name = os.path.join(Folders.results, name)
with open(file_name, "r") as f:
computed_data = json.load(f)
for computed, expected in zip(computed_data, expected_data):
self.assertEqual(computed, expected)
for key, value in expected_data.items():
self.assertIn(key, computed_data)
self.assertEqual(computed_data[key][0], value[0])
self.assertSequenceEqual(computed_data[key][1], value[1])
def test_be_build_best_report(self):
stdout, _ = self.execute_script(
"be_build_best", ["-s", "accuracy", "-m", "ODTE", "-r", "1"]
)
expected_data = {
"balance-scale": [
0.96352,
{
"base_estimator__C": 57,
"base_estimator__gamma": 0.1,
"base_estimator__kernel": "rbf",
"base_estimator__multiclass_strategy": "ovr",
"n_estimators": 100,
"n_jobs": -1,
},
"results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json",
],
"balloons": [
0.785,
{
"base_estimator__C": 5,
"base_estimator__gamma": 0.14,
"base_estimator__kernel": "rbf",
"base_estimator__multiclass_strategy": "ovr",
"n_estimators": 100,
"n_jobs": -1,
},
"results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json",
],
}
name = Files.best_results("accuracy", "ODTE")
file_name = os.path.join(Folders.results, name)
with open(file_name, "r") as f:
computed_data = json.load(f)
for computed, expected in zip(computed_data, expected_data):
self.assertEqual(computed, expected)
for key, value in expected_data.items():
self.assertIn(key, computed_data)
self.assertEqual(computed_data[key][0], value[0])
self.assertSequenceEqual(computed_data[key][1], value[1])
self.check_output_file(stdout, "be_build_best_report")

View File

@@ -0,0 +1,75 @@
import os
import json
from ...Utils import Folders, Files
from ..TestBase import TestBase
class BeGridTest(TestBase):
def setUp(self):
self.prepare_scripts_env()
def tearDown(self) -> None:
self.remove_files(
[
Files.grid_input("f1-macro", "STree"),
Files.grid_output("accuracy", "SVC"),
],
Folders.results,
)
return super().tearDown()
def test_be_build_grid(self):
stdout, stderr = self.execute_script(
"be_build_grid", ["-m", "STree", "-s", "f1-macro"]
)
self.assertEqual(stderr.getvalue(), "")
self.assertEqual(
stdout.getvalue(),
"Generated grid input file to results/grid_input_f1-macro_STree."
"json\n",
)
name = Files.grid_input("f1-macro", "STree")
file_name = os.path.join(Folders.results, name)
self.check_file_file(file_name, "be_build_grid")
def test_be_grid_(self):
stdout, stderr = self.execute_script(
"be_grid",
["-m", "SVC", "-s", "accuracy", "--n_folds", "2"],
)
expected = "Perform grid search with SVC model\n"
self.assertTrue(stdout.getvalue().startswith(expected))
name = Files.grid_output("accuracy", "SVC")
file_name = os.path.join(Folders.results, name)
with open(file_name, "r") as f:
computed_data = json.load(f)
expected_data = {
"balance-scale": [
0.9167895469812403,
{"C": 5.0, "kernel": "linear"},
"v. -, Computed on iMac27 on 2022-05-07 at 23:55:03 took",
],
"balloons": [
0.6875,
{"C": 5.0, "kernel": "rbf"},
"v. -, Computed on iMac27 on 2022-05-07 at 23:55:03 took",
],
}
for computed, expected in zip(computed_data, expected_data):
self.assertEqual(computed, expected)
for key, value in expected_data.items():
self.assertIn(key, computed_data)
self.assertEqual(computed_data[key][0], value[0])
self.assertSequenceEqual(computed_data[key][1], value[1])
def test_be_grid_no_input(self):
stdout, stderr = self.execute_script(
"be_grid",
["-m", "ODTE", "-s", "f1-weighted", "-q", "1"],
)
self.assertEqual(stderr.getvalue(), "")
grid_file = os.path.join(
Folders.results, Files.grid_input("f1-weighted", "ODTE")
)
expected = f"** The grid input file [{grid_file}] could not be found\n"
self.assertEqual(stdout.getvalue(), expected)

View File

@@ -0,0 +1,59 @@
import os
from ...Utils import Folders, NO_RESULTS
from ..TestBase import TestBase
class BeListTest(TestBase):
def setUp(self):
self.prepare_scripts_env()
def test_be_list(self):
stdout, stderr = self.execute_script("be_list", ["-m", "STree"])
self.assertEqual(stderr.getvalue(), "")
self.check_output_file(stdout, "summary_list_model")
def test_be_list_no_data(self):
stdout, stderr = self.execute_script(
"be_list", ["-m", "Wodt", "-s", "f1-macro"]
)
self.assertEqual(stderr.getvalue(), "")
self.assertEqual(stdout.getvalue(), f"{NO_RESULTS}\n")
def test_be_list_nan(self):
def swap_files(source_folder, target_folder, file_name):
source = os.path.join(source_folder, file_name)
target = os.path.join(target_folder, file_name)
os.rename(source, target)
file_name = (
"results_accuracy_XGBoost_MacBookpro16_2022-05-04_11:00:"
"35_0.json"
)
# move nan result from hidden to results
swap_files(Folders.hidden_results, Folders.results, file_name)
try:
# list and move nan result to hidden
stdout, stderr = self.execute_script("be_list", ["--nan", "1"])
self.assertEqual(stderr.getvalue(), "")
self.check_output_file(stdout, "be_list_nan")
except Exception:
# move back nan result file if be_list couldn't
swap_files(Folders.results, Folders.hidden_results, file_name)
self.fail("test_be_list_nan() should not raise exception")
def test_be_list_nan_no_nan(self):
stdout, stderr = self.execute_script("be_list", ["--nan", "1"])
self.assertEqual(stderr.getvalue(), "")
self.check_output_file(stdout, "be_list_no_nan")
def test_be_no_env(self):
path = os.getcwd()
os.chdir("..")
stderr = None
try:
_, stderr = self.execute_script("be_list", [])
except SystemExit as e:
self.assertEqual(e.code, 1)
finally:
os.chdir(path)
self.assertIsNone(stderr)

View File

@@ -0,0 +1,161 @@
import os
from io import StringIO
from unittest.mock import patch
from ...Results import Report
from ...Utils import Files, Folders
from ..TestBase import TestBase
class BeMainTest(TestBase):
def setUp(self):
self.prepare_scripts_env()
self.score = "accuracy"
self.files = []
def tearDown(self) -> None:
self.remove_files(self.files, ".")
return super().tearDown()
def test_be_main_dataset(self):
stdout, _ = self.execute_script(
"be_main",
["-m", "STree", "-d", "balloons", "--title", "test"],
)
self.check_output_lines(
stdout=stdout,
file_name="be_main_dataset",
lines_to_compare=[0, 2, 3, 5, 6, 7, 8, 9, 11, 12, 13],
)
def test_be_main_complete(self):
stdout, _ = self.execute_script(
"be_main",
["-s", self.score, "-m", "STree", "--title", "test", "-r", "1"],
)
# keep the report name to delete it after
report_name = stdout.getvalue().splitlines()[-1].split("in ")[1]
self.files.append(report_name)
self.check_output_lines(
stdout, "be_main_complete", [0, 2, 3, 5, 6, 7, 8, 9, 12, 13, 14]
)
def test_be_main_no_report(self):
stdout, _ = self.execute_script(
"be_main",
["-s", self.score, "-m", "STree", "--title", "test"],
)
# keep the report name to delete it after
report_name = stdout.getvalue().splitlines()[-1].split("in ")[1]
self.files.append(report_name)
report = Report(file_name=report_name)
with patch(self.output, new=StringIO()) as stdout:
report.report()
self.check_output_lines(
stdout,
"be_main_complete",
[0, 2, 3, 5, 6, 7, 8, 9, 12, 13, 14],
)
def test_be_main_best_params(self):
stdout, _ = self.execute_script(
"be_main",
[
"-s",
self.score,
"-m",
"STree",
"--title",
"test",
"-f",
"1",
"-r",
"1",
],
)
# keep the report name to delete it after
report_name = stdout.getvalue().splitlines()[-1].split("in ")[1]
self.files.append(report_name)
self.check_output_lines(
stdout, "be_main_best", [0, 2, 3, 5, 6, 7, 8, 9, 12, 13, 14]
)
def test_be_main_best_params_non_existent(self):
model = "GBC"
stdout, stderr = self.execute_script(
"be_main",
[
"-s",
self.score,
"-m",
model,
"--title",
"test",
"-f",
"1",
"-r",
"1",
],
)
self.assertEqual(stderr.getvalue(), "")
file_name = os.path.join(
Folders.results, Files.best_results(self.score, model)
)
self.assertEqual(
stdout.getvalue(),
f"{file_name} does not exist\n",
)
def test_be_main_grid_non_existent(self):
model = "GBC"
stdout, stderr = self.execute_script(
"be_main",
[
"-s",
self.score,
"-m",
model,
"--title",
"test",
"-g",
"1",
"-r",
"1",
],
)
self.assertEqual(stderr.getvalue(), "")
file_name = os.path.join(
Folders.results, Files.grid_output(self.score, model)
)
self.assertEqual(
stdout.getvalue(),
f"{file_name} does not exist\n",
)
def test_be_main_grid_params(self):
stdout, _ = self.execute_script(
"be_main",
[
"-s",
self.score,
"-m",
"STree",
"--title",
"test",
"-g",
"1",
"-r",
"1",
],
)
# keep the report name to delete it after
report_name = stdout.getvalue().splitlines()[-1].split("in ")[1]
self.files.append(report_name)
self.check_output_lines(
stdout, "be_main_grid", [0, 2, 3, 5, 6, 7, 8, 9, 12, 13, 14]
)
def test_be_main_no_data(self):
stdout, _ = self.execute_script(
"be_main", ["-m", "STree", "-d", "unknown", "--title", "test"]
)
self.assertEqual(stdout.getvalue(), "Unknown dataset: unknown\n")

View File

@@ -0,0 +1,28 @@
from ..TestBase import TestBase
from ...Utils import NO_RESULTS
class BePairCheckTest(TestBase):
def setUp(self):
self.prepare_scripts_env()
def test_be_pair_check(self):
stdout, stderr = self.execute_script(
"be_pair_check", ["-m1", "ODTE", "-m2", "STree"]
)
self.assertEqual(stderr.getvalue(), "")
self.check_output_file(stdout, "paircheck")
def test_be_pair_check_no_data_a(self):
stdout, stderr = self.execute_script(
"be_pair_check", ["-m1", "SVC", "-m2", "ODTE"]
)
self.assertEqual(stderr.getvalue(), "")
self.assertEqual(stdout.getvalue(), f"{NO_RESULTS}\n")
def test_be_pair_check_no_data_b(self):
stdout, stderr = self.execute_script(
"be_pair_check", ["-m1", "STree", "-m2", "SVC"]
)
self.assertEqual(stderr.getvalue(), "")
self.assertEqual(stdout.getvalue(), f"{NO_RESULTS}\n")

View File

@@ -0,0 +1,44 @@
import os
from ...Utils import Folders
from ..TestBase import TestBase
class BePrintStrees(TestBase):
def setUp(self):
self.prepare_scripts_env()
self.score = "accuracy"
self.files = []
self.datasets = ["balloons", "balance-scale"]
def tearDown(self) -> None:
self.remove_files(self.files, ".")
return super().tearDown()
def test_be_print_strees_dataset_bn(self):
for name in self.datasets:
stdout, _ = self.execute_script(
"be_print_strees",
["-d", name, "-q", "1"],
)
file_name = os.path.join(Folders.img, f"stree_{name}.png")
self.files.append(file_name)
self.assertTrue(os.path.exists(file_name))
self.assertEqual(
stdout.getvalue(), f"File {file_name} generated\n"
)
computed_size = os.path.getsize(file_name)
self.assertGreater(computed_size, 25000)
def test_be_print_strees_dataset_color(self):
for name in self.datasets:
stdout, _ = self.execute_script(
"be_print_strees",
["-d", name, "-q", "1", "-c", "1"],
)
file_name = os.path.join(Folders.img, f"stree_{name}.png")
self.files.append(file_name)
self.assertEqual(
stdout.getvalue(), f"File {file_name} generated\n"
)
computed_size = os.path.getsize(file_name)
self.assertGreater(computed_size, 30000)

View File

@@ -0,0 +1,108 @@
import os
from openpyxl import load_workbook
from ...Utils import Folders
from ..TestBase import TestBase
class BeReportTest(TestBase):
def setUp(self):
self.prepare_scripts_env()
def tearDown(self) -> None:
files = [
"results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.sql",
"results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.xlsx",
]
self.remove_files(files, Folders.results)
return super().tearDown()
def test_be_report(self):
file_name = os.path.join(
"results",
"results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json",
)
stdout, stderr = self.execute_script("be_report", ["-f", file_name])
self.assertEqual(stderr.getvalue(), "")
self.check_output_file(stdout, "report")
def test_be_report_not_found(self):
stdout, stderr = self.execute_script("be_report", ["-f", "unknown"])
self.assertEqual(stderr.getvalue(), "")
self.assertEqual(stdout.getvalue(), "unknown does not exists!\n")
def test_be_report_compare(self):
file_name = "results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json"
stdout, stderr = self.execute_script(
"be_report", ["-f", file_name, "-c", "1"]
)
self.assertEqual(stderr.getvalue(), "")
self.check_output_file(stdout, "report_compared")
def test_be_report_datatsets(self):
stdout, stderr = self.execute_script("be_report", [])
self.assertEqual(stderr.getvalue(), "")
self.check_output_file(stdout, "report_datasets")
def test_be_report_best(self):
stdout, stderr = self.execute_script(
"be_report", ["-s", "accuracy", "-m", "STree", "-b", "1"]
)
self.assertEqual(stderr.getvalue(), "")
self.check_output_file(stdout, "report_best")
def test_be_report_grid(self):
stdout, stderr = self.execute_script(
"be_report", ["-s", "accuracy", "-m", "STree", "-g", "1"]
)
self.assertEqual(stderr.getvalue(), "")
self.check_output_file(stdout, "report_grid")
def test_be_report_best_both(self):
stdout, stderr = self.execute_script(
"be_report",
["-s", "accuracy", "-m", "STree", "-b", "1", "-g", "1"],
)
self.assertEqual(stderr.getvalue(), "")
self.check_output_file(stdout, "report_best")
def test_be_report_excel_compared(self):
file_name = "results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json"
stdout, stderr = self.execute_script(
"be_report",
["-f", file_name, "-x", "1", "-c", "1"],
)
file_name = os.path.join(
Folders.results, file_name.replace(".json", ".xlsx")
)
book = load_workbook(file_name)
sheet = book["STree"]
self.check_excel_sheet(sheet, "excel_compared")
self.assertEqual(stderr.getvalue(), "")
self.check_output_file(stdout, "report_compared")
def test_be_report_excel(self):
file_name = "results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json"
stdout, stderr = self.execute_script(
"be_report",
["-f", file_name, "-x", "1"],
)
file_name = os.path.join(
Folders.results, file_name.replace(".json", ".xlsx")
)
book = load_workbook(file_name)
sheet = book["STree"]
self.check_excel_sheet(sheet, "excel")
self.assertEqual(stderr.getvalue(), "")
self.check_output_file(stdout, "report")
def test_be_report_sql(self):
file_name = "results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json"
stdout, stderr = self.execute_script(
"be_report",
["-f", file_name, "-q", "1"],
)
file_name = os.path.join(
Folders.results, file_name.replace(".json", ".sql")
)
self.check_file_file(file_name, "sql")
self.assertEqual(stderr.getvalue(), "")

View File

@@ -0,0 +1,31 @@
from ..TestBase import TestBase
class BeSummaryTest(TestBase):
def setUp(self):
self.prepare_scripts_env()
def tearDown(self) -> None:
pass
def test_be_summary_list_results_model(self):
stdout, stderr = self.execute_script("be_summary", ["-m", "STree"])
self.assertEqual(stderr.getvalue(), "")
self.check_output_file(stdout, "be_summary_list_model")
def test_be_summary_list_results_score(self):
stdout, stderr = self.execute_script("be_summary", ["-s", "accuracy"])
self.assertEqual(stderr.getvalue(), "")
self.check_output_file(stdout, "be_summary_list_score")
def test_be_summary_list_results_score_all(self):
stdout, stderr = self.execute_script("be_summary", ["-s", "all"])
self.assertEqual(stderr.getvalue(), "")
self.check_output_file(stdout, "be_summary_list_score_all")
def test_summary_list_results_model_score(self):
stdout, stderr = self.execute_script(
"be_summary", ["-s", "accuracy", "-m", "ODTE"]
)
self.assertEqual(stderr.getvalue(), "")
self.check_output_file(stdout, "be_summary_list_score_model")

View File

@@ -0,0 +1,32 @@
Dataset ODTE RandomForest STree
============================== ============= ============= =============
balance-scale 0.96352±0.025 0.83616±0.026 0.97056±0.015
balloons 0.78500±0.246 0.62500±0.250 0.86000±0.285
Model File Name Score
============================== =========================================================================== ========
ODTE results/results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341
RandomForest results/results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627
STree results/results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544
****************************************************************************************************
Benchmark Ok
****************************************************************************************************
---------------------------------------------------------------------
Friedman test, objetive maximize output variable accuracy. Obtained p-value: 1.3534e-01
Chi squared with 2 degrees of freedom statistic: 4.0000
Test accepted: p-value: 1.3534e-01 >= 0.0500
---------------------------------------------------------------------
Control post hoc test for output accuracy
Adjust method: Holm
Control method: STree
p-values:
ODTE 0.3173
RandomForest 0.0910
---------------------------------------------------------------------
$testMultiple
classifier pvalue rank win tie loss
STree STree NA 1 NA NA NA
ODTE ODTE 0.31731051 2 2 0 0
RandomForest RandomForest 0.09100053 3 2 0 0

View File

@@ -0,0 +1,33 @@
Dataset ODTE RandomForest STree
============================== ============= ============= =============
balance-scale 0.96352±0.025 0.83616±0.026 0.97056±0.015
balloons 0.78500±0.246 0.62500±0.250 0.86000±0.285
Model File Name Score
============================== =========================================================================== ========
ODTE results/results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341
RandomForest results/results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627
STree results/results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544
****************************************************************************************************
Benchmark Ok
****************************************************************************************************
---------------------------------------------------------------------
Friedman test, objetive maximize output variable accuracy. Obtained p-value: 1.3534e-01
Chi squared with 2 degrees of freedom statistic: 4.0000
Test accepted: p-value: 1.3534e-01 >= 0.0500
---------------------------------------------------------------------
Control post hoc test for output accuracy
Adjust method: Holm
Control method: STree
p-values:
ODTE 0.3173
RandomForest 0.0910
---------------------------------------------------------------------
$testMultiple
classifier pvalue rank win tie loss
STree STree NA 1 NA NA NA
ODTE ODTE 0.31731051 2 2 0 0
RandomForest RandomForest 0.09100053 3 2 0 0
File exreport/exreport_accuracy.tex generated

View File

@@ -0,0 +1,60 @@
balance-scale results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json
----------------------------------------------------------------------------------------------------
0.8361600 {}
----------------------------------------------------------------------------------------------------
Test default paramters with RandomForest
****************************************************************************************************
balloons results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json
----------------------------------------------------------------------------------------------------
0.5566667 {"max_features": "auto", "splitter": "mutual"}
----------------------------------------------------------------------------------------------------
default B
****************************************************************************************************
balance-scale
----------------------------------------------------------------------------------------------------
1.0000000 ""
----------------------------------------------------------------------------------------------------
****************************************************************************************************
balloons
----------------------------------------------------------------------------------------------------
1.0000000 ""
----------------------------------------------------------------------------------------------------
****************************************************************************************************
balance-scale
----------------------------------------------------------------------------------------------------
1.0000000 ""
----------------------------------------------------------------------------------------------------
****************************************************************************************************
balloons
----------------------------------------------------------------------------------------------------
1.0000000 ""
----------------------------------------------------------------------------------------------------
****************************************************************************************************
balance-scale
----------------------------------------------------------------------------------------------------
1.0000000 ""
----------------------------------------------------------------------------------------------------
****************************************************************************************************
balloons
----------------------------------------------------------------------------------------------------
1.0000000 ""
----------------------------------------------------------------------------------------------------
****************************************************************************************************
balance-scale
----------------------------------------------------------------------------------------------------
1.0000000 ""
----------------------------------------------------------------------------------------------------
****************************************************************************************************
balloons
----------------------------------------------------------------------------------------------------
1.0000000 ""
----------------------------------------------------------------------------------------------------
****************************************************************************************************

View File

@@ -0,0 +1,11 @@
******************************************************************************************************************************************************************
* Report Best accuracy Scores with ODTE in any platform *
******************************************************************************************************************************************************************
Dataset Score File/Message Hyperparameters
============================== ======== ============================================================================ =============================================
balance-scale 0.963520 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json {'base_estimator__C': 57, 'base_estimator__gamma': 0.1, 'base_estimator__kernel': 'rbf', 'base_estimator__multiclass_strategy': 'ovr', 'n_estimators': 100, 'n_jobs': -1}
balloons 0.785000 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json {'base_estimator__C': 5, 'base_estimator__gamma': 0.14, 'base_estimator__kernel': 'rbf', 'base_estimator__multiclass_strategy': 'ovr', 'n_estimators': 100, 'n_jobs': -1}
******************************************************************************************************************************************************************
* Scores compared to stree_default accuracy (liblinear-ovr) .: 0.0434 *
******************************************************************************************************************************************************************

View File

@@ -0,0 +1,105 @@
[
{
"n_jobs": [
-1
],
"n_estimators": [
100
],
"base_estimator__C": [
1.0
],
"base_estimator__kernel": [
"linear"
],
"base_estimator__multiclass_strategy": [
"ovo"
]
},
{
"n_jobs": [
-1
],
"n_estimators": [
100
],
"base_estimator__C": [
0.001,
0.0275,
0.05,
0.08,
0.2,
0.25,
0.95,
1.0,
1.75,
7,
10000.0
],
"base_estimator__kernel": [
"liblinear"
],
"base_estimator__multiclass_strategy": [
"ovr"
]
},
{
"n_jobs": [
-1
],
"n_estimators": [
100
],
"base_estimator__C": [
0.05,
1.0,
1.05,
2,
2.8,
2.83,
5,
7,
57,
10000.0
],
"base_estimator__gamma": [
0.001,
0.1,
0.14,
10.0,
"auto",
"scale"
],
"base_estimator__kernel": [
"rbf"
],
"base_estimator__multiclass_strategy": [
"ovr"
]
},
{
"n_jobs": [
-1
],
"n_estimators": [
100
],
"base_estimator__C": [
0.05,
0.2,
1.0,
8.25
],
"base_estimator__gamma": [
0.1,
"scale"
],
"base_estimator__kernel": [
"poly"
],
"base_estimator__multiclass_strategy": [
"ovo",
"ovr"
]
}
]

View File

@@ -0,0 +1,20 @@
{
"balance-scale": [
0.9119999999999999,
{
"C": 1.0,
"kernel": "liblinear",
"multiclass_strategy": "ovr"
},
"v. 1.2.4, Computed on iMac27 on 2022-05-07 at 23:29:25 took 0.962s"
],
"balloons": [
0.7,
{
"C": 1.0,
"kernel": "linear",
"multiclass_strategy": "ovr"
},
"v. 1.2.4, Computed on iMac27 on 2022-05-07 at 23:29:25 took 1.232s"
]
}

View File

@@ -0,0 +1,13 @@
Date File Score Time(h) Title
========== ================================================================ ======== ======= ============================================
2022-05-04 results_accuracy_XGBoost_MacBookpro16_2022-05-04_11:00:35_0.json nan 3.091 Default hyperparameters
2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init
2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest
2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
****************************** Results with nan moved to hidden ******************************
Date File Score Time(h) Title
========== ================================================================ ======== ======= =======================
2022-05-04 results_accuracy_XGBoost_MacBookpro16_2022-05-04_11:00:35_0.json nan 3.091 Default hyperparameters

View File

@@ -0,0 +1,7 @@
Date File Score Time(h) Title
========== =============================================================== ======== ======= ============================================
2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init
2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest
2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters

View File

@@ -0,0 +1,16 @@
***********************************************************************************************************************
* Report STree ver. 1.2.4 with 5 Folds cross validation and 10 random seeds. 2022-05-09 00:15:25 *
* test *
* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
* Execution took 0.80 seconds, 0.00 hours, on iMac27 *
* Score is accuracy *
***********************************************************************************************************************
Dataset Samp Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
============================== ===== ===== === ======= ======= ======= =============== ================ ===============
balance-scale 625 4 3 23.32 12.16 6.44 0.840160±0.0304 0.013745±0.0019 {'splitter': 'best', 'max_features': 'auto'}
balloons 16 4 2 3.00 2.00 2.00 0.860000±0.2850 0.000388±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
***********************************************************************************************************************
* Accuracy compared to stree_default (liblinear-ovr) .: 0.0422 *
***********************************************************************************************************************
Results in results/results_accuracy_STree_iMac27_2022-05-09_00:15:25_0.json

View File

@@ -0,0 +1,16 @@
***********************************************************************************************************************
* Report STree ver. 1.2.4 with 5 Folds cross validation and 10 random seeds. 2022-05-08 20:14:43 *
* test *
* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
* Execution took 0.48 seconds, 0.00 hours, on iMac27 *
* Score is accuracy *
***********************************************************************************************************************
Dataset Samp Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
============================== ===== ===== === ======= ======= ======= =============== ================ ===============
balance-scale 625 4 3 17.36 9.18 6.18 0.908480±0.0247 0.007388±0.0013 {}
balloons 16 4 2 4.64 2.82 2.66 0.663333±0.3009 0.000664±0.0002 {}
***********************************************************************************************************************
* Accuracy compared to stree_default (liblinear-ovr) .: 0.0390 *
***********************************************************************************************************************
Results in results/results_accuracy_STree_iMac27_2022-05-08_20:14:43_0.json

View File

@@ -0,0 +1,15 @@
***********************************************************************************************************************
* Report STree ver. 1.2.4 with 5 Folds cross validation and 10 random seeds. 2022-05-08 19:38:28 *
* test *
* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
* Execution took 0.06 seconds, 0.00 hours, on iMac27 *
* Score is accuracy *
***********************************************************************************************************************
Dataset Samp Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
============================== ===== ===== === ======= ======= ======= =============== ================ ===============
balloons 16 4 2 4.64 2.82 2.66 0.663333±0.3009 0.000671±0.0001 {}
***********************************************************************************************************************
* Accuracy compared to stree_default (liblinear-ovr) .: 0.0165 *
***********************************************************************************************************************
Partial result file removed: results/results_accuracy_STree_iMac27_2022-05-08_19:38:28_0.json

View File

@@ -0,0 +1,16 @@
***********************************************************************************************************************
* Report STree ver. 1.2.4 with 5 Folds cross validation and 10 random seeds. 2022-05-09 00:21:06 *
* test *
* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
* Execution took 0.89 seconds, 0.00 hours, on iMac27 *
* Score is accuracy *
***********************************************************************************************************************
Dataset Samp Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
============================== ===== ===== === ======= ======= ======= =============== ================ ===============
balance-scale 625 4 3 26.12 13.56 7.94 0.910720±0.0249 0.015852±0.0027 {'C': 1.0, 'kernel': 'liblinear', 'multiclass_strategy': 'ovr'}
balloons 16 4 2 4.64 2.82 2.66 0.663333±0.3009 0.000640±0.0001 {'C': 1.0, 'kernel': 'linear', 'multiclass_strategy': 'ovr'}
***********************************************************************************************************************
* Accuracy compared to stree_default (liblinear-ovr) .: 0.0391 *
***********************************************************************************************************************
Results in results/results_accuracy_STree_iMac27_2022-05-09_00:21:06_0.json

View File

@@ -0,0 +1,35 @@
*********************************************************************************
* BEST RESULT of accuracy for STree *
*-------------------------------------------------------------------------------*
* *
*  With gridsearched hyperparameters  *
* *
* Model: STree Ver. 1.2.3 Score: accuracy Metric:  0.0454434 *
* *
* Date : 2021-09-30  Time: 11:42:07 Time Spent:  624.25 secs. *
* Seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Platform: iMac27 *
* Stratified: False *
* results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json *
* *
*********************************************************************************
*********************************************************************************
* BEST RESULT of accuracy *
*-------------------------------------------------------------------------------*
* *
*  With gridsearched hyperparameters  *
* *
* Model: STree Ver. 1.2.3 Score: accuracy Metric:  0.0454434 *
* *
* Date : 2021-09-30  Time: 11:42:07 Time Spent:  624.25 secs. *
* Seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Platform: iMac27 *
* Stratified: False *
* results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json *
* *
*********************************************************************************
Date File Score Time(h) Title
========== =============================================================== ======== ======= ============================================
2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init
2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest

View File

@@ -0,0 +1,35 @@
*********************************************************************************
* BEST RESULT of accuracy for ODTE *
*-------------------------------------------------------------------------------*
* *
*  Gridsearched hyperparams v022.1b random_init  *
* *
* Model: ODTE Ver. 0.3.2 Score: accuracy Metric:  0.0434068 *
* *
* Date : 2022-04-20  Time: 10:52:20 Time Spent: 22,591.47 secs. *
* Seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Platform: Galgo *
* Stratified: False *
* results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json *
* *
*********************************************************************************
*********************************************************************************
* BEST RESULT of accuracy *
*-------------------------------------------------------------------------------*
* *
*  With gridsearched hyperparameters  *
* *
* Model: STree Ver. 1.2.3 Score: accuracy Metric:  0.0454434 *
* *
* Date : 2021-09-30  Time: 11:42:07 Time Spent:  624.25 secs. *
* Seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Platform: iMac27 *
* Stratified: False *
* results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json *
* *
*********************************************************************************
Date File Score Time(h) Title
========== =============================================================== ======== ======= ============================================
2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init
2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest

View File

@@ -0,0 +1,39 @@
*********************************************************************************
* BEST RESULT of accuracy for ODTE *
*-------------------------------------------------------------------------------*
* *
*  Gridsearched hyperparams v022.1b random_init  *
* *
* Model: ODTE Ver. 0.3.2 Score: accuracy Metric:  0.0434068 *
* *
* Date : 2022-04-20  Time: 10:52:20 Time Spent: 22,591.47 secs. *
* Seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Platform: Galgo *
* Stratified: False *
* results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json *
* *
*********************************************************************************
*********************************************************************************
* BEST RESULT of accuracy *
*-------------------------------------------------------------------------------*
* *
*  With gridsearched hyperparameters  *
* *
* Model: STree Ver. 1.2.3 Score: accuracy Metric:  0.0454434 *
* *
* Date : 2021-09-30  Time: 11:42:07 Time Spent:  624.25 secs. *
* Seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Platform: iMac27 *
* Stratified: False *
* results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json *
* *
*********************************************************************************
Date File Score Time(h) Title
========== =============================================================== ======== ======= ============================================
2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init
2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest
** No results found **
** No results found **
** No results found **
** No results found **

View File

@@ -0,0 +1,35 @@
*********************************************************************************
* BEST RESULT of accuracy for ODTE *
*-------------------------------------------------------------------------------*
* *
*  Gridsearched hyperparams v022.1b random_init  *
* *
* Model: ODTE Ver. 0.3.2 Score: accuracy Metric:  0.0434068 *
* *
* Date : 2022-04-20  Time: 10:52:20 Time Spent: 22,591.47 secs. *
* Seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Platform: Galgo *
* Stratified: False *
* results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json *
* *
*********************************************************************************
*********************************************************************************
* BEST RESULT of accuracy *
*-------------------------------------------------------------------------------*
* *
*  With gridsearched hyperparameters  *
* *
* Model: STree Ver. 1.2.3 Score: accuracy Metric:  0.0454434 *
* *
* Date : 2021-09-30  Time: 11:42:07 Time Spent:  624.25 secs. *
* Seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Platform: iMac27 *
* Stratified: False *
* results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json *
* *
*********************************************************************************
Date File Score Time(h) Title
========== =============================================================== ======== ======= ============================================
2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init
2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest

View File

@@ -42,5 +42,5 @@
8;9;"0.2756860130252853" 8;9;"0.2756860130252853"
8;10;"0.02120100021362305" 8;10;"0.02120100021362305"
8;11;"0.003526023309468471" 8;11;"0.003526023309468471"
8;12;"{'splitter': 'iwss', 'max_features': 'auto'}" 8;12;"{'splitter': 'best', 'max_features': 'auto'}"
10;1;"** Accuracy compared to stree_default (liblinear-ovr) .: 0.0416" 10;1;"** Accuracy compared to stree_default (liblinear-ovr) .: 0.0416"

View File

@@ -1,7 +1,9 @@
**************************************************************************************************** ****************************************************************************************************
Error computing benchmark Error computing benchmark
**************************************************************************************************** ****************************************************************************************************
Error in dim(ordered) <- ns : Error in file(file, "rt") : cannot open the connection
dims [producto 1] no coincide con la longitud del objeto [0] Calls: read.csv -> read.table -> file
Calls: testMultipleControl -> .doFriedmanTest -> <Anonymous> -> cast In addition: Warning message:
Ejecución interrumpida In file(file, "rt") :
cannot open file 'exreport/exreport_unknown.csv': No such file or directory
Execution halted

View File

@@ -4,7 +4,7 @@
Dataset Score File/Message Hyperparameters Dataset Score File/Message Hyperparameters
============================== ======== ============================================================================ ============================================= ============================== ======== ============================================================================ =============================================
balance-scale 0.980000 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json {'splitter': 'iwss', 'max_features': 'auto'} balance-scale 0.980000 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json {'splitter': 'best', 'max_features': 'auto'}
balloons 0.860000 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'} balloons 0.860000 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
****************************************************************************************************************************************************************** ******************************************************************************************************************************************************************
* Scores compared to stree_default accuracy (liblinear-ovr) .: 0.0457 * * Scores compared to stree_default accuracy (liblinear-ovr) .: 0.0457 *

View File

@@ -0,0 +1,4 @@
Dataset Samp. Feat. Cls Balance
============================== ===== ===== === ========================================
balance-scale 625 4 3 7.84%/ 46.08%/ 46.08%
balloons 16 4 2 56.25%/ 43.75%

View File

@@ -1,3 +1,4 @@
Date File Score Time(h) Title Date File Score Time(h) Title
========== ======================================================== ======== ======= ======= ========== ================================================================ ======== ======= =======================
2021-11-01 results_accuracy_STree_iMac27_2021-11-01_23:55:16_0.json 0.97446 0.098 default 2022-05-04 results_accuracy_XGBoost_MacBookpro16_2022-05-04_11:00:35_0.json nan 3.091 Default hyperparameters
2021-11-01 results_accuracy_STree_iMac27_2021-11-01_23:55:16_0.json 0.97446 0.098 default

View File

@@ -1,4 +1,4 @@
Date File Score Time(h) Title Date File Score Time(h) Title
========== ============================================================= ======== ======= ================================= ========== ============================================================= ======== ======= =================================
2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B 2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A 2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A

View File

@@ -1,4 +1,4 @@
Date File Score Time(h) Title Date File Score Time(h) Title
========== =============================================================== ======== ======= ============================================ ========== =============================================================== ======== ======= ============================================
2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init 2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init
2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest 2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest

View File

@@ -1,4 +1,4 @@
Date File Score Time(h) Title Date File Score Time(h) Title
========== =============================================================== ======== ======= ============================================ ========== =============================================================== ======== ======= ============================================
2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init 2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init
2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest 2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest

View File

@@ -1,12 +1,12 @@
********************************************************************************* *********************************************************************************
* * * *
* With gridsearched hyperparameters * *  With gridsearched hyperparameters  *
* * * *
* Model: STree Ver. 1.2.3 Score: accuracy Metric: 0.0454434 * * Model: STree Ver. 1.2.3 Score: accuracy Metric:  0.0454434 *
* * * *
* Date : 2021-09-30 Time: 11:42:07 Time Spent: 624.25 secs. * * Date : 2021-09-30  Time: 11:42:07 Time Spent:  624.25 secs. *
* Seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Platform: iMac27 * * Seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Platform: iMac27 *
* Stratified: False * * Stratified: False *
* results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json * * results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json *
* * * *
********************************************************************************* *********************************************************************************

View File

@@ -1,14 +1,14 @@
********************************************************************************* *********************************************************************************
* **Title** * * **Title** *
*-------------------------------------------------------------------------------* *-------------------------------------------------------------------------------*
* * * *
* With gridsearched hyperparameters * *  With gridsearched hyperparameters  *
* * * *
* Model: STree Ver. 1.2.3 Score: accuracy Metric: 0.0454434 * * Model: STree Ver. 1.2.3 Score: accuracy Metric:  0.0454434 *
* * * *
* Date : 2021-09-30 Time: 11:42:07 Time Spent: 624.25 secs. * * Date : 2021-09-30  Time: 11:42:07 Time Spent:  624.25 secs. *
* Seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Platform: iMac27 * * Seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Platform: iMac27 *
* Stratified: False * * Stratified: False *
* results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json * * results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json *
* * * *
********************************************************************************* *********************************************************************************

View File

@@ -1,4 +1,4 @@
Date File Score Time(h) Title Date File Score Time(h) Title
========== =============================================================== ======== ======= ============================================ ========== =============================================================== ======== ======= ============================================
2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters 2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init 2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init

View File

@@ -6,3 +6,5 @@ xlsxwriter
openpyxl openpyxl
tqdm tqdm
xgboost xgboost
graphviz
Wodt @ git+ssh://git@github.com/doctorado-ml/Wodt.git#egg=Wodt

View File

@@ -7,9 +7,8 @@ def readme():
return f.read() return f.read()
def get_data(field): def get_data(field, file_name="__init__.py"):
item = "" item = ""
file_name = "_version.py" if field == "version" else "__init__.py"
with open(os.path.join("benchmark", file_name)) as f: with open(os.path.join("benchmark", file_name)) as f:
for line in f.readlines(): for line in f.readlines():
if line.startswith(f"__{field}__"): if line.startswith(f"__{field}__"):
@@ -21,17 +20,34 @@ def get_data(field):
return item return item
def import_scripts(): def get_requirements():
with open("requirements.txt") as f:
return f.read().splitlines()
def script_names():
scripts = [
"benchmark",
"best",
"build_best",
"build_grid",
"grid",
"list",
"main",
"pair_check",
"print_strees",
"report",
"summary",
]
result = [] result = []
names = os.listdir(os.path.join("benchmark", "scripts")) for script in scripts:
for name in names: result.append(f"be_{script}=benchmark.scripts.be_{script}:main")
result.append(os.path.join("benchmark", "scripts", name))
return result return result
setuptools.setup( setuptools.setup(
name="benchmark", name="benchmark",
version=get_data("version"), version=get_data("version", "_version.py"),
license=get_data("license"), license=get_data("license"),
description="Oblique decision tree with svm nodes", description="Oblique decision tree with svm nodes",
long_description=readme(), long_description=readme(),
@@ -46,32 +62,15 @@ setuptools.setup(
"Development Status :: 4 - Beta", "Development Status :: 4 - Beta",
"License :: OSI Approved :: " + get_data("license"), "License :: OSI Approved :: " + get_data("license"),
"Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Natural Language :: English", "Natural Language :: English",
"Topic :: Scientific/Engineering :: Artificial Intelligence", "Topic :: Scientific/Engineering :: Artificial Intelligence",
"Intended Audience :: Science/Research", "Intended Audience :: Science/Research",
], ],
install_requires=[ install_requires=get_requirements(),
"scikit-learn",
"odte",
"pandas",
"mufs",
"xlsxwriter",
"tqdm",
],
zip_safe=False, zip_safe=False,
entry_points={ entry_points={
"console_scripts": [ "console_scripts": script_names(),
"be_list=benchmark.scripts.be_list:main",
"be_report=benchmark.scripts.be_report:main",
"be_main=benchmark.scripts.be_main:main",
"be_benchmark=benchmark.scripts.be_benchmark:main",
"be_best=benchmark.scripts.be_best:main",
"be_build_best=benchmark.scripts.be_build_best:main",
"be_grid=benchmark.scripts.be_grid:main",
"be_pair_check=benchmark.scripts.be_pair_check:main",
"be_print_strees=benchmark.scripts.be_print_strees:main",
"be_repara=benchmark.scripts.be_repara:main",
"be_summary=benchmark.scripts.be_summary:main",
],
}, },
) )

3
sonar-project.properties Normal file
View File

@@ -0,0 +1,3 @@
sonar.projectKey=benchmark
sonar.sourceEncoding=UTF-8
sonar.python.version="3.8, 3.9, 3.10"