25 Commits

Author SHA1 Message Date
c8124be119 Update version info 2022-11-18 23:36:43 +01:00
58c52849d8 Add AODE to models 2022-11-18 23:33:41 +01:00
d68fb47688 Remove extra space in report header 2022-11-17 13:42:27 +01:00
38667d61f7 Refactor be_list 2022-11-17 12:09:02 +01:00
dfd4f8179b Complete tests adding excel to be_list 2022-11-17 12:00:30 +01:00
8a9342c97b Add space to time column in report 2022-11-17 09:41:17 +01:00
974227166c Add excel to be_list 2022-11-17 01:36:19 +01:00
feea9c542a Add KDB model 2022-11-15 22:06:04 +01:00
a53e957c00 fix stochastic error in discretization 2022-11-14 21:51:53 +01:00
a2db4f1f6d Fix lint error in test 2022-11-14 17:27:18 +01:00
5a3ae6f440 Update version info and tests 2022-11-14 00:54:18 +01:00
Ricardo Montañana Gómez
8d06a2c5f6 Merge pull request #6 from Doctorado-ML/language_version
Add Discretizer to Datasets
Add excel to report datasets
Add report datasets sheet to benchmark excel
2022-11-13 22:51:50 +01:00
9039a634cf Exclude macos-latest with python 3.11 (no torch) 2022-11-13 22:14:01 +01:00
5b5d385b4c Fix uppercase mistake in filename 2022-11-13 20:04:26 +01:00
6ebcc31c36 Add bayesclass to requirements 2022-11-13 18:34:54 +01:00
cd2d803ff5 Update requirements 2022-11-13 18:10:42 +01:00
6aec5b2a97 Add tests to excel in report datasets 2022-11-13 17:44:45 +01:00
f1b9dc1fef Add excel to report dataset 2022-11-13 14:46:41 +01:00
2e6f49de8e Add discretize key to .env.dist 2022-11-12 19:38:14 +01:00
2d61cd11c2 refactor Discretization in datasets 2022-11-12 19:37:46 +01:00
4b442a46f2 Add Discretizer to Datasets 2022-11-10 11:47:01 +01:00
feaf85d0b8 Add Dataset load return a pandas dataframe 2022-11-04 18:40:50 +01:00
c62b06f263 Update Readme 2022-11-01 22:30:42 +01:00
Ricardo Montañana Gómez
b9eaa534bc Merge pull request #5 from Doctorado-ML/language_version
Disable sonar quality gate in CI
2022-11-01 21:24:12 +01:00
0d87e670f7 Disable sonar quality gate in CI
Update base score for Arff STree
2022-11-01 16:53:22 +01:00
55 changed files with 976 additions and 267 deletions

View File

@@ -5,3 +5,4 @@ model=ODTE
stratified=0 stratified=0
source_data=Tanveer source_data=Tanveer
seeds=[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] seeds=[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]
discretize=0

View File

@@ -8,7 +8,7 @@ jobs:
name: Build name: Build
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v2 - uses: actions/checkout@v3
with: with:
fetch-depth: 0 fetch-depth: 0
- run: echo "project_version=$(git describe --tags --abbrev=0)" >> $GITHUB_ENV - run: echo "project_version=$(git describe --tags --abbrev=0)" >> $GITHUB_ENV
@@ -22,7 +22,8 @@ jobs:
-Dsonar.python.version=3.10 -Dsonar.python.version=3.10
# If you wish to fail your job when the Quality Gate is red, uncomment the # If you wish to fail your job when the Quality Gate is red, uncomment the
# following lines. This would typically be used to fail a deployment. # following lines. This would typically be used to fail a deployment.
- uses: sonarsource/sonarqube-quality-gate-action@master #- uses: sonarsource/sonarqube-quality-gate-action@master
timeout-minutes: 5 # timeout-minutes: 5
env: # env:
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} # SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
# SONAR_HOST_URL: ${{ secrets.SONAR_HOST_URL }}

View File

@@ -13,10 +13,13 @@ jobs:
strategy: strategy:
matrix: matrix:
os: [macos-latest, ubuntu-latest] os: [macos-latest, ubuntu-latest]
python: ["3.10"] python: ["3.10", "3.11"]
exclude:
- os: macos-latest
python: "3.11"
steps: steps:
- uses: actions/checkout@v2 - uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python }} - name: Set up Python ${{ matrix.python }}
uses: actions/setup-python@v2 uses: actions/setup-python@v2
with: with:

View File

@@ -1,7 +1,7 @@
[![CI](https://github.com/Doctorado-ML/benchmark/actions/workflows/main.yml/badge.svg)](https://github.com/Doctorado-ML/benchmark/actions/workflows/main.yml) [![CI](https://github.com/Doctorado-ML/benchmark/actions/workflows/main.yml/badge.svg)](https://github.com/Doctorado-ML/benchmark/actions/workflows/main.yml)
[![codecov](https://codecov.io/gh/Doctorado-ML/benchmark/branch/main/graph/badge.svg?token=ZRP937NDSG)](https://codecov.io/gh/Doctorado-ML/benchmark) [![codecov](https://codecov.io/gh/Doctorado-ML/benchmark/branch/main/graph/badge.svg?token=ZRP937NDSG)](https://codecov.io/gh/Doctorado-ML/benchmark)
[![Quality Gate Status](https://haystack.rmontanana.es:25000/api/project_badges/measure?project=benchmark&metric=alert_status&token=336a6e501988888543c3153baa91bad4b9914dd2)](http://haystack.local:25000/dashboard?id=benchmark) [![Quality Gate Status](https://haystack.rmontanana.es:25000/api/project_badges/measure?project=benchmark&metric=alert_status&token=336a6e501988888543c3153baa91bad4b9914dd2)](https://haystack.rmontanana.es:25000/dashboard?id=benchmark)
[![Technical Debt](https://haystack.rmontanana.es:25000/api/project_badges/measure?project=benchmark&metric=sqale_index&token=336a6e501988888543c3153baa91bad4b9914dd2)](http://haystack.local:25000/dashboard?id=benchmark) [![Technical Debt](https://haystack.rmontanana.es:25000/api/project_badges/measure?project=benchmark&metric=sqale_index&token=336a6e501988888543c3153baa91bad4b9914dd2)](https://haystack.rmontanana.es:25000/dashboard?id=benchmark)
![https://img.shields.io/badge/python-3.8%2B-blue](https://img.shields.io/badge/python-3.8%2B-brightgreen) ![https://img.shields.io/badge/python-3.8%2B-blue](https://img.shields.io/badge/python-3.8%2B-brightgreen)
# benchmark # benchmark

View File

@@ -1,8 +1,10 @@
import os import os
import pandas as pd import pandas as pd
import numpy as np
from scipy.io import arff from scipy.io import arff
from .Utils import Files from .Utils import Files
from .Arguments import EnvData from .Arguments import EnvData
from mdlp.discretization import MDLP
class Diterator: class Diterator:
@@ -28,9 +30,12 @@ class DatasetsArff:
file_name = os.path.join(self.folder(), self.dataset_names(name)) file_name = os.path.join(self.folder(), self.dataset_names(name))
data = arff.loadarff(file_name) data = arff.loadarff(file_name)
df = pd.DataFrame(data[0]) df = pd.DataFrame(data[0])
df = df.dropna() df.dropna(axis=0, how="any", inplace=True)
X = df.drop(class_name, axis=1).to_numpy() X = df.drop(class_name, axis=1)
self.features = X.columns
self.class_name = class_name
y, _ = pd.factorize(df[class_name]) y, _ = pd.factorize(df[class_name])
X = X.to_numpy()
return X, y return X, y
@@ -43,7 +48,7 @@ class DatasetsTanveer:
def folder(): def folder():
return "data" return "data"
def load(self, name, _): def load(self, name, *args):
file_name = os.path.join(self.folder(), self.dataset_names(name)) file_name = os.path.join(self.folder(), self.dataset_names(name))
data = pd.read_csv( data = pd.read_csv(
file_name, file_name,
@@ -64,7 +69,7 @@ class DatasetsSurcov:
def folder(): def folder():
return "datasets" return "datasets"
def load(self, name, _): def load(self, name, *args):
file_name = os.path.join(self.folder(), self.dataset_names(name)) file_name = os.path.join(self.folder(), self.dataset_names(name))
data = pd.read_csv( data = pd.read_csv(
file_name, file_name,
@@ -80,15 +85,19 @@ class DatasetsSurcov:
class Datasets: class Datasets:
def __init__(self, dataset_name=None): def __init__(self, dataset_name=None):
envData = EnvData.load() envData = EnvData.load()
class_name = getattr( class_name = getattr(
__import__(__name__), __import__(__name__),
f"Datasets{envData['source_data']}", f"Datasets{envData['source_data']}",
) )
self.load = (
self.load_discretized
if envData["discretize"] == "1"
else self.load_continuous
)
self.dataset = class_name() self.dataset = class_name()
self.class_names = [] self.class_names = []
self.load_names() self._load_names()
if dataset_name is not None: if dataset_name is not None:
try: try:
class_name = self.class_names[ class_name = self.class_names[
@@ -99,7 +108,7 @@ class Datasets:
raise ValueError(f"Unknown dataset: {dataset_name}") raise ValueError(f"Unknown dataset: {dataset_name}")
self.data_sets = [dataset_name] self.data_sets = [dataset_name]
def load_names(self): def _load_names(self):
file_name = os.path.join(self.dataset.folder(), Files.index) file_name = os.path.join(self.dataset.folder(), Files.index)
default_class = "class" default_class = "class"
with open(file_name) as f: with open(file_name) as f:
@@ -115,12 +124,63 @@ class Datasets:
self.data_sets = result self.data_sets = result
self.class_names = class_names self.class_names = class_names
def load(self, name): def get_attributes(self, name):
class Attributes:
pass
X, y = self.load_continuous(name)
attr = Attributes()
values, counts = np.unique(y, return_counts=True)
comp = ""
sep = ""
for count in counts:
comp += f"{sep}{count/sum(counts)*100:5.2f}%"
sep = "/ "
attr.balance = comp
attr.classes = len(np.unique(y))
attr.samples = X.shape[0]
attr.features = X.shape[1]
return attr
def get_features(self):
return self.dataset.features
def get_class_name(self):
return self.dataset.class_name
def load_continuous(self, name):
try: try:
class_name = self.class_names[self.data_sets.index(name)] class_name = self.class_names[self.data_sets.index(name)]
return self.dataset.load(name, class_name) return self.dataset.load(name, class_name)
except (ValueError, FileNotFoundError): except (ValueError, FileNotFoundError):
raise ValueError(f"Unknown dataset: {name}") raise ValueError(f"Unknown dataset: {name}")
def discretize(self, X, y):
"""Supervised discretization with Fayyad and Irani's MDLP algorithm.
Parameters
----------
X : np.ndarray
array (n_samples, n_features) of features
y : np.ndarray
array (n_samples,) of labels
Returns
-------
tuple (X, y) of numpy.ndarray
"""
discretiz = MDLP(random_state=17, dtype=np.int32)
Xdisc = discretiz.fit_transform(X, y)
return Xdisc
def load_discretized(self, name, dataframe=False):
X, yd = self.load_continuous(name)
Xd = self.discretize(X, yd)
dataset = pd.DataFrame(Xd, columns=self.get_features())
dataset[self.get_class_name()] = yd
if dataframe:
return dataset
return Xd, yd
def __iter__(self) -> Diterator: def __iter__(self) -> Diterator:
return Diterator(self.data_sets) return Diterator(self.data_sets)

View File

@@ -8,6 +8,7 @@ from sklearn.ensemble import (
) )
from sklearn.svm import SVC from sklearn.svm import SVC
from stree import Stree from stree import Stree
from bayesclass import TAN, KDB, AODE
from wodt import Wodt from wodt import Wodt
from odte import Odte from odte import Odte
from xgboost import XGBClassifier from xgboost import XGBClassifier
@@ -20,6 +21,9 @@ class Models:
def define_models(random_state): def define_models(random_state):
return { return {
"STree": Stree(random_state=random_state), "STree": Stree(random_state=random_state),
"TAN": TAN(random_state=random_state),
"KDB": KDB(k=3),
"AODE": AODE(random_state=random_state),
"Cart": DecisionTreeClassifier(random_state=random_state), "Cart": DecisionTreeClassifier(random_state=random_state),
"ExtraTree": ExtraTreeClassifier(random_state=random_state), "ExtraTree": ExtraTreeClassifier(random_state=random_state),
"Wodt": Wodt(random_state=random_state), "Wodt": Wodt(random_state=random_state),

View File

@@ -1,4 +1,5 @@
import os import os
import sys
from operator import itemgetter from operator import itemgetter
import math import math
import json import json
@@ -6,6 +7,7 @@ import abc
import shutil import shutil
import subprocess import subprocess
import xlsxwriter import xlsxwriter
from xlsxwriter.exceptions import DuplicateWorksheetName
import numpy as np import numpy as np
from .Experiments import BestResults from .Experiments import BestResults
from .Datasets import Datasets from .Datasets import Datasets
@@ -17,6 +19,11 @@ from .Utils import (
TextColor, TextColor,
NO_RESULTS, NO_RESULTS,
) )
from ._version import __version__
def get_input(is_test):
return "test" if is_test else input()
class BestResultsEver: class BestResultsEver:
@@ -33,7 +40,7 @@ class BestResultsEver:
] ]
self.data["Arff"]["accuracy"] = [ self.data["Arff"]["accuracy"] = [
"STree_default (linear-ovo)", "STree_default (linear-ovo)",
21.9765, 22.109799,
] ]
def get_name_value(self, key, score): def get_name_value(self, key, score):
@@ -123,7 +130,7 @@ class BaseReport(abc.ABC):
class Report(BaseReport): class Report(BaseReport):
header_lengths = [30, 6, 5, 3, 7, 7, 7, 15, 16, 15] header_lengths = [30, 6, 5, 3, 7, 7, 7, 15, 17, 15]
header_cols = [ header_cols = [
"Dataset", "Dataset",
"Sampl.", "Sampl.",
@@ -182,7 +189,7 @@ class Report(BaseReport):
) )
i += 1 i += 1
print( print(
f"{result['time']:9.6f}±{result['time_std']:6.4f} ", f"{result['time']:10.6f}±{result['time_std']:6.4f} ",
end="", end="",
) )
i += 1 i += 1
@@ -326,7 +333,17 @@ class Excel(BaseReport):
else: else:
self.book = book self.book = book
self.close = False self.close = False
self.sheet = self.book.add_worksheet(self.data["model"]) suffix = ""
num = 1
while True:
try:
self.sheet = self.book.add_worksheet(
self.data["model"] + suffix
)
break
except DuplicateWorksheetName:
num += 1
suffix = str(num)
self.max_hyper_width = 0 self.max_hyper_width = 0
self.col_hyperparams = 0 self.col_hyperparams = 0
@@ -566,37 +583,251 @@ class Excel(BaseReport):
self.sheet.set_row(c, 20) self.sheet.set_row(c, 20)
self.sheet.set_row(0, 25) self.sheet.set_row(0, 25)
self.sheet.freeze_panes(6, 1) self.sheet.freeze_panes(6, 1)
self.sheet.hide_gridlines() self.sheet.hide_gridlines(2)
if self.close: if self.close:
self.book.close() self.book.close()
class ReportDatasets: class ReportDatasets:
row = 6
# alternate lines colors
color1 = "#DCE6F1"
color2 = "#FDE9D9"
color3 = "#B1A0C7"
def __init__(self, excel=False, book=None):
self.excel = excel
self.env = EnvData().load()
self.close = False
self.output = True
self.header_text = f"Datasets used in benchmark ver. {__version__}"
if excel:
self.max_length = 0
if book is None:
self.excel_file_name = Files.datasets_report_excel
self.book = xlsxwriter.Workbook(
self.excel_file_name, {"nan_inf_to_errors": True}
)
self.set_properties(self.get_title())
self.close = True
else:
self.book = book
self.output = False
self.sheet = self.book.add_worksheet("Datasets")
def set_properties(self, title):
self.book.set_properties(
{
"title": title,
"subject": "Machine learning results",
"author": "Ricardo Montañana Gómez",
"manager": "Dr. J. A. Gámez, Dr. J. M. Puerta",
"company": "UCLM",
"comments": "Created with Python and XlsxWriter",
}
)
@staticmethod @staticmethod
def report(): def get_python_version():
return "{}.{}".format(sys.version_info.major, sys.version_info.minor)
def get_title(self):
return (
f" Benchmark ver. {__version__} - "
f" Python ver. {self.get_python_version()}"
f" with {self.env['n_folds']} Folds cross validation "
f" Discretization: {self.env['discretize']} "
f"Stratification: {self.env['stratified']}"
)
def get_file_name(self):
return self.excel_file_name
def header(self):
merge_format = self.book.add_format(
{
"border": 1,
"bold": 1,
"align": "center",
"valign": "vcenter",
"font_size": 18,
"bg_color": self.color3,
}
)
merge_format_subheader = self.book.add_format(
{
"border": 1,
"bold": 1,
"align": "center",
"valign": "vcenter",
"font_size": 16,
"bg_color": self.color1,
}
)
merge_format_subheader_right = self.book.add_format(
{
"border": 1,
"bold": 1,
"align": "right",
"valign": "vcenter",
"font_size": 16,
"bg_color": self.color1,
}
)
merge_format_subheader_left = self.book.add_format(
{
"border": 1,
"bold": 1,
"align": "left",
"valign": "vcenter",
"font_size": 16,
"bg_color": self.color1,
}
)
self.sheet.merge_range(0, 0, 0, 4, self.header_text, merge_format)
self.sheet.merge_range(
1,
0,
4,
0,
f" Default score {self.env['score']}",
merge_format_subheader,
)
self.sheet.merge_range(
1,
1,
1,
3,
"Cross validation",
merge_format_subheader_right,
)
self.sheet.write(
1, 4, f"{self.env['n_folds']} Folds", merge_format_subheader_left
)
self.sheet.merge_range(
2,
1,
2,
3,
"Stratified",
merge_format_subheader_right,
)
self.sheet.write(
2,
4,
f"{'True' if self.env['stratified']=='1' else 'False'}",
merge_format_subheader_left,
)
self.sheet.merge_range(
3,
1,
3,
3,
"Discretized",
merge_format_subheader_right,
)
self.sheet.write(
3,
4,
f"{'True' if self.env['discretize']=='1' else 'False'}",
merge_format_subheader_left,
)
self.sheet.merge_range(
4,
1,
4,
3,
"Seeds",
merge_format_subheader_right,
)
self.sheet.write(
4, 4, f"{self.env['seeds']}", merge_format_subheader_left
)
self.update_max_length(len(self.env["seeds"]) + 1)
header_cols = [
("Dataset", 30),
("Samples", 10),
("Features", 10),
("Classes", 10),
("Balance", 50),
]
bold = self.book.add_format(
{
"bold": True,
"font_size": 14,
"bg_color": self.color3,
"border": 1,
}
)
i = 0
for item, length in header_cols:
self.sheet.write(5, i, item, bold)
self.sheet.set_column(i, i, length)
i += 1
def footer(self):
# set Balance column width to max length
self.sheet.set_column(4, 4, self.max_length)
self.sheet.freeze_panes(6, 1)
self.sheet.hide_gridlines(2)
if self.close:
self.book.close()
def print_line(self, result):
size_n = 14
integer = self.book.add_format(
{"num_format": "#,###", "font_size": size_n, "border": 1}
)
normal = self.book.add_format({"font_size": size_n, "border": 1})
col = 0
if self.row % 2 == 0:
normal.set_bg_color(self.color1)
integer.set_bg_color(self.color1)
else:
normal.set_bg_color(self.color2)
integer.set_bg_color(self.color2)
self.sheet.write(self.row, col, result.dataset, normal)
self.sheet.write(self.row, col + 1, result.samples, integer)
self.sheet.write(self.row, col + 2, result.features, integer)
self.sheet.write(self.row, col + 3, result.classes, normal)
self.sheet.write(self.row, col + 4, result.balance, normal)
self.update_max_length(len(result.balance))
self.row += 1
def update_max_length(self, value):
if value > self.max_length:
self.max_length = value
def report(self):
data_sets = Datasets() data_sets = Datasets()
color_line = TextColor.LINE1 color_line = TextColor.LINE1
print(color_line, end="") if self.excel:
print(f"{'Dataset':30s} Sampl. Feat. Cls Balance") self.header()
print("=" * 30 + " ===== ====== === " + "=" * 40) if self.output:
print(color_line, end="")
print(self.header_text)
print("")
print(f"{'Dataset':30s} Sampl. Feat. Cls Balance")
print("=" * 30 + " ====== ===== === " + "=" * 60)
for dataset in data_sets: for dataset in data_sets:
X, y = data_sets.load(dataset) attributes = data_sets.get_attributes(dataset)
attributes.dataset = dataset
if self.excel:
self.print_line(attributes)
color_line = ( color_line = (
TextColor.LINE2 TextColor.LINE2
if color_line == TextColor.LINE1 if color_line == TextColor.LINE1
else TextColor.LINE1 else TextColor.LINE1
) )
values, counts = np.unique(y, return_counts=True) if self.output:
comp = "" print(color_line, end="")
sep = "" print(
for count in counts: f"{dataset:30s} {attributes.samples:6,d} "
comp += f"{sep}{count/sum(counts)*100:5.2f}%" f"{attributes.features:5,d} {attributes.classes:3d} "
sep = "/ " f"{attributes.balance:40s}"
print(color_line, end="") )
print( if self.excel:
f"{dataset:30s} {X.shape[0]:6,d} {X.shape[1]:5,d} " self.footer()
f"{len(np.unique(y)):3d} {comp:40s}"
)
class SQL(BaseReport): class SQL(BaseReport):
@@ -1068,7 +1299,12 @@ class Benchmark:
k = Excel(file_name=file_name, book=book) k = Excel(file_name=file_name, book=book)
k.report() k.report()
sheet.freeze_panes(6, 1) sheet.freeze_panes(6, 1)
sheet.hide_gridlines() sheet.hide_gridlines(2)
def add_datasets_sheet():
# Add datasets sheet
re = ReportDatasets(excel=True, book=book)
re.report()
def exreport_output(): def exreport_output():
file_name = os.path.join( file_name = os.path.join(
@@ -1096,6 +1332,7 @@ class Benchmark:
footer() footer()
models_files() models_files()
exreport_output() exreport_output()
add_datasets_sheet()
book.close() book.close()
@@ -1119,6 +1356,7 @@ class Summary:
def __init__(self, hidden=False) -> None: def __init__(self, hidden=False) -> None:
self.results = Files().get_all_results(hidden=hidden) self.results = Files().get_all_results(hidden=hidden)
self.data = [] self.data = []
self.data_filtered = []
self.datasets = {} self.datasets = {}
self.models = set() self.models = set()
self.hidden = hidden self.hidden = hidden
@@ -1195,13 +1433,14 @@ class Summary:
number=0, number=0,
) -> None: ) -> None:
"""Print the list of results""" """Print the list of results"""
data = self.get_results_criteria( if self.data_filtered == []:
score, model, input_data, sort_key, number self.data_filtered = self.get_results_criteria(
) score, model, input_data, sort_key, number
if data == []: )
if self.data_filtered == []:
raise ValueError(NO_RESULTS) raise ValueError(NO_RESULTS)
max_file = max(len(x["file"]) for x in data) max_file = max(len(x["file"]) for x in self.data_filtered)
max_title = max(len(x["title"]) for x in data) max_title = max(len(x["title"]) for x in self.data_filtered)
if self.hidden: if self.hidden:
color1 = TextColor.GREEN color1 = TextColor.GREEN
color2 = TextColor.YELLOW color2 = TextColor.YELLOW
@@ -1210,10 +1449,11 @@ class Summary:
color2 = TextColor.LINE2 color2 = TextColor.LINE2
print(color1, end="") print(color1, end="")
print( print(
f"{'Date':10s} {'File':{max_file}s} {'Score':8s} {'Time(h)':7s} " f" # {'Date':10s} {'File':{max_file}s} {'Score':8s} "
f"{'Title':s}" f"{'Time(h)':7s} {'Title':s}"
) )
print( print(
"===",
"=" * 10 "=" * 10
+ " " + " "
+ "=" * max_file + "=" * max_file
@@ -1222,21 +1462,60 @@ class Summary:
+ " " + " "
+ "=" * 7 + "=" * 7
+ " " + " "
+ "=" * max_title + "=" * max_title,
) )
print( print(
"\n".join( "\n".join(
[ [
(color2 if n % 2 == 0 else color1) (color2 if n % 2 == 0 else color1) + f"{n:3d} "
+ f"{x['date']} {x['file']:{max_file}s} " f"{x['date']} {x['file']:{max_file}s} "
f"{x['metric']:8.5f} " f"{x['metric']:8.5f} "
f"{x['duration']/3600:7.3f} " f"{x['duration']/3600:7.3f} "
f"{x['title']}" f"{x['title']}"
for n, x in enumerate(data) for n, x in enumerate(self.data_filtered)
] ]
) )
) )
def manage_results(self, excel, is_test):
"""Manage results showed in the summary
return True if excel file is created False otherwise
"""
num = ""
book = None
while True:
print(
"Which result do you want to report? (q to quit, r to list "
"again, number to report): ",
end="",
)
num = get_input(is_test)
if num == "r":
self.list_results()
if num == "q":
if excel:
if book is not None:
book.close()
return True
return False
if num.isdigit() and int(num) < len(self.data) and int(num) >= 0:
rep = Report(self.data_filtered[int(num)]["file"], self.hidden)
rep.report()
if excel and not self.hidden:
if book is None:
file_name = Files.be_list_excel
book = xlsxwriter.Workbook(
file_name, {"nan_inf_to_errors": True}
)
excel = Excel(
file_name=self.data_filtered[int(num)]["file"],
book=book,
)
excel.report()
else:
if num not in ("r", "q"):
print(f"Invalid option {num}. Try again!")
def show_result(self, data: dict, title: str = "") -> None: def show_result(self, data: dict, title: str = "") -> None:
def whites(n: int) -> str: def whites(n: int) -> str:
return " " * n + color1 + "*" return " " * n + color1 + "*"

View File

@@ -27,6 +27,8 @@ class Files:
exreport_pdf = "Rplots.pdf" exreport_pdf = "Rplots.pdf"
benchmark_r = "benchmark.r" benchmark_r = "benchmark.r"
dot_env = ".env" dot_env = ".env"
datasets_report_excel = "ReportDatasets.xlsx"
be_list_excel = "some_results.xlsx"
@staticmethod @staticmethod
def exreport_output(score): def exreport_output(score):

View File

@@ -1,10 +1,16 @@
from .Datasets import Datasets, DatasetsSurcov, DatasetsTanveer, DatasetsArff from .Datasets import (
Datasets,
DatasetsSurcov,
DatasetsTanveer,
DatasetsArff,
)
from .Experiments import Experiment from .Experiments import Experiment
from .Results import Report, Summary from .Results import Report, Summary
from ._version import __version__
__author__ = "Ricardo Montañana Gómez" __author__ = "Ricardo Montañana Gómez"
__copyright__ = "Copyright 2020-2022, Ricardo Montañana Gómez" __copyright__ = "Copyright 2020-2023, Ricardo Montañana Gómez"
__license__ = "MIT License" __license__ = "MIT License"
__author_email__ = "ricardo.montanana@alu.uclm.es" __author_email__ = "ricardo.montanana@alu.uclm.es"
__all__ = ["Experiment", "Datasets", "Report", "Summary"] __all__ = ["Experiment", "Datasets", "Report", "Summary", __version__]

View File

@@ -1 +1 @@
__version__ = "0.2.0" __version__ = "0.4.0"

View File

@@ -1,7 +1,7 @@
#! /usr/bin/env python #! /usr/bin/env python
import os import os
from benchmark.Results import Summary from benchmark.Results import Summary
from benchmark.Utils import Folders from benchmark.Utils import Folders, Files
from benchmark.Arguments import Arguments from benchmark.Arguments import Arguments
"""List experiments of a model """List experiments of a model
@@ -12,6 +12,7 @@ def main(args_test=None):
arguments = Arguments() arguments = Arguments()
arguments.xset("number").xset("model", required=False).xset("key") arguments.xset("number").xset("model", required=False).xset("key")
arguments.xset("hidden").xset("nan").xset("score", required=False) arguments.xset("hidden").xset("nan").xset("score", required=False)
arguments.xset("excel")
args = arguments.parse(args_test) args = arguments.parse(args_test)
data = Summary(hidden=args.hidden) data = Summary(hidden=args.hidden)
data.acquire() data.acquire()
@@ -22,32 +23,39 @@ def main(args_test=None):
sort_key=args.key, sort_key=args.key,
number=args.number, number=args.number,
) )
is_test = args_test is not None
if not args.nan:
excel_generated = data.manage_results(args.excel, is_test)
if args.excel and excel_generated:
print(f"Generated file: {Files.be_list_excel}")
Files.open(Files.be_list_excel, is_test)
except ValueError as e: except ValueError as e:
print(e) print(e)
else: return
if args.nan: if args.nan:
results_nan = [] results_nan = []
results = data.get_results_criteria( results = data.get_results_criteria(
score=args.score, score=args.score,
model=args.model, model=args.model,
input_data=None, input_data=None,
sort_key=args.key, sort_key=args.key,
number=args.number, number=args.number,
)
for result in results:
if result["metric"] != result["metric"]:
results_nan.append(result)
if results_nan != []:
print(
"\n"
+ "*" * 30
+ " Results with nan moved to hidden "
+ "*" * 30
) )
for result in results: data.data_filtered = []
if result["metric"] != result["metric"]: data.list_results(input_data=results_nan)
results_nan.append(result) for result in results_nan:
if results_nan != []: name = result["file"]
print( os.rename(
"\n" os.path.join(Folders.results, name),
+ "*" * 30 os.path.join(Folders.hidden_results, name),
+ " Results with nan moved to hidden "
+ "*" * 30
) )
data.list_results(input_data=results_nan)
for result in results_nan:
name = result["file"]
os.rename(
os.path.join(Folders.results, name),
os.path.join(Folders.hidden_results, name),
)

View File

@@ -21,7 +21,11 @@ def main(args_test=None):
if args.grid: if args.grid:
args.best = None args.best = None
if args.file is None and args.best is None and args.grid is None: if args.file is None and args.best is None and args.grid is None:
ReportDatasets.report() report = ReportDatasets(args.excel)
report.report()
if args.excel:
is_test = args_test is not None
Files.open(report.get_file_name(), is_test)
else: else:
if args.best is not None or args.grid is not None: if args.best is not None or args.grid is not None:
report = ReportBest(args.score, args.model, args.best, args.grid) report = ReportBest(args.score, args.model, args.best, args.grid)

View File

@@ -6,3 +6,4 @@ stratified=0
# Source of data Tanveer/Surcov # Source of data Tanveer/Surcov
source_data=Tanveer source_data=Tanveer
seeds=[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] seeds=[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]
discretize=0

View File

@@ -4,4 +4,5 @@ n_folds=5
model=ODTE model=ODTE
stratified=0 stratified=0
source_data=Arff source_data=Arff
seeds=[271, 314, 171] seeds=[271, 314, 171]
discretize=1

View File

@@ -6,3 +6,4 @@ stratified=0
# Source of data Tanveer/Surcov # Source of data Tanveer/Surcov
source_data=Tanveer source_data=Tanveer
seeds=[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] seeds=[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]
discretize=0

View File

@@ -5,4 +5,5 @@ model=ODTE
stratified=0 stratified=0
# Source of data Tanveer/Surcov # Source of data Tanveer/Surcov
source_data=Surcov source_data=Surcov
seeds=[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] seeds=[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]
discretize=0

2
benchmark/tests/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
ReportDatasets.xlsx
some_results.xlsx

View File

@@ -5,6 +5,7 @@ from openpyxl import load_workbook
from .TestBase import TestBase from .TestBase import TestBase
from ..Utils import Folders, Files, NO_RESULTS from ..Utils import Folders, Files, NO_RESULTS
from ..Results import Benchmark from ..Results import Benchmark
from .._version import __version__
class BenchmarkTest(TestBase): class BenchmarkTest(TestBase):
@@ -89,6 +90,15 @@ class BenchmarkTest(TestBase):
self.assertTrue(os.path.exists(benchmark.get_tex_file())) self.assertTrue(os.path.exists(benchmark.get_tex_file()))
self.check_file_file(benchmark.get_tex_file(), "exreport_tex") self.check_file_file(benchmark.get_tex_file(), "exreport_tex")
@staticmethod
def generate_excel_sheet(test, sheet, file_name):
with open(os.path.join("test_files", file_name), "w") as f:
for row in range(1, sheet.max_row + 1):
for col in range(1, sheet.max_column + 1):
value = sheet.cell(row=row, column=col).value
if value is not None:
print(f'{row};{col};"{value}"', file=f)
def test_excel_output(self): def test_excel_output(self):
benchmark = Benchmark("accuracy", visualize=False) benchmark = Benchmark("accuracy", visualize=False)
benchmark.compile_results() benchmark.compile_results()
@@ -98,9 +108,16 @@ class BenchmarkTest(TestBase):
benchmark.excel() benchmark.excel()
file_name = benchmark.get_excel_file_name() file_name = benchmark.get_excel_file_name()
book = load_workbook(file_name) book = load_workbook(file_name)
replace = None
with_this = None
for sheet_name in book.sheetnames: for sheet_name in book.sheetnames:
sheet = book[sheet_name] sheet = book[sheet_name]
self.check_excel_sheet(sheet, f"exreport_excel_{sheet_name}") if sheet_name == "Datasets":
# ExcelTest.generate_excel_sheet( replace = self.benchmark_version
# self, sheet, f"exreport_excel_{sheet_name}" with_this = __version__
# ) self.check_excel_sheet(
sheet,
f"exreport_excel_{sheet_name}",
replace=replace,
with_this=with_this,
)

View File

@@ -30,6 +30,19 @@ class DatasetTest(TestBase):
expected = [271, 314, 171] expected = [271, 314, 171]
self.assertSequenceEqual(Randomized.seeds(), expected) self.assertSequenceEqual(Randomized.seeds(), expected)
def test_load_dataframe(self):
self.set_env(".env.arff")
dt = Datasets()
X, y = dt.load_discretized("iris", dataframe=False)
dataset = dt.load_discretized("iris", dataframe=True)
class_name = dt.get_class_name()
features = dt.get_features()
self.assertListEqual(y.tolist(), dataset[class_name].tolist())
for i in range(len(features)):
self.assertListEqual(
X[:, i].tolist(), dataset[features[i]].tolist()
)
def test_Datasets_iterator(self): def test_Datasets_iterator(self):
test = { test = {
".env.dist": ["balance-scale", "balloons"], ".env.dist": ["balance-scale", "balloons"],

View File

@@ -2,11 +2,14 @@ import os
from io import StringIO from io import StringIO
from unittest.mock import patch from unittest.mock import patch
from .TestBase import TestBase from .TestBase import TestBase
from ..Results import Report, BaseReport, ReportBest, ReportDatasets from ..Results import Report, BaseReport, ReportBest, ReportDatasets, get_input
from ..Utils import Symbols from ..Utils import Symbols
class ReportTest(TestBase): class ReportTest(TestBase):
def test_get_input(self):
self.assertEqual(get_input(is_test=True), "test")
def test_BaseReport(self): def test_BaseReport(self):
with patch.multiple(BaseReport, __abstractmethods__=set()): with patch.multiple(BaseReport, __abstractmethods__=set()):
file_name = os.path.join( file_name = os.path.join(
@@ -81,7 +84,7 @@ class ReportTest(TestBase):
output_text = stdout.getvalue().splitlines() output_text = stdout.getvalue().splitlines()
# Compare replacing STree version # Compare replacing STree version
for line, index in zip(expected, range(len(expected))): for line, index in zip(expected, range(len(expected))):
if "1.2.4" in line: if self.stree_version in line:
# replace STree version # replace STree version
line = self.replace_STree_version(line, output_text, index) line = self.replace_STree_version(line, output_text, index)
@@ -97,4 +100,12 @@ class ReportTest(TestBase):
def test_report_datasets(self, mock_output): def test_report_datasets(self, mock_output):
report = ReportDatasets() report = ReportDatasets()
report.report() report.report()
self.check_output_file(mock_output, "report_datasets") file_name = f"report_datasets{self.ext}"
with open(os.path.join(self.test_files, file_name)) as f:
expected = f.read()
output_text = mock_output.getvalue().splitlines()
for line, index in zip(expected.splitlines(), range(len(expected))):
if self.benchmark_version in line:
# replace benchmark version
line = self.replace_benchmark_version(line, output_text, index)
self.assertEqual(line, output_text[index])

View File

@@ -15,6 +15,8 @@ class TestBase(unittest.TestCase):
self.test_files = "test_files" self.test_files = "test_files"
self.output = "sys.stdout" self.output = "sys.stdout"
self.ext = ".test" self.ext = ".test"
self.benchmark_version = "0.2.0"
self.stree_version = "1.2.4"
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
def remove_files(self, files, folder): def remove_files(self, files, folder):
@@ -31,7 +33,9 @@ class TestBase(unittest.TestCase):
if value is not None: if value is not None:
print(f'{row};{col};"{value}"', file=f) print(f'{row};{col};"{value}"', file=f)
def check_excel_sheet(self, sheet, file_name): def check_excel_sheet(
self, sheet, file_name, replace=None, with_this=None
):
file_name += self.ext file_name += self.ext
with open(os.path.join(self.test_files, file_name), "r") as f: with open(os.path.join(self.test_files, file_name), "r") as f:
expected = csv.reader(f, delimiter=";") expected = csv.reader(f, delimiter=";")
@@ -43,6 +47,9 @@ class TestBase(unittest.TestCase):
value = float(value) value = float(value)
except ValueError: except ValueError:
pass pass
if replace is not None and isinstance(value, str):
if replace in value:
value = value.replace(replace, with_this)
self.assertEqual(sheet.cell(int(row), int(col)).value, value) self.assertEqual(sheet.cell(int(row), int(col)).value, value)
def check_output_file(self, output, file_name): def check_output_file(self, output, file_name):
@@ -51,10 +58,15 @@ class TestBase(unittest.TestCase):
expected = f.read() expected = f.read()
self.assertEqual(output.getvalue(), expected) self.assertEqual(output.getvalue(), expected)
@staticmethod def replace_STree_version(self, line, output, index):
def replace_STree_version(line, output, index): idx = line.find(self.stree_version)
idx = line.find("1.2.4") return line.replace(self.stree_version, output[index][idx : idx + 5])
return line.replace("1.2.4", output[index][idx : idx + 5])
def replace_benchmark_version(self, line, output, index):
idx = line.find(self.benchmark_version)
return line.replace(
self.benchmark_version, output[index][idx : idx + 5]
)
def check_file_file(self, computed_file, expected_file): def check_file_file(self, computed_file, expected_file):
with open(computed_file) as f: with open(computed_file) as f:

View File

@@ -179,6 +179,7 @@ class UtilTest(TestBase):
"stratified": "0", "stratified": "0",
"source_data": "Tanveer", "source_data": "Tanveer",
"seeds": "[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]", "seeds": "[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]",
"discretize": "0",
} }
computed = EnvData().load() computed = EnvData().load()
self.assertDictEqual(computed, expected) self.assertDictEqual(computed, expected)

View File

@@ -2,6 +2,7 @@ import os
from openpyxl import load_workbook from openpyxl import load_workbook
from ...Utils import NO_RESULTS, Folders, Files from ...Utils import NO_RESULTS, Folders, Files
from ..TestBase import TestBase from ..TestBase import TestBase
from ..._version import __version__
class BeBenchmarkTest(TestBase): class BeBenchmarkTest(TestBase):
@@ -43,9 +44,19 @@ class BeBenchmarkTest(TestBase):
Folders.exreport, Files.exreport_excel(self.score) Folders.exreport, Files.exreport_excel(self.score)
) )
book = load_workbook(file_name) book = load_workbook(file_name)
replace = None
with_this = None
for sheet_name in book.sheetnames: for sheet_name in book.sheetnames:
sheet = book[sheet_name] sheet = book[sheet_name]
self.check_excel_sheet(sheet, f"exreport_excel_{sheet_name}") if sheet_name == "Datasets":
replace = self.benchmark_version
with_this = __version__
self.check_excel_sheet(
sheet,
f"exreport_excel_{sheet_name}",
replace=replace,
with_this=with_this,
)
def test_be_benchmark_single(self): def test_be_benchmark_single(self):
stdout, stderr = self.execute_script( stdout, stderr = self.execute_script(

View File

@@ -4,6 +4,10 @@ from ...Utils import Folders, Files
from ..TestBase import TestBase from ..TestBase import TestBase
def get_test():
return "hola"
class BeGridTest(TestBase): class BeGridTest(TestBase):
def setUp(self): def setUp(self):
self.prepare_scripts_env() self.prepare_scripts_env()

View File

@@ -1,5 +1,7 @@
import os import os
from ...Utils import Folders, NO_RESULTS from unittest.mock import patch
from openpyxl import load_workbook
from ...Utils import Folders, Files, NO_RESULTS
from ..TestBase import TestBase from ..TestBase import TestBase
@@ -7,12 +9,64 @@ class BeListTest(TestBase):
def setUp(self): def setUp(self):
self.prepare_scripts_env() self.prepare_scripts_env()
def test_be_list(self): @patch("benchmark.Results.get_input", return_value="q")
def test_be_list(self, input_data):
stdout, stderr = self.execute_script("be_list", ["-m", "STree"]) stdout, stderr = self.execute_script("be_list", ["-m", "STree"])
self.assertEqual(stderr.getvalue(), "") self.assertEqual(stderr.getvalue(), "")
self.check_output_file(stdout, "summary_list_model") self.check_output_file(stdout, "be_list_model")
def test_be_list_no_data(self): @patch("benchmark.Results.get_input", side_effect=iter(["x", "q"]))
def test_be_list_invalid_option(self, input_data):
stdout, stderr = self.execute_script("be_list", ["-m", "STree"])
self.assertEqual(stderr.getvalue(), "")
self.check_output_file(stdout, "be_list_model_invalid")
@patch("benchmark.Results.get_input", side_effect=iter(["0", "q"]))
def test_be_list_report(self, input_data):
stdout, stderr = self.execute_script("be_list", ["-m", "STree"])
self.assertEqual(stderr.getvalue(), "")
self.check_output_file(stdout, "be_list_report")
@patch("benchmark.Results.get_input", side_effect=iter(["q"]))
def test_be_list_report_excel_none(self, input_data):
stdout, stderr = self.execute_script(
"be_list", ["-m", "STree", "-x", "1"]
)
self.assertEqual(stderr.getvalue(), "")
self.check_output_file(stdout, "be_list_model")
@patch("benchmark.Results.get_input", side_effect=iter(["r", "q"]))
def test_be_list_twice(self, input_data):
stdout, stderr = self.execute_script("be_list", ["-m", "STree"])
self.assertEqual(stderr.getvalue(), "")
self.check_output_file(stdout, "be_list_model_2")
@patch("benchmark.Results.get_input", side_effect=iter(["2", "q"]))
def test_be_list_report_excel(self, input_data):
stdout, stderr = self.execute_script(
"be_list", ["-m", "STree", "-x", "1"]
)
self.assertEqual(stderr.getvalue(), "")
self.check_output_file(stdout, "be_list_report_excel")
book = load_workbook(Files.be_list_excel)
sheet = book["STree"]
self.check_excel_sheet(sheet, "excel")
@patch("benchmark.Results.get_input", side_effect=iter(["2", "1", "q"]))
def test_be_list_report_excel_twice(self, input_data):
stdout, stderr = self.execute_script(
"be_list", ["-m", "STree", "-x", "1"]
)
self.assertEqual(stderr.getvalue(), "")
self.check_output_file(stdout, "be_list_report_excel_2")
book = load_workbook(Files.be_list_excel)
sheet = book["STree"]
self.check_excel_sheet(sheet, "excel")
sheet = book["STree2"]
self.check_excel_sheet(sheet, "excel2")
@patch("benchmark.Results.get_input", return_value="q")
def test_be_list_no_data(self, input_data):
stdout, stderr = self.execute_script( stdout, stderr = self.execute_script(
"be_list", ["-m", "Wodt", "-s", "f1-macro"] "be_list", ["-m", "Wodt", "-s", "f1-macro"]
) )
@@ -41,7 +95,8 @@ class BeListTest(TestBase):
swap_files(Folders.results, Folders.hidden_results, file_name) swap_files(Folders.results, Folders.hidden_results, file_name)
self.fail("test_be_list_nan() should not raise exception") self.fail("test_be_list_nan() should not raise exception")
def test_be_list_nan_no_nan(self): @patch("benchmark.Results.get_input", return_value="q")
def test_be_list_nan_no_nan(self, input_data):
stdout, stderr = self.execute_script("be_list", ["--nan", "1"]) stdout, stderr = self.execute_script("be_list", ["--nan", "1"])
self.assertEqual(stderr.getvalue(), "") self.assertEqual(stderr.getvalue(), "")
self.check_output_file(stdout, "be_list_no_nan") self.check_output_file(stdout, "be_list_no_nan")

View File

@@ -1,7 +1,8 @@
import os import os
from openpyxl import load_workbook from openpyxl import load_workbook
from ...Utils import Folders from ...Utils import Folders, Files
from ..TestBase import TestBase from ..TestBase import TestBase
from ..._version import __version__
class BeReportTest(TestBase): class BeReportTest(TestBase):
@@ -14,6 +15,7 @@ class BeReportTest(TestBase):
"results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.xlsx", "results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.xlsx",
] ]
self.remove_files(files, Folders.results) self.remove_files(files, Folders.results)
self.remove_files([Files.datasets_report_excel], os.getcwd())
return super().tearDown() return super().tearDown()
def test_be_report(self): def test_be_report(self):
@@ -41,7 +43,37 @@ class BeReportTest(TestBase):
def test_be_report_datatsets(self): def test_be_report_datatsets(self):
stdout, stderr = self.execute_script("be_report", []) stdout, stderr = self.execute_script("be_report", [])
self.assertEqual(stderr.getvalue(), "") self.assertEqual(stderr.getvalue(), "")
self.check_output_file(stdout, "report_datasets") file_name = f"report_datasets{self.ext}"
with open(os.path.join(self.test_files, file_name)) as f:
expected = f.read()
output_text = stdout.getvalue().splitlines()
for line, index in zip(expected.splitlines(), range(len(expected))):
if self.benchmark_version in line:
# replace benchmark version
line = self.replace_benchmark_version(line, output_text, index)
self.assertEqual(line, output_text[index])
def test_be_report_datasets_excel(self):
stdout, stderr = self.execute_script("be_report", ["-x", "1"])
self.assertEqual(stderr.getvalue(), "")
file_name = f"report_datasets{self.ext}"
with open(os.path.join(self.test_files, file_name)) as f:
expected = f.read()
output_text = stdout.getvalue().splitlines()
for line, index in zip(expected.splitlines(), range(len(expected))):
if self.benchmark_version in line:
# replace benchmark version
line = self.replace_benchmark_version(line, output_text, index)
self.assertEqual(line, output_text[index])
file_name = os.path.join(os.getcwd(), Files.datasets_report_excel)
book = load_workbook(file_name)
sheet = book["Datasets"]
self.check_excel_sheet(
sheet,
"exreport_excel_Datasets",
replace=self.benchmark_version,
with_this=__version__,
)
def test_be_report_best(self): def test_be_report_best(self):
stdout, stderr = self.execute_script( stdout, stderr = self.execute_script(

View File

@@ -0,0 +1,6 @@
 # Date File Score Time(h) Title
=== ========== ============================================================= ======== ======= =================================
 0 2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
 1 2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
 2 2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
Which result do you want to report? (q to quit, r to list again, number to report):

View File

@@ -0,0 +1,11 @@
 # Date File Score Time(h) Title
=== ========== ============================================================= ======== ======= =================================
 0 2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
 1 2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
 2 2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
Which result do you want to report? (q to quit, r to list again, number to report):  # Date File Score Time(h) Title
=== ========== ============================================================= ======== ======= =================================
 0 2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
 1 2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
 2 2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
Which result do you want to report? (q to quit, r to list again, number to report):

View File

@@ -0,0 +1,7 @@
 # Date File Score Time(h) Title
=== ========== ============================================================= ======== ======= =================================
 0 2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
 1 2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
 2 2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
Which result do you want to report? (q to quit, r to list again, number to report): Invalid option x. Try again!
Which result do you want to report? (q to quit, r to list again, number to report):

View File

@@ -1,13 +1,13 @@
Date File Score Time(h) Title  # Date File Score Time(h) Title
========== ================================================================ ======== ======= ============================================ === ========== ================================================================ ======== ======= ============================================
2022-05-04 results_accuracy_XGBoost_MacBookpro16_2022-05-04_11:00:35_0.json nan 3.091 Default hyperparameters  0 2022-05-04 results_accuracy_XGBoost_MacBookpro16_2022-05-04_11:00:35_0.json nan 3.091 Default hyperparameters
2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init  1 2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init
2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest  2 2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest
2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B  3 2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A  4 2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters  5 2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
****************************** Results with nan moved to hidden ****************************** ****************************** Results with nan moved to hidden ******************************
Date File Score Time(h) Title  # Date File Score Time(h) Title
========== ================================================================ ======== ======= ======================= === ========== ================================================================ ======== ======= =======================
2022-05-04 results_accuracy_XGBoost_MacBookpro16_2022-05-04_11:00:35_0.json nan 3.091 Default hyperparameters  0 2022-05-04 results_accuracy_XGBoost_MacBookpro16_2022-05-04_11:00:35_0.json nan 3.091 Default hyperparameters

View File

@@ -1,7 +1,7 @@
Date File Score Time(h) Title  # Date File Score Time(h) Title
========== =============================================================== ======== ======= ============================================ === ========== =============================================================== ======== ======= ============================================
2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init  0 2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init
2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest  1 2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest
2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B  2 2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A  3 2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters  4 2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters

View File

@@ -0,0 +1,21 @@
 # Date File Score Time(h) Title
=== ========== ============================================================= ======== ======= =================================
 0 2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
 1 2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
 2 2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
Which result do you want to report? (q to quit, r to list again, number to report): *************************************************************************************************************************
* STree ver. 1.2.3 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2021-11-01 19:17:07 *
* default B *
* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
* Execution took 4115.04 seconds, 1.14 hours, on macbook-pro *
* Score is accuracy *
*************************************************************************************************************************
Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
============================== ====== ===== === ======= ======= ======= =============== ================= ===============
balance-scale 625 4 3 18.78 9.88 5.90 0.970000±0.0020 0.233304±0.0481 {'max_features': 'auto', 'splitter': 'mutual'}
balloons 16 4 2 4.72 2.86 2.78 0.556667±0.2941 0.021352±0.0058 {'max_features': 'auto', 'splitter': 'mutual'}
*************************************************************************************************************************
* accuracy compared to STree_default (liblinear-ovr) .: 0.0379 *
*************************************************************************************************************************
Which result do you want to report? (q to quit, r to list again, number to report):

View File

@@ -0,0 +1,21 @@
 # Date File Score Time(h) Title
=== ========== ============================================================= ======== ======= =================================
 0 2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
 1 2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
 2 2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
Which result do you want to report? (q to quit, r to list again, number to report): *************************************************************************************************************************
* STree ver. 1.2.3 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2021-09-30 11:42:07 *
* With gridsearched hyperparameters *
* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
* Execution took 624.25 seconds, 0.17 hours, on iMac27 *
* Score is accuracy *
*************************************************************************************************************************
Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
============================== ====== ===== === ======= ======= ======= =============== ================= ===============
balance-scale 625 4 3 7.00 4.00 3.00 0.970560±0.0150 0.014049±0.0020 {'C': 10000.0, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
balloons 16 4 2 3.00 2.00 2.00 0.860000±0.2850 0.000854±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
*************************************************************************************************************************
* accuracy compared to STree_default (liblinear-ovr) .: 0.0454 *
*************************************************************************************************************************
Which result do you want to report? (q to quit, r to list again, number to report): Generated file: some_results.xlsx

View File

@@ -0,0 +1,36 @@
 # Date File Score Time(h) Title
=== ========== ============================================================= ======== ======= =================================
 0 2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
 1 2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
 2 2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
Which result do you want to report? (q to quit, r to list again, number to report): *************************************************************************************************************************
* STree ver. 1.2.3 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2021-09-30 11:42:07 *
* With gridsearched hyperparameters *
* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
* Execution took 624.25 seconds, 0.17 hours, on iMac27 *
* Score is accuracy *
*************************************************************************************************************************
Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
============================== ====== ===== === ======= ======= ======= =============== ================= ===============
balance-scale 625 4 3 7.00 4.00 3.00 0.970560±0.0150 0.014049±0.0020 {'C': 10000.0, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
balloons 16 4 2 3.00 2.00 2.00 0.860000±0.2850 0.000854±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
*************************************************************************************************************************
* accuracy compared to STree_default (liblinear-ovr) .: 0.0454 *
*************************************************************************************************************************
Which result do you want to report? (q to quit, r to list again, number to report): *************************************************************************************************************************
* STree ver. 1.2.3 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2021-10-27 09:40:40 *
* default A *
* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
* Execution took 3395.01 seconds, 0.94 hours, on iMac27 *
* Score is accuracy *
*************************************************************************************************************************
Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
============================== ====== ===== === ======= ======= ======= =============== ================= ===============
balance-scale 625 4 3 11.08 5.90 5.90 0.980000±0.0010 0.285207±0.0603 {'splitter': 'best', 'max_features': 'auto'}
balloons 16 4 2 4.12 2.56 2.56 0.695000±0.2757 0.021201±0.0035 {'splitter': 'best', 'max_features': 'auto'}
*************************************************************************************************************************
* accuracy compared to STree_default (liblinear-ovr) .: 0.0416 *
*************************************************************************************************************************
Which result do you want to report? (q to quit, r to list again, number to report): Generated file: some_results.xlsx

View File

@@ -1,16 +1,16 @@
************************************************************************************************************************ *************************************************************************************************************************
* STree ver. 1.2.4 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2022-05-09 00:15:25 * * STree ver. 1.2.4 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2022-05-09 00:15:25 *
* test * * test *
* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False * * Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
* Execution took 0.80 seconds, 0.00 hours, on iMac27 * * Execution took 0.80 seconds, 0.00 hours, on iMac27 *
* Score is accuracy * * Score is accuracy *
************************************************************************************************************************ *************************************************************************************************************************
Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
============================== ====== ===== === ======= ======= ======= =============== ================ =============== ============================== ====== ===== === ======= ======= ======= =============== ================= ===============
balance-scale 625 4 3 23.32 12.16 6.44 0.840160±0.0304 0.013745±0.0019 {'splitter': 'best', 'max_features': 'auto'} balance-scale 625 4 3 23.32 12.16 6.44 0.840160±0.0304 0.013745±0.0019 {'splitter': 'best', 'max_features': 'auto'}
balloons 16 4 2 3.00 2.00 2.00 0.860000±0.2850 0.000388±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'} balloons 16 4 2 3.00 2.00 2.00 0.860000±0.2850 0.000388±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
************************************************************************************************************************ *************************************************************************************************************************
* accuracy compared to STree_default (liblinear-ovr) .: 0.0422 * * accuracy compared to STree_default (liblinear-ovr) .: 0.0422 *
************************************************************************************************************************ *************************************************************************************************************************
Results in results/results_accuracy_STree_iMac27_2022-05-09_00:15:25_0.json Results in results/results_accuracy_STree_iMac27_2022-05-09_00:15:25_0.json

View File

@@ -1,16 +1,16 @@
************************************************************************************************************************ *************************************************************************************************************************
* STree ver. 1.2.4 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2022-05-08 20:14:43 * * STree ver. 1.2.4 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2022-05-08 20:14:43 *
* test * * test *
* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False * * Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
* Execution took 0.48 seconds, 0.00 hours, on iMac27 * * Execution took 0.48 seconds, 0.00 hours, on iMac27 *
* Score is accuracy * * Score is accuracy *
************************************************************************************************************************ *************************************************************************************************************************
Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
============================== ====== ===== === ======= ======= ======= =============== ================ =============== ============================== ====== ===== === ======= ======= ======= =============== ================= ===============
balance-scale 625 4 3 17.36 9.18 6.18 0.908480±0.0247 0.007388±0.0013 {} balance-scale 625 4 3 17.36 9.18 6.18 0.908480±0.0247 0.007388±0.0013 {}
balloons 16 4 2 4.64 2.82 2.66 0.663333±0.3009 0.000664±0.0002 {} balloons 16 4 2 4.64 2.82 2.66 0.663333±0.3009 0.000664±0.0002 {}
************************************************************************************************************************ *************************************************************************************************************************
* accuracy compared to STree_default (liblinear-ovr) .: 0.0390 * * accuracy compared to STree_default (liblinear-ovr) .: 0.0390 *
************************************************************************************************************************ *************************************************************************************************************************
Results in results/results_accuracy_STree_iMac27_2022-05-08_20:14:43_0.json Results in results/results_accuracy_STree_iMac27_2022-05-08_20:14:43_0.json

View File

@@ -1,15 +1,15 @@
************************************************************************************************************************ *************************************************************************************************************************
* STree ver. 1.2.4 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2022-05-08 19:38:28 * * STree ver. 1.2.4 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2022-05-08 19:38:28 *
* test * * test *
* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False * * Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
* Execution took 0.06 seconds, 0.00 hours, on iMac27 * * Execution took 0.06 seconds, 0.00 hours, on iMac27 *
* Score is accuracy * * Score is accuracy *
************************************************************************************************************************ *************************************************************************************************************************
Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
============================== ====== ===== === ======= ======= ======= =============== ================ =============== ============================== ====== ===== === ======= ======= ======= =============== ================= ===============
balloons 16 4 2 4.64 2.82 2.66 0.663333±0.3009 0.000671±0.0001 {} balloons 16 4 2 4.64 2.82 2.66 0.663333±0.3009 0.000671±0.0001 {}
************************************************************************************************************************ *************************************************************************************************************************
* accuracy compared to STree_default (liblinear-ovr) .: 0.0165 * * accuracy compared to STree_default (liblinear-ovr) .: 0.0165 *
************************************************************************************************************************ *************************************************************************************************************************
Partial result file removed: results/results_accuracy_STree_iMac27_2022-05-08_19:38:28_0.json Partial result file removed: results/results_accuracy_STree_iMac27_2022-05-08_19:38:28_0.json

View File

@@ -1,16 +1,16 @@
************************************************************************************************************************ *************************************************************************************************************************
* STree ver. 1.2.4 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2022-05-09 00:21:06 * * STree ver. 1.2.4 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2022-05-09 00:21:06 *
* test * * test *
* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False * * Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
* Execution took 0.89 seconds, 0.00 hours, on iMac27 * * Execution took 0.89 seconds, 0.00 hours, on iMac27 *
* Score is accuracy * * Score is accuracy *
************************************************************************************************************************ *************************************************************************************************************************
Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
============================== ====== ===== === ======= ======= ======= =============== ================ =============== ============================== ====== ===== === ======= ======= ======= =============== ================= ===============
balance-scale 625 4 3 26.12 13.56 7.94 0.910720±0.0249 0.015852±0.0027 {'C': 1.0, 'kernel': 'liblinear', 'multiclass_strategy': 'ovr'} balance-scale 625 4 3 26.12 13.56 7.94 0.910720±0.0249 0.015852±0.0027 {'C': 1.0, 'kernel': 'liblinear', 'multiclass_strategy': 'ovr'}
balloons 16 4 2 4.64 2.82 2.66 0.663333±0.3009 0.000640±0.0001 {'C': 1.0, 'kernel': 'linear', 'multiclass_strategy': 'ovr'} balloons 16 4 2 4.64 2.82 2.66 0.663333±0.3009 0.000640±0.0001 {'C': 1.0, 'kernel': 'linear', 'multiclass_strategy': 'ovr'}
************************************************************************************************************************ *************************************************************************************************************************
* accuracy compared to STree_default (liblinear-ovr) .: 0.0391 * * accuracy compared to STree_default (liblinear-ovr) .: 0.0391 *
************************************************************************************************************************ *************************************************************************************************************************
Results in results/results_accuracy_STree_iMac27_2022-05-09_00:21:06_0.json Results in results/results_accuracy_STree_iMac27_2022-05-09_00:21:06_0.json

View File

@@ -26,10 +26,10 @@
* results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json * * results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json *
* * * *
********************************************************************************* *********************************************************************************
Date File Score Time(h) Title  # Date File Score Time(h) Title
========== =============================================================== ======== ======= ============================================ === ========== =============================================================== ======== ======= ============================================
2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters  0 2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init  1 2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init
2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A  2 2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B  3 2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest  4 2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest

View File

@@ -26,10 +26,10 @@
* results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json * * results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json *
* * * *
********************************************************************************* *********************************************************************************
Date File Score Time(h) Title  # Date File Score Time(h) Title
========== =============================================================== ======== ======= ============================================ === ========== =============================================================== ======== ======= ============================================
2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters  0 2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init  1 2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init
2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A  2 2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B  3 2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest  4 2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest

View File

@@ -26,13 +26,13 @@
* results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json * * results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json *
* * * *
********************************************************************************* *********************************************************************************
Date File Score Time(h) Title  # Date File Score Time(h) Title
========== =============================================================== ======== ======= ============================================ === ========== =============================================================== ======== ======= ============================================
2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters  0 2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init  1 2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init
2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A  2 2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B  3 2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest  4 2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest
** No results found ** ** No results found **
** No results found ** ** No results found **
** No results found ** ** No results found **

View File

@@ -26,10 +26,10 @@
* results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json * * results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json *
* * * *
********************************************************************************* *********************************************************************************
Date File Score Time(h) Title  # Date File Score Time(h) Title
========== =============================================================== ======== ======= ============================================ === ========== =============================================================== ======== ======= ============================================
2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters  0 2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init  1 2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init
2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A  2 2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B  3 2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest  4 2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest

View File

@@ -0,0 +1,48 @@
1;1;" STree ver. 1.2.3 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2021-10-27 09:40:40"
2;1;" default A"
3;1;" Score is accuracy"
3;2;" Execution time"
3;5;"3,395.01 s"
3;7;" "
3;8;"Platform"
3;9;"iMac27"
3;10;"Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]"
4;5;" 0.94 h"
4;10;"Stratified: False"
6;1;"Dataset"
6;2;"Samples"
6;3;"Features"
6;4;"Classes"
6;5;"Nodes"
6;6;"Leaves"
6;7;"Depth"
6;8;"Score"
6;9;"Score Std."
6;10;"Time"
6;11;"Time Std."
6;12;"Hyperparameters"
7;1;"balance-scale"
7;2;"625"
7;3;"4"
7;4;"3"
7;5;"11.08"
7;6;"5.9"
7;7;"5.9"
7;8;"0.98"
7;9;"0.001"
7;10;"0.2852065515518188"
7;11;"0.06031593282605064"
7;12;"{'splitter': 'best', 'max_features': 'auto'}"
8;1;"balloons"
8;2;"16"
8;3;"4"
8;4;"2"
8;5;"4.12"
8;6;"2.56"
8;7;"2.56"
8;8;"0.695"
8;9;"0.2756860130252853"
8;10;"0.02120100021362305"
8;11;"0.003526023309468471"
8;12;"{'splitter': 'best', 'max_features': 'auto'}"
10;1;"** accuracy compared to STree_default (liblinear-ovr) .: 0.0416"

View File

@@ -0,0 +1,25 @@
1;1;"Datasets used in benchmark ver. 0.2.0"
2;1;" Default score accuracy"
2;2;"Cross validation"
2;5;"5 Folds"
3;2;"Stratified"
3;5;"False"
4;2;"Discretized"
4;5;"False"
5;2;"Seeds"
5;5;"[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]"
6;1;"Dataset"
6;2;"Samples"
6;3;"Features"
6;4;"Classes"
6;5;"Balance"
7;1;"balance-scale"
7;2;"625"
7;3;"4"
7;4;"3"
7;5;" 7.84%/ 46.08%/ 46.08%"
8;1;"balloons"
8;2;"16"
8;3;"4"
8;4;"2"
8;5;"56.25%/ 43.75%"

View File

@@ -1,15 +1,15 @@
************************************************************************************************************************ *************************************************************************************************************************
* STree ver. 1.2.3 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2021-09-30 11:42:07 * * STree ver. 1.2.3 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2021-09-30 11:42:07 *
* With gridsearched hyperparameters * * With gridsearched hyperparameters *
* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False * * Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
* Execution took 624.25 seconds, 0.17 hours, on iMac27 * * Execution took 624.25 seconds, 0.17 hours, on iMac27 *
* Score is accuracy * * Score is accuracy *
************************************************************************************************************************ *************************************************************************************************************************
Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
============================== ====== ===== === ======= ======= ======= =============== ================ =============== ============================== ====== ===== === ======= ======= ======= =============== ================= ===============
balance-scale 625 4 3 7.00 4.00 3.00 0.970560±0.0150 0.014049±0.0020 {'C': 10000.0, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'} balance-scale 625 4 3 7.00 4.00 3.00 0.970560±0.0150 0.014049±0.0020 {'C': 10000.0, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
balloons 16 4 2 3.00 2.00 2.00 0.860000±0.2850 0.000854±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'} balloons 16 4 2 3.00 2.00 2.00 0.860000±0.2850 0.000854±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
************************************************************************************************************************ *************************************************************************************************************************
* accuracy compared to STree_default (liblinear-ovr) .: 0.0454 * * accuracy compared to STree_default (liblinear-ovr) .: 0.0454 *
************************************************************************************************************************ *************************************************************************************************************************

View File

@@ -1,16 +1,16 @@
************************************************************************************************************************ *************************************************************************************************************************
* STree ver. 1.2.3 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2021-09-30 11:42:07 * * STree ver. 1.2.3 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2021-09-30 11:42:07 *
* With gridsearched hyperparameters * * With gridsearched hyperparameters *
* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False * * Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
* Execution took 624.25 seconds, 0.17 hours, on iMac27 * * Execution took 624.25 seconds, 0.17 hours, on iMac27 *
* Score is accuracy * * Score is accuracy *
************************************************************************************************************************ *************************************************************************************************************************
Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
============================== ====== ===== === ======= ======= ======= =============== ================ =============== ============================== ====== ===== === ======= ======= ======= =============== ================= ===============
balance-scale 625 4 3 7.00 4.00 3.00 0.970560±0.0150 0.014049±0.0020 {'C': 10000.0, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'} balance-scale 625 4 3 7.00 4.00 3.00 0.970560±0.0150 0.014049±0.0020 {'C': 10000.0, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
balloons 16 4 2 3.00 2.00 2.00 0.860000±0.2850✔ 0.000854±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'} balloons 16 4 2 3.00 2.00 2.00 0.860000±0.2850✔ 0.000854±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
************************************************************************************************************************ *************************************************************************************************************************
* ✔ Equal to best .....: 1 * * ✔ Equal to best .....: 1 *
* accuracy compared to STree_default (liblinear-ovr) .: 0.0454 * * accuracy compared to STree_default (liblinear-ovr) .: 0.0454 *
************************************************************************************************************************ *************************************************************************************************************************

View File

@@ -1,4 +1,6 @@
Dataset Sampl. Feat. Cls Balance Datasets used in benchmark ver. 0.2.0
============================== ===== ====== === ========================================
Dataset Sampl. Feat. Cls Balance
============================== ====== ===== === ============================================================
balance-scale 625 4 3 7.84%/ 46.08%/ 46.08% balance-scale 625 4 3 7.84%/ 46.08%/ 46.08%
balloons 16 4 2 56.25%/ 43.75% balloons 16 4 2 56.25%/ 43.75%

View File

@@ -1,4 +1,4 @@
Date File Score Time(h) Title  # Date File Score Time(h) Title
========== ================================================================ ======== ======= ======================= === ========== ================================================================ ======== ======= =======================
2022-05-04 results_accuracy_XGBoost_MacBookpro16_2022-05-04_11:00:35_0.json nan 3.091 Default hyperparameters  0 2022-05-04 results_accuracy_XGBoost_MacBookpro16_2022-05-04_11:00:35_0.json nan 3.091 Default hyperparameters
2021-11-01 results_accuracy_STree_iMac27_2021-11-01_23:55:16_0.json 0.97446 0.098 default  1 2021-11-01 results_accuracy_STree_iMac27_2021-11-01_23:55:16_0.json 0.97446 0.098 default

View File

@@ -1,5 +1,5 @@
Date File Score Time(h) Title  # Date File Score Time(h) Title
========== ============================================================= ======== ======= ================================= === ========== ============================================================= ======== ======= =================================
2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B  0 2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A  1 2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters  2 2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters

View File

@@ -1,5 +1,5 @@
Date File Score Time(h) Title  # Date File Score Time(h) Title
========== =============================================================== ======== ======= ============================================ === ========== =============================================================== ======== ======= ============================================
2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init  0 2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init
2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest  1 2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest
2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B  2 2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B

View File

@@ -1,7 +1,7 @@
Date File Score Time(h) Title  # Date File Score Time(h) Title
========== =============================================================== ======== ======= ============================================ === ========== =============================================================== ======== ======= ============================================
2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init  0 2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init
2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest  1 2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest
2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B  2 2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A  3 2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters  4 2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters

View File

@@ -1,7 +1,7 @@
Date File Score Time(h) Title  # Date File Score Time(h) Title
========== =============================================================== ======== ======= ============================================ === ========== =============================================================== ======== ======= ============================================
2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters  0 2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init  1 2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init
2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A  2 2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B  3 2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest  4 2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest

View File

@@ -2,7 +2,10 @@ pandas
scikit-learn scikit-learn
scipy scipy
odte odte
cython
mdlp-discretization
mufs mufs
bayesclass @ git+ssh://git@github.com/doctorado-ml/bayesclass.git
xlsxwriter xlsxwriter
openpyxl openpyxl
tqdm tqdm

View File

@@ -49,15 +49,14 @@ setuptools.setup(
name="benchmark", name="benchmark",
version=get_data("version", "_version.py"), version=get_data("version", "_version.py"),
license=get_data("license"), license=get_data("license"),
description="Oblique decision tree with svm nodes", description="Benchmark of models with different datasets",
long_description=readme(), long_description=readme(),
long_description_content_type="text/markdown", long_description_content_type="text/markdown",
packages=setuptools.find_packages(), packages=setuptools.find_packages(),
url="https://github.com/Doctorado-ML/benchmark", url="https://github.com/Doctorado-ML/benchmark",
author=get_data("author"), author=get_data("author"),
author_email=get_data("author_email"), author_email=get_data("author_email"),
keywords="scikit-learn oblique-classifier oblique-decision-tree decision-\ keywords="scikit-learn benchmark",
tree svm svc",
classifiers=[ classifiers=[
"Development Status :: 4 - Beta", "Development Status :: 4 - Beta",
"License :: OSI Approved :: " + get_data("license"), "License :: OSI Approved :: " + get_data("license"),