25 Commits

Author SHA1 Message Date
c8124be119 Update version info 2022-11-18 23:36:43 +01:00
58c52849d8 Add AODE to models 2022-11-18 23:33:41 +01:00
d68fb47688 Remove extra space in report header 2022-11-17 13:42:27 +01:00
38667d61f7 Refactor be_list 2022-11-17 12:09:02 +01:00
dfd4f8179b Complete tests adding excel to be_list 2022-11-17 12:00:30 +01:00
8a9342c97b Add space to time column in report 2022-11-17 09:41:17 +01:00
974227166c Add excel to be_list 2022-11-17 01:36:19 +01:00
feea9c542a Add KDB model 2022-11-15 22:06:04 +01:00
a53e957c00 fix stochastic error in discretization 2022-11-14 21:51:53 +01:00
a2db4f1f6d Fix lint error in test 2022-11-14 17:27:18 +01:00
5a3ae6f440 Update version info and tests 2022-11-14 00:54:18 +01:00
Ricardo Montañana Gómez
8d06a2c5f6 Merge pull request #6 from Doctorado-ML/language_version
Add Discretizer to Datasets
Add excel to report datasets
Add report datasets sheet to benchmark excel
2022-11-13 22:51:50 +01:00
9039a634cf Exclude macos-latest with python 3.11 (no torch) 2022-11-13 22:14:01 +01:00
5b5d385b4c Fix uppercase mistake in filename 2022-11-13 20:04:26 +01:00
6ebcc31c36 Add bayesclass to requirements 2022-11-13 18:34:54 +01:00
cd2d803ff5 Update requirements 2022-11-13 18:10:42 +01:00
6aec5b2a97 Add tests to excel in report datasets 2022-11-13 17:44:45 +01:00
f1b9dc1fef Add excel to report dataset 2022-11-13 14:46:41 +01:00
2e6f49de8e Add discretize key to .env.dist 2022-11-12 19:38:14 +01:00
2d61cd11c2 refactor Discretization in datasets 2022-11-12 19:37:46 +01:00
4b442a46f2 Add Discretizer to Datasets 2022-11-10 11:47:01 +01:00
feaf85d0b8 Add Dataset load return a pandas dataframe 2022-11-04 18:40:50 +01:00
c62b06f263 Update Readme 2022-11-01 22:30:42 +01:00
Ricardo Montañana Gómez
b9eaa534bc Merge pull request #5 from Doctorado-ML/language_version
Disable sonar quality gate in CI
2022-11-01 21:24:12 +01:00
0d87e670f7 Disable sonar quality gate in CI
Update base score for Arff STree
2022-11-01 16:53:22 +01:00
55 changed files with 976 additions and 267 deletions

View File

@@ -5,3 +5,4 @@ model=ODTE
stratified=0
source_data=Tanveer
seeds=[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]
discretize=0

View File

@@ -8,7 +8,7 @@ jobs:
name: Build
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
with:
fetch-depth: 0
- run: echo "project_version=$(git describe --tags --abbrev=0)" >> $GITHUB_ENV
@@ -22,7 +22,8 @@ jobs:
-Dsonar.python.version=3.10
# If you wish to fail your job when the Quality Gate is red, uncomment the
# following lines. This would typically be used to fail a deployment.
- uses: sonarsource/sonarqube-quality-gate-action@master
timeout-minutes: 5
env:
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
#- uses: sonarsource/sonarqube-quality-gate-action@master
# timeout-minutes: 5
# env:
# SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
# SONAR_HOST_URL: ${{ secrets.SONAR_HOST_URL }}

View File

@@ -13,10 +13,13 @@ jobs:
strategy:
matrix:
os: [macos-latest, ubuntu-latest]
python: ["3.10"]
python: ["3.10", "3.11"]
exclude:
- os: macos-latest
python: "3.11"
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python }}
uses: actions/setup-python@v2
with:

View File

@@ -1,7 +1,7 @@
[![CI](https://github.com/Doctorado-ML/benchmark/actions/workflows/main.yml/badge.svg)](https://github.com/Doctorado-ML/benchmark/actions/workflows/main.yml)
[![codecov](https://codecov.io/gh/Doctorado-ML/benchmark/branch/main/graph/badge.svg?token=ZRP937NDSG)](https://codecov.io/gh/Doctorado-ML/benchmark)
[![Quality Gate Status](https://haystack.rmontanana.es:25000/api/project_badges/measure?project=benchmark&metric=alert_status&token=336a6e501988888543c3153baa91bad4b9914dd2)](http://haystack.local:25000/dashboard?id=benchmark)
[![Technical Debt](https://haystack.rmontanana.es:25000/api/project_badges/measure?project=benchmark&metric=sqale_index&token=336a6e501988888543c3153baa91bad4b9914dd2)](http://haystack.local:25000/dashboard?id=benchmark)
[![Quality Gate Status](https://haystack.rmontanana.es:25000/api/project_badges/measure?project=benchmark&metric=alert_status&token=336a6e501988888543c3153baa91bad4b9914dd2)](https://haystack.rmontanana.es:25000/dashboard?id=benchmark)
[![Technical Debt](https://haystack.rmontanana.es:25000/api/project_badges/measure?project=benchmark&metric=sqale_index&token=336a6e501988888543c3153baa91bad4b9914dd2)](https://haystack.rmontanana.es:25000/dashboard?id=benchmark)
![https://img.shields.io/badge/python-3.8%2B-blue](https://img.shields.io/badge/python-3.8%2B-brightgreen)
# benchmark

View File

@@ -1,8 +1,10 @@
import os
import pandas as pd
import numpy as np
from scipy.io import arff
from .Utils import Files
from .Arguments import EnvData
from mdlp.discretization import MDLP
class Diterator:
@@ -28,9 +30,12 @@ class DatasetsArff:
file_name = os.path.join(self.folder(), self.dataset_names(name))
data = arff.loadarff(file_name)
df = pd.DataFrame(data[0])
df = df.dropna()
X = df.drop(class_name, axis=1).to_numpy()
df.dropna(axis=0, how="any", inplace=True)
X = df.drop(class_name, axis=1)
self.features = X.columns
self.class_name = class_name
y, _ = pd.factorize(df[class_name])
X = X.to_numpy()
return X, y
@@ -43,7 +48,7 @@ class DatasetsTanveer:
def folder():
return "data"
def load(self, name, _):
def load(self, name, *args):
file_name = os.path.join(self.folder(), self.dataset_names(name))
data = pd.read_csv(
file_name,
@@ -64,7 +69,7 @@ class DatasetsSurcov:
def folder():
return "datasets"
def load(self, name, _):
def load(self, name, *args):
file_name = os.path.join(self.folder(), self.dataset_names(name))
data = pd.read_csv(
file_name,
@@ -80,15 +85,19 @@ class DatasetsSurcov:
class Datasets:
def __init__(self, dataset_name=None):
envData = EnvData.load()
class_name = getattr(
__import__(__name__),
f"Datasets{envData['source_data']}",
)
self.load = (
self.load_discretized
if envData["discretize"] == "1"
else self.load_continuous
)
self.dataset = class_name()
self.class_names = []
self.load_names()
self._load_names()
if dataset_name is not None:
try:
class_name = self.class_names[
@@ -99,7 +108,7 @@ class Datasets:
raise ValueError(f"Unknown dataset: {dataset_name}")
self.data_sets = [dataset_name]
def load_names(self):
def _load_names(self):
file_name = os.path.join(self.dataset.folder(), Files.index)
default_class = "class"
with open(file_name) as f:
@@ -115,12 +124,63 @@ class Datasets:
self.data_sets = result
self.class_names = class_names
def load(self, name):
def get_attributes(self, name):
class Attributes:
pass
X, y = self.load_continuous(name)
attr = Attributes()
values, counts = np.unique(y, return_counts=True)
comp = ""
sep = ""
for count in counts:
comp += f"{sep}{count/sum(counts)*100:5.2f}%"
sep = "/ "
attr.balance = comp
attr.classes = len(np.unique(y))
attr.samples = X.shape[0]
attr.features = X.shape[1]
return attr
def get_features(self):
return self.dataset.features
def get_class_name(self):
return self.dataset.class_name
def load_continuous(self, name):
try:
class_name = self.class_names[self.data_sets.index(name)]
return self.dataset.load(name, class_name)
except (ValueError, FileNotFoundError):
raise ValueError(f"Unknown dataset: {name}")
def discretize(self, X, y):
"""Supervised discretization with Fayyad and Irani's MDLP algorithm.
Parameters
----------
X : np.ndarray
array (n_samples, n_features) of features
y : np.ndarray
array (n_samples,) of labels
Returns
-------
tuple (X, y) of numpy.ndarray
"""
discretiz = MDLP(random_state=17, dtype=np.int32)
Xdisc = discretiz.fit_transform(X, y)
return Xdisc
def load_discretized(self, name, dataframe=False):
X, yd = self.load_continuous(name)
Xd = self.discretize(X, yd)
dataset = pd.DataFrame(Xd, columns=self.get_features())
dataset[self.get_class_name()] = yd
if dataframe:
return dataset
return Xd, yd
def __iter__(self) -> Diterator:
return Diterator(self.data_sets)

View File

@@ -8,6 +8,7 @@ from sklearn.ensemble import (
)
from sklearn.svm import SVC
from stree import Stree
from bayesclass import TAN, KDB, AODE
from wodt import Wodt
from odte import Odte
from xgboost import XGBClassifier
@@ -20,6 +21,9 @@ class Models:
def define_models(random_state):
return {
"STree": Stree(random_state=random_state),
"TAN": TAN(random_state=random_state),
"KDB": KDB(k=3),
"AODE": AODE(random_state=random_state),
"Cart": DecisionTreeClassifier(random_state=random_state),
"ExtraTree": ExtraTreeClassifier(random_state=random_state),
"Wodt": Wodt(random_state=random_state),

View File

@@ -1,4 +1,5 @@
import os
import sys
from operator import itemgetter
import math
import json
@@ -6,6 +7,7 @@ import abc
import shutil
import subprocess
import xlsxwriter
from xlsxwriter.exceptions import DuplicateWorksheetName
import numpy as np
from .Experiments import BestResults
from .Datasets import Datasets
@@ -17,6 +19,11 @@ from .Utils import (
TextColor,
NO_RESULTS,
)
from ._version import __version__
def get_input(is_test):
return "test" if is_test else input()
class BestResultsEver:
@@ -33,7 +40,7 @@ class BestResultsEver:
]
self.data["Arff"]["accuracy"] = [
"STree_default (linear-ovo)",
21.9765,
22.109799,
]
def get_name_value(self, key, score):
@@ -123,7 +130,7 @@ class BaseReport(abc.ABC):
class Report(BaseReport):
header_lengths = [30, 6, 5, 3, 7, 7, 7, 15, 16, 15]
header_lengths = [30, 6, 5, 3, 7, 7, 7, 15, 17, 15]
header_cols = [
"Dataset",
"Sampl.",
@@ -182,7 +189,7 @@ class Report(BaseReport):
)
i += 1
print(
f"{result['time']:9.6f}±{result['time_std']:6.4f} ",
f"{result['time']:10.6f}±{result['time_std']:6.4f} ",
end="",
)
i += 1
@@ -326,7 +333,17 @@ class Excel(BaseReport):
else:
self.book = book
self.close = False
self.sheet = self.book.add_worksheet(self.data["model"])
suffix = ""
num = 1
while True:
try:
self.sheet = self.book.add_worksheet(
self.data["model"] + suffix
)
break
except DuplicateWorksheetName:
num += 1
suffix = str(num)
self.max_hyper_width = 0
self.col_hyperparams = 0
@@ -566,37 +583,251 @@ class Excel(BaseReport):
self.sheet.set_row(c, 20)
self.sheet.set_row(0, 25)
self.sheet.freeze_panes(6, 1)
self.sheet.hide_gridlines()
self.sheet.hide_gridlines(2)
if self.close:
self.book.close()
class ReportDatasets:
row = 6
# alternate lines colors
color1 = "#DCE6F1"
color2 = "#FDE9D9"
color3 = "#B1A0C7"
def __init__(self, excel=False, book=None):
self.excel = excel
self.env = EnvData().load()
self.close = False
self.output = True
self.header_text = f"Datasets used in benchmark ver. {__version__}"
if excel:
self.max_length = 0
if book is None:
self.excel_file_name = Files.datasets_report_excel
self.book = xlsxwriter.Workbook(
self.excel_file_name, {"nan_inf_to_errors": True}
)
self.set_properties(self.get_title())
self.close = True
else:
self.book = book
self.output = False
self.sheet = self.book.add_worksheet("Datasets")
def set_properties(self, title):
self.book.set_properties(
{
"title": title,
"subject": "Machine learning results",
"author": "Ricardo Montañana Gómez",
"manager": "Dr. J. A. Gámez, Dr. J. M. Puerta",
"company": "UCLM",
"comments": "Created with Python and XlsxWriter",
}
)
@staticmethod
def report():
def get_python_version():
return "{}.{}".format(sys.version_info.major, sys.version_info.minor)
def get_title(self):
return (
f" Benchmark ver. {__version__} - "
f" Python ver. {self.get_python_version()}"
f" with {self.env['n_folds']} Folds cross validation "
f" Discretization: {self.env['discretize']} "
f"Stratification: {self.env['stratified']}"
)
def get_file_name(self):
return self.excel_file_name
def header(self):
merge_format = self.book.add_format(
{
"border": 1,
"bold": 1,
"align": "center",
"valign": "vcenter",
"font_size": 18,
"bg_color": self.color3,
}
)
merge_format_subheader = self.book.add_format(
{
"border": 1,
"bold": 1,
"align": "center",
"valign": "vcenter",
"font_size": 16,
"bg_color": self.color1,
}
)
merge_format_subheader_right = self.book.add_format(
{
"border": 1,
"bold": 1,
"align": "right",
"valign": "vcenter",
"font_size": 16,
"bg_color": self.color1,
}
)
merge_format_subheader_left = self.book.add_format(
{
"border": 1,
"bold": 1,
"align": "left",
"valign": "vcenter",
"font_size": 16,
"bg_color": self.color1,
}
)
self.sheet.merge_range(0, 0, 0, 4, self.header_text, merge_format)
self.sheet.merge_range(
1,
0,
4,
0,
f" Default score {self.env['score']}",
merge_format_subheader,
)
self.sheet.merge_range(
1,
1,
1,
3,
"Cross validation",
merge_format_subheader_right,
)
self.sheet.write(
1, 4, f"{self.env['n_folds']} Folds", merge_format_subheader_left
)
self.sheet.merge_range(
2,
1,
2,
3,
"Stratified",
merge_format_subheader_right,
)
self.sheet.write(
2,
4,
f"{'True' if self.env['stratified']=='1' else 'False'}",
merge_format_subheader_left,
)
self.sheet.merge_range(
3,
1,
3,
3,
"Discretized",
merge_format_subheader_right,
)
self.sheet.write(
3,
4,
f"{'True' if self.env['discretize']=='1' else 'False'}",
merge_format_subheader_left,
)
self.sheet.merge_range(
4,
1,
4,
3,
"Seeds",
merge_format_subheader_right,
)
self.sheet.write(
4, 4, f"{self.env['seeds']}", merge_format_subheader_left
)
self.update_max_length(len(self.env["seeds"]) + 1)
header_cols = [
("Dataset", 30),
("Samples", 10),
("Features", 10),
("Classes", 10),
("Balance", 50),
]
bold = self.book.add_format(
{
"bold": True,
"font_size": 14,
"bg_color": self.color3,
"border": 1,
}
)
i = 0
for item, length in header_cols:
self.sheet.write(5, i, item, bold)
self.sheet.set_column(i, i, length)
i += 1
def footer(self):
# set Balance column width to max length
self.sheet.set_column(4, 4, self.max_length)
self.sheet.freeze_panes(6, 1)
self.sheet.hide_gridlines(2)
if self.close:
self.book.close()
def print_line(self, result):
size_n = 14
integer = self.book.add_format(
{"num_format": "#,###", "font_size": size_n, "border": 1}
)
normal = self.book.add_format({"font_size": size_n, "border": 1})
col = 0
if self.row % 2 == 0:
normal.set_bg_color(self.color1)
integer.set_bg_color(self.color1)
else:
normal.set_bg_color(self.color2)
integer.set_bg_color(self.color2)
self.sheet.write(self.row, col, result.dataset, normal)
self.sheet.write(self.row, col + 1, result.samples, integer)
self.sheet.write(self.row, col + 2, result.features, integer)
self.sheet.write(self.row, col + 3, result.classes, normal)
self.sheet.write(self.row, col + 4, result.balance, normal)
self.update_max_length(len(result.balance))
self.row += 1
def update_max_length(self, value):
if value > self.max_length:
self.max_length = value
def report(self):
data_sets = Datasets()
color_line = TextColor.LINE1
print(color_line, end="")
print(f"{'Dataset':30s} Sampl. Feat. Cls Balance")
print("=" * 30 + " ===== ====== === " + "=" * 40)
if self.excel:
self.header()
if self.output:
print(color_line, end="")
print(self.header_text)
print("")
print(f"{'Dataset':30s} Sampl. Feat. Cls Balance")
print("=" * 30 + " ====== ===== === " + "=" * 60)
for dataset in data_sets:
X, y = data_sets.load(dataset)
attributes = data_sets.get_attributes(dataset)
attributes.dataset = dataset
if self.excel:
self.print_line(attributes)
color_line = (
TextColor.LINE2
if color_line == TextColor.LINE1
else TextColor.LINE1
)
values, counts = np.unique(y, return_counts=True)
comp = ""
sep = ""
for count in counts:
comp += f"{sep}{count/sum(counts)*100:5.2f}%"
sep = "/ "
print(color_line, end="")
print(
f"{dataset:30s} {X.shape[0]:6,d} {X.shape[1]:5,d} "
f"{len(np.unique(y)):3d} {comp:40s}"
)
if self.output:
print(color_line, end="")
print(
f"{dataset:30s} {attributes.samples:6,d} "
f"{attributes.features:5,d} {attributes.classes:3d} "
f"{attributes.balance:40s}"
)
if self.excel:
self.footer()
class SQL(BaseReport):
@@ -1068,7 +1299,12 @@ class Benchmark:
k = Excel(file_name=file_name, book=book)
k.report()
sheet.freeze_panes(6, 1)
sheet.hide_gridlines()
sheet.hide_gridlines(2)
def add_datasets_sheet():
# Add datasets sheet
re = ReportDatasets(excel=True, book=book)
re.report()
def exreport_output():
file_name = os.path.join(
@@ -1096,6 +1332,7 @@ class Benchmark:
footer()
models_files()
exreport_output()
add_datasets_sheet()
book.close()
@@ -1119,6 +1356,7 @@ class Summary:
def __init__(self, hidden=False) -> None:
self.results = Files().get_all_results(hidden=hidden)
self.data = []
self.data_filtered = []
self.datasets = {}
self.models = set()
self.hidden = hidden
@@ -1195,13 +1433,14 @@ class Summary:
number=0,
) -> None:
"""Print the list of results"""
data = self.get_results_criteria(
score, model, input_data, sort_key, number
)
if data == []:
if self.data_filtered == []:
self.data_filtered = self.get_results_criteria(
score, model, input_data, sort_key, number
)
if self.data_filtered == []:
raise ValueError(NO_RESULTS)
max_file = max(len(x["file"]) for x in data)
max_title = max(len(x["title"]) for x in data)
max_file = max(len(x["file"]) for x in self.data_filtered)
max_title = max(len(x["title"]) for x in self.data_filtered)
if self.hidden:
color1 = TextColor.GREEN
color2 = TextColor.YELLOW
@@ -1210,10 +1449,11 @@ class Summary:
color2 = TextColor.LINE2
print(color1, end="")
print(
f"{'Date':10s} {'File':{max_file}s} {'Score':8s} {'Time(h)':7s} "
f"{'Title':s}"
f" # {'Date':10s} {'File':{max_file}s} {'Score':8s} "
f"{'Time(h)':7s} {'Title':s}"
)
print(
"===",
"=" * 10
+ " "
+ "=" * max_file
@@ -1222,21 +1462,60 @@ class Summary:
+ " "
+ "=" * 7
+ " "
+ "=" * max_title
+ "=" * max_title,
)
print(
"\n".join(
[
(color2 if n % 2 == 0 else color1)
+ f"{x['date']} {x['file']:{max_file}s} "
(color2 if n % 2 == 0 else color1) + f"{n:3d} "
f"{x['date']} {x['file']:{max_file}s} "
f"{x['metric']:8.5f} "
f"{x['duration']/3600:7.3f} "
f"{x['title']}"
for n, x in enumerate(data)
for n, x in enumerate(self.data_filtered)
]
)
)
def manage_results(self, excel, is_test):
"""Manage results showed in the summary
return True if excel file is created False otherwise
"""
num = ""
book = None
while True:
print(
"Which result do you want to report? (q to quit, r to list "
"again, number to report): ",
end="",
)
num = get_input(is_test)
if num == "r":
self.list_results()
if num == "q":
if excel:
if book is not None:
book.close()
return True
return False
if num.isdigit() and int(num) < len(self.data) and int(num) >= 0:
rep = Report(self.data_filtered[int(num)]["file"], self.hidden)
rep.report()
if excel and not self.hidden:
if book is None:
file_name = Files.be_list_excel
book = xlsxwriter.Workbook(
file_name, {"nan_inf_to_errors": True}
)
excel = Excel(
file_name=self.data_filtered[int(num)]["file"],
book=book,
)
excel.report()
else:
if num not in ("r", "q"):
print(f"Invalid option {num}. Try again!")
def show_result(self, data: dict, title: str = "") -> None:
def whites(n: int) -> str:
return " " * n + color1 + "*"

View File

@@ -27,6 +27,8 @@ class Files:
exreport_pdf = "Rplots.pdf"
benchmark_r = "benchmark.r"
dot_env = ".env"
datasets_report_excel = "ReportDatasets.xlsx"
be_list_excel = "some_results.xlsx"
@staticmethod
def exreport_output(score):

View File

@@ -1,10 +1,16 @@
from .Datasets import Datasets, DatasetsSurcov, DatasetsTanveer, DatasetsArff
from .Datasets import (
Datasets,
DatasetsSurcov,
DatasetsTanveer,
DatasetsArff,
)
from .Experiments import Experiment
from .Results import Report, Summary
from ._version import __version__
__author__ = "Ricardo Montañana Gómez"
__copyright__ = "Copyright 2020-2022, Ricardo Montañana Gómez"
__copyright__ = "Copyright 2020-2023, Ricardo Montañana Gómez"
__license__ = "MIT License"
__author_email__ = "ricardo.montanana@alu.uclm.es"
__all__ = ["Experiment", "Datasets", "Report", "Summary"]
__all__ = ["Experiment", "Datasets", "Report", "Summary", __version__]

View File

@@ -1 +1 @@
__version__ = "0.2.0"
__version__ = "0.4.0"

View File

@@ -1,7 +1,7 @@
#! /usr/bin/env python
import os
from benchmark.Results import Summary
from benchmark.Utils import Folders
from benchmark.Utils import Folders, Files
from benchmark.Arguments import Arguments
"""List experiments of a model
@@ -12,6 +12,7 @@ def main(args_test=None):
arguments = Arguments()
arguments.xset("number").xset("model", required=False).xset("key")
arguments.xset("hidden").xset("nan").xset("score", required=False)
arguments.xset("excel")
args = arguments.parse(args_test)
data = Summary(hidden=args.hidden)
data.acquire()
@@ -22,32 +23,39 @@ def main(args_test=None):
sort_key=args.key,
number=args.number,
)
is_test = args_test is not None
if not args.nan:
excel_generated = data.manage_results(args.excel, is_test)
if args.excel and excel_generated:
print(f"Generated file: {Files.be_list_excel}")
Files.open(Files.be_list_excel, is_test)
except ValueError as e:
print(e)
else:
if args.nan:
results_nan = []
results = data.get_results_criteria(
score=args.score,
model=args.model,
input_data=None,
sort_key=args.key,
number=args.number,
return
if args.nan:
results_nan = []
results = data.get_results_criteria(
score=args.score,
model=args.model,
input_data=None,
sort_key=args.key,
number=args.number,
)
for result in results:
if result["metric"] != result["metric"]:
results_nan.append(result)
if results_nan != []:
print(
"\n"
+ "*" * 30
+ " Results with nan moved to hidden "
+ "*" * 30
)
for result in results:
if result["metric"] != result["metric"]:
results_nan.append(result)
if results_nan != []:
print(
"\n"
+ "*" * 30
+ " Results with nan moved to hidden "
+ "*" * 30
data.data_filtered = []
data.list_results(input_data=results_nan)
for result in results_nan:
name = result["file"]
os.rename(
os.path.join(Folders.results, name),
os.path.join(Folders.hidden_results, name),
)
data.list_results(input_data=results_nan)
for result in results_nan:
name = result["file"]
os.rename(
os.path.join(Folders.results, name),
os.path.join(Folders.hidden_results, name),
)

View File

@@ -21,7 +21,11 @@ def main(args_test=None):
if args.grid:
args.best = None
if args.file is None and args.best is None and args.grid is None:
ReportDatasets.report()
report = ReportDatasets(args.excel)
report.report()
if args.excel:
is_test = args_test is not None
Files.open(report.get_file_name(), is_test)
else:
if args.best is not None or args.grid is not None:
report = ReportBest(args.score, args.model, args.best, args.grid)

View File

@@ -6,3 +6,4 @@ stratified=0
# Source of data Tanveer/Surcov
source_data=Tanveer
seeds=[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]
discretize=0

View File

@@ -4,4 +4,5 @@ n_folds=5
model=ODTE
stratified=0
source_data=Arff
seeds=[271, 314, 171]
seeds=[271, 314, 171]
discretize=1

View File

@@ -6,3 +6,4 @@ stratified=0
# Source of data Tanveer/Surcov
source_data=Tanveer
seeds=[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]
discretize=0

View File

@@ -5,4 +5,5 @@ model=ODTE
stratified=0
# Source of data Tanveer/Surcov
source_data=Surcov
seeds=[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]
seeds=[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]
discretize=0

2
benchmark/tests/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
ReportDatasets.xlsx
some_results.xlsx

View File

@@ -5,6 +5,7 @@ from openpyxl import load_workbook
from .TestBase import TestBase
from ..Utils import Folders, Files, NO_RESULTS
from ..Results import Benchmark
from .._version import __version__
class BenchmarkTest(TestBase):
@@ -89,6 +90,15 @@ class BenchmarkTest(TestBase):
self.assertTrue(os.path.exists(benchmark.get_tex_file()))
self.check_file_file(benchmark.get_tex_file(), "exreport_tex")
@staticmethod
def generate_excel_sheet(test, sheet, file_name):
with open(os.path.join("test_files", file_name), "w") as f:
for row in range(1, sheet.max_row + 1):
for col in range(1, sheet.max_column + 1):
value = sheet.cell(row=row, column=col).value
if value is not None:
print(f'{row};{col};"{value}"', file=f)
def test_excel_output(self):
benchmark = Benchmark("accuracy", visualize=False)
benchmark.compile_results()
@@ -98,9 +108,16 @@ class BenchmarkTest(TestBase):
benchmark.excel()
file_name = benchmark.get_excel_file_name()
book = load_workbook(file_name)
replace = None
with_this = None
for sheet_name in book.sheetnames:
sheet = book[sheet_name]
self.check_excel_sheet(sheet, f"exreport_excel_{sheet_name}")
# ExcelTest.generate_excel_sheet(
# self, sheet, f"exreport_excel_{sheet_name}"
# )
if sheet_name == "Datasets":
replace = self.benchmark_version
with_this = __version__
self.check_excel_sheet(
sheet,
f"exreport_excel_{sheet_name}",
replace=replace,
with_this=with_this,
)

View File

@@ -30,6 +30,19 @@ class DatasetTest(TestBase):
expected = [271, 314, 171]
self.assertSequenceEqual(Randomized.seeds(), expected)
def test_load_dataframe(self):
self.set_env(".env.arff")
dt = Datasets()
X, y = dt.load_discretized("iris", dataframe=False)
dataset = dt.load_discretized("iris", dataframe=True)
class_name = dt.get_class_name()
features = dt.get_features()
self.assertListEqual(y.tolist(), dataset[class_name].tolist())
for i in range(len(features)):
self.assertListEqual(
X[:, i].tolist(), dataset[features[i]].tolist()
)
def test_Datasets_iterator(self):
test = {
".env.dist": ["balance-scale", "balloons"],

View File

@@ -2,11 +2,14 @@ import os
from io import StringIO
from unittest.mock import patch
from .TestBase import TestBase
from ..Results import Report, BaseReport, ReportBest, ReportDatasets
from ..Results import Report, BaseReport, ReportBest, ReportDatasets, get_input
from ..Utils import Symbols
class ReportTest(TestBase):
def test_get_input(self):
self.assertEqual(get_input(is_test=True), "test")
def test_BaseReport(self):
with patch.multiple(BaseReport, __abstractmethods__=set()):
file_name = os.path.join(
@@ -81,7 +84,7 @@ class ReportTest(TestBase):
output_text = stdout.getvalue().splitlines()
# Compare replacing STree version
for line, index in zip(expected, range(len(expected))):
if "1.2.4" in line:
if self.stree_version in line:
# replace STree version
line = self.replace_STree_version(line, output_text, index)
@@ -97,4 +100,12 @@ class ReportTest(TestBase):
def test_report_datasets(self, mock_output):
report = ReportDatasets()
report.report()
self.check_output_file(mock_output, "report_datasets")
file_name = f"report_datasets{self.ext}"
with open(os.path.join(self.test_files, file_name)) as f:
expected = f.read()
output_text = mock_output.getvalue().splitlines()
for line, index in zip(expected.splitlines(), range(len(expected))):
if self.benchmark_version in line:
# replace benchmark version
line = self.replace_benchmark_version(line, output_text, index)
self.assertEqual(line, output_text[index])

View File

@@ -15,6 +15,8 @@ class TestBase(unittest.TestCase):
self.test_files = "test_files"
self.output = "sys.stdout"
self.ext = ".test"
self.benchmark_version = "0.2.0"
self.stree_version = "1.2.4"
super().__init__(*args, **kwargs)
def remove_files(self, files, folder):
@@ -31,7 +33,9 @@ class TestBase(unittest.TestCase):
if value is not None:
print(f'{row};{col};"{value}"', file=f)
def check_excel_sheet(self, sheet, file_name):
def check_excel_sheet(
self, sheet, file_name, replace=None, with_this=None
):
file_name += self.ext
with open(os.path.join(self.test_files, file_name), "r") as f:
expected = csv.reader(f, delimiter=";")
@@ -43,6 +47,9 @@ class TestBase(unittest.TestCase):
value = float(value)
except ValueError:
pass
if replace is not None and isinstance(value, str):
if replace in value:
value = value.replace(replace, with_this)
self.assertEqual(sheet.cell(int(row), int(col)).value, value)
def check_output_file(self, output, file_name):
@@ -51,10 +58,15 @@ class TestBase(unittest.TestCase):
expected = f.read()
self.assertEqual(output.getvalue(), expected)
@staticmethod
def replace_STree_version(line, output, index):
idx = line.find("1.2.4")
return line.replace("1.2.4", output[index][idx : idx + 5])
def replace_STree_version(self, line, output, index):
idx = line.find(self.stree_version)
return line.replace(self.stree_version, output[index][idx : idx + 5])
def replace_benchmark_version(self, line, output, index):
idx = line.find(self.benchmark_version)
return line.replace(
self.benchmark_version, output[index][idx : idx + 5]
)
def check_file_file(self, computed_file, expected_file):
with open(computed_file) as f:

View File

@@ -179,6 +179,7 @@ class UtilTest(TestBase):
"stratified": "0",
"source_data": "Tanveer",
"seeds": "[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]",
"discretize": "0",
}
computed = EnvData().load()
self.assertDictEqual(computed, expected)

View File

@@ -2,6 +2,7 @@ import os
from openpyxl import load_workbook
from ...Utils import NO_RESULTS, Folders, Files
from ..TestBase import TestBase
from ..._version import __version__
class BeBenchmarkTest(TestBase):
@@ -43,9 +44,19 @@ class BeBenchmarkTest(TestBase):
Folders.exreport, Files.exreport_excel(self.score)
)
book = load_workbook(file_name)
replace = None
with_this = None
for sheet_name in book.sheetnames:
sheet = book[sheet_name]
self.check_excel_sheet(sheet, f"exreport_excel_{sheet_name}")
if sheet_name == "Datasets":
replace = self.benchmark_version
with_this = __version__
self.check_excel_sheet(
sheet,
f"exreport_excel_{sheet_name}",
replace=replace,
with_this=with_this,
)
def test_be_benchmark_single(self):
stdout, stderr = self.execute_script(

View File

@@ -4,6 +4,10 @@ from ...Utils import Folders, Files
from ..TestBase import TestBase
def get_test():
return "hola"
class BeGridTest(TestBase):
def setUp(self):
self.prepare_scripts_env()

View File

@@ -1,5 +1,7 @@
import os
from ...Utils import Folders, NO_RESULTS
from unittest.mock import patch
from openpyxl import load_workbook
from ...Utils import Folders, Files, NO_RESULTS
from ..TestBase import TestBase
@@ -7,12 +9,64 @@ class BeListTest(TestBase):
def setUp(self):
self.prepare_scripts_env()
def test_be_list(self):
@patch("benchmark.Results.get_input", return_value="q")
def test_be_list(self, input_data):
stdout, stderr = self.execute_script("be_list", ["-m", "STree"])
self.assertEqual(stderr.getvalue(), "")
self.check_output_file(stdout, "summary_list_model")
self.check_output_file(stdout, "be_list_model")
def test_be_list_no_data(self):
@patch("benchmark.Results.get_input", side_effect=iter(["x", "q"]))
def test_be_list_invalid_option(self, input_data):
stdout, stderr = self.execute_script("be_list", ["-m", "STree"])
self.assertEqual(stderr.getvalue(), "")
self.check_output_file(stdout, "be_list_model_invalid")
@patch("benchmark.Results.get_input", side_effect=iter(["0", "q"]))
def test_be_list_report(self, input_data):
stdout, stderr = self.execute_script("be_list", ["-m", "STree"])
self.assertEqual(stderr.getvalue(), "")
self.check_output_file(stdout, "be_list_report")
@patch("benchmark.Results.get_input", side_effect=iter(["q"]))
def test_be_list_report_excel_none(self, input_data):
stdout, stderr = self.execute_script(
"be_list", ["-m", "STree", "-x", "1"]
)
self.assertEqual(stderr.getvalue(), "")
self.check_output_file(stdout, "be_list_model")
@patch("benchmark.Results.get_input", side_effect=iter(["r", "q"]))
def test_be_list_twice(self, input_data):
stdout, stderr = self.execute_script("be_list", ["-m", "STree"])
self.assertEqual(stderr.getvalue(), "")
self.check_output_file(stdout, "be_list_model_2")
@patch("benchmark.Results.get_input", side_effect=iter(["2", "q"]))
def test_be_list_report_excel(self, input_data):
stdout, stderr = self.execute_script(
"be_list", ["-m", "STree", "-x", "1"]
)
self.assertEqual(stderr.getvalue(), "")
self.check_output_file(stdout, "be_list_report_excel")
book = load_workbook(Files.be_list_excel)
sheet = book["STree"]
self.check_excel_sheet(sheet, "excel")
@patch("benchmark.Results.get_input", side_effect=iter(["2", "1", "q"]))
def test_be_list_report_excel_twice(self, input_data):
stdout, stderr = self.execute_script(
"be_list", ["-m", "STree", "-x", "1"]
)
self.assertEqual(stderr.getvalue(), "")
self.check_output_file(stdout, "be_list_report_excel_2")
book = load_workbook(Files.be_list_excel)
sheet = book["STree"]
self.check_excel_sheet(sheet, "excel")
sheet = book["STree2"]
self.check_excel_sheet(sheet, "excel2")
@patch("benchmark.Results.get_input", return_value="q")
def test_be_list_no_data(self, input_data):
stdout, stderr = self.execute_script(
"be_list", ["-m", "Wodt", "-s", "f1-macro"]
)
@@ -41,7 +95,8 @@ class BeListTest(TestBase):
swap_files(Folders.results, Folders.hidden_results, file_name)
self.fail("test_be_list_nan() should not raise exception")
def test_be_list_nan_no_nan(self):
@patch("benchmark.Results.get_input", return_value="q")
def test_be_list_nan_no_nan(self, input_data):
stdout, stderr = self.execute_script("be_list", ["--nan", "1"])
self.assertEqual(stderr.getvalue(), "")
self.check_output_file(stdout, "be_list_no_nan")

View File

@@ -1,7 +1,8 @@
import os
from openpyxl import load_workbook
from ...Utils import Folders
from ...Utils import Folders, Files
from ..TestBase import TestBase
from ..._version import __version__
class BeReportTest(TestBase):
@@ -14,6 +15,7 @@ class BeReportTest(TestBase):
"results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.xlsx",
]
self.remove_files(files, Folders.results)
self.remove_files([Files.datasets_report_excel], os.getcwd())
return super().tearDown()
def test_be_report(self):
@@ -41,7 +43,37 @@ class BeReportTest(TestBase):
def test_be_report_datatsets(self):
stdout, stderr = self.execute_script("be_report", [])
self.assertEqual(stderr.getvalue(), "")
self.check_output_file(stdout, "report_datasets")
file_name = f"report_datasets{self.ext}"
with open(os.path.join(self.test_files, file_name)) as f:
expected = f.read()
output_text = stdout.getvalue().splitlines()
for line, index in zip(expected.splitlines(), range(len(expected))):
if self.benchmark_version in line:
# replace benchmark version
line = self.replace_benchmark_version(line, output_text, index)
self.assertEqual(line, output_text[index])
def test_be_report_datasets_excel(self):
stdout, stderr = self.execute_script("be_report", ["-x", "1"])
self.assertEqual(stderr.getvalue(), "")
file_name = f"report_datasets{self.ext}"
with open(os.path.join(self.test_files, file_name)) as f:
expected = f.read()
output_text = stdout.getvalue().splitlines()
for line, index in zip(expected.splitlines(), range(len(expected))):
if self.benchmark_version in line:
# replace benchmark version
line = self.replace_benchmark_version(line, output_text, index)
self.assertEqual(line, output_text[index])
file_name = os.path.join(os.getcwd(), Files.datasets_report_excel)
book = load_workbook(file_name)
sheet = book["Datasets"]
self.check_excel_sheet(
sheet,
"exreport_excel_Datasets",
replace=self.benchmark_version,
with_this=__version__,
)
def test_be_report_best(self):
stdout, stderr = self.execute_script(

View File

@@ -0,0 +1,6 @@
 # Date File Score Time(h) Title
=== ========== ============================================================= ======== ======= =================================
 0 2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
 1 2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
 2 2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
Which result do you want to report? (q to quit, r to list again, number to report):

View File

@@ -0,0 +1,11 @@
 # Date File Score Time(h) Title
=== ========== ============================================================= ======== ======= =================================
 0 2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
 1 2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
 2 2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
Which result do you want to report? (q to quit, r to list again, number to report):  # Date File Score Time(h) Title
=== ========== ============================================================= ======== ======= =================================
 0 2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
 1 2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
 2 2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
Which result do you want to report? (q to quit, r to list again, number to report):

View File

@@ -0,0 +1,7 @@
 # Date File Score Time(h) Title
=== ========== ============================================================= ======== ======= =================================
 0 2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
 1 2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
 2 2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
Which result do you want to report? (q to quit, r to list again, number to report): Invalid option x. Try again!
Which result do you want to report? (q to quit, r to list again, number to report):

View File

@@ -1,13 +1,13 @@
Date File Score Time(h) Title
========== ================================================================ ======== ======= ============================================
2022-05-04 results_accuracy_XGBoost_MacBookpro16_2022-05-04_11:00:35_0.json nan 3.091 Default hyperparameters
2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init
2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest
2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
 # Date File Score Time(h) Title
=== ========== ================================================================ ======== ======= ============================================
 0 2022-05-04 results_accuracy_XGBoost_MacBookpro16_2022-05-04_11:00:35_0.json nan 3.091 Default hyperparameters
 1 2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init
 2 2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest
 3 2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
 4 2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
 5 2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
****************************** Results with nan moved to hidden ******************************
Date File Score Time(h) Title
========== ================================================================ ======== ======= =======================
2022-05-04 results_accuracy_XGBoost_MacBookpro16_2022-05-04_11:00:35_0.json nan 3.091 Default hyperparameters
 # Date File Score Time(h) Title
=== ========== ================================================================ ======== ======= =======================
 0 2022-05-04 results_accuracy_XGBoost_MacBookpro16_2022-05-04_11:00:35_0.json nan 3.091 Default hyperparameters

View File

@@ -1,7 +1,7 @@
Date File Score Time(h) Title
========== =============================================================== ======== ======= ============================================
2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init
2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest
2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
 # Date File Score Time(h) Title
=== ========== =============================================================== ======== ======= ============================================
 0 2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init
 1 2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest
 2 2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
 3 2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
 4 2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters

View File

@@ -0,0 +1,21 @@
 # Date File Score Time(h) Title
=== ========== ============================================================= ======== ======= =================================
 0 2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
 1 2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
 2 2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
Which result do you want to report? (q to quit, r to list again, number to report): *************************************************************************************************************************
* STree ver. 1.2.3 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2021-11-01 19:17:07 *
* default B *
* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
* Execution took 4115.04 seconds, 1.14 hours, on macbook-pro *
* Score is accuracy *
*************************************************************************************************************************
Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
============================== ====== ===== === ======= ======= ======= =============== ================= ===============
balance-scale 625 4 3 18.78 9.88 5.90 0.970000±0.0020 0.233304±0.0481 {'max_features': 'auto', 'splitter': 'mutual'}
balloons 16 4 2 4.72 2.86 2.78 0.556667±0.2941 0.021352±0.0058 {'max_features': 'auto', 'splitter': 'mutual'}
*************************************************************************************************************************
* accuracy compared to STree_default (liblinear-ovr) .: 0.0379 *
*************************************************************************************************************************
Which result do you want to report? (q to quit, r to list again, number to report):

View File

@@ -0,0 +1,21 @@
 # Date File Score Time(h) Title
=== ========== ============================================================= ======== ======= =================================
 0 2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
 1 2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
 2 2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
Which result do you want to report? (q to quit, r to list again, number to report): *************************************************************************************************************************
* STree ver. 1.2.3 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2021-09-30 11:42:07 *
* With gridsearched hyperparameters *
* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
* Execution took 624.25 seconds, 0.17 hours, on iMac27 *
* Score is accuracy *
*************************************************************************************************************************
Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
============================== ====== ===== === ======= ======= ======= =============== ================= ===============
balance-scale 625 4 3 7.00 4.00 3.00 0.970560±0.0150 0.014049±0.0020 {'C': 10000.0, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
balloons 16 4 2 3.00 2.00 2.00 0.860000±0.2850 0.000854±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
*************************************************************************************************************************
* accuracy compared to STree_default (liblinear-ovr) .: 0.0454 *
*************************************************************************************************************************
Which result do you want to report? (q to quit, r to list again, number to report): Generated file: some_results.xlsx

View File

@@ -0,0 +1,36 @@
 # Date File Score Time(h) Title
=== ========== ============================================================= ======== ======= =================================
 0 2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
 1 2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
 2 2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
Which result do you want to report? (q to quit, r to list again, number to report): *************************************************************************************************************************
* STree ver. 1.2.3 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2021-09-30 11:42:07 *
* With gridsearched hyperparameters *
* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
* Execution took 624.25 seconds, 0.17 hours, on iMac27 *
* Score is accuracy *
*************************************************************************************************************************
Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
============================== ====== ===== === ======= ======= ======= =============== ================= ===============
balance-scale 625 4 3 7.00 4.00 3.00 0.970560±0.0150 0.014049±0.0020 {'C': 10000.0, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
balloons 16 4 2 3.00 2.00 2.00 0.860000±0.2850 0.000854±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
*************************************************************************************************************************
* accuracy compared to STree_default (liblinear-ovr) .: 0.0454 *
*************************************************************************************************************************
Which result do you want to report? (q to quit, r to list again, number to report): *************************************************************************************************************************
* STree ver. 1.2.3 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2021-10-27 09:40:40 *
* default A *
* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
* Execution took 3395.01 seconds, 0.94 hours, on iMac27 *
* Score is accuracy *
*************************************************************************************************************************
Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
============================== ====== ===== === ======= ======= ======= =============== ================= ===============
balance-scale 625 4 3 11.08 5.90 5.90 0.980000±0.0010 0.285207±0.0603 {'splitter': 'best', 'max_features': 'auto'}
balloons 16 4 2 4.12 2.56 2.56 0.695000±0.2757 0.021201±0.0035 {'splitter': 'best', 'max_features': 'auto'}
*************************************************************************************************************************
* accuracy compared to STree_default (liblinear-ovr) .: 0.0416 *
*************************************************************************************************************************
Which result do you want to report? (q to quit, r to list again, number to report): Generated file: some_results.xlsx

View File

@@ -1,16 +1,16 @@
************************************************************************************************************************
* STree ver. 1.2.4 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2022-05-09 00:15:25 *
* test *
* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
* Execution took 0.80 seconds, 0.00 hours, on iMac27 *
* Score is accuracy *
************************************************************************************************************************
*************************************************************************************************************************
* STree ver. 1.2.4 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2022-05-09 00:15:25 *
* test *
* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
* Execution took 0.80 seconds, 0.00 hours, on iMac27 *
* Score is accuracy *
*************************************************************************************************************************
Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
============================== ====== ===== === ======= ======= ======= =============== ================ ===============
balance-scale 625 4 3 23.32 12.16 6.44 0.840160±0.0304 0.013745±0.0019 {'splitter': 'best', 'max_features': 'auto'}
balloons 16 4 2 3.00 2.00 2.00 0.860000±0.2850 0.000388±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
************************************************************************************************************************
* accuracy compared to STree_default (liblinear-ovr) .: 0.0422 *
************************************************************************************************************************
Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
============================== ====== ===== === ======= ======= ======= =============== ================= ===============
balance-scale 625 4 3 23.32 12.16 6.44 0.840160±0.0304 0.013745±0.0019 {'splitter': 'best', 'max_features': 'auto'}
balloons 16 4 2 3.00 2.00 2.00 0.860000±0.2850 0.000388±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
*************************************************************************************************************************
* accuracy compared to STree_default (liblinear-ovr) .: 0.0422 *
*************************************************************************************************************************
Results in results/results_accuracy_STree_iMac27_2022-05-09_00:15:25_0.json

View File

@@ -1,16 +1,16 @@
************************************************************************************************************************
* STree ver. 1.2.4 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2022-05-08 20:14:43 *
* test *
* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
* Execution took 0.48 seconds, 0.00 hours, on iMac27 *
* Score is accuracy *
************************************************************************************************************************
*************************************************************************************************************************
* STree ver. 1.2.4 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2022-05-08 20:14:43 *
* test *
* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
* Execution took 0.48 seconds, 0.00 hours, on iMac27 *
* Score is accuracy *
*************************************************************************************************************************
Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
============================== ====== ===== === ======= ======= ======= =============== ================ ===============
balance-scale 625 4 3 17.36 9.18 6.18 0.908480±0.0247 0.007388±0.0013 {}
balloons 16 4 2 4.64 2.82 2.66 0.663333±0.3009 0.000664±0.0002 {}
************************************************************************************************************************
* accuracy compared to STree_default (liblinear-ovr) .: 0.0390 *
************************************************************************************************************************
Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
============================== ====== ===== === ======= ======= ======= =============== ================= ===============
balance-scale 625 4 3 17.36 9.18 6.18 0.908480±0.0247 0.007388±0.0013 {}
balloons 16 4 2 4.64 2.82 2.66 0.663333±0.3009 0.000664±0.0002 {}
*************************************************************************************************************************
* accuracy compared to STree_default (liblinear-ovr) .: 0.0390 *
*************************************************************************************************************************
Results in results/results_accuracy_STree_iMac27_2022-05-08_20:14:43_0.json

View File

@@ -1,15 +1,15 @@
************************************************************************************************************************
* STree ver. 1.2.4 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2022-05-08 19:38:28 *
* test *
* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
* Execution took 0.06 seconds, 0.00 hours, on iMac27 *
* Score is accuracy *
************************************************************************************************************************
*************************************************************************************************************************
* STree ver. 1.2.4 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2022-05-08 19:38:28 *
* test *
* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
* Execution took 0.06 seconds, 0.00 hours, on iMac27 *
* Score is accuracy *
*************************************************************************************************************************
Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
============================== ====== ===== === ======= ======= ======= =============== ================ ===============
balloons 16 4 2 4.64 2.82 2.66 0.663333±0.3009 0.000671±0.0001 {}
************************************************************************************************************************
* accuracy compared to STree_default (liblinear-ovr) .: 0.0165 *
************************************************************************************************************************
Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
============================== ====== ===== === ======= ======= ======= =============== ================= ===============
balloons 16 4 2 4.64 2.82 2.66 0.663333±0.3009 0.000671±0.0001 {}
*************************************************************************************************************************
* accuracy compared to STree_default (liblinear-ovr) .: 0.0165 *
*************************************************************************************************************************
Partial result file removed: results/results_accuracy_STree_iMac27_2022-05-08_19:38:28_0.json

View File

@@ -1,16 +1,16 @@
************************************************************************************************************************
* STree ver. 1.2.4 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2022-05-09 00:21:06 *
* test *
* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
* Execution took 0.89 seconds, 0.00 hours, on iMac27 *
* Score is accuracy *
************************************************************************************************************************
*************************************************************************************************************************
* STree ver. 1.2.4 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2022-05-09 00:21:06 *
* test *
* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
* Execution took 0.89 seconds, 0.00 hours, on iMac27 *
* Score is accuracy *
*************************************************************************************************************************
Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
============================== ====== ===== === ======= ======= ======= =============== ================ ===============
balance-scale 625 4 3 26.12 13.56 7.94 0.910720±0.0249 0.015852±0.0027 {'C': 1.0, 'kernel': 'liblinear', 'multiclass_strategy': 'ovr'}
balloons 16 4 2 4.64 2.82 2.66 0.663333±0.3009 0.000640±0.0001 {'C': 1.0, 'kernel': 'linear', 'multiclass_strategy': 'ovr'}
************************************************************************************************************************
* accuracy compared to STree_default (liblinear-ovr) .: 0.0391 *
************************************************************************************************************************
Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
============================== ====== ===== === ======= ======= ======= =============== ================= ===============
balance-scale 625 4 3 26.12 13.56 7.94 0.910720±0.0249 0.015852±0.0027 {'C': 1.0, 'kernel': 'liblinear', 'multiclass_strategy': 'ovr'}
balloons 16 4 2 4.64 2.82 2.66 0.663333±0.3009 0.000640±0.0001 {'C': 1.0, 'kernel': 'linear', 'multiclass_strategy': 'ovr'}
*************************************************************************************************************************
* accuracy compared to STree_default (liblinear-ovr) .: 0.0391 *
*************************************************************************************************************************
Results in results/results_accuracy_STree_iMac27_2022-05-09_00:21:06_0.json

View File

@@ -26,10 +26,10 @@
* results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json *
* *
*********************************************************************************
Date File Score Time(h) Title
========== =============================================================== ======== ======= ============================================
2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init
2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest
 # Date File Score Time(h) Title
=== ========== =============================================================== ======== ======= ============================================
 0 2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
 1 2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init
 2 2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
 3 2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
 4 2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest

View File

@@ -26,10 +26,10 @@
* results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json *
* *
*********************************************************************************
Date File Score Time(h) Title
========== =============================================================== ======== ======= ============================================
2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init
2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest
 # Date File Score Time(h) Title
=== ========== =============================================================== ======== ======= ============================================
 0 2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
 1 2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init
 2 2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
 3 2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
 4 2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest

View File

@@ -26,13 +26,13 @@
* results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json *
* *
*********************************************************************************
Date File Score Time(h) Title
========== =============================================================== ======== ======= ============================================
2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init
2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest
 # Date File Score Time(h) Title
=== ========== =============================================================== ======== ======= ============================================
 0 2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
 1 2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init
 2 2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
 3 2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
 4 2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest
** No results found **
** No results found **
** No results found **

View File

@@ -26,10 +26,10 @@
* results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json *
* *
*********************************************************************************
Date File Score Time(h) Title
========== =============================================================== ======== ======= ============================================
2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init
2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest
 # Date File Score Time(h) Title
=== ========== =============================================================== ======== ======= ============================================
 0 2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
 1 2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init
 2 2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
 3 2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
 4 2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest

View File

@@ -0,0 +1,48 @@
1;1;" STree ver. 1.2.3 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2021-10-27 09:40:40"
2;1;" default A"
3;1;" Score is accuracy"
3;2;" Execution time"
3;5;"3,395.01 s"
3;7;" "
3;8;"Platform"
3;9;"iMac27"
3;10;"Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]"
4;5;" 0.94 h"
4;10;"Stratified: False"
6;1;"Dataset"
6;2;"Samples"
6;3;"Features"
6;4;"Classes"
6;5;"Nodes"
6;6;"Leaves"
6;7;"Depth"
6;8;"Score"
6;9;"Score Std."
6;10;"Time"
6;11;"Time Std."
6;12;"Hyperparameters"
7;1;"balance-scale"
7;2;"625"
7;3;"4"
7;4;"3"
7;5;"11.08"
7;6;"5.9"
7;7;"5.9"
7;8;"0.98"
7;9;"0.001"
7;10;"0.2852065515518188"
7;11;"0.06031593282605064"
7;12;"{'splitter': 'best', 'max_features': 'auto'}"
8;1;"balloons"
8;2;"16"
8;3;"4"
8;4;"2"
8;5;"4.12"
8;6;"2.56"
8;7;"2.56"
8;8;"0.695"
8;9;"0.2756860130252853"
8;10;"0.02120100021362305"
8;11;"0.003526023309468471"
8;12;"{'splitter': 'best', 'max_features': 'auto'}"
10;1;"** accuracy compared to STree_default (liblinear-ovr) .: 0.0416"

View File

@@ -0,0 +1,25 @@
1;1;"Datasets used in benchmark ver. 0.2.0"
2;1;" Default score accuracy"
2;2;"Cross validation"
2;5;"5 Folds"
3;2;"Stratified"
3;5;"False"
4;2;"Discretized"
4;5;"False"
5;2;"Seeds"
5;5;"[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]"
6;1;"Dataset"
6;2;"Samples"
6;3;"Features"
6;4;"Classes"
6;5;"Balance"
7;1;"balance-scale"
7;2;"625"
7;3;"4"
7;4;"3"
7;5;" 7.84%/ 46.08%/ 46.08%"
8;1;"balloons"
8;2;"16"
8;3;"4"
8;4;"2"
8;5;"56.25%/ 43.75%"

View File

@@ -1,15 +1,15 @@
************************************************************************************************************************
* STree ver. 1.2.3 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2021-09-30 11:42:07 *
* With gridsearched hyperparameters *
* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
* Execution took 624.25 seconds, 0.17 hours, on iMac27 *
* Score is accuracy *
************************************************************************************************************************
*************************************************************************************************************************
* STree ver. 1.2.3 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2021-09-30 11:42:07 *
* With gridsearched hyperparameters *
* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
* Execution took 624.25 seconds, 0.17 hours, on iMac27 *
* Score is accuracy *
*************************************************************************************************************************
Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
============================== ====== ===== === ======= ======= ======= =============== ================ ===============
balance-scale 625 4 3 7.00 4.00 3.00 0.970560±0.0150 0.014049±0.0020 {'C': 10000.0, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
balloons 16 4 2 3.00 2.00 2.00 0.860000±0.2850 0.000854±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
************************************************************************************************************************
* accuracy compared to STree_default (liblinear-ovr) .: 0.0454 *
************************************************************************************************************************
Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
============================== ====== ===== === ======= ======= ======= =============== ================= ===============
balance-scale 625 4 3 7.00 4.00 3.00 0.970560±0.0150 0.014049±0.0020 {'C': 10000.0, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
balloons 16 4 2 3.00 2.00 2.00 0.860000±0.2850 0.000854±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
*************************************************************************************************************************
* accuracy compared to STree_default (liblinear-ovr) .: 0.0454 *
*************************************************************************************************************************

View File

@@ -1,16 +1,16 @@
************************************************************************************************************************
* STree ver. 1.2.3 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2021-09-30 11:42:07 *
* With gridsearched hyperparameters *
* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
* Execution took 624.25 seconds, 0.17 hours, on iMac27 *
* Score is accuracy *
************************************************************************************************************************
*************************************************************************************************************************
* STree ver. 1.2.3 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2021-09-30 11:42:07 *
* With gridsearched hyperparameters *
* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
* Execution took 624.25 seconds, 0.17 hours, on iMac27 *
* Score is accuracy *
*************************************************************************************************************************
Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
============================== ====== ===== === ======= ======= ======= =============== ================ ===============
balance-scale 625 4 3 7.00 4.00 3.00 0.970560±0.0150 0.014049±0.0020 {'C': 10000.0, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
balloons 16 4 2 3.00 2.00 2.00 0.860000±0.2850✔ 0.000854±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
************************************************************************************************************************
* ✔ Equal to best .....: 1 *
* accuracy compared to STree_default (liblinear-ovr) .: 0.0454 *
************************************************************************************************************************
Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
============================== ====== ===== === ======= ======= ======= =============== ================= ===============
balance-scale 625 4 3 7.00 4.00 3.00 0.970560±0.0150 0.014049±0.0020 {'C': 10000.0, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
balloons 16 4 2 3.00 2.00 2.00 0.860000±0.2850✔ 0.000854±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
*************************************************************************************************************************
* ✔ Equal to best .....: 1 *
* accuracy compared to STree_default (liblinear-ovr) .: 0.0454 *
*************************************************************************************************************************

View File

@@ -1,4 +1,6 @@
Dataset Sampl. Feat. Cls Balance
============================== ===== ====== === ========================================
Datasets used in benchmark ver. 0.2.0
Dataset Sampl. Feat. Cls Balance
============================== ====== ===== === ============================================================
balance-scale 625 4 3 7.84%/ 46.08%/ 46.08%
balloons 16 4 2 56.25%/ 43.75%

View File

@@ -1,4 +1,4 @@
Date File Score Time(h) Title
========== ================================================================ ======== ======= =======================
2022-05-04 results_accuracy_XGBoost_MacBookpro16_2022-05-04_11:00:35_0.json nan 3.091 Default hyperparameters
2021-11-01 results_accuracy_STree_iMac27_2021-11-01_23:55:16_0.json 0.97446 0.098 default
 # Date File Score Time(h) Title
=== ========== ================================================================ ======== ======= =======================
 0 2022-05-04 results_accuracy_XGBoost_MacBookpro16_2022-05-04_11:00:35_0.json nan 3.091 Default hyperparameters
 1 2021-11-01 results_accuracy_STree_iMac27_2021-11-01_23:55:16_0.json 0.97446 0.098 default

View File

@@ -1,5 +1,5 @@
Date File Score Time(h) Title
========== ============================================================= ======== ======= =================================
2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
 # Date File Score Time(h) Title
=== ========== ============================================================= ======== ======= =================================
 0 2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
 1 2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
 2 2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters

View File

@@ -1,5 +1,5 @@
Date File Score Time(h) Title
========== =============================================================== ======== ======= ============================================
2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init
2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest
2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
 # Date File Score Time(h) Title
=== ========== =============================================================== ======== ======= ============================================
 0 2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init
 1 2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest
 2 2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B

View File

@@ -1,7 +1,7 @@
Date File Score Time(h) Title
========== =============================================================== ======== ======= ============================================
2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init
2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest
2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
 # Date File Score Time(h) Title
=== ========== =============================================================== ======== ======= ============================================
 0 2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init
 1 2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest
 2 2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
 3 2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
 4 2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters

View File

@@ -1,7 +1,7 @@
Date File Score Time(h) Title
========== =============================================================== ======== ======= ============================================
2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init
2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest
 # Date File Score Time(h) Title
=== ========== =============================================================== ======== ======= ============================================
 0 2021-09-30 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json 0.04544 0.173 With gridsearched hyperparameters
 1 2022-04-20 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json 0.04341 6.275 Gridsearched hyperparams v022.1b random_init
 2 2021-10-27 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json 0.04158 0.943 default A
 3 2021-11-01 results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json 0.03790 1.143 default B
 4 2022-01-14 results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json 0.03627 0.076 Test default paramters with RandomForest

View File

@@ -2,7 +2,10 @@ pandas
scikit-learn
scipy
odte
cython
mdlp-discretization
mufs
bayesclass @ git+ssh://git@github.com/doctorado-ml/bayesclass.git
xlsxwriter
openpyxl
tqdm

View File

@@ -49,15 +49,14 @@ setuptools.setup(
name="benchmark",
version=get_data("version", "_version.py"),
license=get_data("license"),
description="Oblique decision tree with svm nodes",
description="Benchmark of models with different datasets",
long_description=readme(),
long_description_content_type="text/markdown",
packages=setuptools.find_packages(),
url="https://github.com/Doctorado-ML/benchmark",
author=get_data("author"),
author_email=get_data("author_email"),
keywords="scikit-learn oblique-classifier oblique-decision-tree decision-\
tree svm svc",
keywords="scikit-learn benchmark",
classifiers=[
"Development Status :: 4 - Beta",
"License :: OSI Approved :: " + get_data("license"),