mirror of
https://github.com/Doctorado-ML/benchmark.git
synced 2025-08-15 23:45:54 +00:00
Merge pull request #6 from Doctorado-ML/language_version
Add Discretizer to Datasets Add excel to report datasets Add report datasets sheet to benchmark excel
This commit is contained in:
@@ -5,3 +5,4 @@ model=ODTE
|
||||
stratified=0
|
||||
source_data=Tanveer
|
||||
seeds=[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]
|
||||
discretize=0
|
||||
|
3
.github/workflows/main.yml
vendored
3
.github/workflows/main.yml
vendored
@@ -14,6 +14,9 @@ jobs:
|
||||
matrix:
|
||||
os: [macos-latest, ubuntu-latest]
|
||||
python: ["3.10", "3.11"]
|
||||
exclude:
|
||||
- os: macos-latest
|
||||
python: "3.11"
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
@@ -1,8 +1,10 @@
|
||||
import os
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from scipy.io import arff
|
||||
from .Utils import Files
|
||||
from .Arguments import EnvData
|
||||
from mdlp.discretization import MDLP
|
||||
|
||||
|
||||
class Diterator:
|
||||
@@ -24,14 +26,18 @@ class DatasetsArff:
|
||||
def folder():
|
||||
return "datasets"
|
||||
|
||||
def load(self, name, class_name):
|
||||
def load(self, name, class_name, dataframe):
|
||||
file_name = os.path.join(self.folder(), self.dataset_names(name))
|
||||
data = arff.loadarff(file_name)
|
||||
df = pd.DataFrame(data[0])
|
||||
df = df.dropna()
|
||||
X = df.drop(class_name, axis=1).to_numpy()
|
||||
df.dropna(axis=0, how="any", inplace=True)
|
||||
X = df.drop(class_name, axis=1)
|
||||
self.features = X.columns
|
||||
self.class_name = class_name
|
||||
y, _ = pd.factorize(df[class_name])
|
||||
return X, y
|
||||
df[class_name] = y
|
||||
X = X.to_numpy()
|
||||
return df if dataframe else (X, y)
|
||||
|
||||
|
||||
class DatasetsTanveer:
|
||||
@@ -43,7 +49,7 @@ class DatasetsTanveer:
|
||||
def folder():
|
||||
return "data"
|
||||
|
||||
def load(self, name, _):
|
||||
def load(self, name, *args):
|
||||
file_name = os.path.join(self.folder(), self.dataset_names(name))
|
||||
data = pd.read_csv(
|
||||
file_name,
|
||||
@@ -64,7 +70,7 @@ class DatasetsSurcov:
|
||||
def folder():
|
||||
return "datasets"
|
||||
|
||||
def load(self, name, _):
|
||||
def load(self, name, *args):
|
||||
file_name = os.path.join(self.folder(), self.dataset_names(name))
|
||||
data = pd.read_csv(
|
||||
file_name,
|
||||
@@ -80,15 +86,19 @@ class DatasetsSurcov:
|
||||
|
||||
class Datasets:
|
||||
def __init__(self, dataset_name=None):
|
||||
|
||||
envData = EnvData.load()
|
||||
class_name = getattr(
|
||||
__import__(__name__),
|
||||
f"Datasets{envData['source_data']}",
|
||||
)
|
||||
self.load = (
|
||||
self.load_discretized
|
||||
if envData["discretize"] == "1"
|
||||
else self.load_continuous
|
||||
)
|
||||
self.dataset = class_name()
|
||||
self.class_names = []
|
||||
self.load_names()
|
||||
self._load_names()
|
||||
if dataset_name is not None:
|
||||
try:
|
||||
class_name = self.class_names[
|
||||
@@ -99,7 +109,7 @@ class Datasets:
|
||||
raise ValueError(f"Unknown dataset: {dataset_name}")
|
||||
self.data_sets = [dataset_name]
|
||||
|
||||
def load_names(self):
|
||||
def _load_names(self):
|
||||
file_name = os.path.join(self.dataset.folder(), Files.index)
|
||||
default_class = "class"
|
||||
with open(file_name) as f:
|
||||
@@ -115,12 +125,61 @@ class Datasets:
|
||||
self.data_sets = result
|
||||
self.class_names = class_names
|
||||
|
||||
def load(self, name):
|
||||
def get_attributes(self, name):
|
||||
class Attributes:
|
||||
pass
|
||||
|
||||
X, y = self.load_continuous(name)
|
||||
attr = Attributes()
|
||||
values, counts = np.unique(y, return_counts=True)
|
||||
comp = ""
|
||||
sep = ""
|
||||
for count in counts:
|
||||
comp += f"{sep}{count/sum(counts)*100:5.2f}%"
|
||||
sep = "/ "
|
||||
attr.balance = comp
|
||||
attr.classes = len(np.unique(y))
|
||||
attr.samples = X.shape[0]
|
||||
attr.features = X.shape[1]
|
||||
return attr
|
||||
|
||||
def get_features(self):
|
||||
return self.dataset.features
|
||||
|
||||
def get_class_name(self):
|
||||
return self.dataset.class_name
|
||||
|
||||
def load_continuous(self, name, dataframe=False):
|
||||
try:
|
||||
class_name = self.class_names[self.data_sets.index(name)]
|
||||
return self.dataset.load(name, class_name)
|
||||
return self.dataset.load(name, class_name, dataframe)
|
||||
except (ValueError, FileNotFoundError):
|
||||
raise ValueError(f"Unknown dataset: {name}")
|
||||
|
||||
def discretize(self, X, y):
|
||||
"""Supervised discretization with Fayyad and Irani's MDLP algorithm.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : np.ndarray
|
||||
array (n_samples, n_features) of features
|
||||
y : np.ndarray
|
||||
array (n_samples,) of labels
|
||||
|
||||
Returns
|
||||
-------
|
||||
tuple (X, y) of numpy.ndarray
|
||||
"""
|
||||
discretiz = MDLP()
|
||||
Xdisc = discretiz.fit_transform(X, y)
|
||||
return Xdisc.astype(int), y.astype(int)
|
||||
|
||||
def load_discretized(self, name, dataframe=False):
|
||||
X, y = self.load_continuous(name)
|
||||
X, y = self.discretize(X, y)
|
||||
dataset = pd.DataFrame(X, columns=self.get_features())
|
||||
dataset[self.get_class_name()] = y
|
||||
return dataset if dataframe else X, y
|
||||
|
||||
def __iter__(self) -> Diterator:
|
||||
return Diterator(self.data_sets)
|
||||
|
@@ -8,6 +8,7 @@ from sklearn.ensemble import (
|
||||
)
|
||||
from sklearn.svm import SVC
|
||||
from stree import Stree
|
||||
from bayesclass import TAN
|
||||
from wodt import Wodt
|
||||
from odte import Odte
|
||||
from xgboost import XGBClassifier
|
||||
@@ -20,6 +21,7 @@ class Models:
|
||||
def define_models(random_state):
|
||||
return {
|
||||
"STree": Stree(random_state=random_state),
|
||||
"TAN": TAN(random_state=random_state),
|
||||
"Cart": DecisionTreeClassifier(random_state=random_state),
|
||||
"ExtraTree": ExtraTreeClassifier(random_state=random_state),
|
||||
"Wodt": Wodt(random_state=random_state),
|
||||
|
@@ -1,4 +1,5 @@
|
||||
import os
|
||||
import sys
|
||||
from operator import itemgetter
|
||||
import math
|
||||
import json
|
||||
@@ -17,6 +18,7 @@ from .Utils import (
|
||||
TextColor,
|
||||
NO_RESULTS,
|
||||
)
|
||||
from ._version import __version__
|
||||
|
||||
|
||||
class BestResultsEver:
|
||||
@@ -566,37 +568,251 @@ class Excel(BaseReport):
|
||||
self.sheet.set_row(c, 20)
|
||||
self.sheet.set_row(0, 25)
|
||||
self.sheet.freeze_panes(6, 1)
|
||||
self.sheet.hide_gridlines()
|
||||
self.sheet.hide_gridlines(2)
|
||||
if self.close:
|
||||
self.book.close()
|
||||
|
||||
|
||||
class ReportDatasets:
|
||||
row = 6
|
||||
# alternate lines colors
|
||||
color1 = "#DCE6F1"
|
||||
color2 = "#FDE9D9"
|
||||
color3 = "#B1A0C7"
|
||||
|
||||
def __init__(self, excel=False, book=None):
|
||||
self.excel = excel
|
||||
self.env = EnvData().load()
|
||||
self.close = False
|
||||
self.output = True
|
||||
self.header_text = f"Datasets used in benchmark ver. {__version__}"
|
||||
if excel:
|
||||
self.max_length = 0
|
||||
if book is None:
|
||||
self.excel_file_name = Files.datasets_report_excel
|
||||
self.book = xlsxwriter.Workbook(
|
||||
self.excel_file_name, {"nan_inf_to_errors": True}
|
||||
)
|
||||
self.set_properties(self.get_title())
|
||||
self.close = True
|
||||
else:
|
||||
self.book = book
|
||||
self.output = False
|
||||
self.sheet = self.book.add_worksheet("Datasets")
|
||||
|
||||
def set_properties(self, title):
|
||||
self.book.set_properties(
|
||||
{
|
||||
"title": title,
|
||||
"subject": "Machine learning results",
|
||||
"author": "Ricardo Montañana Gómez",
|
||||
"manager": "Dr. J. A. Gámez, Dr. J. M. Puerta",
|
||||
"company": "UCLM",
|
||||
"comments": "Created with Python and XlsxWriter",
|
||||
}
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def report():
|
||||
def get_python_version():
|
||||
return "{}.{}".format(sys.version_info.major, sys.version_info.minor)
|
||||
|
||||
def get_title(self):
|
||||
return (
|
||||
f" Benchmark ver. {__version__} - "
|
||||
f" Python ver. {self.get_python_version()}"
|
||||
f" with {self.env['n_folds']} Folds cross validation "
|
||||
f" Discretization: {self.env['discretize']} "
|
||||
f"Stratification: {self.env['stratified']}"
|
||||
)
|
||||
|
||||
def get_file_name(self):
|
||||
return self.excel_file_name
|
||||
|
||||
def header(self):
|
||||
merge_format = self.book.add_format(
|
||||
{
|
||||
"border": 1,
|
||||
"bold": 1,
|
||||
"align": "center",
|
||||
"valign": "vcenter",
|
||||
"font_size": 18,
|
||||
"bg_color": self.color3,
|
||||
}
|
||||
)
|
||||
merge_format_subheader = self.book.add_format(
|
||||
{
|
||||
"border": 1,
|
||||
"bold": 1,
|
||||
"align": "center",
|
||||
"valign": "vcenter",
|
||||
"font_size": 16,
|
||||
"bg_color": self.color1,
|
||||
}
|
||||
)
|
||||
merge_format_subheader_right = self.book.add_format(
|
||||
{
|
||||
"border": 1,
|
||||
"bold": 1,
|
||||
"align": "right",
|
||||
"valign": "vcenter",
|
||||
"font_size": 16,
|
||||
"bg_color": self.color1,
|
||||
}
|
||||
)
|
||||
merge_format_subheader_left = self.book.add_format(
|
||||
{
|
||||
"border": 1,
|
||||
"bold": 1,
|
||||
"align": "left",
|
||||
"valign": "vcenter",
|
||||
"font_size": 16,
|
||||
"bg_color": self.color1,
|
||||
}
|
||||
)
|
||||
self.sheet.merge_range(0, 0, 0, 4, self.header_text, merge_format)
|
||||
self.sheet.merge_range(
|
||||
1,
|
||||
0,
|
||||
4,
|
||||
0,
|
||||
f" Default score {self.env['score']}",
|
||||
merge_format_subheader,
|
||||
)
|
||||
self.sheet.merge_range(
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
3,
|
||||
"Cross validation",
|
||||
merge_format_subheader_right,
|
||||
)
|
||||
self.sheet.write(
|
||||
1, 4, f"{self.env['n_folds']} Folds", merge_format_subheader_left
|
||||
)
|
||||
self.sheet.merge_range(
|
||||
2,
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
"Stratified",
|
||||
merge_format_subheader_right,
|
||||
)
|
||||
self.sheet.write(
|
||||
2,
|
||||
4,
|
||||
f"{'True' if self.env['stratified']=='1' else 'False'}",
|
||||
merge_format_subheader_left,
|
||||
)
|
||||
self.sheet.merge_range(
|
||||
3,
|
||||
1,
|
||||
3,
|
||||
3,
|
||||
"Discretized",
|
||||
merge_format_subheader_right,
|
||||
)
|
||||
self.sheet.write(
|
||||
3,
|
||||
4,
|
||||
f"{'True' if self.env['discretize']=='1' else 'False'}",
|
||||
merge_format_subheader_left,
|
||||
)
|
||||
self.sheet.merge_range(
|
||||
4,
|
||||
1,
|
||||
4,
|
||||
3,
|
||||
"Seeds",
|
||||
merge_format_subheader_right,
|
||||
)
|
||||
self.sheet.write(
|
||||
4, 4, f"{self.env['seeds']}", merge_format_subheader_left
|
||||
)
|
||||
self.update_max_length(len(self.env["seeds"]) + 1)
|
||||
header_cols = [
|
||||
("Dataset", 30),
|
||||
("Samples", 10),
|
||||
("Features", 10),
|
||||
("Classes", 10),
|
||||
("Balance", 50),
|
||||
]
|
||||
bold = self.book.add_format(
|
||||
{
|
||||
"bold": True,
|
||||
"font_size": 14,
|
||||
"bg_color": self.color3,
|
||||
"border": 1,
|
||||
}
|
||||
)
|
||||
i = 0
|
||||
for item, length in header_cols:
|
||||
self.sheet.write(5, i, item, bold)
|
||||
self.sheet.set_column(i, i, length)
|
||||
i += 1
|
||||
|
||||
def footer(self):
|
||||
# set Balance column width to max length
|
||||
self.sheet.set_column(4, 4, self.max_length)
|
||||
self.sheet.freeze_panes(6, 1)
|
||||
self.sheet.hide_gridlines(2)
|
||||
if self.close:
|
||||
self.book.close()
|
||||
|
||||
def print_line(self, result):
|
||||
size_n = 14
|
||||
integer = self.book.add_format(
|
||||
{"num_format": "#,###", "font_size": size_n, "border": 1}
|
||||
)
|
||||
normal = self.book.add_format({"font_size": size_n, "border": 1})
|
||||
col = 0
|
||||
if self.row % 2 == 0:
|
||||
normal.set_bg_color(self.color1)
|
||||
integer.set_bg_color(self.color1)
|
||||
else:
|
||||
normal.set_bg_color(self.color2)
|
||||
integer.set_bg_color(self.color2)
|
||||
self.sheet.write(self.row, col, result.dataset, normal)
|
||||
self.sheet.write(self.row, col + 1, result.samples, integer)
|
||||
self.sheet.write(self.row, col + 2, result.features, integer)
|
||||
self.sheet.write(self.row, col + 3, result.classes, normal)
|
||||
self.sheet.write(self.row, col + 4, result.balance, normal)
|
||||
self.update_max_length(len(result.balance))
|
||||
self.row += 1
|
||||
|
||||
def update_max_length(self, value):
|
||||
if value > self.max_length:
|
||||
self.max_length = value
|
||||
|
||||
def report(self):
|
||||
data_sets = Datasets()
|
||||
color_line = TextColor.LINE1
|
||||
print(color_line, end="")
|
||||
print(f"{'Dataset':30s} Sampl. Feat. Cls Balance")
|
||||
print("=" * 30 + " ===== ====== === " + "=" * 40)
|
||||
if self.excel:
|
||||
self.header()
|
||||
if self.output:
|
||||
print(color_line, end="")
|
||||
print(self.header_text)
|
||||
print("")
|
||||
print(f"{'Dataset':30s} Sampl. Feat. Cls Balance")
|
||||
print("=" * 30 + " ====== ===== === " + "=" * 60)
|
||||
for dataset in data_sets:
|
||||
X, y = data_sets.load(dataset)
|
||||
attributes = data_sets.get_attributes(dataset)
|
||||
attributes.dataset = dataset
|
||||
if self.excel:
|
||||
self.print_line(attributes)
|
||||
color_line = (
|
||||
TextColor.LINE2
|
||||
if color_line == TextColor.LINE1
|
||||
else TextColor.LINE1
|
||||
)
|
||||
values, counts = np.unique(y, return_counts=True)
|
||||
comp = ""
|
||||
sep = ""
|
||||
for count in counts:
|
||||
comp += f"{sep}{count/sum(counts)*100:5.2f}%"
|
||||
sep = "/ "
|
||||
print(color_line, end="")
|
||||
print(
|
||||
f"{dataset:30s} {X.shape[0]:6,d} {X.shape[1]:5,d} "
|
||||
f"{len(np.unique(y)):3d} {comp:40s}"
|
||||
)
|
||||
if self.output:
|
||||
print(color_line, end="")
|
||||
print(
|
||||
f"{dataset:30s} {attributes.samples:6,d} "
|
||||
f"{attributes.features:5,d} {attributes.classes:3d} "
|
||||
f"{attributes.balance:40s}"
|
||||
)
|
||||
if self.excel:
|
||||
self.footer()
|
||||
|
||||
|
||||
class SQL(BaseReport):
|
||||
@@ -1068,7 +1284,12 @@ class Benchmark:
|
||||
k = Excel(file_name=file_name, book=book)
|
||||
k.report()
|
||||
sheet.freeze_panes(6, 1)
|
||||
sheet.hide_gridlines()
|
||||
sheet.hide_gridlines(2)
|
||||
|
||||
def add_datasets_sheet():
|
||||
# Add datasets sheet
|
||||
re = ReportDatasets(excel=True, book=book)
|
||||
re.report()
|
||||
|
||||
def exreport_output():
|
||||
file_name = os.path.join(
|
||||
@@ -1096,6 +1317,7 @@ class Benchmark:
|
||||
footer()
|
||||
models_files()
|
||||
exreport_output()
|
||||
add_datasets_sheet()
|
||||
book.close()
|
||||
|
||||
|
||||
|
@@ -27,6 +27,7 @@ class Files:
|
||||
exreport_pdf = "Rplots.pdf"
|
||||
benchmark_r = "benchmark.r"
|
||||
dot_env = ".env"
|
||||
datasets_report_excel = "ReportDatasets.xlsx"
|
||||
|
||||
@staticmethod
|
||||
def exreport_output(score):
|
||||
|
@@ -1,10 +1,16 @@
|
||||
from .Datasets import Datasets, DatasetsSurcov, DatasetsTanveer, DatasetsArff
|
||||
from .Datasets import (
|
||||
Datasets,
|
||||
DatasetsSurcov,
|
||||
DatasetsTanveer,
|
||||
DatasetsArff,
|
||||
)
|
||||
from .Experiments import Experiment
|
||||
from .Results import Report, Summary
|
||||
from ._version import __version__
|
||||
|
||||
__author__ = "Ricardo Montañana Gómez"
|
||||
__copyright__ = "Copyright 2020-2022, Ricardo Montañana Gómez"
|
||||
__copyright__ = "Copyright 2020-2023, Ricardo Montañana Gómez"
|
||||
__license__ = "MIT License"
|
||||
__author_email__ = "ricardo.montanana@alu.uclm.es"
|
||||
|
||||
__all__ = ["Experiment", "Datasets", "Report", "Summary"]
|
||||
__all__ = ["Experiment", "Datasets", "Report", "Summary", __version__]
|
||||
|
1
benchmark/_version
Normal file
1
benchmark/_version
Normal file
@@ -0,0 +1 @@
|
||||
__version__ = "0.7.1"
|
@@ -21,7 +21,11 @@ def main(args_test=None):
|
||||
if args.grid:
|
||||
args.best = None
|
||||
if args.file is None and args.best is None and args.grid is None:
|
||||
ReportDatasets.report()
|
||||
report = ReportDatasets(args.excel)
|
||||
report.report()
|
||||
if args.excel:
|
||||
is_test = args_test is not None
|
||||
Files.open(report.get_file_name(), is_test)
|
||||
else:
|
||||
if args.best is not None or args.grid is not None:
|
||||
report = ReportBest(args.score, args.model, args.best, args.grid)
|
||||
|
@@ -6,3 +6,4 @@ stratified=0
|
||||
# Source of data Tanveer/Surcov
|
||||
source_data=Tanveer
|
||||
seeds=[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]
|
||||
discretize=0
|
||||
|
@@ -4,4 +4,5 @@ n_folds=5
|
||||
model=ODTE
|
||||
stratified=0
|
||||
source_data=Arff
|
||||
seeds=[271, 314, 171]
|
||||
seeds=[271, 314, 171]
|
||||
discretize=1
|
@@ -6,3 +6,4 @@ stratified=0
|
||||
# Source of data Tanveer/Surcov
|
||||
source_data=Tanveer
|
||||
seeds=[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]
|
||||
discretize=0
|
||||
|
@@ -5,4 +5,5 @@ model=ODTE
|
||||
stratified=0
|
||||
# Source of data Tanveer/Surcov
|
||||
source_data=Surcov
|
||||
seeds=[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]
|
||||
seeds=[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]
|
||||
discretize=0
|
1
benchmark/tests/.gitignore
vendored
Normal file
1
benchmark/tests/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
ReportDatasets.xlsx
|
@@ -89,6 +89,15 @@ class BenchmarkTest(TestBase):
|
||||
self.assertTrue(os.path.exists(benchmark.get_tex_file()))
|
||||
self.check_file_file(benchmark.get_tex_file(), "exreport_tex")
|
||||
|
||||
@staticmethod
|
||||
def generate_excel_sheet(test, sheet, file_name):
|
||||
with open(os.path.join("test_files", file_name), "w") as f:
|
||||
for row in range(1, sheet.max_row + 1):
|
||||
for col in range(1, sheet.max_column + 1):
|
||||
value = sheet.cell(row=row, column=col).value
|
||||
if value is not None:
|
||||
print(f'{row};{col};"{value}"', file=f)
|
||||
|
||||
def test_excel_output(self):
|
||||
benchmark = Benchmark("accuracy", visualize=False)
|
||||
benchmark.compile_results()
|
||||
@@ -101,6 +110,3 @@ class BenchmarkTest(TestBase):
|
||||
for sheet_name in book.sheetnames:
|
||||
sheet = book[sheet_name]
|
||||
self.check_excel_sheet(sheet, f"exreport_excel_{sheet_name}")
|
||||
# ExcelTest.generate_excel_sheet(
|
||||
# self, sheet, f"exreport_excel_{sheet_name}"
|
||||
# )
|
||||
|
@@ -179,6 +179,7 @@ class UtilTest(TestBase):
|
||||
"stratified": "0",
|
||||
"source_data": "Tanveer",
|
||||
"seeds": "[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]",
|
||||
"discretize": "0",
|
||||
}
|
||||
computed = EnvData().load()
|
||||
self.assertDictEqual(computed, expected)
|
||||
|
@@ -1,6 +1,6 @@
|
||||
import os
|
||||
from openpyxl import load_workbook
|
||||
from ...Utils import Folders
|
||||
from ...Utils import Folders, Files
|
||||
from ..TestBase import TestBase
|
||||
|
||||
|
||||
@@ -43,6 +43,15 @@ class BeReportTest(TestBase):
|
||||
self.assertEqual(stderr.getvalue(), "")
|
||||
self.check_output_file(stdout, "report_datasets")
|
||||
|
||||
def test_be_report_datasets_excel(self):
|
||||
stdout, stderr = self.execute_script("be_report", ["-x", "1"])
|
||||
self.assertEqual(stderr.getvalue(), "")
|
||||
self.check_output_file(stdout, "report_datasets")
|
||||
file_name = os.path.join(os.getcwd(), Files.datasets_report_excel)
|
||||
book = load_workbook(file_name)
|
||||
sheet = book["Datasets"]
|
||||
self.check_excel_sheet(sheet, "exreport_excel_Datasets")
|
||||
|
||||
def test_be_report_best(self):
|
||||
stdout, stderr = self.execute_script(
|
||||
"be_report", ["-s", "accuracy", "-m", "STree", "-b", "1"]
|
||||
|
25
benchmark/tests/test_files/exreport_excel_Datasets.test
Normal file
25
benchmark/tests/test_files/exreport_excel_Datasets.test
Normal file
@@ -0,0 +1,25 @@
|
||||
1;1;"Datasets used in benchmark ver. 0.2.0"
|
||||
2;1;" Default score accuracy"
|
||||
2;2;"Cross validation"
|
||||
2;5;"5 Folds"
|
||||
3;2;"Stratified"
|
||||
3;5;"False"
|
||||
4;2;"Discretized"
|
||||
4;5;"False"
|
||||
5;2;"Seeds"
|
||||
5;5;"[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]"
|
||||
6;1;"Dataset"
|
||||
6;2;"Samples"
|
||||
6;3;"Features"
|
||||
6;4;"Classes"
|
||||
6;5;"Balance"
|
||||
7;1;"balance-scale"
|
||||
7;2;"625"
|
||||
7;3;"4"
|
||||
7;4;"3"
|
||||
7;5;" 7.84%/ 46.08%/ 46.08%"
|
||||
8;1;"balloons"
|
||||
8;2;"16"
|
||||
8;3;"4"
|
||||
8;4;"2"
|
||||
8;5;"56.25%/ 43.75%"
|
@@ -1,4 +1,6 @@
|
||||
[94mDataset Sampl. Feat. Cls Balance
|
||||
============================== ===== ====== === ========================================
|
||||
[94mDatasets used in benchmark ver. 0.2.0
|
||||
|
||||
Dataset Sampl. Feat. Cls Balance
|
||||
============================== ====== ===== === ============================================================
|
||||
[96mbalance-scale 625 4 3 7.84%/ 46.08%/ 46.08%
|
||||
[94mballoons 16 4 2 56.25%/ 43.75%
|
||||
|
@@ -2,7 +2,10 @@ pandas
|
||||
scikit-learn
|
||||
scipy
|
||||
odte
|
||||
cython
|
||||
mdlp-discretization
|
||||
mufs
|
||||
bayesclass @ git+ssh://git@github.com/doctorado-ml/bayesclass.git
|
||||
xlsxwriter
|
||||
openpyxl
|
||||
tqdm
|
||||
|
Reference in New Issue
Block a user