mirror of
https://github.com/Doctorado-ML/benchmark.git
synced 2025-08-18 17:05:54 +00:00
Merge pull request #9 from Doctorado-ML/continuous_features
Continuous features
This commit is contained in:
4
.github/workflows/main.yml
vendored
4
.github/workflows/main.yml
vendored
@@ -18,7 +18,7 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
- name: Set up Python ${{ matrix.python }}
|
- name: Set up Python ${{ matrix.python }}
|
||||||
uses: actions/setup-python@v2
|
uses: actions/setup-python@v4
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python }}
|
python-version: ${{ matrix.python }}
|
||||||
# Make dot command available in the environment
|
# Make dot command available in the environment
|
||||||
@@ -53,7 +53,7 @@ jobs:
|
|||||||
coverage run -m unittest -v benchmark.tests
|
coverage run -m unittest -v benchmark.tests
|
||||||
coverage xml
|
coverage xml
|
||||||
- name: Upload coverage to Codecov
|
- name: Upload coverage to Codecov
|
||||||
uses: codecov/codecov-action@v1
|
uses: codecov/codecov-action@v3
|
||||||
with:
|
with:
|
||||||
token: ${{ secrets.CODECOV_TOKEN }}
|
token: ${{ secrets.CODECOV_TOKEN }}
|
||||||
files: ./coverage.xml
|
files: ./coverage.xml
|
||||||
|
@@ -123,6 +123,15 @@ class Arguments(argparse.ArgumentParser):
|
|||||||
("-p", "--hyperparameters"),
|
("-p", "--hyperparameters"),
|
||||||
{"type": str, "required": False, "default": "{}"},
|
{"type": str, "required": False, "default": "{}"},
|
||||||
],
|
],
|
||||||
|
"ignore_nan": [
|
||||||
|
("--ignore-nan",),
|
||||||
|
{
|
||||||
|
"default": False,
|
||||||
|
"action": "store_true",
|
||||||
|
"required": False,
|
||||||
|
"help": "Ignore nan results",
|
||||||
|
},
|
||||||
|
],
|
||||||
"key": [
|
"key": [
|
||||||
("-k", "--key"),
|
("-k", "--key"),
|
||||||
{
|
{
|
||||||
|
@@ -2,10 +2,11 @@ import os
|
|||||||
from types import SimpleNamespace
|
from types import SimpleNamespace
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import json
|
||||||
from scipy.io import arff
|
from scipy.io import arff
|
||||||
from .Utils import Files
|
from .Utils import Files
|
||||||
from .Arguments import EnvData
|
from .Arguments import EnvData
|
||||||
from mdlp.discretization import MDLP
|
from fimdlp.mdlp import FImdlp
|
||||||
|
|
||||||
|
|
||||||
class Diterator:
|
class Diterator:
|
||||||
@@ -27,6 +28,12 @@ class DatasetsArff:
|
|||||||
def folder():
|
def folder():
|
||||||
return "datasets"
|
return "datasets"
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_range_features(X, c_features):
|
||||||
|
if c_features.strip() == "all":
|
||||||
|
return list(range(X.shape[1]))
|
||||||
|
return json.loads(c_features)
|
||||||
|
|
||||||
def load(self, name, class_name):
|
def load(self, name, class_name):
|
||||||
file_name = os.path.join(self.folder(), self.dataset_names(name))
|
file_name = os.path.join(self.folder(), self.dataset_names(name))
|
||||||
data = arff.loadarff(file_name)
|
data = arff.loadarff(file_name)
|
||||||
@@ -34,7 +41,7 @@ class DatasetsArff:
|
|||||||
df.dropna(axis=0, how="any", inplace=True)
|
df.dropna(axis=0, how="any", inplace=True)
|
||||||
self.dataset = df
|
self.dataset = df
|
||||||
X = df.drop(class_name, axis=1)
|
X = df.drop(class_name, axis=1)
|
||||||
self.features = X.columns
|
self.features = X.columns.to_list()
|
||||||
self.class_name = class_name
|
self.class_name = class_name
|
||||||
y, _ = pd.factorize(df[class_name])
|
y, _ = pd.factorize(df[class_name])
|
||||||
X = X.to_numpy()
|
X = X.to_numpy()
|
||||||
@@ -50,6 +57,10 @@ class DatasetsTanveer:
|
|||||||
def folder():
|
def folder():
|
||||||
return "data"
|
return "data"
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_range_features(X, name):
|
||||||
|
return []
|
||||||
|
|
||||||
def load(self, name, *args):
|
def load(self, name, *args):
|
||||||
file_name = os.path.join(self.folder(), self.dataset_names(name))
|
file_name = os.path.join(self.folder(), self.dataset_names(name))
|
||||||
data = pd.read_csv(
|
data = pd.read_csv(
|
||||||
@@ -75,6 +86,10 @@ class DatasetsSurcov:
|
|||||||
def folder():
|
def folder():
|
||||||
return "datasets"
|
return "datasets"
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_range_features(X, name):
|
||||||
|
return []
|
||||||
|
|
||||||
def load(self, name, *args):
|
def load(self, name, *args):
|
||||||
file_name = os.path.join(self.folder(), self.dataset_names(name))
|
file_name = os.path.join(self.folder(), self.dataset_names(name))
|
||||||
data = pd.read_csv(
|
data = pd.read_csv(
|
||||||
@@ -102,16 +117,16 @@ class Datasets:
|
|||||||
)
|
)
|
||||||
self.discretize = envData["discretize"] == "1"
|
self.discretize = envData["discretize"] == "1"
|
||||||
self.dataset = source_name()
|
self.dataset = source_name()
|
||||||
self.class_names = []
|
|
||||||
self.data_sets = []
|
|
||||||
# initialize self.class_names & self.data_sets
|
# initialize self.class_names & self.data_sets
|
||||||
class_names, sets = self._init_names(dataset_name)
|
class_names, sets = self._init_names(dataset_name)
|
||||||
self.class_names = class_names
|
self.class_names = class_names
|
||||||
self.data_sets = sets
|
self.data_sets = sets
|
||||||
|
self.states = {} # states of discretized variables
|
||||||
|
|
||||||
def _init_names(self, dataset_name):
|
def _init_names(self, dataset_name):
|
||||||
file_name = os.path.join(self.dataset.folder(), Files.index)
|
file_name = os.path.join(self.dataset.folder(), Files.index)
|
||||||
default_class = "class"
|
default_class = "class"
|
||||||
|
self.continuous_features = {}
|
||||||
with open(file_name) as f:
|
with open(file_name) as f:
|
||||||
sets = f.read().splitlines()
|
sets = f.read().splitlines()
|
||||||
class_names = [default_class] * len(sets)
|
class_names = [default_class] * len(sets)
|
||||||
@@ -119,10 +134,14 @@ class Datasets:
|
|||||||
result = []
|
result = []
|
||||||
class_names = []
|
class_names = []
|
||||||
for data in sets:
|
for data in sets:
|
||||||
name, class_name = data.split(",")
|
name, class_name, features = data.split(",", 2)
|
||||||
result.append(name)
|
result.append(name)
|
||||||
class_names.append(class_name)
|
class_names.append(class_name)
|
||||||
|
self.continuous_features[name] = features
|
||||||
sets = result
|
sets = result
|
||||||
|
else:
|
||||||
|
for name in sets:
|
||||||
|
self.continuous_features[name] = None
|
||||||
# Set as dataset list the dataset passed as argument
|
# Set as dataset list the dataset passed as argument
|
||||||
if dataset_name is None:
|
if dataset_name is None:
|
||||||
return class_names, sets
|
return class_names, sets
|
||||||
@@ -137,6 +156,7 @@ class Datasets:
|
|||||||
self.discretize = False
|
self.discretize = False
|
||||||
X, y = self.load(name)
|
X, y = self.load(name)
|
||||||
attr = SimpleNamespace()
|
attr = SimpleNamespace()
|
||||||
|
attr.dataset = name
|
||||||
values, counts = np.unique(y, return_counts=True)
|
values, counts = np.unique(y, return_counts=True)
|
||||||
comp = ""
|
comp = ""
|
||||||
sep = ""
|
sep = ""
|
||||||
@@ -147,24 +167,41 @@ class Datasets:
|
|||||||
attr.classes = len(np.unique(y))
|
attr.classes = len(np.unique(y))
|
||||||
attr.samples = X.shape[0]
|
attr.samples = X.shape[0]
|
||||||
attr.features = X.shape[1]
|
attr.features = X.shape[1]
|
||||||
|
attr.cont_features = len(self.get_continuous_features())
|
||||||
self.discretize = tmp
|
self.discretize = tmp
|
||||||
return attr
|
return attr
|
||||||
|
|
||||||
def get_features(self):
|
def get_features(self):
|
||||||
return self.dataset.features
|
return self.dataset.features
|
||||||
|
|
||||||
|
def get_states(self, name):
|
||||||
|
return self.states[name] if name in self.states else None
|
||||||
|
|
||||||
|
def get_continuous_features(self):
|
||||||
|
return self.continuous_features_dataset
|
||||||
|
|
||||||
def get_class_name(self):
|
def get_class_name(self):
|
||||||
return self.dataset.class_name
|
return self.dataset.class_name
|
||||||
|
|
||||||
def get_dataset(self):
|
def get_dataset(self):
|
||||||
return self.dataset.dataset
|
return self.dataset.dataset
|
||||||
|
|
||||||
|
def build_states(self, name, X):
|
||||||
|
features = self.get_features()
|
||||||
|
self.states[name] = {
|
||||||
|
features[i]: np.unique(X[:, i]).tolist() for i in range(X.shape[1])
|
||||||
|
}
|
||||||
|
|
||||||
def load(self, name, dataframe=False):
|
def load(self, name, dataframe=False):
|
||||||
try:
|
try:
|
||||||
class_name = self.class_names[self.data_sets.index(name)]
|
class_name = self.class_names[self.data_sets.index(name)]
|
||||||
X, y = self.dataset.load(name, class_name)
|
X, y = self.dataset.load(name, class_name)
|
||||||
|
self.continuous_features_dataset = self.dataset.get_range_features(
|
||||||
|
X, self.continuous_features[name]
|
||||||
|
)
|
||||||
if self.discretize:
|
if self.discretize:
|
||||||
X = self.discretize_dataset(X, y)
|
X = self.discretize_dataset(X, y)
|
||||||
|
self.build_states(name, X)
|
||||||
dataset = pd.DataFrame(X, columns=self.get_features())
|
dataset = pd.DataFrame(X, columns=self.get_features())
|
||||||
dataset[self.get_class_name()] = y
|
dataset[self.get_class_name()] = y
|
||||||
self.dataset.dataset = dataset
|
self.dataset.dataset = dataset
|
||||||
@@ -188,9 +225,8 @@ class Datasets:
|
|||||||
-------
|
-------
|
||||||
tuple (X, y) of numpy.ndarray
|
tuple (X, y) of numpy.ndarray
|
||||||
"""
|
"""
|
||||||
discretiz = MDLP(random_state=17, dtype=np.int32)
|
discretiz = FImdlp(algorithm=0)
|
||||||
Xdisc = discretiz.fit_transform(X, y)
|
return discretiz.fit_transform(X, y)
|
||||||
return Xdisc
|
|
||||||
|
|
||||||
def __iter__(self) -> Diterator:
|
def __iter__(self) -> Diterator:
|
||||||
return Diterator(self.data_sets)
|
return Diterator(self.data_sets)
|
||||||
|
@@ -112,6 +112,7 @@ class Experiment:
|
|||||||
platform,
|
platform,
|
||||||
title,
|
title,
|
||||||
progress_bar=True,
|
progress_bar=True,
|
||||||
|
ignore_nan=True,
|
||||||
folds=5,
|
folds=5,
|
||||||
):
|
):
|
||||||
today = datetime.now()
|
today = datetime.now()
|
||||||
@@ -131,6 +132,7 @@ class Experiment:
|
|||||||
self.score_name = score_name
|
self.score_name = score_name
|
||||||
self.model_name = model_name
|
self.model_name = model_name
|
||||||
self.title = title
|
self.title = title
|
||||||
|
self.ignore_nan = ignore_nan
|
||||||
self.stratified = stratified == "1"
|
self.stratified = stratified == "1"
|
||||||
self.stratified_class = StratifiedKFold if self.stratified else KFold
|
self.stratified_class = StratifiedKFold if self.stratified else KFold
|
||||||
self.datasets = datasets
|
self.datasets = datasets
|
||||||
@@ -184,7 +186,14 @@ class Experiment:
|
|||||||
self.leaves = []
|
self.leaves = []
|
||||||
self.depths = []
|
self.depths = []
|
||||||
|
|
||||||
def _n_fold_crossval(self, X, y, hyperparameters):
|
def _build_fit_params(self, name):
|
||||||
|
states = self.datasets.get_states(name)
|
||||||
|
if states is None:
|
||||||
|
return None
|
||||||
|
features = self.datasets.get_features()
|
||||||
|
return {"state_names": states, "features": features}
|
||||||
|
|
||||||
|
def _n_fold_crossval(self, name, X, y, hyperparameters):
|
||||||
if self.scores != []:
|
if self.scores != []:
|
||||||
raise ValueError("Must init experiment before!")
|
raise ValueError("Must init experiment before!")
|
||||||
loop = tqdm(
|
loop = tqdm(
|
||||||
@@ -201,6 +210,7 @@ class Experiment:
|
|||||||
shuffle=True, random_state=random_state, n_splits=self.folds
|
shuffle=True, random_state=random_state, n_splits=self.folds
|
||||||
)
|
)
|
||||||
clf = self._build_classifier(random_state, hyperparameters)
|
clf = self._build_classifier(random_state, hyperparameters)
|
||||||
|
fit_params = self._build_fit_params(name)
|
||||||
self.version = Models.get_version(self.model_name, clf)
|
self.version = Models.get_version(self.model_name, clf)
|
||||||
with warnings.catch_warnings():
|
with warnings.catch_warnings():
|
||||||
warnings.filterwarnings("ignore")
|
warnings.filterwarnings("ignore")
|
||||||
@@ -209,11 +219,19 @@ class Experiment:
|
|||||||
X,
|
X,
|
||||||
y,
|
y,
|
||||||
cv=kfold,
|
cv=kfold,
|
||||||
|
fit_params=fit_params,
|
||||||
return_estimator=True,
|
return_estimator=True,
|
||||||
scoring=self.score_name,
|
scoring=self.score_name,
|
||||||
)
|
)
|
||||||
self.scores.append(res["test_score"])
|
if np.isnan(res["test_score"]).any():
|
||||||
self.times.append(res["fit_time"])
|
if not self.ignore_nan:
|
||||||
|
print(res["test_score"])
|
||||||
|
raise ValueError("NaN in results")
|
||||||
|
results = res["test_score"][~np.isnan(res["test_score"])]
|
||||||
|
else:
|
||||||
|
results = res["test_score"]
|
||||||
|
self.scores.extend(results)
|
||||||
|
self.times.extend(res["fit_time"])
|
||||||
for result_item in res["estimator"]:
|
for result_item in res["estimator"]:
|
||||||
nodes_item, leaves_item, depth_item = Models.get_complexity(
|
nodes_item, leaves_item, depth_item = Models.get_complexity(
|
||||||
self.model_name, result_item
|
self.model_name, result_item
|
||||||
@@ -273,7 +291,7 @@ class Experiment:
|
|||||||
n_classes = len(np.unique(y))
|
n_classes = len(np.unique(y))
|
||||||
hyperparameters = self.hyperparameters_dict[name][1]
|
hyperparameters = self.hyperparameters_dict[name][1]
|
||||||
self._init_experiment()
|
self._init_experiment()
|
||||||
self._n_fold_crossval(X, y, hyperparameters)
|
self._n_fold_crossval(name, X, y, hyperparameters)
|
||||||
self._add_results(name, hyperparameters, samp, feat, n_classes)
|
self._add_results(name, hyperparameters, samp, feat, n_classes)
|
||||||
self._output_results()
|
self._output_results()
|
||||||
self.duration = time.time() - now
|
self.duration = time.time() - now
|
||||||
|
@@ -15,6 +15,24 @@ from xgboost import XGBClassifier
|
|||||||
import sklearn
|
import sklearn
|
||||||
import xgboost
|
import xgboost
|
||||||
|
|
||||||
|
import random
|
||||||
|
|
||||||
|
|
||||||
|
class MockModel(SVC):
|
||||||
|
# Only used for testing
|
||||||
|
def predict(self, X):
|
||||||
|
if random.random() < 0.1:
|
||||||
|
return [float("NaN")] * len(X)
|
||||||
|
return super().predict(X)
|
||||||
|
|
||||||
|
def nodes_leaves(self):
|
||||||
|
return 0, 0
|
||||||
|
|
||||||
|
def fit(self, X, y, **kwargs):
|
||||||
|
kwargs.pop("state_names", None)
|
||||||
|
kwargs.pop("features", None)
|
||||||
|
return super().fit(X, y, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
class Models:
|
class Models:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -22,27 +40,27 @@ class Models:
|
|||||||
return {
|
return {
|
||||||
"STree": Stree(random_state=random_state),
|
"STree": Stree(random_state=random_state),
|
||||||
"TAN": TAN(random_state=random_state),
|
"TAN": TAN(random_state=random_state),
|
||||||
"KDB": KDB(k=3),
|
"KDB": KDB(k=2),
|
||||||
"AODE": AODE(random_state=random_state),
|
"AODE": AODE(random_state=random_state),
|
||||||
"Cart": DecisionTreeClassifier(random_state=random_state),
|
"Cart": DecisionTreeClassifier(random_state=random_state),
|
||||||
"ExtraTree": ExtraTreeClassifier(random_state=random_state),
|
"ExtraTree": ExtraTreeClassifier(random_state=random_state),
|
||||||
"Wodt": Wodt(random_state=random_state),
|
"Wodt": Wodt(random_state=random_state),
|
||||||
"SVC": SVC(random_state=random_state),
|
"SVC": SVC(random_state=random_state),
|
||||||
"ODTE": Odte(
|
"ODTE": Odte(
|
||||||
base_estimator=Stree(random_state=random_state),
|
estimator=Stree(random_state=random_state),
|
||||||
random_state=random_state,
|
random_state=random_state,
|
||||||
),
|
),
|
||||||
"BaggingStree": BaggingClassifier(
|
"BaggingStree": BaggingClassifier(
|
||||||
base_estimator=Stree(random_state=random_state),
|
estimator=Stree(random_state=random_state),
|
||||||
random_state=random_state,
|
random_state=random_state,
|
||||||
),
|
),
|
||||||
"BaggingWodt": BaggingClassifier(
|
"BaggingWodt": BaggingClassifier(
|
||||||
base_estimator=Wodt(random_state=random_state),
|
estimator=Wodt(random_state=random_state),
|
||||||
random_state=random_state,
|
random_state=random_state,
|
||||||
),
|
),
|
||||||
"XGBoost": XGBClassifier(random_state=random_state),
|
"XGBoost": XGBClassifier(random_state=random_state),
|
||||||
"AdaBoostStree": AdaBoostClassifier(
|
"AdaBoostStree": AdaBoostClassifier(
|
||||||
base_estimator=Stree(
|
estimator=Stree(
|
||||||
random_state=random_state,
|
random_state=random_state,
|
||||||
),
|
),
|
||||||
algorithm="SAMME",
|
algorithm="SAMME",
|
||||||
@@ -50,6 +68,7 @@ class Models:
|
|||||||
),
|
),
|
||||||
"GBC": GradientBoostingClassifier(random_state=random_state),
|
"GBC": GradientBoostingClassifier(random_state=random_state),
|
||||||
"RandomForest": RandomForestClassifier(random_state=random_state),
|
"RandomForest": RandomForestClassifier(random_state=random_state),
|
||||||
|
"Mock": MockModel(random_state=random_state),
|
||||||
}
|
}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@@ -684,7 +684,7 @@ class ReportDatasets:
|
|||||||
"bg_color": self.color1,
|
"bg_color": self.color1,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
self.sheet.merge_range(0, 0, 0, 4, self.header_text, merge_format)
|
self.sheet.merge_range(0, 0, 0, 5, self.header_text, merge_format)
|
||||||
self.sheet.merge_range(
|
self.sheet.merge_range(
|
||||||
1,
|
1,
|
||||||
0,
|
0,
|
||||||
@@ -697,24 +697,24 @@ class ReportDatasets:
|
|||||||
1,
|
1,
|
||||||
1,
|
1,
|
||||||
1,
|
1,
|
||||||
3,
|
4,
|
||||||
"Cross validation",
|
"Cross validation",
|
||||||
merge_format_subheader_right,
|
merge_format_subheader_right,
|
||||||
)
|
)
|
||||||
self.sheet.write(
|
self.sheet.write(
|
||||||
1, 4, f"{self.env['n_folds']} Folds", merge_format_subheader_left
|
1, 5, f"{self.env['n_folds']} Folds", merge_format_subheader_left
|
||||||
)
|
)
|
||||||
self.sheet.merge_range(
|
self.sheet.merge_range(
|
||||||
2,
|
2,
|
||||||
1,
|
1,
|
||||||
2,
|
2,
|
||||||
3,
|
4,
|
||||||
"Stratified",
|
"Stratified",
|
||||||
merge_format_subheader_right,
|
merge_format_subheader_right,
|
||||||
)
|
)
|
||||||
self.sheet.write(
|
self.sheet.write(
|
||||||
2,
|
2,
|
||||||
4,
|
5,
|
||||||
f"{'True' if self.env['stratified']=='1' else 'False'}",
|
f"{'True' if self.env['stratified']=='1' else 'False'}",
|
||||||
merge_format_subheader_left,
|
merge_format_subheader_left,
|
||||||
)
|
)
|
||||||
@@ -722,13 +722,13 @@ class ReportDatasets:
|
|||||||
3,
|
3,
|
||||||
1,
|
1,
|
||||||
3,
|
3,
|
||||||
3,
|
4,
|
||||||
"Discretized",
|
"Discretized",
|
||||||
merge_format_subheader_right,
|
merge_format_subheader_right,
|
||||||
)
|
)
|
||||||
self.sheet.write(
|
self.sheet.write(
|
||||||
3,
|
3,
|
||||||
4,
|
5,
|
||||||
f"{'True' if self.env['discretize']=='1' else 'False'}",
|
f"{'True' if self.env['discretize']=='1' else 'False'}",
|
||||||
merge_format_subheader_left,
|
merge_format_subheader_left,
|
||||||
)
|
)
|
||||||
@@ -736,18 +736,19 @@ class ReportDatasets:
|
|||||||
4,
|
4,
|
||||||
1,
|
1,
|
||||||
4,
|
4,
|
||||||
3,
|
4,
|
||||||
"Seeds",
|
"Seeds",
|
||||||
merge_format_subheader_right,
|
merge_format_subheader_right,
|
||||||
)
|
)
|
||||||
self.sheet.write(
|
self.sheet.write(
|
||||||
4, 4, f"{self.env['seeds']}", merge_format_subheader_left
|
4, 5, f"{self.env['seeds']}", merge_format_subheader_left
|
||||||
)
|
)
|
||||||
self.update_max_length(len(self.env["seeds"]) + 1)
|
self.update_max_length(len(self.env["seeds"]) + 1)
|
||||||
header_cols = [
|
header_cols = [
|
||||||
("Dataset", 30),
|
("Dataset", 30),
|
||||||
("Samples", 10),
|
("Samples", 10),
|
||||||
("Features", 10),
|
("Features", 10),
|
||||||
|
("Continuous", 10),
|
||||||
("Classes", 10),
|
("Classes", 10),
|
||||||
("Balance", 50),
|
("Balance", 50),
|
||||||
]
|
]
|
||||||
@@ -767,7 +768,7 @@ class ReportDatasets:
|
|||||||
|
|
||||||
def footer(self):
|
def footer(self):
|
||||||
# set Balance column width to max length
|
# set Balance column width to max length
|
||||||
self.sheet.set_column(4, 4, self.max_length)
|
self.sheet.set_column(5, 5, self.max_length)
|
||||||
self.sheet.freeze_panes(6, 1)
|
self.sheet.freeze_panes(6, 1)
|
||||||
self.sheet.hide_gridlines(2)
|
self.sheet.hide_gridlines(2)
|
||||||
if self.close:
|
if self.close:
|
||||||
@@ -789,8 +790,9 @@ class ReportDatasets:
|
|||||||
self.sheet.write(self.row, col, result.dataset, normal)
|
self.sheet.write(self.row, col, result.dataset, normal)
|
||||||
self.sheet.write(self.row, col + 1, result.samples, integer)
|
self.sheet.write(self.row, col + 1, result.samples, integer)
|
||||||
self.sheet.write(self.row, col + 2, result.features, integer)
|
self.sheet.write(self.row, col + 2, result.features, integer)
|
||||||
self.sheet.write(self.row, col + 3, result.classes, normal)
|
self.sheet.write(self.row, col + 3, result.cont_features, integer)
|
||||||
self.sheet.write(self.row, col + 4, result.balance, normal)
|
self.sheet.write(self.row, col + 4, result.classes, normal)
|
||||||
|
self.sheet.write(self.row, col + 5, result.balance, normal)
|
||||||
self.update_max_length(len(result.balance))
|
self.update_max_length(len(result.balance))
|
||||||
self.row += 1
|
self.row += 1
|
||||||
|
|
||||||
@@ -807,11 +809,11 @@ class ReportDatasets:
|
|||||||
print(color_line, end="")
|
print(color_line, end="")
|
||||||
print(self.header_text)
|
print(self.header_text)
|
||||||
print("")
|
print("")
|
||||||
print(f"{'Dataset':30s} Sampl. Feat. Cls Balance")
|
print(f"{'Dataset':30s} Sampl. Feat. Cont Cls Balance")
|
||||||
print("=" * 30 + " ====== ===== === " + "=" * 60)
|
print("=" * 30 + " ====== ===== ==== === " + "=" * 60)
|
||||||
for dataset in data_sets:
|
for dataset in data_sets:
|
||||||
attributes = data_sets.get_attributes(dataset)
|
attributes = data_sets.get_attributes(dataset)
|
||||||
attributes.dataset = dataset
|
|
||||||
if self.excel:
|
if self.excel:
|
||||||
self.print_line(attributes)
|
self.print_line(attributes)
|
||||||
color_line = (
|
color_line = (
|
||||||
@@ -823,8 +825,8 @@ class ReportDatasets:
|
|||||||
print(color_line, end="")
|
print(color_line, end="")
|
||||||
print(
|
print(
|
||||||
f"{dataset:30s} {attributes.samples:6,d} "
|
f"{dataset:30s} {attributes.samples:6,d} "
|
||||||
f"{attributes.features:5,d} {attributes.classes:3d} "
|
f"{attributes.features:5,d} {attributes.cont_features:4,d}"
|
||||||
f"{attributes.balance:40s}"
|
f" {attributes.classes:3d} {attributes.balance:40s}"
|
||||||
)
|
)
|
||||||
if self.excel:
|
if self.excel:
|
||||||
self.footer()
|
self.footer()
|
||||||
|
@@ -46,7 +46,7 @@ def main(args_test=None):
|
|||||||
'{"C": 7, "gamma": 0.1, "kernel": "rbf", "multiclass_strategy": '
|
'{"C": 7, "gamma": 0.1, "kernel": "rbf", "multiclass_strategy": '
|
||||||
'"ovr"}',
|
'"ovr"}',
|
||||||
'{"C": 5, "kernel": "rbf", "gamma": "auto"}',
|
'{"C": 5, "kernel": "rbf", "gamma": "auto"}',
|
||||||
'{"C": 0.05, "max_iter": 10000.0, "kernel": "liblinear", '
|
'{"C": 0.05, "max_iter": 10000, "kernel": "liblinear", '
|
||||||
'"multiclass_strategy": "ovr"}',
|
'"multiclass_strategy": "ovr"}',
|
||||||
'{"C":0.0275, "kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
'{"C":0.0275, "kernel": "liblinear", "multiclass_strategy": "ovr"}',
|
||||||
'{"C": 7, "gamma": 10.0, "kernel": "rbf", "multiclass_strategy": '
|
'{"C": 7, "gamma": 10.0, "kernel": "rbf", "multiclass_strategy": '
|
||||||
@@ -97,7 +97,7 @@ def main(args_test=None):
|
|||||||
for item in results:
|
for item in results:
|
||||||
results_tmp = {"n_jobs": [-1], "n_estimators": [100]}
|
results_tmp = {"n_jobs": [-1], "n_estimators": [100]}
|
||||||
for key, value in results[item].items():
|
for key, value in results[item].items():
|
||||||
new_key = f"base_estimator__{key}"
|
new_key = f"estimator__{key}"
|
||||||
try:
|
try:
|
||||||
results_tmp[new_key] = sorted(value)
|
results_tmp[new_key] = sorted(value)
|
||||||
except TypeError:
|
except TypeError:
|
||||||
@@ -111,6 +111,7 @@ def main(args_test=None):
|
|||||||
t2 = sorted([x for x in value if isinstance(x, str)])
|
t2 = sorted([x for x in value if isinstance(x, str)])
|
||||||
results_tmp[new_key] = t1 + t2
|
results_tmp[new_key] = t1 + t2
|
||||||
output.append(results_tmp)
|
output.append(results_tmp)
|
||||||
|
|
||||||
# save results
|
# save results
|
||||||
file_name = Files.grid_input(args.score, args.model)
|
file_name = Files.grid_input(args.score, args.model)
|
||||||
file_output = os.path.join(Folders.results, file_name)
|
file_output = os.path.join(Folders.results, file_name)
|
||||||
|
@@ -13,7 +13,7 @@ def main(args_test=None):
|
|||||||
arguments = Arguments(prog="be_main")
|
arguments = Arguments(prog="be_main")
|
||||||
arguments.xset("stratified").xset("score").xset("model", mandatory=True)
|
arguments.xset("stratified").xset("score").xset("model", mandatory=True)
|
||||||
arguments.xset("n_folds").xset("platform").xset("quiet").xset("title")
|
arguments.xset("n_folds").xset("platform").xset("quiet").xset("title")
|
||||||
arguments.xset("report")
|
arguments.xset("report").xset("ignore_nan")
|
||||||
arguments.add_exclusive(
|
arguments.add_exclusive(
|
||||||
["grid_paramfile", "best_paramfile", "hyperparameters"]
|
["grid_paramfile", "best_paramfile", "hyperparameters"]
|
||||||
)
|
)
|
||||||
@@ -35,6 +35,7 @@ def main(args_test=None):
|
|||||||
grid_paramfile=args.grid_paramfile,
|
grid_paramfile=args.grid_paramfile,
|
||||||
progress_bar=not args.quiet,
|
progress_bar=not args.quiet,
|
||||||
platform=args.platform,
|
platform=args.platform,
|
||||||
|
ignore_nan=args.ignore_nan,
|
||||||
title=args.title,
|
title=args.title,
|
||||||
folds=args.n_folds,
|
folds=args.n_folds,
|
||||||
)
|
)
|
||||||
|
@@ -6,4 +6,4 @@ stratified=0
|
|||||||
# Source of data Tanveer/Surcov
|
# Source of data Tanveer/Surcov
|
||||||
source_data=Tanveer
|
source_data=Tanveer
|
||||||
seeds=[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]
|
seeds=[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]
|
||||||
discretize=0
|
discretize=0
|
@@ -6,4 +6,4 @@ stratified=0
|
|||||||
# Source of data Tanveer/Surcov
|
# Source of data Tanveer/Surcov
|
||||||
source_data=Tanveer
|
source_data=Tanveer
|
||||||
seeds=[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]
|
seeds=[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]
|
||||||
discretize=0
|
discretize=0
|
@@ -18,7 +18,7 @@ class BestResultTest(TestBase):
|
|||||||
"C": 7,
|
"C": 7,
|
||||||
"gamma": 0.1,
|
"gamma": 0.1,
|
||||||
"kernel": "rbf",
|
"kernel": "rbf",
|
||||||
"max_iter": 10000.0,
|
"max_iter": 10000,
|
||||||
"multiclass_strategy": "ovr",
|
"multiclass_strategy": "ovr",
|
||||||
},
|
},
|
||||||
"results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json",
|
"results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json",
|
||||||
|
@@ -1,4 +1,3 @@
|
|||||||
import shutil
|
|
||||||
from .TestBase import TestBase
|
from .TestBase import TestBase
|
||||||
from ..Experiments import Randomized
|
from ..Experiments import Randomized
|
||||||
from ..Datasets import Datasets
|
from ..Datasets import Datasets
|
||||||
@@ -17,10 +16,6 @@ class DatasetTest(TestBase):
|
|||||||
self.set_env(".env.dist")
|
self.set_env(".env.dist")
|
||||||
return super().tearDown()
|
return super().tearDown()
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def set_env(env):
|
|
||||||
shutil.copy(env, ".env")
|
|
||||||
|
|
||||||
def test_Randomized(self):
|
def test_Randomized(self):
|
||||||
expected = [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]
|
expected = [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]
|
||||||
self.assertSequenceEqual(Randomized.seeds(), expected)
|
self.assertSequenceEqual(Randomized.seeds(), expected)
|
||||||
|
@@ -1,4 +1,6 @@
|
|||||||
import json
|
import json
|
||||||
|
from io import StringIO
|
||||||
|
from unittest.mock import patch
|
||||||
from .TestBase import TestBase
|
from .TestBase import TestBase
|
||||||
from ..Experiments import Experiment
|
from ..Experiments import Experiment
|
||||||
from ..Datasets import Datasets
|
from ..Datasets import Datasets
|
||||||
@@ -8,10 +10,12 @@ class ExperimentTest(TestBase):
|
|||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.exp = self.build_exp()
|
self.exp = self.build_exp()
|
||||||
|
|
||||||
def build_exp(self, hyperparams=False, grid=False):
|
def build_exp(
|
||||||
|
self, hyperparams=False, grid=False, model="STree", ignore_nan=False
|
||||||
|
):
|
||||||
params = {
|
params = {
|
||||||
"score_name": "accuracy",
|
"score_name": "accuracy",
|
||||||
"model_name": "STree",
|
"model_name": model,
|
||||||
"stratified": "0",
|
"stratified": "0",
|
||||||
"datasets": Datasets(),
|
"datasets": Datasets(),
|
||||||
"hyperparams_dict": "{}",
|
"hyperparams_dict": "{}",
|
||||||
@@ -21,6 +25,7 @@ class ExperimentTest(TestBase):
|
|||||||
"title": "Test",
|
"title": "Test",
|
||||||
"progress_bar": False,
|
"progress_bar": False,
|
||||||
"folds": 2,
|
"folds": 2,
|
||||||
|
"ignore_nan": ignore_nan,
|
||||||
}
|
}
|
||||||
return Experiment(**params)
|
return Experiment(**params)
|
||||||
|
|
||||||
@@ -31,6 +36,7 @@ class ExperimentTest(TestBase):
|
|||||||
],
|
],
|
||||||
".",
|
".",
|
||||||
)
|
)
|
||||||
|
self.set_env(".env.dist")
|
||||||
return super().tearDown()
|
return super().tearDown()
|
||||||
|
|
||||||
def test_build_hyperparams_file(self):
|
def test_build_hyperparams_file(self):
|
||||||
@@ -46,7 +52,7 @@ class ExperimentTest(TestBase):
|
|||||||
"C": 7,
|
"C": 7,
|
||||||
"gamma": 0.1,
|
"gamma": 0.1,
|
||||||
"kernel": "rbf",
|
"kernel": "rbf",
|
||||||
"max_iter": 10000.0,
|
"max_iter": 10000,
|
||||||
"multiclass_strategy": "ovr",
|
"multiclass_strategy": "ovr",
|
||||||
},
|
},
|
||||||
"results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json",
|
"results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json",
|
||||||
@@ -89,7 +95,7 @@ class ExperimentTest(TestBase):
|
|||||||
def test_exception_n_fold_crossval(self):
|
def test_exception_n_fold_crossval(self):
|
||||||
self.exp.do_experiment()
|
self.exp.do_experiment()
|
||||||
with self.assertRaises(ValueError):
|
with self.assertRaises(ValueError):
|
||||||
self.exp._n_fold_crossval([], [], {})
|
self.exp._n_fold_crossval("", [], [], {})
|
||||||
|
|
||||||
def test_do_experiment(self):
|
def test_do_experiment(self):
|
||||||
self.exp.do_experiment()
|
self.exp.do_experiment()
|
||||||
@@ -131,3 +137,39 @@ class ExperimentTest(TestBase):
|
|||||||
):
|
):
|
||||||
for key, value in expected_result.items():
|
for key, value in expected_result.items():
|
||||||
self.assertEqual(computed_result[key], value)
|
self.assertEqual(computed_result[key], value)
|
||||||
|
|
||||||
|
def test_build_fit_parameters(self):
|
||||||
|
self.set_env(".env.arff")
|
||||||
|
expected = {
|
||||||
|
"state_names": {
|
||||||
|
"sepallength": [0, 1, 2],
|
||||||
|
"sepalwidth": [0, 1, 3, 4],
|
||||||
|
"petallength": [0, 1, 2, 3],
|
||||||
|
"petalwidth": [0, 1, 2, 3],
|
||||||
|
},
|
||||||
|
"features": [
|
||||||
|
"sepallength",
|
||||||
|
"sepalwidth",
|
||||||
|
"petallength",
|
||||||
|
"petalwidth",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
exp = self.build_exp(model="TAN")
|
||||||
|
X, y = exp.datasets.load("iris")
|
||||||
|
computed = exp._build_fit_params("iris")
|
||||||
|
for key, value in expected["state_names"].items():
|
||||||
|
self.assertEqual(computed["state_names"][key], value)
|
||||||
|
for feature in expected["features"]:
|
||||||
|
self.assertIn(feature, computed["features"])
|
||||||
|
|
||||||
|
@patch("sys.stdout", new_callable=StringIO)
|
||||||
|
def test_experiment_with_nan_not_ignored(self, mock_output):
|
||||||
|
exp = self.build_exp(model="Mock")
|
||||||
|
self.assertRaises(ValueError, exp.do_experiment)
|
||||||
|
output_text = mock_output.getvalue().splitlines()
|
||||||
|
expected = "[ nan 0.8974359]"
|
||||||
|
self.assertEqual(expected, output_text[0])
|
||||||
|
|
||||||
|
def test_experiment_with_nan_ignored(self):
|
||||||
|
self.exp = self.build_exp(model="Mock", ignore_nan=True)
|
||||||
|
self.exp.do_experiment()
|
||||||
|
@@ -70,19 +70,19 @@ class ModelTest(TestBase):
|
|||||||
def test_BaggingStree(self):
|
def test_BaggingStree(self):
|
||||||
clf = Models.get_model("BaggingStree")
|
clf = Models.get_model("BaggingStree")
|
||||||
self.assertIsInstance(clf, BaggingClassifier)
|
self.assertIsInstance(clf, BaggingClassifier)
|
||||||
clf_base = clf.base_estimator
|
clf_base = clf.estimator
|
||||||
self.assertIsInstance(clf_base, Stree)
|
self.assertIsInstance(clf_base, Stree)
|
||||||
|
|
||||||
def test_BaggingWodt(self):
|
def test_BaggingWodt(self):
|
||||||
clf = Models.get_model("BaggingWodt")
|
clf = Models.get_model("BaggingWodt")
|
||||||
self.assertIsInstance(clf, BaggingClassifier)
|
self.assertIsInstance(clf, BaggingClassifier)
|
||||||
clf_base = clf.base_estimator
|
clf_base = clf.estimator
|
||||||
self.assertIsInstance(clf_base, Wodt)
|
self.assertIsInstance(clf_base, Wodt)
|
||||||
|
|
||||||
def test_AdaBoostStree(self):
|
def test_AdaBoostStree(self):
|
||||||
clf = Models.get_model("AdaBoostStree")
|
clf = Models.get_model("AdaBoostStree")
|
||||||
self.assertIsInstance(clf, AdaBoostClassifier)
|
self.assertIsInstance(clf, AdaBoostClassifier)
|
||||||
clf_base = clf.base_estimator
|
clf_base = clf.estimator
|
||||||
self.assertIsInstance(clf_base, Stree)
|
self.assertIsInstance(clf_base, Stree)
|
||||||
|
|
||||||
def test_unknown_classifier(self):
|
def test_unknown_classifier(self):
|
||||||
|
@@ -4,6 +4,7 @@ import pathlib
|
|||||||
import sys
|
import sys
|
||||||
import csv
|
import csv
|
||||||
import unittest
|
import unittest
|
||||||
|
import shutil
|
||||||
from importlib import import_module
|
from importlib import import_module
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
@@ -19,6 +20,10 @@ class TestBase(unittest.TestCase):
|
|||||||
self.stree_version = "1.2.4"
|
self.stree_version = "1.2.4"
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def set_env(env):
|
||||||
|
shutil.copy(env, ".env")
|
||||||
|
|
||||||
def remove_files(self, files, folder):
|
def remove_files(self, files, folder):
|
||||||
for file_name in files:
|
for file_name in files:
|
||||||
file_name = os.path.join(folder, file_name)
|
file_name = os.path.join(folder, file_name)
|
||||||
|
@@ -1,2 +1,2 @@
|
|||||||
iris,class
|
iris,class,all
|
||||||
wine,class
|
wine,class,[0, 1]
|
||||||
|
@@ -1 +1 @@
|
|||||||
{"balance-scale": [0.98, {"splitter": "best", "max_features": "auto"}, "results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json"], "balloons": [0.86, {"C": 7, "gamma": 0.1, "kernel": "rbf", "max_iter": 10000.0, "multiclass_strategy": "ovr"}, "results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json"]}
|
{"balance-scale": [0.98, {"splitter": "best", "max_features": "auto"}, "results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json"], "balloons": [0.86, {"C": 7, "gamma": 0.1, "kernel": "rbf", "max_iter": 10000, "multiclass_strategy": "ovr"}, "results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json"]}
|
@@ -17,10 +17,10 @@
|
|||||||
"features": 4,
|
"features": 4,
|
||||||
"classes": 3,
|
"classes": 3,
|
||||||
"hyperparameters": {
|
"hyperparameters": {
|
||||||
"C": 10000.0,
|
"C": 10000,
|
||||||
"gamma": 0.1,
|
"gamma": 0.1,
|
||||||
"kernel": "rbf",
|
"kernel": "rbf",
|
||||||
"max_iter": 10000.0,
|
"max_iter": 10000,
|
||||||
"multiclass_strategy": "ovr"
|
"multiclass_strategy": "ovr"
|
||||||
},
|
},
|
||||||
"nodes": 7.0,
|
"nodes": 7.0,
|
||||||
@@ -40,7 +40,7 @@
|
|||||||
"C": 7,
|
"C": 7,
|
||||||
"gamma": 0.1,
|
"gamma": 0.1,
|
||||||
"kernel": "rbf",
|
"kernel": "rbf",
|
||||||
"max_iter": 10000.0,
|
"max_iter": 10000,
|
||||||
"multiclass_strategy": "ovr"
|
"multiclass_strategy": "ovr"
|
||||||
},
|
},
|
||||||
"nodes": 3.0,
|
"nodes": 3.0,
|
||||||
|
@@ -27,7 +27,7 @@ class BePrintStrees(TestBase):
|
|||||||
stdout.getvalue(), f"File {file_name} generated\n"
|
stdout.getvalue(), f"File {file_name} generated\n"
|
||||||
)
|
)
|
||||||
computed_size = os.path.getsize(file_name)
|
computed_size = os.path.getsize(file_name)
|
||||||
self.assertGreater(computed_size, 25000)
|
self.assertGreater(computed_size, 24500)
|
||||||
|
|
||||||
def test_be_print_strees_dataset_color(self):
|
def test_be_print_strees_dataset_color(self):
|
||||||
for name in self.datasets:
|
for name in self.datasets:
|
||||||
|
@@ -6,13 +6,13 @@
|
|||||||
"n_estimators": [
|
"n_estimators": [
|
||||||
100
|
100
|
||||||
],
|
],
|
||||||
"base_estimator__C": [
|
"estimator__C": [
|
||||||
1.0
|
1.0
|
||||||
],
|
],
|
||||||
"base_estimator__kernel": [
|
"estimator__kernel": [
|
||||||
"linear"
|
"linear"
|
||||||
],
|
],
|
||||||
"base_estimator__multiclass_strategy": [
|
"estimator__multiclass_strategy": [
|
||||||
"ovo"
|
"ovo"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -23,7 +23,7 @@
|
|||||||
"n_estimators": [
|
"n_estimators": [
|
||||||
100
|
100
|
||||||
],
|
],
|
||||||
"base_estimator__C": [
|
"estimator__C": [
|
||||||
0.001,
|
0.001,
|
||||||
0.0275,
|
0.0275,
|
||||||
0.05,
|
0.05,
|
||||||
@@ -36,10 +36,10 @@
|
|||||||
7,
|
7,
|
||||||
10000.0
|
10000.0
|
||||||
],
|
],
|
||||||
"base_estimator__kernel": [
|
"estimator__kernel": [
|
||||||
"liblinear"
|
"liblinear"
|
||||||
],
|
],
|
||||||
"base_estimator__multiclass_strategy": [
|
"estimator__multiclass_strategy": [
|
||||||
"ovr"
|
"ovr"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -50,7 +50,7 @@
|
|||||||
"n_estimators": [
|
"n_estimators": [
|
||||||
100
|
100
|
||||||
],
|
],
|
||||||
"base_estimator__C": [
|
"estimator__C": [
|
||||||
0.05,
|
0.05,
|
||||||
1.0,
|
1.0,
|
||||||
1.05,
|
1.05,
|
||||||
@@ -62,7 +62,7 @@
|
|||||||
57,
|
57,
|
||||||
10000.0
|
10000.0
|
||||||
],
|
],
|
||||||
"base_estimator__gamma": [
|
"estimator__gamma": [
|
||||||
0.001,
|
0.001,
|
||||||
0.1,
|
0.1,
|
||||||
0.14,
|
0.14,
|
||||||
@@ -70,10 +70,10 @@
|
|||||||
"auto",
|
"auto",
|
||||||
"scale"
|
"scale"
|
||||||
],
|
],
|
||||||
"base_estimator__kernel": [
|
"estimator__kernel": [
|
||||||
"rbf"
|
"rbf"
|
||||||
],
|
],
|
||||||
"base_estimator__multiclass_strategy": [
|
"estimator__multiclass_strategy": [
|
||||||
"ovr"
|
"ovr"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -84,20 +84,20 @@
|
|||||||
"n_estimators": [
|
"n_estimators": [
|
||||||
100
|
100
|
||||||
],
|
],
|
||||||
"base_estimator__C": [
|
"estimator__C": [
|
||||||
0.05,
|
0.05,
|
||||||
0.2,
|
0.2,
|
||||||
1.0,
|
1.0,
|
||||||
8.25
|
8.25
|
||||||
],
|
],
|
||||||
"base_estimator__gamma": [
|
"estimator__gamma": [
|
||||||
0.1,
|
0.1,
|
||||||
"scale"
|
"scale"
|
||||||
],
|
],
|
||||||
"base_estimator__kernel": [
|
"estimator__kernel": [
|
||||||
"poly"
|
"poly"
|
||||||
],
|
],
|
||||||
"base_estimator__multiclass_strategy": [
|
"estimator__multiclass_strategy": [
|
||||||
"ovo",
|
"ovo",
|
||||||
"ovr"
|
"ovr"
|
||||||
]
|
]
|
||||||
|
@@ -9,7 +9,7 @@
|
|||||||
Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
|
Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
|
||||||
============================== ====== ===== === ======= ======= ======= =============== ================= ===============
|
============================== ====== ===== === ======= ======= ======= =============== ================= ===============
|
||||||
[96mbalance-scale 625 4 3 23.32 12.16 6.44 0.840160±0.0304 0.013745±0.0019 {'splitter': 'best', 'max_features': 'auto'}
|
[96mbalance-scale 625 4 3 23.32 12.16 6.44 0.840160±0.0304 0.013745±0.0019 {'splitter': 'best', 'max_features': 'auto'}
|
||||||
[94mballoons 16 4 2 3.00 2.00 2.00 0.860000±0.2850 0.000388±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
|
[94mballoons 16 4 2 3.00 2.00 2.00 0.860000±0.2850 0.000388±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000, 'multiclass_strategy': 'ovr'}
|
||||||
[94m*************************************************************************************************************************
|
[94m*************************************************************************************************************************
|
||||||
[94m* accuracy compared to STree_default (liblinear-ovr) .: 0.0422 *
|
[94m* accuracy compared to STree_default (liblinear-ovr) .: 0.0422 *
|
||||||
[94m*************************************************************************************************************************
|
[94m*************************************************************************************************************************
|
||||||
|
@@ -32,7 +32,7 @@
|
|||||||
7;9;"0.0150468069702512"
|
7;9;"0.0150468069702512"
|
||||||
7;10;"0.01404867172241211"
|
7;10;"0.01404867172241211"
|
||||||
7;11;"0.002026269126958884"
|
7;11;"0.002026269126958884"
|
||||||
7;12;"{'C': 10000.0, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}"
|
7;12;"{'C': 10000, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000, 'multiclass_strategy': 'ovr'}"
|
||||||
8;1;"balloons"
|
8;1;"balloons"
|
||||||
8;2;"16"
|
8;2;"16"
|
||||||
8;3;"4"
|
8;3;"4"
|
||||||
@@ -44,5 +44,5 @@
|
|||||||
8;9;"0.2850146195080759"
|
8;9;"0.2850146195080759"
|
||||||
8;10;"0.0008541679382324218"
|
8;10;"0.0008541679382324218"
|
||||||
8;11;"3.629469326417878e-05"
|
8;11;"3.629469326417878e-05"
|
||||||
8;12;"{'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}"
|
8;12;"{'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000, 'multiclass_strategy': 'ovr'}"
|
||||||
10;1;"** accuracy compared to STree_default (liblinear-ovr) .: 0.0454"
|
10;1;"** accuracy compared to STree_default (liblinear-ovr) .: 0.0454"
|
@@ -32,7 +32,7 @@
|
|||||||
7;10;0.0150468069702512
|
7;10;0.0150468069702512
|
||||||
7;11;0.01404867172241211
|
7;11;0.01404867172241211
|
||||||
7;12;0.002026269126958884
|
7;12;0.002026269126958884
|
||||||
7;13;"{'C': 10000.0, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}"
|
7;13;"{'C': 10000, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000, 'multiclass_strategy': 'ovr'}"
|
||||||
8;1;"balloons"
|
8;1;"balloons"
|
||||||
8;2;16
|
8;2;16
|
||||||
8;3;4
|
8;3;4
|
||||||
@@ -45,7 +45,7 @@
|
|||||||
8;10;0.2850146195080759
|
8;10;0.2850146195080759
|
||||||
8;11;0.0008541679382324218
|
8;11;0.0008541679382324218
|
||||||
8;12;3.629469326417878e-05
|
8;12;3.629469326417878e-05
|
||||||
8;13;"{'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}"
|
8;13;"{'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000, 'multiclass_strategy': 'ovr'}"
|
||||||
11;2;"✔"
|
11;2;"✔"
|
||||||
11;3;1
|
11;3;1
|
||||||
11;4;"Equal to best"
|
11;4;"Equal to best"
|
||||||
|
@@ -1,25 +1,28 @@
|
|||||||
1;1;"Datasets used in benchmark ver. 0.2.0"
|
1;1;"Datasets used in benchmark ver. 0.4.0"
|
||||||
2;1;" Default score accuracy"
|
2;1;" Default score accuracy"
|
||||||
2;2;"Cross validation"
|
2;2;"Cross validation"
|
||||||
2;5;"5 Folds"
|
2;6;"5 Folds"
|
||||||
3;2;"Stratified"
|
3;2;"Stratified"
|
||||||
3;5;"False"
|
3;6;"False"
|
||||||
4;2;"Discretized"
|
4;2;"Discretized"
|
||||||
4;5;"False"
|
4;6;"False"
|
||||||
5;2;"Seeds"
|
5;2;"Seeds"
|
||||||
5;5;"[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]"
|
5;6;"[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]"
|
||||||
6;1;"Dataset"
|
6;1;"Dataset"
|
||||||
6;2;"Samples"
|
6;2;"Samples"
|
||||||
6;3;"Features"
|
6;3;"Features"
|
||||||
6;4;"Classes"
|
6;4;"Continuous"
|
||||||
6;5;"Balance"
|
6;5;"Classes"
|
||||||
|
6;6;"Balance"
|
||||||
7;1;"balance-scale"
|
7;1;"balance-scale"
|
||||||
7;2;"625"
|
7;2;"625"
|
||||||
7;3;"4"
|
7;3;"4"
|
||||||
7;4;"3"
|
7;4;"0"
|
||||||
7;5;" 7.84%/ 46.08%/ 46.08%"
|
7;5;"3"
|
||||||
|
7;6;" 7.84%/ 46.08%/ 46.08%"
|
||||||
8;1;"balloons"
|
8;1;"balloons"
|
||||||
8;2;"16"
|
8;2;"16"
|
||||||
8;3;"4"
|
8;3;"4"
|
||||||
8;4;"2"
|
8;4;"0"
|
||||||
8;5;"56.25%/ 43.75%"
|
8;5;"2"
|
||||||
|
8;6;"56.25%/ 43.75%"
|
||||||
|
@@ -32,7 +32,7 @@
|
|||||||
7;9;"0.0150468069702512"
|
7;9;"0.0150468069702512"
|
||||||
7;10;"0.01404867172241211"
|
7;10;"0.01404867172241211"
|
||||||
7;11;"0.002026269126958884"
|
7;11;"0.002026269126958884"
|
||||||
7;12;"{'C': 10000.0, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}"
|
7;12;"{'C': 10000, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000, 'multiclass_strategy': 'ovr'}"
|
||||||
8;1;"balloons"
|
8;1;"balloons"
|
||||||
8;2;"16"
|
8;2;"16"
|
||||||
8;3;"4"
|
8;3;"4"
|
||||||
@@ -44,5 +44,5 @@
|
|||||||
8;9;"0.2850146195080759"
|
8;9;"0.2850146195080759"
|
||||||
8;10;"0.0008541679382324218"
|
8;10;"0.0008541679382324218"
|
||||||
8;11;"3.629469326417878e-05"
|
8;11;"3.629469326417878e-05"
|
||||||
8;12;"{'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}"
|
8;12;"{'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000, 'multiclass_strategy': 'ovr'}"
|
||||||
10;1;"** accuracy compared to STree_default (liblinear-ovr) .: 0.0454"
|
10;1;"** accuracy compared to STree_default (liblinear-ovr) .: 0.0454"
|
||||||
|
@@ -8,8 +8,8 @@
|
|||||||
|
|
||||||
Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
|
Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
|
||||||
============================== ====== ===== === ======= ======= ======= =============== ================= ===============
|
============================== ====== ===== === ======= ======= ======= =============== ================= ===============
|
||||||
[96mbalance-scale 625 4 3 7.00 4.00 3.00 0.970560±0.0150 0.014049±0.0020 {'C': 10000.0, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
|
[96mbalance-scale 625 4 3 7.00 4.00 3.00 0.970560±0.0150 0.014049±0.0020 {'C': 10000, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000, 'multiclass_strategy': 'ovr'}
|
||||||
[94mballoons 16 4 2 3.00 2.00 2.00 0.860000±0.2850 0.000854±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
|
[94mballoons 16 4 2 3.00 2.00 2.00 0.860000±0.2850 0.000854±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000, 'multiclass_strategy': 'ovr'}
|
||||||
[94m*************************************************************************************************************************
|
[94m*************************************************************************************************************************
|
||||||
[94m* accuracy compared to STree_default (liblinear-ovr) .: 0.0454 *
|
[94m* accuracy compared to STree_default (liblinear-ovr) .: 0.0454 *
|
||||||
[94m*************************************************************************************************************************
|
[94m*************************************************************************************************************************
|
||||||
|
@@ -5,7 +5,7 @@
|
|||||||
Dataset Score File/Message Hyperparameters
|
Dataset Score File/Message Hyperparameters
|
||||||
============================== ======== ============================================================================ =============================================
|
============================== ======== ============================================================================ =============================================
|
||||||
balance-scale 0.980000 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json {'splitter': 'best', 'max_features': 'auto'}
|
balance-scale 0.980000 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json {'splitter': 'best', 'max_features': 'auto'}
|
||||||
balloons 0.860000 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
|
balloons 0.860000 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000, 'multiclass_strategy': 'ovr'}
|
||||||
******************************************************************************************************************************************************************
|
******************************************************************************************************************************************************************
|
||||||
* accuracy compared to STree_default (liblinear-ovr) .: 0.0457 *
|
* accuracy compared to STree_default (liblinear-ovr) .: 0.0457 *
|
||||||
******************************************************************************************************************************************************************
|
******************************************************************************************************************************************************************
|
||||||
|
@@ -8,8 +8,8 @@
|
|||||||
|
|
||||||
Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
|
Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
|
||||||
============================== ====== ===== === ======= ======= ======= =============== ================= ===============
|
============================== ====== ===== === ======= ======= ======= =============== ================= ===============
|
||||||
[96mbalance-scale 625 4 3 7.00 4.00 3.00 0.970560±0.0150 0.014049±0.0020 {'C': 10000.0, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
|
[96mbalance-scale 625 4 3 7.00 4.00 3.00 0.970560±0.0150 0.014049±0.0020 {'C': 10000, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000, 'multiclass_strategy': 'ovr'}
|
||||||
[94mballoons 16 4 2 3.00 2.00 2.00 0.860000±0.2850✔ 0.000854±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
|
[94mballoons 16 4 2 3.00 2.00 2.00 0.860000±0.2850✔ 0.000854±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000, 'multiclass_strategy': 'ovr'}
|
||||||
[94m*************************************************************************************************************************
|
[94m*************************************************************************************************************************
|
||||||
[94m* ✔ Equal to best .....: 1 *
|
[94m* ✔ Equal to best .....: 1 *
|
||||||
[94m* accuracy compared to STree_default (liblinear-ovr) .: 0.0454 *
|
[94m* accuracy compared to STree_default (liblinear-ovr) .: 0.0454 *
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
[94mDatasets used in benchmark ver. 0.2.0
|
[94mDatasets used in benchmark ver. 0.2.0
|
||||||
|
|
||||||
Dataset Sampl. Feat. Cls Balance
|
Dataset Sampl. Feat. Cont Cls Balance
|
||||||
============================== ====== ===== === ============================================================
|
============================== ====== ===== ==== === ============================================================
|
||||||
[96mbalance-scale 625 4 3 7.84%/ 46.08%/ 46.08%
|
[96mbalance-scale 625 4 0 3 7.84%/ 46.08%/ 46.08%
|
||||||
[94mballoons 16 4 2 56.25%/ 43.75%
|
[94mballoons 16 4 0 2 56.25%/ 43.75%
|
||||||
|
@@ -3,7 +3,7 @@ scikit-learn
|
|||||||
scipy
|
scipy
|
||||||
odte
|
odte
|
||||||
cython
|
cython
|
||||||
mdlp-discretization
|
fimdlp
|
||||||
mufs
|
mufs
|
||||||
bayesclass @ git+ssh://git@github.com/doctorado-ml/bayesclass.git
|
bayesclass @ git+ssh://git@github.com/doctorado-ml/bayesclass.git
|
||||||
xlsxwriter
|
xlsxwriter
|
||||||
|
Reference in New Issue
Block a user