From e15ab3dcab46da8bbeb89bb4d2b0e3668ee7f3bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= Date: Mon, 24 Oct 2022 18:21:08 +0200 Subject: [PATCH 1/8] Split Datasets class from Experiments --- benchmark/Arguments.py | 2 +- benchmark/Datasets.py | 103 ++++++++++++++++++ benchmark/Experiments.py | 82 +------------- benchmark/Results.py | 3 +- benchmark/__init__.py | 3 +- benchmark/scripts/be_build_best.py | 3 +- benchmark/scripts/be_grid.py | 3 +- benchmark/scripts/be_main.py | 3 +- benchmark/scripts/be_print_strees.py | 2 +- benchmark/tests/BestResults_test.py | 3 +- benchmark/tests/Dataset_test.py | 3 +- benchmark/tests/Experiment_test.py | 3 +- benchmark/tests/GridSearch_test.py | 3 +- .../results/grid_output_accuracy_STree.json | 4 +- requirements.txt | 1 + 15 files changed, 127 insertions(+), 94 deletions(-) create mode 100644 benchmark/Datasets.py diff --git a/benchmark/Arguments.py b/benchmark/Arguments.py index a034934..67a2515 100644 --- a/benchmark/Arguments.py +++ b/benchmark/Arguments.py @@ -1,6 +1,6 @@ import sys import argparse -from .Experiments import Models +from .Models import Models from .Utils import Files, NO_ENV ALL_METRICS = ( diff --git a/benchmark/Datasets.py b/benchmark/Datasets.py new file mode 100644 index 0000000..20a4894 --- /dev/null +++ b/benchmark/Datasets.py @@ -0,0 +1,103 @@ +import os +import pandas as pd +from scipy.io import arff +from .Utils import Files +from .Arguments import EnvData + + +class Diterator: + def __init__(self, data): + self._stack = data.copy() + + def __next__(self): + if len(self._stack) == 0: + raise StopIteration() + return self._stack.pop(0) + + +class DatasetsArff: + @staticmethod + def dataset_names(name): + return f"{name}.arff" + + @staticmethod + def folder(): + return "datasets" + + def load(self, name, class_name="class"): + file_name = os.path.join(self.folder(), self.dataset_names(name)) + data = arff.loadarff(file_name) + df = pd.DataFrame(data[0]) + y = df[class_name] + X = data.drop(class_name, axis=1).to_numpy() + y = data[class_name].to_numpy() + return X, y + + +class DatasetsTanveer: + @staticmethod + def dataset_names(name): + return f"{name}_R.dat" + + @staticmethod + def folder(): + return "data" + + def load(self, name): + file_name = os.path.join(self.folder(), self.dataset_names(name)) + data = pd.read_csv( + file_name, + sep="\t", + index_col=0, + ) + X = data.drop("clase", axis=1).to_numpy() + y = data["clase"].to_numpy() + return X, y + + +class DatasetsSurcov: + @staticmethod + def dataset_names(name): + return f"{name}.csv" + + @staticmethod + def folder(): + return "datasets" + + def load(self, name): + file_name = os.path.join(self.folder(), self.dataset_names(name)) + data = pd.read_csv( + file_name, + index_col=0, + ) + data.dropna(axis=0, how="any", inplace=True) + self.columns = data.columns + col_list = ["class"] + X = data.drop(col_list, axis=1).to_numpy() + y = data["class"].to_numpy() + return X, y + + +class Datasets: + def __init__(self, dataset_name=None): + envData = EnvData.load() + class_name = getattr( + __import__(__name__), + f"Datasets{envData['source_data']}", + ) + self.dataset = class_name() + if dataset_name is None: + file_name = os.path.join(self.dataset.folder(), Files.index) + with open(file_name) as f: + self.data_sets = f.read().splitlines() + else: + self.data_sets = [dataset_name] + + def load(self, name): + try: + return self.dataset.load(name) + except FileNotFoundError: + raise ValueError(f"Unknown dataset: {name}") + + def __iter__(self) -> Diterator: + return Diterator(self.data_sets) diff --git a/benchmark/Experiments.py b/benchmark/Experiments.py index ab2063a..658805a 100644 --- a/benchmark/Experiments.py +++ b/benchmark/Experiments.py @@ -6,7 +6,6 @@ import time from datetime import datetime from tqdm import tqdm import numpy as np -import pandas as pd from sklearn.model_selection import ( StratifiedKFold, KFold, @@ -14,93 +13,14 @@ from sklearn.model_selection import ( cross_validate, ) from .Utils import Folders, Files, NO_RESULTS +from .Datasets import Datasets from .Models import Models -from .Arguments import EnvData class Randomized: seeds = [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] -class Diterator: - def __init__(self, data): - self._stack = data.copy() - - def __next__(self): - if len(self._stack) == 0: - raise StopIteration() - return self._stack.pop(0) - - -class DatasetsTanveer: - @staticmethod - def dataset_names(name): - return f"{name}_R.dat" - - @staticmethod - def folder(): - return "data" - - def load(self, name): - file_name = os.path.join(self.folder(), self.dataset_names(name)) - data = pd.read_csv( - file_name, - sep="\t", - index_col=0, - ) - X = data.drop("clase", axis=1).to_numpy() - y = data["clase"].to_numpy() - return X, y - - -class DatasetsSurcov: - @staticmethod - def dataset_names(name): - return f"{name}.csv" - - @staticmethod - def folder(): - return "datasets" - - def load(self, name): - file_name = os.path.join(self.folder(), self.dataset_names(name)) - data = pd.read_csv( - file_name, - index_col=0, - ) - data.dropna(axis=0, how="any", inplace=True) - self.columns = data.columns - col_list = ["class"] - X = data.drop(col_list, axis=1).to_numpy() - y = data["class"].to_numpy() - return X, y - - -class Datasets: - def __init__(self, dataset_name=None): - envData = EnvData.load() - class_name = getattr( - __import__(__name__), - f"Datasets{envData['source_data']}", - ) - self.dataset = class_name() - if dataset_name is None: - file_name = os.path.join(self.dataset.folder(), Files.index) - with open(file_name) as f: - self.data_sets = f.read().splitlines() - else: - self.data_sets = [dataset_name] - - def load(self, name): - try: - return self.dataset.load(name) - except FileNotFoundError: - raise ValueError(f"Unknown dataset: {name}") - - def __iter__(self) -> Diterator: - return Diterator(self.data_sets) - - class BestResults: def __init__(self, score, model, datasets, quiet=False): self.score_name = score diff --git a/benchmark/Results.py b/benchmark/Results.py index f4ff7ca..c5940ab 100644 --- a/benchmark/Results.py +++ b/benchmark/Results.py @@ -7,7 +7,8 @@ import shutil import subprocess import xlsxwriter import numpy as np -from .Experiments import Datasets, BestResults +from .Experiments import BestResults +from .Datasets import Datasets from .Utils import ( Folders, Files, diff --git a/benchmark/__init__.py b/benchmark/__init__.py index bafc822..cac5b02 100644 --- a/benchmark/__init__.py +++ b/benchmark/__init__.py @@ -1,4 +1,5 @@ -from .Experiments import Experiment, Datasets, DatasetsSurcov, DatasetsTanveer +from .Datasets import Datasets, DatasetsSurcov, DatasetsTanveer +from .Experiments import Experiment from .Results import Report, Summary __author__ = "Ricardo Montañana Gómez" diff --git a/benchmark/scripts/be_build_best.py b/benchmark/scripts/be_build_best.py index 08d5f96..233bf22 100755 --- a/benchmark/scripts/be_build_best.py +++ b/benchmark/scripts/be_build_best.py @@ -1,6 +1,7 @@ #!/usr/bin/env python from benchmark.Results import ReportBest -from benchmark.Experiments import Datasets, BestResults +from benchmark.Experiments import BestResults +from benchmark.Datasets import Datasets from benchmark.Arguments import Arguments """Build a json file with the best results of a model and its hyperparameters diff --git a/benchmark/scripts/be_grid.py b/benchmark/scripts/be_grid.py index ec2f8ae..8f10e48 100755 --- a/benchmark/scripts/be_grid.py +++ b/benchmark/scripts/be_grid.py @@ -1,5 +1,6 @@ #!/usr/bin/env python -from benchmark.Experiments import GridSearch, Datasets +from benchmark.Experiments import GridSearch +from benchmark.Datasets import Datasets from benchmark.Arguments import Arguments """Do experiment and build result file, optionally print report with results diff --git a/benchmark/scripts/be_main.py b/benchmark/scripts/be_main.py index 971598f..dcd8b0e 100755 --- a/benchmark/scripts/be_main.py +++ b/benchmark/scripts/be_main.py @@ -1,6 +1,7 @@ #!/usr/bin/env python import os -from benchmark.Experiments import Experiment, Datasets +from benchmark.Experiments import Experiment +from benchmark.Datasets import Datasets from benchmark.Results import Report from benchmark.Arguments import Arguments diff --git a/benchmark/scripts/be_print_strees.py b/benchmark/scripts/be_print_strees.py index 0a55a19..1c41e50 100755 --- a/benchmark/scripts/be_print_strees.py +++ b/benchmark/scripts/be_print_strees.py @@ -3,7 +3,7 @@ import os import json from stree import Stree from graphviz import Source -from benchmark.Experiments import Datasets +from benchmark.Datasets import Datasets from benchmark.Utils import Files, Folders from benchmark.Arguments import Arguments diff --git a/benchmark/tests/BestResults_test.py b/benchmark/tests/BestResults_test.py index f6a4b32..76a5ea8 100644 --- a/benchmark/tests/BestResults_test.py +++ b/benchmark/tests/BestResults_test.py @@ -1,6 +1,7 @@ import os from .TestBase import TestBase -from ..Experiments import BestResults, Datasets +from ..Experiments import BestResults +from ..Datasets import Datasets class BestResultTest(TestBase): diff --git a/benchmark/tests/Dataset_test.py b/benchmark/tests/Dataset_test.py index 63ffc9c..4669922 100644 --- a/benchmark/tests/Dataset_test.py +++ b/benchmark/tests/Dataset_test.py @@ -1,6 +1,7 @@ import shutil from .TestBase import TestBase -from ..Experiments import Randomized, Datasets +from ..Experiments import Randomized +from ..Datasets import Datasets class DatasetTest(TestBase): diff --git a/benchmark/tests/Experiment_test.py b/benchmark/tests/Experiment_test.py index ca5b37f..0f8ffad 100644 --- a/benchmark/tests/Experiment_test.py +++ b/benchmark/tests/Experiment_test.py @@ -1,6 +1,7 @@ import json from .TestBase import TestBase -from ..Experiments import Experiment, Datasets +from ..Experiments import Experiment +from ..Datasets import Datasets class ExperimentTest(TestBase): diff --git a/benchmark/tests/GridSearch_test.py b/benchmark/tests/GridSearch_test.py index 4cfb0f6..b8db074 100644 --- a/benchmark/tests/GridSearch_test.py +++ b/benchmark/tests/GridSearch_test.py @@ -1,6 +1,7 @@ import json from .TestBase import TestBase -from ..Experiments import GridSearch, Datasets +from ..Experiments import GridSearch +from ..Datasets import Datasets class GridSearchTest(TestBase): diff --git a/benchmark/tests/results/grid_output_accuracy_STree.json b/benchmark/tests/results/grid_output_accuracy_STree.json index 7f197d6..731e0b7 100644 --- a/benchmark/tests/results/grid_output_accuracy_STree.json +++ b/benchmark/tests/results/grid_output_accuracy_STree.json @@ -6,7 +6,7 @@ "kernel": "liblinear", "multiclass_strategy": "ovr" }, - "v. 1.2.4, Computed on Test on 2022-02-22 at 12:00:00 took 1s" + "v. 1.3.0, Computed on Test on 2022-02-22 at 12:00:00 took 1s" ], "balloons": [ 0.625, @@ -15,6 +15,6 @@ "kernel": "linear", "multiclass_strategy": "ovr" }, - "v. 1.2.4, Computed on Test on 2022-02-22 at 12:00:00 took 1s" + "v. 1.3.0, Computed on Test on 2022-02-22 at 12:00:00 took 1s" ] } \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 154ab58..02446a6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ pandas scikit-learn +scipy odte mufs xlsxwriter From 34b3bd94de083992d677e446d73b269a9c4582df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= Date: Mon, 24 Oct 2022 21:04:07 +0200 Subject: [PATCH 2/8] Add Arff as source_data for datasets --- benchmark/Datasets.py | 30 ++++++++++++++++++++++-------- benchmark/__init__.py | 2 +- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/benchmark/Datasets.py b/benchmark/Datasets.py index 20a4894..0150623 100644 --- a/benchmark/Datasets.py +++ b/benchmark/Datasets.py @@ -24,13 +24,12 @@ class DatasetsArff: def folder(): return "datasets" - def load(self, name, class_name="class"): + def load(self, name, class_name): file_name = os.path.join(self.folder(), self.dataset_names(name)) data = arff.loadarff(file_name) df = pd.DataFrame(data[0]) - y = df[class_name] - X = data.drop(class_name, axis=1).to_numpy() - y = data[class_name].to_numpy() + X = df.drop(class_name, axis=1).to_numpy() + y = df[class_name].to_numpy() return X, y @@ -43,7 +42,7 @@ class DatasetsTanveer: def folder(): return "data" - def load(self, name): + def load(self, name, _): file_name = os.path.join(self.folder(), self.dataset_names(name)) data = pd.read_csv( file_name, @@ -64,7 +63,7 @@ class DatasetsSurcov: def folder(): return "datasets" - def load(self, name): + def load(self, name, _): file_name = os.path.join(self.folder(), self.dataset_names(name)) data = pd.read_csv( file_name, @@ -80,23 +79,38 @@ class DatasetsSurcov: class Datasets: def __init__(self, dataset_name=None): + default_class = "class" envData = EnvData.load() class_name = getattr( __import__(__name__), f"Datasets{envData['source_data']}", ) self.dataset = class_name() + self.class_names = [] if dataset_name is None: file_name = os.path.join(self.dataset.folder(), Files.index) with open(file_name) as f: self.data_sets = f.read().splitlines() + self.class_names = [default_class] * len(self.data_sets) + if "," in self.data_sets[0]: + result = [] + class_names = [] + for data in self.data_sets: + name, class_name = data.split(",") + result.append(name) + class_names.append(class_name) + self.data_sets = result + self.class_names = class_names + else: self.data_sets = [dataset_name] + self.class_names = [default_class] def load(self, name): try: - return self.dataset.load(name) - except FileNotFoundError: + class_name = self.class_names[self.data_sets.index(name)] + return self.dataset.load(name, class_name) + except (ValueError, FileNotFoundError): raise ValueError(f"Unknown dataset: {name}") def __iter__(self) -> Diterator: diff --git a/benchmark/__init__.py b/benchmark/__init__.py index cac5b02..e26a7cf 100644 --- a/benchmark/__init__.py +++ b/benchmark/__init__.py @@ -1,4 +1,4 @@ -from .Datasets import Datasets, DatasetsSurcov, DatasetsTanveer +from .Datasets import Datasets, DatasetsSurcov, DatasetsTanveer, DatasetsArff from .Experiments import Experiment from .Results import Report, Summary From 47bf6eeda6416975a5486af78931d405b24b159d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= Date: Mon, 24 Oct 2022 21:30:56 +0200 Subject: [PATCH 3/8] Add a space to #Samples in dataset report --- benchmark/Results.py | 6 +++--- benchmark/tests/test_files/report_datasets.test | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/benchmark/Results.py b/benchmark/Results.py index c5940ab..53af23f 100644 --- a/benchmark/Results.py +++ b/benchmark/Results.py @@ -535,8 +535,8 @@ class ReportDatasets: data_sets = Datasets() color_line = TextColor.LINE1 print(color_line, end="") - print(f"{'Dataset':30s} Samp. Feat. Cls Balance") - print("=" * 30 + " ===== ===== === " + "=" * 40) + print(f"{'Dataset':30s} Sampl. Feat. Cls Balance") + print("=" * 30 + " ===== ====== === " + "=" * 40) for dataset in data_sets: X, y = data_sets.load(dataset) color_line = ( @@ -552,7 +552,7 @@ class ReportDatasets: sep = "/ " print(color_line, end="") print( - f"{dataset:30s} {X.shape[0]:5,d} {X.shape[1]:5,d} " + f"{dataset:30s} {X.shape[0]:6,d} {X.shape[1]:5,d} " f"{len(np.unique(y)):3d} {comp:40s}" ) diff --git a/benchmark/tests/test_files/report_datasets.test b/benchmark/tests/test_files/report_datasets.test index d9581c0..8f5b0f6 100644 --- a/benchmark/tests/test_files/report_datasets.test +++ b/benchmark/tests/test_files/report_datasets.test @@ -1,4 +1,4 @@ -Dataset Samp. Feat. Cls Balance -============================== ===== ===== === ======================================== -balance-scale 625 4 3 7.84%/ 46.08%/ 46.08% -balloons 16 4 2 56.25%/ 43.75% +Dataset Sampl. Feat. Cls Balance +============================== ===== ====== === ======================================== +balance-scale 625 4 3 7.84%/ 46.08%/ 46.08% +balloons 16 4 2 56.25%/ 43.75% From 8001c7f2ebf2be1be2804e64e0640eb589e28c7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= Date: Mon, 24 Oct 2022 22:43:46 +0200 Subject: [PATCH 4/8] Add a space to #Samples in every report --- benchmark/Results.py | 4 +-- benchmark/tests/test_files/be_main_best.test | 28 ++++++++--------- .../tests/test_files/be_main_complete.test | 28 ++++++++--------- .../tests/test_files/be_main_dataset.test | 26 ++++++++-------- benchmark/tests/test_files/be_main_grid.test | 28 ++++++++--------- benchmark/tests/test_files/report.test | 28 ++++++++--------- .../tests/test_files/report_compared.test | 30 +++++++++---------- 7 files changed, 86 insertions(+), 86 deletions(-) diff --git a/benchmark/Results.py b/benchmark/Results.py index 53af23f..9c90dc8 100644 --- a/benchmark/Results.py +++ b/benchmark/Results.py @@ -83,10 +83,10 @@ class BaseReport(abc.ABC): class Report(BaseReport): - header_lengths = [30, 5, 5, 3, 7, 7, 7, 15, 16, 15] + header_lengths = [30, 6, 5, 3, 7, 7, 7, 15, 16, 15] header_cols = [ "Dataset", - "Samp", + "Sampl.", "Feat.", "Cls", "Nodes", diff --git a/benchmark/tests/test_files/be_main_best.test b/benchmark/tests/test_files/be_main_best.test index 8de58b0..f90f45a 100644 --- a/benchmark/tests/test_files/be_main_best.test +++ b/benchmark/tests/test_files/be_main_best.test @@ -1,16 +1,16 @@ -*********************************************************************************************************************** -* Report STree ver. 1.2.4 with 5 Folds cross validation and 10 random seeds. 2022-05-09 00:15:25 * -* test * -* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False * -* Execution took 0.80 seconds, 0.00 hours, on iMac27 * -* Score is accuracy * -*********************************************************************************************************************** +************************************************************************************************************************ +* Report STree ver. 1.2.4 with 5 Folds cross validation and 10 random seeds. 2022-05-09 00:15:25 * +* test * +* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False * +* Execution took 0.80 seconds, 0.00 hours, on iMac27 * +* Score is accuracy * +************************************************************************************************************************ -Dataset Samp Feat. Cls Nodes Leaves Depth Score Time Hyperparameters -============================== ===== ===== === ======= ======= ======= =============== ================ =============== -balance-scale 625 4 3 23.32 12.16 6.44 0.840160±0.0304 0.013745±0.0019 {'splitter': 'best', 'max_features': 'auto'} -balloons 16 4 2 3.00 2.00 2.00 0.860000±0.2850 0.000388±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'} -*********************************************************************************************************************** -* Accuracy compared to stree_default (liblinear-ovr) .: 0.0422 * -*********************************************************************************************************************** +Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters +============================== ====== ===== === ======= ======= ======= =============== ================ =============== +balance-scale 625 4 3 23.32 12.16 6.44 0.840160±0.0304 0.013745±0.0019 {'splitter': 'best', 'max_features': 'auto'} +balloons 16 4 2 3.00 2.00 2.00 0.860000±0.2850 0.000388±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'} +************************************************************************************************************************ +* Accuracy compared to stree_default (liblinear-ovr) .: 0.0422 * +************************************************************************************************************************ Results in results/results_accuracy_STree_iMac27_2022-05-09_00:15:25_0.json diff --git a/benchmark/tests/test_files/be_main_complete.test b/benchmark/tests/test_files/be_main_complete.test index 3217515..d70eb26 100644 --- a/benchmark/tests/test_files/be_main_complete.test +++ b/benchmark/tests/test_files/be_main_complete.test @@ -1,16 +1,16 @@ -*********************************************************************************************************************** -* Report STree ver. 1.2.4 with 5 Folds cross validation and 10 random seeds. 2022-05-08 20:14:43 * -* test * -* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False * -* Execution took 0.48 seconds, 0.00 hours, on iMac27 * -* Score is accuracy * -*********************************************************************************************************************** +************************************************************************************************************************ +* Report STree ver. 1.2.4 with 5 Folds cross validation and 10 random seeds. 2022-05-08 20:14:43 * +* test * +* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False * +* Execution took 0.48 seconds, 0.00 hours, on iMac27 * +* Score is accuracy * +************************************************************************************************************************ -Dataset Samp Feat. Cls Nodes Leaves Depth Score Time Hyperparameters -============================== ===== ===== === ======= ======= ======= =============== ================ =============== -balance-scale 625 4 3 17.36 9.18 6.18 0.908480±0.0247 0.007388±0.0013 {} -balloons 16 4 2 4.64 2.82 2.66 0.663333±0.3009 0.000664±0.0002 {} -*********************************************************************************************************************** -* Accuracy compared to stree_default (liblinear-ovr) .: 0.0390 * -*********************************************************************************************************************** +Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters +============================== ====== ===== === ======= ======= ======= =============== ================ =============== +balance-scale 625 4 3 17.36 9.18 6.18 0.908480±0.0247 0.007388±0.0013 {} +balloons 16 4 2 4.64 2.82 2.66 0.663333±0.3009 0.000664±0.0002 {} +************************************************************************************************************************ +* Accuracy compared to stree_default (liblinear-ovr) .: 0.0390 * +************************************************************************************************************************ Results in results/results_accuracy_STree_iMac27_2022-05-08_20:14:43_0.json diff --git a/benchmark/tests/test_files/be_main_dataset.test b/benchmark/tests/test_files/be_main_dataset.test index d8553d7..10d3eea 100644 --- a/benchmark/tests/test_files/be_main_dataset.test +++ b/benchmark/tests/test_files/be_main_dataset.test @@ -1,15 +1,15 @@ -*********************************************************************************************************************** -* Report STree ver. 1.2.4 with 5 Folds cross validation and 10 random seeds. 2022-05-08 19:38:28 * -* test * -* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False * -* Execution took 0.06 seconds, 0.00 hours, on iMac27 * -* Score is accuracy * -*********************************************************************************************************************** +************************************************************************************************************************ +* Report STree ver. 1.2.4 with 5 Folds cross validation and 10 random seeds. 2022-05-08 19:38:28 * +* test * +* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False * +* Execution took 0.06 seconds, 0.00 hours, on iMac27 * +* Score is accuracy * +************************************************************************************************************************ -Dataset Samp Feat. Cls Nodes Leaves Depth Score Time Hyperparameters -============================== ===== ===== === ======= ======= ======= =============== ================ =============== -balloons 16 4 2 4.64 2.82 2.66 0.663333±0.3009 0.000671±0.0001 {} -*********************************************************************************************************************** -* Accuracy compared to stree_default (liblinear-ovr) .: 0.0165 * -*********************************************************************************************************************** +Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters +============================== ====== ===== === ======= ======= ======= =============== ================ =============== +balloons 16 4 2 4.64 2.82 2.66 0.663333±0.3009 0.000671±0.0001 {} +************************************************************************************************************************ +* Accuracy compared to stree_default (liblinear-ovr) .: 0.0165 * +************************************************************************************************************************ Partial result file removed: results/results_accuracy_STree_iMac27_2022-05-08_19:38:28_0.json diff --git a/benchmark/tests/test_files/be_main_grid.test b/benchmark/tests/test_files/be_main_grid.test index a2e41d2..10e1373 100644 --- a/benchmark/tests/test_files/be_main_grid.test +++ b/benchmark/tests/test_files/be_main_grid.test @@ -1,16 +1,16 @@ -*********************************************************************************************************************** -* Report STree ver. 1.2.4 with 5 Folds cross validation and 10 random seeds. 2022-05-09 00:21:06 * -* test * -* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False * -* Execution took 0.89 seconds, 0.00 hours, on iMac27 * -* Score is accuracy * -*********************************************************************************************************************** +************************************************************************************************************************ +* Report STree ver. 1.2.4 with 5 Folds cross validation and 10 random seeds. 2022-05-09 00:21:06 * +* test * +* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False * +* Execution took 0.89 seconds, 0.00 hours, on iMac27 * +* Score is accuracy * +************************************************************************************************************************ -Dataset Samp Feat. Cls Nodes Leaves Depth Score Time Hyperparameters -============================== ===== ===== === ======= ======= ======= =============== ================ =============== -balance-scale 625 4 3 26.12 13.56 7.94 0.910720±0.0249 0.015852±0.0027 {'C': 1.0, 'kernel': 'liblinear', 'multiclass_strategy': 'ovr'} -balloons 16 4 2 4.64 2.82 2.66 0.663333±0.3009 0.000640±0.0001 {'C': 1.0, 'kernel': 'linear', 'multiclass_strategy': 'ovr'} -*********************************************************************************************************************** -* Accuracy compared to stree_default (liblinear-ovr) .: 0.0391 * -*********************************************************************************************************************** +Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters +============================== ====== ===== === ======= ======= ======= =============== ================ =============== +balance-scale 625 4 3 26.12 13.56 7.94 0.910720±0.0249 0.015852±0.0027 {'C': 1.0, 'kernel': 'liblinear', 'multiclass_strategy': 'ovr'} +balloons 16 4 2 4.64 2.82 2.66 0.663333±0.3009 0.000640±0.0001 {'C': 1.0, 'kernel': 'linear', 'multiclass_strategy': 'ovr'} +************************************************************************************************************************ +* Accuracy compared to stree_default (liblinear-ovr) .: 0.0391 * +************************************************************************************************************************ Results in results/results_accuracy_STree_iMac27_2022-05-09_00:21:06_0.json diff --git a/benchmark/tests/test_files/report.test b/benchmark/tests/test_files/report.test index 2e5b77e..1dcac2e 100644 --- a/benchmark/tests/test_files/report.test +++ b/benchmark/tests/test_files/report.test @@ -1,15 +1,15 @@ -*********************************************************************************************************************** -* Report STree ver. 1.2.3 with 5 Folds cross validation and 10 random seeds. 2021-09-30 11:42:07 * -* With gridsearched hyperparameters * -* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False * -* Execution took 624.25 seconds, 0.17 hours, on iMac27 * -* Score is accuracy * -*********************************************************************************************************************** +************************************************************************************************************************ +* Report STree ver. 1.2.3 with 5 Folds cross validation and 10 random seeds. 2021-09-30 11:42:07 * +* With gridsearched hyperparameters * +* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False * +* Execution took 624.25 seconds, 0.17 hours, on iMac27 * +* Score is accuracy * +************************************************************************************************************************ -Dataset Samp Feat. Cls Nodes Leaves Depth Score Time Hyperparameters -============================== ===== ===== === ======= ======= ======= =============== ================ =============== -balance-scale 625 4 3 7.00 4.00 3.00 0.970560±0.0150 0.014049±0.0020 {'C': 10000.0, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'} -balloons 16 4 2 3.00 2.00 2.00 0.860000±0.2850 0.000854±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'} -*********************************************************************************************************************** -* Accuracy compared to stree_default (liblinear-ovr) .: 0.0454 * -*********************************************************************************************************************** +Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters +============================== ====== ===== === ======= ======= ======= =============== ================ =============== +balance-scale 625 4 3 7.00 4.00 3.00 0.970560±0.0150 0.014049±0.0020 {'C': 10000.0, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'} +balloons 16 4 2 3.00 2.00 2.00 0.860000±0.2850 0.000854±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'} +************************************************************************************************************************ +* Accuracy compared to stree_default (liblinear-ovr) .: 0.0454 * +************************************************************************************************************************ diff --git a/benchmark/tests/test_files/report_compared.test b/benchmark/tests/test_files/report_compared.test index 2b675eb..000f4dd 100644 --- a/benchmark/tests/test_files/report_compared.test +++ b/benchmark/tests/test_files/report_compared.test @@ -1,16 +1,16 @@ -*********************************************************************************************************************** -* Report STree ver. 1.2.3 with 5 Folds cross validation and 10 random seeds. 2021-09-30 11:42:07 * -* With gridsearched hyperparameters * -* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False * -* Execution took 624.25 seconds, 0.17 hours, on iMac27 * -* Score is accuracy * -*********************************************************************************************************************** +************************************************************************************************************************ +* Report STree ver. 1.2.3 with 5 Folds cross validation and 10 random seeds. 2021-09-30 11:42:07 * +* With gridsearched hyperparameters * +* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False * +* Execution took 624.25 seconds, 0.17 hours, on iMac27 * +* Score is accuracy * +************************************************************************************************************************ -Dataset Samp Feat. Cls Nodes Leaves Depth Score Time Hyperparameters -============================== ===== ===== === ======= ======= ======= =============== ================ =============== -balance-scale 625 4 3 7.00 4.00 3.00 0.970560±0.0150 0.014049±0.0020 {'C': 10000.0, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'} -balloons 16 4 2 3.00 2.00 2.00 0.860000±0.2850✔ 0.000854±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'} -*********************************************************************************************************************** -* ✔ Equal to best .....: 1 * -* Accuracy compared to stree_default (liblinear-ovr) .: 0.0454 * -*********************************************************************************************************************** +Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters +============================== ====== ===== === ======= ======= ======= =============== ================ =============== +balance-scale 625 4 3 7.00 4.00 3.00 0.970560±0.0150 0.014049±0.0020 {'C': 10000.0, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'} +balloons 16 4 2 3.00 2.00 2.00 0.860000±0.2850✔ 0.000854±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'} +************************************************************************************************************************ +* ✔ Equal to best .....: 1 * +* Accuracy compared to stree_default (liblinear-ovr) .: 0.0454 * +************************************************************************************************************************ From 2362f66c7aaa056d6d30001e6cfaa2c8f1d36790 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= Date: Tue, 25 Oct 2022 00:56:37 +0200 Subject: [PATCH 5/8] Add nan manage to arff datasets --- benchmark/Datasets.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/benchmark/Datasets.py b/benchmark/Datasets.py index 0150623..8fb77b3 100644 --- a/benchmark/Datasets.py +++ b/benchmark/Datasets.py @@ -28,8 +28,9 @@ class DatasetsArff: file_name = os.path.join(self.folder(), self.dataset_names(name)) data = arff.loadarff(file_name) df = pd.DataFrame(data[0]) + df = df.dropna() X = df.drop(class_name, axis=1).to_numpy() - y = df[class_name].to_numpy() + y, _ = pd.factorize(df[class_name]) return X, y From 29c4b4ceefaf461c451a0b4baf290ee9f39587f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= Date: Tue, 25 Oct 2022 11:36:04 +0200 Subject: [PATCH 6/8] Update E203 in main.yml Create tests --- .github/workflows/main.yml | 2 +- benchmark/Datasets.py | 44 ++-- benchmark/tests/.env.arff | 6 + benchmark/tests/Dataset_test.py | 7 + benchmark/tests/TestBase.py | 7 +- benchmark/tests/datasets/all.txt | 4 +- benchmark/tests/datasets/hayes-roth.arff | 305 +++++++++++++++++++++++ benchmark/tests/datasets/iris.arff | 225 +++++++++++++++++ benchmark/tests/datasets/wine.arff | 302 ++++++++++++++++++++++ 9 files changed, 878 insertions(+), 24 deletions(-) create mode 100644 benchmark/tests/.env.arff create mode 100755 benchmark/tests/datasets/hayes-roth.arff create mode 100755 benchmark/tests/datasets/iris.arff create mode 100755 benchmark/tests/datasets/wine.arff diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 6052c8f..91f8e1d 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -46,7 +46,7 @@ jobs: - name: Lint run: | black --check --diff benchmark - flake8 --count benchmark + flake8 --count benchmark --ignore=E203 - name: Tests run: | coverage run -m unittest -v benchmark.tests diff --git a/benchmark/Datasets.py b/benchmark/Datasets.py index 8fb77b3..e3735e7 100644 --- a/benchmark/Datasets.py +++ b/benchmark/Datasets.py @@ -80,7 +80,7 @@ class DatasetsSurcov: class Datasets: def __init__(self, dataset_name=None): - default_class = "class" + envData = EnvData.load() class_name = getattr( __import__(__name__), @@ -88,24 +88,32 @@ class Datasets: ) self.dataset = class_name() self.class_names = [] - if dataset_name is None: - file_name = os.path.join(self.dataset.folder(), Files.index) - with open(file_name) as f: - self.data_sets = f.read().splitlines() - self.class_names = [default_class] * len(self.data_sets) - if "," in self.data_sets[0]: - result = [] - class_names = [] - for data in self.data_sets: - name, class_name = data.split(",") - result.append(name) - class_names.append(class_name) - self.data_sets = result - self.class_names = class_names - - else: + self.load_names() + if dataset_name is not None: + try: + class_name = self.class_names[ + self.data_sets.index(dataset_name) + ] + self.class_names = [class_name] + except ValueError: + raise ValueError(f"Unknown dataset: {dataset_name}") self.data_sets = [dataset_name] - self.class_names = [default_class] + + def load_names(self): + file_name = os.path.join(self.dataset.folder(), Files.index) + default_class = "class" + with open(file_name) as f: + self.data_sets = f.read().splitlines() + self.class_names = [default_class] * len(self.data_sets) + if "," in self.data_sets[0]: + result = [] + class_names = [] + for data in self.data_sets: + name, class_name = data.split(",") + result.append(name) + class_names.append(class_name) + self.data_sets = result + self.class_names = class_names def load(self, name): try: diff --git a/benchmark/tests/.env.arff b/benchmark/tests/.env.arff new file mode 100644 index 0000000..3cff1df --- /dev/null +++ b/benchmark/tests/.env.arff @@ -0,0 +1,6 @@ +score=accuracy +platform=MacBookpro16 +n_folds=5 +model=ODTE +stratified=0 +source_data=Arff diff --git a/benchmark/tests/Dataset_test.py b/benchmark/tests/Dataset_test.py index 4669922..ca28453 100644 --- a/benchmark/tests/Dataset_test.py +++ b/benchmark/tests/Dataset_test.py @@ -29,6 +29,7 @@ class DatasetTest(TestBase): test = { ".env.dist": ["balance-scale", "balloons"], ".env.surcov": ["iris", "wine"], + ".env.arff": ["iris", "wine"], } for key, value in test.items(): self.set_env(key) @@ -52,6 +53,11 @@ class DatasetTest(TestBase): self.assertSequenceEqual(X.shape, (625, 4)) self.assertSequenceEqual(y.shape, (625,)) + def test_create_with_unknown_dataset(self): + with self.assertRaises(ValueError) as msg: + Datasets("unknown") + self.assertEqual(str(msg.exception), "Unknown dataset: unknown") + def test_load_unknown_dataset(self): dt = Datasets() with self.assertRaises(ValueError) as msg: @@ -62,6 +68,7 @@ class DatasetTest(TestBase): test = { ".env.dist": "balloons", ".env.surcov": "wine", + ".env.arff": "iris", } for key, value in test.items(): self.set_env(key) diff --git a/benchmark/tests/TestBase.py b/benchmark/tests/TestBase.py index af33d8a..e6b2de0 100644 --- a/benchmark/tests/TestBase.py +++ b/benchmark/tests/TestBase.py @@ -14,6 +14,7 @@ class TestBase(unittest.TestCase): os.chdir(os.path.dirname(os.path.abspath(__file__))) self.test_files = "test_files" self.output = "sys.stdout" + self.ext = ".test" super().__init__(*args, **kwargs) def remove_files(self, files, folder): @@ -31,7 +32,7 @@ class TestBase(unittest.TestCase): print(f'{row};{col};"{value}"', file=f) def check_excel_sheet(self, sheet, file_name): - file_name += ".test" + file_name += self.ext with open(os.path.join(self.test_files, file_name), "r") as f: expected = csv.reader(f, delimiter=";") for row, col, value in expected: @@ -45,7 +46,7 @@ class TestBase(unittest.TestCase): self.assertEqual(sheet.cell(int(row), int(col)).value, value) def check_output_file(self, output, file_name): - file_name += ".test" + file_name += self.ext with open(os.path.join(self.test_files, file_name)) as f: expected = f.read() self.assertEqual(output.getvalue(), expected) @@ -58,7 +59,7 @@ class TestBase(unittest.TestCase): def check_file_file(self, computed_file, expected_file): with open(computed_file) as f: computed = f.read() - expected_file += ".test" + expected_file += self.ext with open(os.path.join(self.test_files, expected_file)) as f: expected = f.read() self.assertEqual(computed, expected) diff --git a/benchmark/tests/datasets/all.txt b/benchmark/tests/datasets/all.txt index 16d4d76..ddf732a 100644 --- a/benchmark/tests/datasets/all.txt +++ b/benchmark/tests/datasets/all.txt @@ -1,2 +1,2 @@ -iris -wine +iris,class +wine,class diff --git a/benchmark/tests/datasets/hayes-roth.arff b/benchmark/tests/datasets/hayes-roth.arff new file mode 100755 index 0000000..4f0bd17 --- /dev/null +++ b/benchmark/tests/datasets/hayes-roth.arff @@ -0,0 +1,305 @@ +% 1. Title: Hayes-Roth & Hayes-Roth (1977) Database +% +% 2. Source Information: +% (a) Creators: Barbara and Frederick Hayes-Roth +% (b) Donor: David W. Aha (aha@ics.uci.edu) (714) 856-8779 +% (c) Date: March, 1989 +% +% 3. Past Usage: +% 1. Hayes-Roth, B., & Hayes-Roth, F. (1977). Concept learning and the +% recognition and classification of exemplars. Journal of Verbal Learning +% and Verbal Behavior, 16, 321-338. +% -- Results: +% -- Human subjects classification and recognition performance: +% 1. decreases with distance from the prototype, +% 2. is better on unseen prototypes than old instances, and +% 3. improves with presentation frequency during learning. +% 2. Anderson, J.R., & Kline, P.J. (1979). A learning system and its +% psychological implications. In Proceedings of the Sixth International +% Joint Conference on Artificial Intelligence (pp. 16-21). Tokyo, Japan: +% Morgan Kaufmann. +% -- Partitioned the results into 4 classes: +% 1. prototypes +% 2. near-prototypes with high presentation frequency during learning +% 3. near-prototypes with low presentation frequency during learning +% 4. instances that are far from protoypes +% -- Described evidence that ACT's classification confidence and +% recognition behaviors closely simulated human subjects' behaviors. +% 3. Aha, D.W. (1989). Incremental learning of independent, overlapping, and +% graded concept descriptions with an instance-based process framework. +% Manuscript submitted for publication. +% -- Used same partition as Anderson & Kline +% -- Described evidence that Bloom's classification confidence behavior +% is similar to the human subjects' behavior. Bloom fitted the data +% more closely than did ACT. +% +% 4. Relevant Information: +% This database contains 5 numeric-valued attributes. Only a subset of +% 3 are used during testing (the latter 3). Furthermore, only 2 of the +% 3 concepts are "used" during testing (i.e., those with the prototypes +% 000 and 111). I've mapped all values to their zero-indexing equivalents. +% +% Some instances could be placed in either category 0 or 1. I've followed +% the authors' suggestion, placing them in each category with equal +% probability. +% +% I've replaced the actual values of the attributes (i.e., hobby has values +% chess, sports and stamps) with numeric values. I think this is how +% the authors' did this when testing the categorization models described +% in the paper. I find this unfair. While the subjects were able to bring +% background knowledge to bear on the attribute values and their +% relationships, the algorithms were provided with no such knowledge. I'm +% uncertain whether the 2 distractor attributes (name and hobby) are +% presented to the authors' algorithms during testing. However, it is clear +% that only the age, educational status, and marital status attributes are +% given during the human subjects' transfer tests. +% +% 5. Number of Instances: 132 training instances, 28 test instances +% +% 6. Number of Attributes: 5 plus the class membership attribute. 3 concepts. +% +% 7. Attribute Information: +% -- 1. name: distinct for each instance and represented numerically +% -- 2. hobby: nominal values ranging between 1 and 3 +% -- 3. age: nominal values ranging between 1 and 4 +% -- 4. educational level: nominal values ranging between 1 and 4 +% -- 5. marital status: nominal values ranging between 1 and 4 +% -- 6. class: nominal value between 1 and 3 +% +% 9. Missing Attribute Values: none +% +% 10. Class Distribution: see below +% +% 11. Detailed description of the experiment: +% 1. 3 categories (1, 2, and neither -- which I call 3) +% -- some of the instances could be classified in either class 1 or 2, and +% they have been evenly distributed between the two classes +% 2. 5 Attributes +% -- A. name (a randomly-generated number between 1 and 132) +% -- B. hobby (a randomly-generated number between 1 and 3) +% -- C. age (a number between 1 and 4) +% -- D. education level (a number between 1 and 4) +% -- E. marital status (a number between 1 and 4) +% 3. Classification: +% -- only attributes C-E are diagnostic; values for A and B are ignored +% -- Class Neither: if a 4 occurs for any attribute C-E +% -- Class 1: Otherwise, if (# of 1's)>(# of 2's) for attributes C-E +% -- Class 2: Otherwise, if (# of 2's)>(# of 1's) for attributes C-E +% -- Either 1 or 2: Otherwise, if (# of 2's)=(# of 1's) for attributes C-E +% 4. Prototypes: +% -- Class 1: 111 +% -- Class 2: 222 +% -- Class Either: 333 +% -- Class Neither: 444 +% 5. Number of training instances: 132 +% -- Each instance presented 0, 1, or 10 times +% -- None of the prototypes seen during training +% -- 3 instances from each of categories 1, 2, and either are repeated +% 10 times each +% -- 3 additional instances from the Either category are shown during +% learning +% 5. Number of test instances: 28 +% -- All 9 class 1 +% -- All 9 class 2 +% -- All 6 class Either +% -- All 4 prototypes +% -------------------- +% -- 28 total +% +% Observations of interest: +% 1. Relative classification confidence of +% -- prototypes for classes 1 and 2 (2 instances) +% (Anderson calls these Class 1 instances) +% -- instances of class 1 with frequency 10 during training and +% instances of class 2 with frequency 10 during training that +% are 1 value away from their respective prototypes (6 instances) +% (Anderson calls these Class 2 instances) +% -- instances of class 1 with frequency 1 during training and +% instances of class 2 with frequency 1 during training that +% are 1 value away from their respective prototypes (6 instances) +% (Anderson calls these Class 3 instances) +% -- instances of class 1 with frequency 1 during training and +% instances of class 2 with frequency 1 during training that +% are 2 values away from their respective prototypes (6 instances) +% (Anderson calls these Class 4 instances) +% 2. Relative classification recognition of them also +% +% Some Expected results: +% Both frequency and distance from prototype will effect the classification +% accuracy of instances. Greater the frequency, higher the classification +% confidence. Closer to prototype, higher the classification confidence. +% +% Information about the dataset +% CLASSTYPE: nominal +% CLASSINDEX: last +% + +@relation hayes-roth + +@attribute hobby INTEGER +@attribute age INTEGER +@attribute educational_level INTEGER +@attribute marital_status INTEGER +@attribute class {1,2,3,4} + +@data +2,1,1,2,1 +2,1,3,2,2 +3,1,4,1,3 +2,4,2,2,3 +1,1,3,4,3 +1,1,3,2,2 +3,1,3,2,2 +3,4,2,4,3 +2,2,1,1,1 +3,2,1,1,1 +1,2,1,1,1 +2,2,3,4,3 +1,1,2,1,1 +2,1,2,2,2 +2,4,1,4,3 +1,1,3,3,1 +3,2,1,2,2 +1,2,1,1,1 +3,3,2,1,1 +3,1,3,2,1 +1,2,2,1,2 +3,2,1,3,1 +2,1,2,1,1 +3,2,1,3,1 +2,3,2,1,1 +3,2,2,1,2 +3,2,1,3,2 +2,1,2,2,2 +1,1,3,2,1 +3,2,1,1,1 +1,4,1,1,3 +2,2,1,3,1 +1,2,1,3,2 +1,1,1,2,1 +2,4,3,1,3 +3,1,2,2,2 +1,1,2,2,2 +3,2,2,1,2 +1,2,1,2,2 +3,4,3,2,3 +2,2,2,1,2 +2,2,1,2,2 +3,2,1,3,2 +3,2,1,1,1 +3,1,2,1,1 +1,2,1,3,2 +2,1,1,2,1 +1,1,1,2,1 +1,2,2,3,2 +3,3,1,1,1 +3,3,3,1,1 +3,2,1,2,2 +3,2,1,2,2 +3,1,2,1,1 +1,1,1,2,1 +2,1,3,2,1 +2,2,2,1,2 +2,1,2,1,1 +2,2,1,3,1 +2,1,2,2,2 +1,2,4,2,3 +2,2,1,2,2 +1,1,2,4,3 +1,3,2,1,1 +2,4,4,2,3 +2,3,2,1,1 +3,1,2,2,2 +1,1,2,2,2 +1,3,2,4,3 +1,1,2,2,2 +3,1,4,2,3 +2,1,3,2,2 +1,1,3,2,2 +3,1,3,2,1 +1,2,4,4,3 +1,4,2,1,3 +2,1,2,1,1 +3,4,1,2,3 +2,2,1,1,1 +1,1,2,1,1 +2,2,4,3,3 +3,1,2,2,2 +1,1,3,2,1 +1,2,1,3,1 +1,4,4,1,3 +3,3,3,2,2 +2,2,1,3,2 +3,3,2,1,2 +1,1,1,3,1 +2,2,1,2,2 +2,2,2,1,2 +2,3,2,3,2 +1,3,2,1,2 +2,2,1,2,2 +1,1,1,2,1 +3,2,2,1,2 +3,2,1,1,1 +1,1,2,1,1 +3,1,4,4,3 +3,3,2,1,2 +2,3,2,1,2 +2,1,3,1,1 +1,2,1,2,2 +3,1,1,2,1 +2,2,4,1,3 +1,2,2,1,2 +2,3,2,1,2 +2,2,1,4,3 +1,4,2,3,3 +2,2,1,1,1 +1,2,1,1,1 +2,2,3,2,2 +1,3,2,1,1 +3,1,2,1,1 +3,1,1,2,1 +3,3,1,4,3 +2,3,4,1,3 +1,2,3,3,2 +3,3,2,2,2 +3,3,4,2,3 +1,2,2,1,2 +2,1,1,4,3 +3,1,2,2,2 +3,2,2,4,3 +2,3,1,3,1 +2,1,1,2,1 +3,4,1,3,3 +1,1,4,3,3 +2,1,2,1,1 +1,2,1,2,2 +1,2,2,1,2 +3,1,1,2,1 +1,1,1,2,1 +1,1,2,1,1 +1,2,1,1,1 +1,1,1,3,1 +1,1,3,1,1 +1,3,1,1,1 +1,1,3,3,1 +1,3,1,3,1 +1,3,3,1,1 +1,2,2,1,2 +1,2,1,2,2 +1,1,2,2,2 +1,2,2,3,2 +1,2,3,2,2 +1,3,2,2,2 +1,2,3,3,2 +1,3,2,3,2 +1,3,3,2,2 +1,1,3,2,1 +1,3,2,1,2 +1,2,1,3,1 +1,2,3,1,2 +1,1,2,3,1 +1,3,1,2,2 +1,1,1,1,1 +1,2,2,2,2 +1,3,3,3,1 +1,4,4,4,3 \ No newline at end of file diff --git a/benchmark/tests/datasets/iris.arff b/benchmark/tests/datasets/iris.arff new file mode 100755 index 0000000..780480c --- /dev/null +++ b/benchmark/tests/datasets/iris.arff @@ -0,0 +1,225 @@ +% 1. Title: Iris Plants Database +% +% 2. Sources: +% (a) Creator: R.A. Fisher +% (b) Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov) +% (c) Date: July, 1988 +% +% 3. Past Usage: +% - Publications: too many to mention!!! Here are a few. +% 1. Fisher,R.A. "The use of multiple measurements in taxonomic problems" +% Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions +% to Mathematical Statistics" (John Wiley, NY, 1950). +% 2. Duda,R.O., & Hart,P.E. (1973) Pattern Classification and Scene Analysis. +% (Q327.D83) John Wiley & Sons. ISBN 0-471-22361-1. See page 218. +% 3. Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System +% Structure and Classification Rule for Recognition in Partially Exposed +% Environments". IEEE Transactions on Pattern Analysis and Machine +% Intelligence, Vol. PAMI-2, No. 1, 67-71. +% -- Results: +% -- very low misclassification rates (0% for the setosa class) +% 4. Gates, G.W. (1972) "The Reduced Nearest Neighbor Rule". IEEE +% Transactions on Information Theory, May 1972, 431-433. +% -- Results: +% -- very low misclassification rates again +% 5. See also: 1988 MLC Proceedings, 54-64. Cheeseman et al's AUTOCLASS II +% conceptual clustering system finds 3 classes in the data. +% +% 4. Relevant Information: +% --- This is perhaps the best known database to be found in the pattern +% recognition literature. Fisher's paper is a classic in the field +% and is referenced frequently to this day. (See Duda & Hart, for +% example.) The data set contains 3 classes of 50 instances each, +% where each class refers to a type of iris plant. One class is +% linearly separable from the other 2; the latter are NOT linearly +% separable from each other. +% --- Predicted attribute: class of iris plant. +% --- This is an exceedingly simple domain. +% +% 5. Number of Instances: 150 (50 in each of three classes) +% +% 6. Number of Attributes: 4 numeric, predictive attributes and the class +% +% 7. Attribute Information: +% 1. sepal length in cm +% 2. sepal width in cm +% 3. petal length in cm +% 4. petal width in cm +% 5. class: +% -- Iris Setosa +% -- Iris Versicolour +% -- Iris Virginica +% +% 8. Missing Attribute Values: None +% +% Summary Statistics: +% Min Max Mean SD Class Correlation +% sepal length: 4.3 7.9 5.84 0.83 0.7826 +% sepal width: 2.0 4.4 3.05 0.43 -0.4194 +% petal length: 1.0 6.9 3.76 1.76 0.9490 (high!) +% petal width: 0.1 2.5 1.20 0.76 0.9565 (high!) +% +% 9. Class Distribution: 33.3% for each of 3 classes. + +@RELATION iris + +@ATTRIBUTE sepallength REAL +@ATTRIBUTE sepalwidth REAL +@ATTRIBUTE petallength REAL +@ATTRIBUTE petalwidth REAL +@ATTRIBUTE class {Iris-setosa,Iris-versicolor,Iris-virginica} + +@DATA +5.1,3.5,1.4,0.2,Iris-setosa +4.9,3.0,1.4,0.2,Iris-setosa +4.7,3.2,1.3,0.2,Iris-setosa +4.6,3.1,1.5,0.2,Iris-setosa +5.0,3.6,1.4,0.2,Iris-setosa +5.4,3.9,1.7,0.4,Iris-setosa +4.6,3.4,1.4,0.3,Iris-setosa +5.0,3.4,1.5,0.2,Iris-setosa +4.4,2.9,1.4,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +5.4,3.7,1.5,0.2,Iris-setosa +4.8,3.4,1.6,0.2,Iris-setosa +4.8,3.0,1.4,0.1,Iris-setosa +4.3,3.0,1.1,0.1,Iris-setosa +5.8,4.0,1.2,0.2,Iris-setosa +5.7,4.4,1.5,0.4,Iris-setosa +5.4,3.9,1.3,0.4,Iris-setosa +5.1,3.5,1.4,0.3,Iris-setosa +5.7,3.8,1.7,0.3,Iris-setosa +5.1,3.8,1.5,0.3,Iris-setosa +5.4,3.4,1.7,0.2,Iris-setosa +5.1,3.7,1.5,0.4,Iris-setosa +4.6,3.6,1.0,0.2,Iris-setosa +5.1,3.3,1.7,0.5,Iris-setosa +4.8,3.4,1.9,0.2,Iris-setosa +5.0,3.0,1.6,0.2,Iris-setosa +5.0,3.4,1.6,0.4,Iris-setosa +5.2,3.5,1.5,0.2,Iris-setosa +5.2,3.4,1.4,0.2,Iris-setosa +4.7,3.2,1.6,0.2,Iris-setosa +4.8,3.1,1.6,0.2,Iris-setosa +5.4,3.4,1.5,0.4,Iris-setosa +5.2,4.1,1.5,0.1,Iris-setosa +5.5,4.2,1.4,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +5.0,3.2,1.2,0.2,Iris-setosa +5.5,3.5,1.3,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +4.4,3.0,1.3,0.2,Iris-setosa +5.1,3.4,1.5,0.2,Iris-setosa +5.0,3.5,1.3,0.3,Iris-setosa +4.5,2.3,1.3,0.3,Iris-setosa +4.4,3.2,1.3,0.2,Iris-setosa +5.0,3.5,1.6,0.6,Iris-setosa +5.1,3.8,1.9,0.4,Iris-setosa +4.8,3.0,1.4,0.3,Iris-setosa +5.1,3.8,1.6,0.2,Iris-setosa +4.6,3.2,1.4,0.2,Iris-setosa +5.3,3.7,1.5,0.2,Iris-setosa +5.0,3.3,1.4,0.2,Iris-setosa +7.0,3.2,4.7,1.4,Iris-versicolor +6.4,3.2,4.5,1.5,Iris-versicolor +6.9,3.1,4.9,1.5,Iris-versicolor +5.5,2.3,4.0,1.3,Iris-versicolor +6.5,2.8,4.6,1.5,Iris-versicolor +5.7,2.8,4.5,1.3,Iris-versicolor +6.3,3.3,4.7,1.6,Iris-versicolor +4.9,2.4,3.3,1.0,Iris-versicolor +6.6,2.9,4.6,1.3,Iris-versicolor +5.2,2.7,3.9,1.4,Iris-versicolor +5.0,2.0,3.5,1.0,Iris-versicolor +5.9,3.0,4.2,1.5,Iris-versicolor +6.0,2.2,4.0,1.0,Iris-versicolor +6.1,2.9,4.7,1.4,Iris-versicolor +5.6,2.9,3.6,1.3,Iris-versicolor +6.7,3.1,4.4,1.4,Iris-versicolor +5.6,3.0,4.5,1.5,Iris-versicolor +5.8,2.7,4.1,1.0,Iris-versicolor +6.2,2.2,4.5,1.5,Iris-versicolor +5.6,2.5,3.9,1.1,Iris-versicolor +5.9,3.2,4.8,1.8,Iris-versicolor +6.1,2.8,4.0,1.3,Iris-versicolor +6.3,2.5,4.9,1.5,Iris-versicolor +6.1,2.8,4.7,1.2,Iris-versicolor +6.4,2.9,4.3,1.3,Iris-versicolor +6.6,3.0,4.4,1.4,Iris-versicolor +6.8,2.8,4.8,1.4,Iris-versicolor +6.7,3.0,5.0,1.7,Iris-versicolor +6.0,2.9,4.5,1.5,Iris-versicolor +5.7,2.6,3.5,1.0,Iris-versicolor +5.5,2.4,3.8,1.1,Iris-versicolor +5.5,2.4,3.7,1.0,Iris-versicolor +5.8,2.7,3.9,1.2,Iris-versicolor +6.0,2.7,5.1,1.6,Iris-versicolor +5.4,3.0,4.5,1.5,Iris-versicolor +6.0,3.4,4.5,1.6,Iris-versicolor +6.7,3.1,4.7,1.5,Iris-versicolor +6.3,2.3,4.4,1.3,Iris-versicolor +5.6,3.0,4.1,1.3,Iris-versicolor +5.5,2.5,4.0,1.3,Iris-versicolor +5.5,2.6,4.4,1.2,Iris-versicolor +6.1,3.0,4.6,1.4,Iris-versicolor +5.8,2.6,4.0,1.2,Iris-versicolor +5.0,2.3,3.3,1.0,Iris-versicolor +5.6,2.7,4.2,1.3,Iris-versicolor +5.7,3.0,4.2,1.2,Iris-versicolor +5.7,2.9,4.2,1.3,Iris-versicolor +6.2,2.9,4.3,1.3,Iris-versicolor +5.1,2.5,3.0,1.1,Iris-versicolor +5.7,2.8,4.1,1.3,Iris-versicolor +6.3,3.3,6.0,2.5,Iris-virginica +5.8,2.7,5.1,1.9,Iris-virginica +7.1,3.0,5.9,2.1,Iris-virginica +6.3,2.9,5.6,1.8,Iris-virginica +6.5,3.0,5.8,2.2,Iris-virginica +7.6,3.0,6.6,2.1,Iris-virginica +4.9,2.5,4.5,1.7,Iris-virginica +7.3,2.9,6.3,1.8,Iris-virginica +6.7,2.5,5.8,1.8,Iris-virginica +7.2,3.6,6.1,2.5,Iris-virginica +6.5,3.2,5.1,2.0,Iris-virginica +6.4,2.7,5.3,1.9,Iris-virginica +6.8,3.0,5.5,2.1,Iris-virginica +5.7,2.5,5.0,2.0,Iris-virginica +5.8,2.8,5.1,2.4,Iris-virginica +6.4,3.2,5.3,2.3,Iris-virginica +6.5,3.0,5.5,1.8,Iris-virginica +7.7,3.8,6.7,2.2,Iris-virginica +7.7,2.6,6.9,2.3,Iris-virginica +6.0,2.2,5.0,1.5,Iris-virginica +6.9,3.2,5.7,2.3,Iris-virginica +5.6,2.8,4.9,2.0,Iris-virginica +7.7,2.8,6.7,2.0,Iris-virginica +6.3,2.7,4.9,1.8,Iris-virginica +6.7,3.3,5.7,2.1,Iris-virginica +7.2,3.2,6.0,1.8,Iris-virginica +6.2,2.8,4.8,1.8,Iris-virginica +6.1,3.0,4.9,1.8,Iris-virginica +6.4,2.8,5.6,2.1,Iris-virginica +7.2,3.0,5.8,1.6,Iris-virginica +7.4,2.8,6.1,1.9,Iris-virginica +7.9,3.8,6.4,2.0,Iris-virginica +6.4,2.8,5.6,2.2,Iris-virginica +6.3,2.8,5.1,1.5,Iris-virginica +6.1,2.6,5.6,1.4,Iris-virginica +7.7,3.0,6.1,2.3,Iris-virginica +6.3,3.4,5.6,2.4,Iris-virginica +6.4,3.1,5.5,1.8,Iris-virginica +6.0,3.0,4.8,1.8,Iris-virginica +6.9,3.1,5.4,2.1,Iris-virginica +6.7,3.1,5.6,2.4,Iris-virginica +6.9,3.1,5.1,2.3,Iris-virginica +5.8,2.7,5.1,1.9,Iris-virginica +6.8,3.2,5.9,2.3,Iris-virginica +6.7,3.3,5.7,2.5,Iris-virginica +6.7,3.0,5.2,2.3,Iris-virginica +6.3,2.5,5.0,1.9,Iris-virginica +6.5,3.0,5.2,2.0,Iris-virginica +6.2,3.4,5.4,2.3,Iris-virginica +5.9,3.0,5.1,1.8,Iris-virginica +% +% +% diff --git a/benchmark/tests/datasets/wine.arff b/benchmark/tests/datasets/wine.arff new file mode 100755 index 0000000..7d61c79 --- /dev/null +++ b/benchmark/tests/datasets/wine.arff @@ -0,0 +1,302 @@ +% 1. Title of Database: Wine recognition data +% Updated Sept 21, 1998 by C.Blake : Added attribute information +% +% 2. Sources: +% (a) Forina, M. et al, PARVUS - An Extendible Package for Data +% Exploration, Classification and Correlation. Institute of Pharmaceutical +% and Food Analysis and Technologies, Via Brigata Salerno, +% 16147 Genoa, Italy. +% +% (b) Stefan Aeberhard, email: stefan@coral.cs.jcu.edu.au +% (c) July 1991 +% 3. Past Usage: +% +% (1) +% S. Aeberhard, D. Coomans and O. de Vel, +% Comparison of Classifiers in High Dimensional Settings, +% Tech. Rep. no. 92-02, (1992), Dept. of Computer Science and Dept. of +% Mathematics and Statistics, James Cook University of North Queensland. +% (Also submitted to Technometrics). +% +% The data was used with many others for comparing various +% classifiers. The classes are separable, though only RDA +% has achieved 100% correct classification. +% (RDA : 100%, QDA 99.4%, LDA 98.9%, 1NN 96.1% (z-transformed data)) +% (All results using the leave-one-out technique) +% +% In a classification context, this is a well posed problem +% with "well behaved" class structures. A good data set +% for first testing of a new classifier, but not very +% challenging. +% +% (2) +% S. Aeberhard, D. Coomans and O. de Vel, +% "THE CLASSIFICATION PERFORMANCE OF RDA" +% Tech. Rep. no. 92-01, (1992), Dept. of Computer Science and Dept. of +% Mathematics and Statistics, James Cook University of North Queensland. +% (Also submitted to Journal of Chemometrics). +% +% Here, the data was used to illustrate the superior performance of +% the use of a new appreciation function with RDA. +% +% 4. Relevant Information: +% +% -- These data are the results of a chemical analysis of +% wines grown in the same region in Italy but derived from three +% different cultivars. +% The analysis determined the quantities of 13 constituents +% found in each of the three types of wines. +% +% -- I think that the initial data set had around 30 variables, but +% for some reason I only have the 13 dimensional version. +% I had a list of what the 30 or so variables were, but a.) +% I lost it, and b.), I would not know which 13 variables +% are included in the set. +% +% -- The attributes are (dontated by Riccardo Leardi, +% riclea@anchem.unige.it ) +% 1) Alcohol +% 2) Malic acid +% 3) Ash +% 4) Alcalinity of ash +% 5) Magnesium +% 6) Total phenols +% 7) Flavanoids +% 8) Nonflavanoid phenols +% 9) Proanthocyanins +% 10)Color intensity +% 11)Hue +% 12)OD280/OD315 of diluted wines +% 13)Proline +% +% 5. Number of Instances +% +% class 1 59 +% class 2 71 +% class 3 48 +% +% 6. Number of Attributes +% +% 13 +% +% 7. For Each Attribute: +% +% All attributes are continuous +% +% No statistics available, but suggest to standardise +% variables for certain uses (e.g. for us with classifiers +% which are NOT scale invariant) +% +% NOTE: 1st attribute is class identifier (1-3) +% +% 8. Missing Attribute Values: +% +% None +% +% 9. Class Distribution: number of instances per class +% +% class 1 59 +% class 2 71 +% class 3 48 +% +% Information about the dataset +% CLASSTYPE: nominal +% CLASSINDEX: first +% + +@relation wine + +@attribute class {1,2,3} +@attribute Alcohol REAL +@attribute Malic_acid REAL +@attribute Ash REAL +@attribute Alcalinity_of_ash REAL +@attribute Magnesium INTEGER +@attribute Total_phenols REAL +@attribute Flavanoids REAL +@attribute Nonflavanoid_phenols REAL +@attribute Proanthocyanins REAL +@attribute Color_intensity REAL +@attribute Hue REAL +@attribute OD280/OD315_of_diluted_wines REAL +@attribute Proline INTEGER + +@data +1,14.23,1.71,2.43,15.6,127,2.8,3.06,.28,2.29,5.64,1.04,3.92,1065 +1,13.2,1.78,2.14,11.2,100,2.65,2.76,.26,1.28,4.38,1.05,3.4,1050 +1,13.16,2.36,2.67,18.6,101,2.8,3.24,.3,2.81,5.68,1.03,3.17,1185 +1,14.37,1.95,2.5,16.8,113,3.85,3.49,.24,2.18,7.8,.86,3.45,1480 +1,13.24,2.59,2.87,21,118,2.8,2.69,.39,1.82,4.32,1.04,2.93,735 +1,14.2,1.76,2.45,15.2,112,3.27,3.39,.34,1.97,6.75,1.05,2.85,1450 +1,14.39,1.87,2.45,14.6,96,2.5,2.52,.3,1.98,5.25,1.02,3.58,1290 +1,14.06,2.15,2.61,17.6,121,2.6,2.51,.31,1.25,5.05,1.06,3.58,1295 +1,14.83,1.64,2.17,14,97,2.8,2.98,.29,1.98,5.2,1.08,2.85,1045 +1,13.86,1.35,2.27,16,98,2.98,3.15,.22,1.85,7.22,1.01,3.55,1045 +1,14.1,2.16,2.3,18,105,2.95,3.32,.22,2.38,5.75,1.25,3.17,1510 +1,14.12,1.48,2.32,16.8,95,2.2,2.43,.26,1.57,5,1.17,2.82,1280 +1,13.75,1.73,2.41,16,89,2.6,2.76,.29,1.81,5.6,1.15,2.9,1320 +1,14.75,1.73,2.39,11.4,91,3.1,3.69,.43,2.81,5.4,1.25,2.73,1150 +1,14.38,1.87,2.38,12,102,3.3,3.64,.29,2.96,7.5,1.2,3,1547 +1,13.63,1.81,2.7,17.2,112,2.85,2.91,.3,1.46,7.3,1.28,2.88,1310 +1,14.3,1.92,2.72,20,120,2.8,3.14,.33,1.97,6.2,1.07,2.65,1280 +1,13.83,1.57,2.62,20,115,2.95,3.4,.4,1.72,6.6,1.13,2.57,1130 +1,14.19,1.59,2.48,16.5,108,3.3,3.93,.32,1.86,8.7,1.23,2.82,1680 +1,13.64,3.1,2.56,15.2,116,2.7,3.03,.17,1.66,5.1,.96,3.36,845 +1,14.06,1.63,2.28,16,126,3,3.17,.24,2.1,5.65,1.09,3.71,780 +1,12.93,3.8,2.65,18.6,102,2.41,2.41,.25,1.98,4.5,1.03,3.52,770 +1,13.71,1.86,2.36,16.6,101,2.61,2.88,.27,1.69,3.8,1.11,4,1035 +1,12.85,1.6,2.52,17.8,95,2.48,2.37,.26,1.46,3.93,1.09,3.63,1015 +1,13.5,1.81,2.61,20,96,2.53,2.61,.28,1.66,3.52,1.12,3.82,845 +1,13.05,2.05,3.22,25,124,2.63,2.68,.47,1.92,3.58,1.13,3.2,830 +1,13.39,1.77,2.62,16.1,93,2.85,2.94,.34,1.45,4.8,.92,3.22,1195 +1,13.3,1.72,2.14,17,94,2.4,2.19,.27,1.35,3.95,1.02,2.77,1285 +1,13.87,1.9,2.8,19.4,107,2.95,2.97,.37,1.76,4.5,1.25,3.4,915 +1,14.02,1.68,2.21,16,96,2.65,2.33,.26,1.98,4.7,1.04,3.59,1035 +1,13.73,1.5,2.7,22.5,101,3,3.25,.29,2.38,5.7,1.19,2.71,1285 +1,13.58,1.66,2.36,19.1,106,2.86,3.19,.22,1.95,6.9,1.09,2.88,1515 +1,13.68,1.83,2.36,17.2,104,2.42,2.69,.42,1.97,3.84,1.23,2.87,990 +1,13.76,1.53,2.7,19.5,132,2.95,2.74,.5,1.35,5.4,1.25,3,1235 +1,13.51,1.8,2.65,19,110,2.35,2.53,.29,1.54,4.2,1.1,2.87,1095 +1,13.48,1.81,2.41,20.5,100,2.7,2.98,.26,1.86,5.1,1.04,3.47,920 +1,13.28,1.64,2.84,15.5,110,2.6,2.68,.34,1.36,4.6,1.09,2.78,880 +1,13.05,1.65,2.55,18,98,2.45,2.43,.29,1.44,4.25,1.12,2.51,1105 +1,13.07,1.5,2.1,15.5,98,2.4,2.64,.28,1.37,3.7,1.18,2.69,1020 +1,14.22,3.99,2.51,13.2,128,3,3.04,.2,2.08,5.1,.89,3.53,760 +1,13.56,1.71,2.31,16.2,117,3.15,3.29,.34,2.34,6.13,.95,3.38,795 +1,13.41,3.84,2.12,18.8,90,2.45,2.68,.27,1.48,4.28,.91,3,1035 +1,13.88,1.89,2.59,15,101,3.25,3.56,.17,1.7,5.43,.88,3.56,1095 +1,13.24,3.98,2.29,17.5,103,2.64,2.63,.32,1.66,4.36,.82,3,680 +1,13.05,1.77,2.1,17,107,3,3,.28,2.03,5.04,.88,3.35,885 +1,14.21,4.04,2.44,18.9,111,2.85,2.65,.3,1.25,5.24,.87,3.33,1080 +1,14.38,3.59,2.28,16,102,3.25,3.17,.27,2.19,4.9,1.04,3.44,1065 +1,13.9,1.68,2.12,16,101,3.1,3.39,.21,2.14,6.1,.91,3.33,985 +1,14.1,2.02,2.4,18.8,103,2.75,2.92,.32,2.38,6.2,1.07,2.75,1060 +1,13.94,1.73,2.27,17.4,108,2.88,3.54,.32,2.08,8.90,1.12,3.1,1260 +1,13.05,1.73,2.04,12.4,92,2.72,3.27,.17,2.91,7.2,1.12,2.91,1150 +1,13.83,1.65,2.6,17.2,94,2.45,2.99,.22,2.29,5.6,1.24,3.37,1265 +1,13.82,1.75,2.42,14,111,3.88,3.74,.32,1.87,7.05,1.01,3.26,1190 +1,13.77,1.9,2.68,17.1,115,3,2.79,.39,1.68,6.3,1.13,2.93,1375 +1,13.74,1.67,2.25,16.4,118,2.6,2.9,.21,1.62,5.85,.92,3.2,1060 +1,13.56,1.73,2.46,20.5,116,2.96,2.78,.2,2.45,6.25,.98,3.03,1120 +1,14.22,1.7,2.3,16.3,118,3.2,3,.26,2.03,6.38,.94,3.31,970 +1,13.29,1.97,2.68,16.8,102,3,3.23,.31,1.66,6,1.07,2.84,1270 +1,13.72,1.43,2.5,16.7,108,3.4,3.67,.19,2.04,6.8,.89,2.87,1285 +2,12.37,.94,1.36,10.6,88,1.98,.57,.28,.42,1.95,1.05,1.82,520 +2,12.33,1.1,2.28,16,101,2.05,1.09,.63,.41,3.27,1.25,1.67,680 +2,12.64,1.36,2.02,16.8,100,2.02,1.41,.53,.62,5.75,.98,1.59,450 +2,13.67,1.25,1.92,18,94,2.1,1.79,.32,.73,3.8,1.23,2.46,630 +2,12.37,1.13,2.16,19,87,3.5,3.1,.19,1.87,4.45,1.22,2.87,420 +2,12.17,1.45,2.53,19,104,1.89,1.75,.45,1.03,2.95,1.45,2.23,355 +2,12.37,1.21,2.56,18.1,98,2.42,2.65,.37,2.08,4.6,1.19,2.3,678 +2,13.11,1.01,1.7,15,78,2.98,3.18,.26,2.28,5.3,1.12,3.18,502 +2,12.37,1.17,1.92,19.6,78,2.11,2,.27,1.04,4.68,1.12,3.48,510 +2,13.34,.94,2.36,17,110,2.53,1.3,.55,.42,3.17,1.02,1.93,750 +2,12.21,1.19,1.75,16.8,151,1.85,1.28,.14,2.5,2.85,1.28,3.07,718 +2,12.29,1.61,2.21,20.4,103,1.1,1.02,.37,1.46,3.05,.906,1.82,870 +2,13.86,1.51,2.67,25,86,2.95,2.86,.21,1.87,3.38,1.36,3.16,410 +2,13.49,1.66,2.24,24,87,1.88,1.84,.27,1.03,3.74,.98,2.78,472 +2,12.99,1.67,2.6,30,139,3.3,2.89,.21,1.96,3.35,1.31,3.5,985 +2,11.96,1.09,2.3,21,101,3.38,2.14,.13,1.65,3.21,.99,3.13,886 +2,11.66,1.88,1.92,16,97,1.61,1.57,.34,1.15,3.8,1.23,2.14,428 +2,13.03,.9,1.71,16,86,1.95,2.03,.24,1.46,4.6,1.19,2.48,392 +2,11.84,2.89,2.23,18,112,1.72,1.32,.43,.95,2.65,.96,2.52,500 +2,12.33,.99,1.95,14.8,136,1.9,1.85,.35,2.76,3.4,1.06,2.31,750 +2,12.7,3.87,2.4,23,101,2.83,2.55,.43,1.95,2.57,1.19,3.13,463 +2,12,.92,2,19,86,2.42,2.26,.3,1.43,2.5,1.38,3.12,278 +2,12.72,1.81,2.2,18.8,86,2.2,2.53,.26,1.77,3.9,1.16,3.14,714 +2,12.08,1.13,2.51,24,78,2,1.58,.4,1.4,2.2,1.31,2.72,630 +2,13.05,3.86,2.32,22.5,85,1.65,1.59,.61,1.62,4.8,.84,2.01,515 +2,11.84,.89,2.58,18,94,2.2,2.21,.22,2.35,3.05,.79,3.08,520 +2,12.67,.98,2.24,18,99,2.2,1.94,.3,1.46,2.62,1.23,3.16,450 +2,12.16,1.61,2.31,22.8,90,1.78,1.69,.43,1.56,2.45,1.33,2.26,495 +2,11.65,1.67,2.62,26,88,1.92,1.61,.4,1.34,2.6,1.36,3.21,562 +2,11.64,2.06,2.46,21.6,84,1.95,1.69,.48,1.35,2.8,1,2.75,680 +2,12.08,1.33,2.3,23.6,70,2.2,1.59,.42,1.38,1.74,1.07,3.21,625 +2,12.08,1.83,2.32,18.5,81,1.6,1.5,.52,1.64,2.4,1.08,2.27,480 +2,12,1.51,2.42,22,86,1.45,1.25,.5,1.63,3.6,1.05,2.65,450 +2,12.69,1.53,2.26,20.7,80,1.38,1.46,.58,1.62,3.05,.96,2.06,495 +2,12.29,2.83,2.22,18,88,2.45,2.25,.25,1.99,2.15,1.15,3.3,290 +2,11.62,1.99,2.28,18,98,3.02,2.26,.17,1.35,3.25,1.16,2.96,345 +2,12.47,1.52,2.2,19,162,2.5,2.27,.32,3.28,2.6,1.16,2.63,937 +2,11.81,2.12,2.74,21.5,134,1.6,.99,.14,1.56,2.5,.95,2.26,625 +2,12.29,1.41,1.98,16,85,2.55,2.5,.29,1.77,2.9,1.23,2.74,428 +2,12.37,1.07,2.1,18.5,88,3.52,3.75,.24,1.95,4.5,1.04,2.77,660 +2,12.29,3.17,2.21,18,88,2.85,2.99,.45,2.81,2.3,1.42,2.83,406 +2,12.08,2.08,1.7,17.5,97,2.23,2.17,.26,1.4,3.3,1.27,2.96,710 +2,12.6,1.34,1.9,18.5,88,1.45,1.36,.29,1.35,2.45,1.04,2.77,562 +2,12.34,2.45,2.46,21,98,2.56,2.11,.34,1.31,2.8,.8,3.38,438 +2,11.82,1.72,1.88,19.5,86,2.5,1.64,.37,1.42,2.06,.94,2.44,415 +2,12.51,1.73,1.98,20.5,85,2.2,1.92,.32,1.48,2.94,1.04,3.57,672 +2,12.42,2.55,2.27,22,90,1.68,1.84,.66,1.42,2.7,.86,3.3,315 +2,12.25,1.73,2.12,19,80,1.65,2.03,.37,1.63,3.4,1,3.17,510 +2,12.72,1.75,2.28,22.5,84,1.38,1.76,.48,1.63,3.3,.88,2.42,488 +2,12.22,1.29,1.94,19,92,2.36,2.04,.39,2.08,2.7,.86,3.02,312 +2,11.61,1.35,2.7,20,94,2.74,2.92,.29,2.49,2.65,.96,3.26,680 +2,11.46,3.74,1.82,19.5,107,3.18,2.58,.24,3.58,2.9,.75,2.81,562 +2,12.52,2.43,2.17,21,88,2.55,2.27,.26,1.22,2,.9,2.78,325 +2,11.76,2.68,2.92,20,103,1.75,2.03,.6,1.05,3.8,1.23,2.5,607 +2,11.41,.74,2.5,21,88,2.48,2.01,.42,1.44,3.08,1.1,2.31,434 +2,12.08,1.39,2.5,22.5,84,2.56,2.29,.43,1.04,2.9,.93,3.19,385 +2,11.03,1.51,2.2,21.5,85,2.46,2.17,.52,2.01,1.9,1.71,2.87,407 +2,11.82,1.47,1.99,20.8,86,1.98,1.6,.3,1.53,1.95,.95,3.33,495 +2,12.42,1.61,2.19,22.5,108,2,2.09,.34,1.61,2.06,1.06,2.96,345 +2,12.77,3.43,1.98,16,80,1.63,1.25,.43,.83,3.4,.7,2.12,372 +2,12,3.43,2,19,87,2,1.64,.37,1.87,1.28,.93,3.05,564 +2,11.45,2.4,2.42,20,96,2.9,2.79,.32,1.83,3.25,.8,3.39,625 +2,11.56,2.05,3.23,28.5,119,3.18,5.08,.47,1.87,6,.93,3.69,465 +2,12.42,4.43,2.73,26.5,102,2.2,2.13,.43,1.71,2.08,.92,3.12,365 +2,13.05,5.8,2.13,21.5,86,2.62,2.65,.3,2.01,2.6,.73,3.1,380 +2,11.87,4.31,2.39,21,82,2.86,3.03,.21,2.91,2.8,.75,3.64,380 +2,12.07,2.16,2.17,21,85,2.6,2.65,.37,1.35,2.76,.86,3.28,378 +2,12.43,1.53,2.29,21.5,86,2.74,3.15,.39,1.77,3.94,.69,2.84,352 +2,11.79,2.13,2.78,28.5,92,2.13,2.24,.58,1.76,3,.97,2.44,466 +2,12.37,1.63,2.3,24.5,88,2.22,2.45,.4,1.9,2.12,.89,2.78,342 +2,12.04,4.3,2.38,22,80,2.1,1.75,.42,1.35,2.6,.79,2.57,580 +3,12.86,1.35,2.32,18,122,1.51,1.25,.21,.94,4.1,.76,1.29,630 +3,12.88,2.99,2.4,20,104,1.3,1.22,.24,.83,5.4,.74,1.42,530 +3,12.81,2.31,2.4,24,98,1.15,1.09,.27,.83,5.7,.66,1.36,560 +3,12.7,3.55,2.36,21.5,106,1.7,1.2,.17,.84,5,.78,1.29,600 +3,12.51,1.24,2.25,17.5,85,2,.58,.6,1.25,5.45,.75,1.51,650 +3,12.6,2.46,2.2,18.5,94,1.62,.66,.63,.94,7.1,.73,1.58,695 +3,12.25,4.72,2.54,21,89,1.38,.47,.53,.8,3.85,.75,1.27,720 +3,12.53,5.51,2.64,25,96,1.79,.6,.63,1.1,5,.82,1.69,515 +3,13.49,3.59,2.19,19.5,88,1.62,.48,.58,.88,5.7,.81,1.82,580 +3,12.84,2.96,2.61,24,101,2.32,.6,.53,.81,4.92,.89,2.15,590 +3,12.93,2.81,2.7,21,96,1.54,.5,.53,.75,4.6,.77,2.31,600 +3,13.36,2.56,2.35,20,89,1.4,.5,.37,.64,5.6,.7,2.47,780 +3,13.52,3.17,2.72,23.5,97,1.55,.52,.5,.55,4.35,.89,2.06,520 +3,13.62,4.95,2.35,20,92,2,.8,.47,1.02,4.4,.91,2.05,550 +3,12.25,3.88,2.2,18.5,112,1.38,.78,.29,1.14,8.21,.65,2,855 +3,13.16,3.57,2.15,21,102,1.5,.55,.43,1.3,4,.6,1.68,830 +3,13.88,5.04,2.23,20,80,.98,.34,.4,.68,4.9,.58,1.33,415 +3,12.87,4.61,2.48,21.5,86,1.7,.65,.47,.86,7.65,.54,1.86,625 +3,13.32,3.24,2.38,21.5,92,1.93,.76,.45,1.25,8.42,.55,1.62,650 +3,13.08,3.9,2.36,21.5,113,1.41,1.39,.34,1.14,9.40,.57,1.33,550 +3,13.5,3.12,2.62,24,123,1.4,1.57,.22,1.25,8.60,.59,1.3,500 +3,12.79,2.67,2.48,22,112,1.48,1.36,.24,1.26,10.8,.48,1.47,480 +3,13.11,1.9,2.75,25.5,116,2.2,1.28,.26,1.56,7.1,.61,1.33,425 +3,13.23,3.3,2.28,18.5,98,1.8,.83,.61,1.87,10.52,.56,1.51,675 +3,12.58,1.29,2.1,20,103,1.48,.58,.53,1.4,7.6,.58,1.55,640 +3,13.17,5.19,2.32,22,93,1.74,.63,.61,1.55,7.9,.6,1.48,725 +3,13.84,4.12,2.38,19.5,89,1.8,.83,.48,1.56,9.01,.57,1.64,480 +3,12.45,3.03,2.64,27,97,1.9,.58,.63,1.14,7.5,.67,1.73,880 +3,14.34,1.68,2.7,25,98,2.8,1.31,.53,2.7,13,.57,1.96,660 +3,13.48,1.67,2.64,22.5,89,2.6,1.1,.52,2.29,11.75,.57,1.78,620 +3,12.36,3.83,2.38,21,88,2.3,.92,.5,1.04,7.65,.56,1.58,520 +3,13.69,3.26,2.54,20,107,1.83,.56,.5,.8,5.88,.96,1.82,680 +3,12.85,3.27,2.58,22,106,1.65,.6,.6,.96,5.58,.87,2.11,570 +3,12.96,3.45,2.35,18.5,106,1.39,.7,.4,.94,5.28,.68,1.75,675 +3,13.78,2.76,2.3,22,90,1.35,.68,.41,1.03,9.58,.7,1.68,615 +3,13.73,4.36,2.26,22.5,88,1.28,.47,.52,1.15,6.62,.78,1.75,520 +3,13.45,3.7,2.6,23,111,1.7,.92,.43,1.46,10.68,.85,1.56,695 +3,12.82,3.37,2.3,19.5,88,1.48,.66,.4,.97,10.26,.72,1.75,685 +3,13.58,2.58,2.69,24.5,105,1.55,.84,.39,1.54,8.66,.74,1.8,750 +3,13.4,4.6,2.86,25,112,1.98,.96,.27,1.11,8.5,.67,1.92,630 +3,12.2,3.03,2.32,19,96,1.25,.49,.4,.73,5.5,.66,1.83,510 +3,12.77,2.39,2.28,19.5,86,1.39,.51,.48,.64,9.899999,.57,1.63,470 +3,14.16,2.51,2.48,20,91,1.68,.7,.44,1.24,9.7,.62,1.71,660 +3,13.71,5.65,2.45,20.5,95,1.68,.61,.52,1.06,7.7,.64,1.74,740 +3,13.4,3.91,2.48,23,102,1.8,.75,.43,1.41,7.3,.7,1.56,750 +3,13.27,4.28,2.26,20,120,1.59,.69,.43,1.35,10.2,.59,1.56,835 +3,13.17,2.59,2.37,20,120,1.65,.68,.53,1.46,9.3,.6,1.62,840 +3,14.13,4.1,2.74,24.5,96,2.05,.76,.56,1.35,9.2,.61,1.6,560 From b24a508d1ccad3c8003494ee4afb8607d4acc079 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= Date: Tue, 25 Oct 2022 15:00:37 +0200 Subject: [PATCH 7/8] Add consistent comparative results to reports --- benchmark/Results.py | 71 +++++++++++++++---- benchmark/Utils.py | 1 - .../test_files/be_build_best_report.test | 2 +- benchmark/tests/test_files/be_main_best.test | 2 +- .../tests/test_files/be_main_complete.test | 2 +- .../tests/test_files/be_main_dataset.test | 2 +- benchmark/tests/test_files/be_main_grid.test | 2 +- benchmark/tests/test_files/excel.test | 2 +- .../tests/test_files/excel_add_ODTE.test | 2 +- .../tests/test_files/excel_add_STree.test | 2 +- .../tests/test_files/excel_compared.test | 2 +- .../tests/test_files/exreport_excel_ODTE.test | 2 +- .../exreport_excel_RandomForest.test | 2 +- .../test_files/exreport_excel_STree.test | 2 +- benchmark/tests/test_files/report.test | 2 +- benchmark/tests/test_files/report_best.test | 2 +- .../tests/test_files/report_compared.test | 2 +- benchmark/tests/test_files/report_grid.test | 2 +- 18 files changed, 75 insertions(+), 29 deletions(-) diff --git a/benchmark/Results.py b/benchmark/Results.py index 9c90dc8..bf0c8e6 100644 --- a/benchmark/Results.py +++ b/benchmark/Results.py @@ -9,16 +9,37 @@ import xlsxwriter import numpy as np from .Experiments import BestResults from .Datasets import Datasets +from .Arguments import EnvData, ALL_METRICS from .Utils import ( Folders, Files, Symbols, - BEST_ACCURACY_STREE, TextColor, NO_RESULTS, ) +class BestResultsEver: + def __init__(self): + self.data = {} + for i in ["Tanveer", "Surcov", "Arff"]: + self.data[i] = {} + for metric in ALL_METRICS: + self.data[i][metric.replace("-", "_")] = ["self", 1.0] + self.data[i][metric] = ["self", 1.0] + self.data["Tanveer"]["accuracy"] = [ + "STree_default (liblinear-ovr)", + 40.282203, + ] + self.data["Arff"]["accuracy"] = [ + "STree_default (linear-ovo)", + 21.9765, + ] + + def get_name_value(self, key, score): + return self.data[key][score] + + class BaseReport(abc.ABC): def __init__(self, file_name, best_file=False): self.file_name = file_name @@ -30,7 +51,20 @@ class BaseReport(abc.ABC): with open(self.file_name) as f: self.data = json.load(f) self.best_acc_file = best_file - self.lines = self.data if best_file else self.data["results"] + if best_file: + self.lines = self.data + else: + self.lines = self.data["results"] + self.score_name = self.data["score_name"] + self.__compute_best_results_ever() + + def __compute_best_results_ever(self): + args = EnvData.load() + key = args["source_data"] + best = BestResultsEver() + self.best_score_name, self.best_score_value = best.get_name_value( + key, self.score_name + ) def _get_accuracy(self, item): return self.data[item][0] if self.best_acc_file else item["score"] @@ -69,6 +103,12 @@ class BaseReport(abc.ABC): } return meaning[status] + def _get_best_accuracy(self): + return self.best_score_value + + def _get_message_best_accuracy(self): + return f"{self.score_name} compared to {self.best_score_name} .:" + @abc.abstractmethod def header(self) -> None: pass @@ -188,8 +228,8 @@ class Report(BaseReport): f" {key} {self._status_meaning(key)} .....: {value:2d}" ) self.header_line( - f" Accuracy compared to stree_default (liblinear-ovr) .: " - f"{accuracy/BEST_ACCURACY_STREE:7.4f}" + f" {self._get_message_best_accuracy()} " + f"{accuracy/self._get_best_accuracy():7.4f}" ) self.header_line("*") @@ -209,12 +249,12 @@ class ReportBest(BaseReport): if best else Files.grid_output(score, model) ) + file_name = os.path.join(Folders.results, name) self.best = best self.grid = grid - file_name = os.path.join(Folders.results, name) - super().__init__(file_name, best_file=True) self.score_name = score self.model = model + super().__init__(file_name, best_file=True) def header_line(self, text: str) -> None: length = sum(self.header_lengths) + len(self.header_lengths) - 3 @@ -254,8 +294,8 @@ class ReportBest(BaseReport): def footer(self, accuracy): self.header_line("*") self.header_line( - f" Scores compared to stree_default accuracy (liblinear-ovr) .: " - f"{accuracy/BEST_ACCURACY_STREE:7.4f}" + f" {self._get_message_best_accuracy()} " + f"{accuracy/self._get_best_accuracy():7.4f}" ) self.header_line("*") @@ -509,8 +549,8 @@ class Excel(BaseReport): self.sheet.write(self.row, 3, self._status_meaning(key), bold) self.row += 1 message = ( - f"** Accuracy compared to stree_default (liblinear-ovr) .: " - f"{accuracy/BEST_ACCURACY_STREE:7.4f}" + f"** {self._get_message_best_accuracy()} " + f"{accuracy/self._get_best_accuracy():7.4f}" ) bold = self.book.add_format({"bold": True, "font_size": 14}) # set width of the hyperparams column with the maximum width @@ -634,6 +674,13 @@ class Benchmark: self._report = {} self._datasets = set() self.visualize = visualize + self.__compute_best_results_ever() + + def __compute_best_results_ever(self): + args = EnvData.load() + key = args["source_data"] + best = BestResultsEver() + _, self.best_score_value = best.get_name_value(key, self._score) def get_result_file_name(self): return os.path.join(Folders.exreport, Files.exreport(self._score)) @@ -971,7 +1018,7 @@ class Benchmark: sheet.write_formula( row, col + 1, - f"=sum({range_metric})/{BEST_ACCURACY_STREE}", + f"=sum({range_metric})/{self.best_score_value}", decimal_total, ) range_rank = ( @@ -1063,7 +1110,7 @@ class StubReport(BaseReport): def footer(self, accuracy: float) -> None: self.accuracy = accuracy - self.score = accuracy / BEST_ACCURACY_STREE + self.score = accuracy / self._get_best_accuracy() class Summary: diff --git a/benchmark/Utils.py b/benchmark/Utils.py index 176352a..d470959 100644 --- a/benchmark/Utils.py +++ b/benchmark/Utils.py @@ -1,7 +1,6 @@ import os import subprocess -BEST_ACCURACY_STREE = 40.282203 NO_RESULTS = "** No results found **" NO_ENV = "File .env not found" diff --git a/benchmark/tests/test_files/be_build_best_report.test b/benchmark/tests/test_files/be_build_best_report.test index 603f363..ea19b87 100644 --- a/benchmark/tests/test_files/be_build_best_report.test +++ b/benchmark/tests/test_files/be_build_best_report.test @@ -7,5 +7,5 @@ Dataset Score File/Message balance-scale 0.963520 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json {'base_estimator__C': 57, 'base_estimator__gamma': 0.1, 'base_estimator__kernel': 'rbf', 'base_estimator__multiclass_strategy': 'ovr', 'n_estimators': 100, 'n_jobs': -1} balloons 0.785000 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json {'base_estimator__C': 5, 'base_estimator__gamma': 0.14, 'base_estimator__kernel': 'rbf', 'base_estimator__multiclass_strategy': 'ovr', 'n_estimators': 100, 'n_jobs': -1} ****************************************************************************************************************************************************************** -* Scores compared to stree_default accuracy (liblinear-ovr) .: 0.0434 * +* accuracy compared to STree_default (liblinear-ovr) .: 0.0434 * ****************************************************************************************************************************************************************** diff --git a/benchmark/tests/test_files/be_main_best.test b/benchmark/tests/test_files/be_main_best.test index f90f45a..8b21255 100644 --- a/benchmark/tests/test_files/be_main_best.test +++ b/benchmark/tests/test_files/be_main_best.test @@ -11,6 +11,6 @@ Dataset Sampl. Feat. Cls Nodes Leaves Depth Score balance-scale 625 4 3 23.32 12.16 6.44 0.840160±0.0304 0.013745±0.0019 {'splitter': 'best', 'max_features': 'auto'} balloons 16 4 2 3.00 2.00 2.00 0.860000±0.2850 0.000388±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'} ************************************************************************************************************************ -* Accuracy compared to stree_default (liblinear-ovr) .: 0.0422 * +* accuracy compared to STree_default (liblinear-ovr) .: 0.0422 * ************************************************************************************************************************ Results in results/results_accuracy_STree_iMac27_2022-05-09_00:15:25_0.json diff --git a/benchmark/tests/test_files/be_main_complete.test b/benchmark/tests/test_files/be_main_complete.test index d70eb26..793d267 100644 --- a/benchmark/tests/test_files/be_main_complete.test +++ b/benchmark/tests/test_files/be_main_complete.test @@ -11,6 +11,6 @@ Dataset Sampl. Feat. Cls Nodes Leaves Depth Score balance-scale 625 4 3 17.36 9.18 6.18 0.908480±0.0247 0.007388±0.0013 {} balloons 16 4 2 4.64 2.82 2.66 0.663333±0.3009 0.000664±0.0002 {} ************************************************************************************************************************ -* Accuracy compared to stree_default (liblinear-ovr) .: 0.0390 * +* accuracy compared to STree_default (liblinear-ovr) .: 0.0390 * ************************************************************************************************************************ Results in results/results_accuracy_STree_iMac27_2022-05-08_20:14:43_0.json diff --git a/benchmark/tests/test_files/be_main_dataset.test b/benchmark/tests/test_files/be_main_dataset.test index 10d3eea..abfcc76 100644 --- a/benchmark/tests/test_files/be_main_dataset.test +++ b/benchmark/tests/test_files/be_main_dataset.test @@ -10,6 +10,6 @@ Dataset Sampl. Feat. Cls Nodes Leaves Depth Score ============================== ====== ===== === ======= ======= ======= =============== ================ =============== balloons 16 4 2 4.64 2.82 2.66 0.663333±0.3009 0.000671±0.0001 {} ************************************************************************************************************************ -* Accuracy compared to stree_default (liblinear-ovr) .: 0.0165 * +* accuracy compared to STree_default (liblinear-ovr) .: 0.0165 * ************************************************************************************************************************ Partial result file removed: results/results_accuracy_STree_iMac27_2022-05-08_19:38:28_0.json diff --git a/benchmark/tests/test_files/be_main_grid.test b/benchmark/tests/test_files/be_main_grid.test index 10e1373..a4bec6e 100644 --- a/benchmark/tests/test_files/be_main_grid.test +++ b/benchmark/tests/test_files/be_main_grid.test @@ -11,6 +11,6 @@ Dataset Sampl. Feat. Cls Nodes Leaves Depth Score balance-scale 625 4 3 26.12 13.56 7.94 0.910720±0.0249 0.015852±0.0027 {'C': 1.0, 'kernel': 'liblinear', 'multiclass_strategy': 'ovr'} balloons 16 4 2 4.64 2.82 2.66 0.663333±0.3009 0.000640±0.0001 {'C': 1.0, 'kernel': 'linear', 'multiclass_strategy': 'ovr'} ************************************************************************************************************************ -* Accuracy compared to stree_default (liblinear-ovr) .: 0.0391 * +* accuracy compared to STree_default (liblinear-ovr) .: 0.0391 * ************************************************************************************************************************ Results in results/results_accuracy_STree_iMac27_2022-05-09_00:21:06_0.json diff --git a/benchmark/tests/test_files/excel.test b/benchmark/tests/test_files/excel.test index 8cf2ef2..373c803 100644 --- a/benchmark/tests/test_files/excel.test +++ b/benchmark/tests/test_files/excel.test @@ -45,4 +45,4 @@ 8;10;"0.0008541679382324218" 8;11;"3.629469326417878e-05" 8;12;"{'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}" -10;1;"** Accuracy compared to stree_default (liblinear-ovr) .: 0.0454" \ No newline at end of file +10;1;"** accuracy compared to STree_default (liblinear-ovr) .: 0.0454" \ No newline at end of file diff --git a/benchmark/tests/test_files/excel_add_ODTE.test b/benchmark/tests/test_files/excel_add_ODTE.test index f97bd71..34f226f 100644 --- a/benchmark/tests/test_files/excel_add_ODTE.test +++ b/benchmark/tests/test_files/excel_add_ODTE.test @@ -45,4 +45,4 @@ 8;10;"0.1156062078475952" 8;11;"0.0127842418285999" 8;12;"{'base_estimator__C': 5, 'base_estimator__gamma': 0.14, 'base_estimator__kernel': 'rbf', 'base_estimator__multiclass_strategy': 'ovr', 'n_estimators': 100, 'n_jobs': -1}" -10;1;"** Accuracy compared to stree_default (liblinear-ovr) .: 0.0434" \ No newline at end of file +10;1;"** accuracy compared to STree_default (liblinear-ovr) .: 0.0434" \ No newline at end of file diff --git a/benchmark/tests/test_files/excel_add_STree.test b/benchmark/tests/test_files/excel_add_STree.test index 36937eb..3a864e4 100644 --- a/benchmark/tests/test_files/excel_add_STree.test +++ b/benchmark/tests/test_files/excel_add_STree.test @@ -43,4 +43,4 @@ 8;10;"0.02120100021362305" 8;11;"0.003526023309468471" 8;12;"{'splitter': 'best', 'max_features': 'auto'}" -10;1;"** Accuracy compared to stree_default (liblinear-ovr) .: 0.0416" \ No newline at end of file +10;1;"** accuracy compared to STree_default (liblinear-ovr) .: 0.0416" \ No newline at end of file diff --git a/benchmark/tests/test_files/excel_compared.test b/benchmark/tests/test_files/excel_compared.test index eb7239d..16b415a 100644 --- a/benchmark/tests/test_files/excel_compared.test +++ b/benchmark/tests/test_files/excel_compared.test @@ -49,4 +49,4 @@ 11;2;"✔" 11;3;1 11;4;"Equal to best" -13;1;"** Accuracy compared to stree_default (liblinear-ovr) .: 0.0454" \ No newline at end of file +13;1;"** accuracy compared to STree_default (liblinear-ovr) .: 0.0454" \ No newline at end of file diff --git a/benchmark/tests/test_files/exreport_excel_ODTE.test b/benchmark/tests/test_files/exreport_excel_ODTE.test index 23f3dc7..46188f4 100644 --- a/benchmark/tests/test_files/exreport_excel_ODTE.test +++ b/benchmark/tests/test_files/exreport_excel_ODTE.test @@ -45,4 +45,4 @@ 8;10;"0.1156062078475952" 8;11;"0.0127842418285999" 8;12;"{'base_estimator__C': 5, 'base_estimator__gamma': 0.14, 'base_estimator__kernel': 'rbf', 'base_estimator__multiclass_strategy': 'ovr', 'n_estimators': 100, 'n_jobs': -1}" -10;1;"** Accuracy compared to stree_default (liblinear-ovr) .: 0.0434" +10;1;"** accuracy compared to STree_default (liblinear-ovr) .: 0.0434" diff --git a/benchmark/tests/test_files/exreport_excel_RandomForest.test b/benchmark/tests/test_files/exreport_excel_RandomForest.test index 424c0c7..7e7a395 100644 --- a/benchmark/tests/test_files/exreport_excel_RandomForest.test +++ b/benchmark/tests/test_files/exreport_excel_RandomForest.test @@ -45,4 +45,4 @@ 8;10;"0.07016648769378662" 8;11;"0.002460508923990468" 8;12;"{}" -10;1;"** Accuracy compared to stree_default (liblinear-ovr) .: 0.0363" +10;1;"** accuracy compared to STree_default (liblinear-ovr) .: 0.0363" diff --git a/benchmark/tests/test_files/exreport_excel_STree.test b/benchmark/tests/test_files/exreport_excel_STree.test index 07f44c0..18b7aa4 100644 --- a/benchmark/tests/test_files/exreport_excel_STree.test +++ b/benchmark/tests/test_files/exreport_excel_STree.test @@ -45,4 +45,4 @@ 8;10;"0.0008541679382324218" 8;11;"3.629469326417878e-05" 8;12;"{'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}" -10;1;"** Accuracy compared to stree_default (liblinear-ovr) .: 0.0454" +10;1;"** accuracy compared to STree_default (liblinear-ovr) .: 0.0454" diff --git a/benchmark/tests/test_files/report.test b/benchmark/tests/test_files/report.test index 1dcac2e..94498b7 100644 --- a/benchmark/tests/test_files/report.test +++ b/benchmark/tests/test_files/report.test @@ -11,5 +11,5 @@ Dataset Sampl. Feat. Cls Nodes Leaves Depth Score balance-scale 625 4 3 7.00 4.00 3.00 0.970560±0.0150 0.014049±0.0020 {'C': 10000.0, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'} balloons 16 4 2 3.00 2.00 2.00 0.860000±0.2850 0.000854±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'} ************************************************************************************************************************ -* Accuracy compared to stree_default (liblinear-ovr) .: 0.0454 * +* accuracy compared to STree_default (liblinear-ovr) .: 0.0454 * ************************************************************************************************************************ diff --git a/benchmark/tests/test_files/report_best.test b/benchmark/tests/test_files/report_best.test index 735ed01..03ffc30 100644 --- a/benchmark/tests/test_files/report_best.test +++ b/benchmark/tests/test_files/report_best.test @@ -7,5 +7,5 @@ Dataset Score File/Message balance-scale 0.980000 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json {'splitter': 'best', 'max_features': 'auto'} balloons 0.860000 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'} ****************************************************************************************************************************************************************** -* Scores compared to stree_default accuracy (liblinear-ovr) .: 0.0457 * +* accuracy compared to STree_default (liblinear-ovr) .: 0.0457 * ****************************************************************************************************************************************************************** diff --git a/benchmark/tests/test_files/report_compared.test b/benchmark/tests/test_files/report_compared.test index 000f4dd..46c6f6c 100644 --- a/benchmark/tests/test_files/report_compared.test +++ b/benchmark/tests/test_files/report_compared.test @@ -12,5 +12,5 @@ Dataset Sampl. Feat. Cls Nodes Leaves Depth Score balloons 16 4 2 3.00 2.00 2.00 0.860000±0.2850✔ 0.000854±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'} ************************************************************************************************************************ * ✔ Equal to best .....: 1 * -* Accuracy compared to stree_default (liblinear-ovr) .: 0.0454 * +* accuracy compared to STree_default (liblinear-ovr) .: 0.0454 * ************************************************************************************************************************ diff --git a/benchmark/tests/test_files/report_grid.test b/benchmark/tests/test_files/report_grid.test index 7aa394f..4ad130f 100644 --- a/benchmark/tests/test_files/report_grid.test +++ b/benchmark/tests/test_files/report_grid.test @@ -7,5 +7,5 @@ Dataset Score File/Message balance-scale 0.919995 v. 1.2.4, Computed on Test on 2022-02-22 at 12:00:00 took 1s {'C': 1.0, 'kernel': 'liblinear', 'multiclass_strategy': 'ovr'} balloons 0.625000 v. 1.2.4, Computed on Test on 2022-02-22 at 12:00:00 took 1s {'C': 1.0, 'kernel': 'linear', 'multiclass_strategy': 'ovr'} ****************************************************************************************************************************************************************** -* Scores compared to stree_default accuracy (liblinear-ovr) .: 0.0384 * +* accuracy compared to STree_default (liblinear-ovr) .: 0.0384 * ****************************************************************************************************************************************************************** From db61911ca619da8aa3d46a5b656906cb89069ebe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= Date: Tue, 25 Oct 2022 15:20:12 +0200 Subject: [PATCH 8/8] Fix CI error --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 91f8e1d..2e44aa8 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -46,7 +46,7 @@ jobs: - name: Lint run: | black --check --diff benchmark - flake8 --count benchmark --ignore=E203 + flake8 --count benchmark --ignore=E203,W503 - name: Tests run: | coverage run -m unittest -v benchmark.tests