From a3b4b59b484088d34e91678ec2073ceb50f0af7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Mon, 25 Apr 2022 00:46:18 +0200 Subject: [PATCH] Add Excel tests --- .flake8 | 2 + benchmark/Experiments.py | 8 +- benchmark/Results.py | 10 +- benchmark/tests/Excel_test.py | 81 +++++++++++++++ benchmark/tests/GridSearch_test.py | 10 +- benchmark/tests/Report_test.py | 99 +++++++++++++++++++ benchmark/tests/SQL_test.py | 81 +++++++++++++++ benchmark/tests/Util_test.py | 1 + benchmark/tests/__init__.py | 6 ++ .../results/grid_output_accuracy_STree.json | 4 +- ...racy_ODTE_Galgo_2022-04-20_10:52:20_0.json | 57 +++++++++++ benchmark/tests/test_files/excel.test | 48 +++++++++ .../tests/test_files/excel_add_ODTE.test | 48 +++++++++ .../tests/test_files/excel_add_STree.test | 46 +++++++++ .../tests/test_files/excel_compared.test | 52 ++++++++++ benchmark/tests/test_files/report.test | 15 +++ benchmark/tests/test_files/report_best.test | 11 +++ .../tests/test_files/report_compared.test | 16 +++ benchmark/tests/test_files/report_grid.test | 11 +++ requirements.txt | 1 + 20 files changed, 588 insertions(+), 19 deletions(-) create mode 100644 .flake8 create mode 100644 benchmark/tests/Excel_test.py create mode 100644 benchmark/tests/Report_test.py create mode 100644 benchmark/tests/SQL_test.py create mode 100644 benchmark/tests/results/results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json create mode 100644 benchmark/tests/test_files/excel.test create mode 100644 benchmark/tests/test_files/excel_add_ODTE.test create mode 100644 benchmark/tests/test_files/excel_add_STree.test create mode 100644 benchmark/tests/test_files/excel_compared.test create mode 100644 benchmark/tests/test_files/report.test create mode 100644 benchmark/tests/test_files/report_best.test create mode 100644 benchmark/tests/test_files/report_compared.test create mode 100644 benchmark/tests/test_files/report_grid.test diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..07e3dc9 --- /dev/null +++ b/.flake8 @@ -0,0 +1,2 @@ +[flake8] +exclude = .git,__init__.py diff --git a/benchmark/Experiments.py b/benchmark/Experiments.py index 9a5e26d..90d3f30 100644 --- a/benchmark/Experiments.py +++ b/benchmark/Experiments.py @@ -348,10 +348,12 @@ class GridSearch: platform, progress_bar=True, folds=5, + test=False, ): today = datetime.now() - self.time = today.strftime("%H:%M:%S") - self.date = today.strftime("%Y-%m-%d") + self.test = test + self.time = "12:00:00" if test else today.strftime("%H:%M:%S") + self.date = "2022-02-22" if test else today.strftime("%Y-%m-%d") self.output_file = os.path.join( Folders.results, Files.grid_output( @@ -412,7 +414,7 @@ class GridSearch: return f"{duration/3600:.3f}h" def _store_result(self, name, grid, duration): - d_message = self._duration_message(duration) + d_message = "1s" if self.test else self._duration_message(duration) message = ( f"v. {self.version}, Computed on {self.platform} on " f"{self.date} at {self.time} " diff --git a/benchmark/Results.py b/benchmark/Results.py index 648e0b3..b04513c 100644 --- a/benchmark/Results.py +++ b/benchmark/Results.py @@ -18,8 +18,8 @@ class BaseReport(abc.ABC): if not os.path.isfile(os.path.join(Folders.results, file_name)): raise FileNotFoundError(f"{file_name} does not exists!") else: - file_name = os.path.join(Folders.results, file_name) - with open(file_name) as f: + self.file_name = os.path.join(Folders.results, file_name) + with open(self.file_name) as f: self.data = json.load(f) self.best_acc_file = best_file self.lines = self.data if best_file else self.data["results"] @@ -205,7 +205,6 @@ class ReportBest(BaseReport): self.grid = grid file_name = os.path.join(Folders.results, name) super().__init__(file_name, best_file=True) - self.compare = False self.score_name = score self.model = model @@ -246,11 +245,6 @@ class ReportBest(BaseReport): def footer(self, accuracy): self.header_line("*") - if self.compare: - for key, value in self._compare_totals.items(): - self.header_line( - f" {key} {self._status_meaning(key)} .....: {value:2d}" - ) self.header_line( f" Scores compared to stree_default accuracy (liblinear-ovr) .: " f"{accuracy/BEST_ACCURACY_STREE:7.4f}" diff --git a/benchmark/tests/Excel_test.py b/benchmark/tests/Excel_test.py new file mode 100644 index 0000000..447a47b --- /dev/null +++ b/benchmark/tests/Excel_test.py @@ -0,0 +1,81 @@ +import os +import csv +import unittest +from openpyxl import load_workbook +from xlsxwriter import Workbook +from ..Results import Excel +from ..Utils import Folders + + +class ExcelTest(unittest.TestCase): + def __init__(self, *args, **kwargs): + os.chdir(os.path.dirname(os.path.abspath(__file__))) + super().__init__(*args, **kwargs) + + def tearDown(self) -> None: + files = [ + "results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.xlsx", + "results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.xlsx", + "results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.xlsx", + ] + for file_name in files: + file_name = os.path.join(Folders.results, file_name) + if os.path.exists(file_name): + os.remove(file_name) + return super().tearDown() + + def check_excel_sheet(self, sheet, file_name): + with open(file_name, "r") as f: + expected = csv.reader(f, delimiter=";") + for row, col, value in expected: + if value.isdigit(): + value = int(value) + else: + try: + value = float(value) + except ValueError: + pass + self.assertEqual(sheet.cell(int(row), int(col)).value, value) + + def test_report_excel_compared(self): + file_name = ( + "results/results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json" + ) + report = Excel(file_name, compare=True) + report.report() + file_output = report.get_file_name() + book = load_workbook(file_output) + sheet = book["STree"] + self.check_excel_sheet(sheet, "test_files/excel_compared.test") + + def test_report_excel(self): + file_name = ( + "results/results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json" + ) + report = Excel(file_name, compare=False) + report.report() + file_output = report.get_file_name() + book = load_workbook(file_output) + sheet = book["STree"] + self.check_excel_sheet(sheet, "test_files/excel.test") + + def test_Excel_Add_sheet(self): + file_name = ( + "results/results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json" + ) + excel_file_name = file_name.replace(".json", ".xlsx") + book = Workbook(excel_file_name) + excel = Excel(file_name=file_name, book=book) + excel.report() + report = Excel( + file_name="results/results_accuracy_ODTE_Galgo_2022-04-20_10:52:20" + "_0.json", + book=book, + ) + report.report() + book.close() + book = load_workbook(excel_file_name) + sheet = book["STree"] + self.check_excel_sheet(sheet, "test_files/excel_add_STree.test") + sheet = book["ODTE"] + self.check_excel_sheet(sheet, "test_files/excel_add_ODTE.test") diff --git a/benchmark/tests/GridSearch_test.py b/benchmark/tests/GridSearch_test.py index 225fb12..659f51f 100644 --- a/benchmark/tests/GridSearch_test.py +++ b/benchmark/tests/GridSearch_test.py @@ -24,6 +24,7 @@ class GridSearchTest(unittest.TestCase): "progress_bar": False, "platform": "Test", "folds": 2, + "test": True, } return GridSearch(**params) @@ -66,18 +67,15 @@ class GridSearchTest(unittest.TestCase): "kernel": "liblinear", "multiclass_strategy": "ovr", }, - "", + "v. 1.2.4, Computed on Test on 2022-02-22 at 12:00:00 took 1s", ], "balloons": [ 0.625, {"C": 1.0, "kernel": "linear", "multiclass_strategy": "ovr"}, - "", + "v. 1.2.4, Computed on Test on 2022-02-22 at 12:00:00 took 1s", ], } - dt = Datasets() - for dataset in dt: - self.assertEqual(data[dataset][0], expected[dataset][0]) - self.assertSequenceEqual(data[dataset][1], expected[dataset][1]) + self.assertSequenceEqual(data, expected) def test_duration_message(self): expected = ["47.234s", "5.421m", "1.177h"] diff --git a/benchmark/tests/Report_test.py b/benchmark/tests/Report_test.py new file mode 100644 index 0000000..f6c8302 --- /dev/null +++ b/benchmark/tests/Report_test.py @@ -0,0 +1,99 @@ +import os +import unittest +from io import StringIO +from unittest.mock import patch +from ..Results import Report, BaseReport, ReportBest +from ..Utils import Symbols + + +class ReportTest(unittest.TestCase): + def __init__(self, *args, **kwargs): + os.chdir(os.path.dirname(os.path.abspath(__file__))) + super().__init__(*args, **kwargs) + + def test_BaseReport(self): + with patch.multiple(BaseReport, __abstractmethods__=set()): + file_name = ( + "results/results_accuracy_STree_iMac27_2021-09-30_11:" + "42:07_0.json" + ) + a = BaseReport(file_name) + self.assertIsNone(a.header()) + self.assertIsNone(a.print_line(None)) + self.assertIsNone(a.footer(accuracy=1.0)) + + def test_report_with_folder(self): + report = Report( + file_name="results/results_accuracy_STree_iMac27_2021-09-30_11:" + "42:07_0.json" + ) + with patch("sys.stdout", new=StringIO()) as fake_out: + report.report() + with open("test_files/report.test", "r") as f: + expected = f.read() + self.assertEqual(fake_out.getvalue(), expected) + + def test_report_without_folder(self): + report = Report( + file_name="results_accuracy_STree_iMac27_2021-09-30_11:42:07_0" + ".json" + ) + with patch("sys.stdout", new=StringIO()) as fake_out: + report.report() + with open("test_files/report.test", "r") as f: + expected = f.read() + self.assertEqual(fake_out.getvalue(), expected) + + def test_report_compared(self): + report = Report( + file_name="results_accuracy_STree_iMac27_2021-09-30_11:42:07_0" + ".json", + compare=True, + ) + with patch("sys.stdout", new=StringIO()) as fake_out: + report.report() + with open("test_files/report_compared.test", "r") as f: + expected = f.read() + self.assertEqual(fake_out.getvalue(), expected) + + def test_compute_status(self): + file_name = "results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json" + report = Report( + file_name=file_name, + compare=True, + ) + with patch("sys.stdout", new=StringIO()): + report.report() + res = report._compute_status("balloons", 0.99) + self.assertEqual(res, Symbols.better_best) + res = report._compute_status("balloons", 1.0) + self.assertEqual(res, Symbols.better_best) + + def test_report_file_not_found(self): + with self.assertRaises(FileNotFoundError): + _ = Report("unknown_file") + + def test_report_best(self): + report = ReportBest("accuracy", "STree", best=True, grid=False) + with patch("sys.stdout", new=StringIO()) as fake_out: + report.report() + with open("test_files/report_best.test", "r") as f: + expected = f.read() + self.assertEqual(fake_out.getvalue(), expected) + + def test_report_grid(self): + report = ReportBest("accuracy", "STree", best=False, grid=True) + with patch("sys.stdout", new=StringIO()) as fake_out: + report.report() + with open("test_files/report_grid.test", "r") as f: + expected = f.read() + self.assertEqual(fake_out.getvalue(), expected) + + def test_report_best_both(self): + report = ReportBest("accuracy", "STree", best=True, grid=True) + with patch("sys.stdout", new=StringIO()) as fake_out: + report.report() + with open("test_files/report_best.test", "r") as f: + expected = f.read() + + self.assertEqual(fake_out.getvalue(), expected) diff --git a/benchmark/tests/SQL_test.py b/benchmark/tests/SQL_test.py new file mode 100644 index 0000000..1f34fe3 --- /dev/null +++ b/benchmark/tests/SQL_test.py @@ -0,0 +1,81 @@ +import os +import csv +import unittest +from openpyxl import load_workbook +from xlsxwriter import Workbook +from ..Results import Excel +from ..Utils import Folders + + +class SQLTest(unittest.TestCase): + def __init__(self, *args, **kwargs): + os.chdir(os.path.dirname(os.path.abspath(__file__))) + super().__init__(*args, **kwargs) + + def tearDown(self) -> None: + files = [ + "results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.xlsx", + "results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.xlsx", + "results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.sql", + ] + for file_name in files: + file_name = os.path.join(Folders.results, file_name) + if os.path.exists(file_name): + os.remove(file_name) + return super().tearDown() + + def check_excel_sheet(self, sheet, file_name): + with open(file_name, "r") as f: + expected = csv.reader(f, delimiter=";") + for row, col, value in expected: + if value.isdigit(): + value = int(value) + else: + try: + value = float(value) + except ValueError: + pass + self.assertEqual(sheet.cell(int(row), int(col)).value, value) + + def test_report_excel_compared(self): + file_name = ( + "results/results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json" + ) + report = Excel(file_name, compare=True) + report.report() + file_output = report.get_file_name() + book = load_workbook(file_output) + sheet = book["STree"] + self.check_excel_sheet(sheet, "test_files/excel_compared.test") + + def test_report_excel(self): + file_name = ( + "results/results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json" + ) + report = Excel(file_name, compare=False) + report.report() + file_output = report.get_file_name() + book = load_workbook(file_output) + sheet = book["STree"] + self.check_excel_sheet(sheet, "test_files/excel.test") + + def test_Excel_Add_sheet(self): + file_name = ( + "results/results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json" + ) + excel_file_name = file_name.replace(".json", ".xlsx") + book = Workbook(excel_file_name) + excel = Excel(file_name=file_name, book=book) + excel.report() + report = Excel( + file_name="results/results_accuracy_ODTE_Galgo_2022-04-20_10:52:20" + "_0.json", + book=book, + ) + report.report() + book.close() + book = load_workbook(excel_file_name) + sheet = book["STree"] + self.check_excel_sheet(sheet, "test_files/excel_add_STree.test") + sheet = book["ODTE"] + self.check_excel_sheet(sheet, "test_files/excel_add_ODTE.test") diff --git a/benchmark/tests/Util_test.py b/benchmark/tests/Util_test.py index b4c341a..5c36935 100644 --- a/benchmark/tests/Util_test.py +++ b/benchmark/tests/Util_test.py @@ -121,6 +121,7 @@ class UtilTest(unittest.TestCase): "results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json", "results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0." "json", + "results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json", ], ) self.assertCountEqual( diff --git a/benchmark/tests/__init__.py b/benchmark/tests/__init__.py index a935e84..2d9851b 100644 --- a/benchmark/tests/__init__.py +++ b/benchmark/tests/__init__.py @@ -4,6 +4,9 @@ from .Dataset_test import DatasetTest from .BestResults_test import BestResultTest from .Experiment_test import ExperimentTest from .GridSearch_test import GridSearchTest +from .Report_test import ReportTest +from .Excel_test import ExcelTest +from .SQL_test import SQLTest all = [ "UtilTest", @@ -12,4 +15,7 @@ all = [ "BestResultTest", "ExperimentTest", "GridSearchTest", + "ReportTest", + "ExcelTest", + "SQLTest", ] diff --git a/benchmark/tests/results/grid_output_accuracy_STree.json b/benchmark/tests/results/grid_output_accuracy_STree.json index 0cde0e8..7f197d6 100644 --- a/benchmark/tests/results/grid_output_accuracy_STree.json +++ b/benchmark/tests/results/grid_output_accuracy_STree.json @@ -6,7 +6,7 @@ "kernel": "liblinear", "multiclass_strategy": "ovr" }, - "v. 1.2.4, Computed on Test on 2022-04-24 at 00:18:17 took 0.261s" + "v. 1.2.4, Computed on Test on 2022-02-22 at 12:00:00 took 1s" ], "balloons": [ 0.625, @@ -15,6 +15,6 @@ "kernel": "linear", "multiclass_strategy": "ovr" }, - "v. 1.2.4, Computed on Test on 2022-04-24 at 00:18:17 took 0.478s" + "v. 1.2.4, Computed on Test on 2022-02-22 at 12:00:00 took 1s" ] } \ No newline at end of file diff --git a/benchmark/tests/results/results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json b/benchmark/tests/results/results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json new file mode 100644 index 0000000..b2485f7 --- /dev/null +++ b/benchmark/tests/results/results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json @@ -0,0 +1,57 @@ +{ + "score_name": "accuracy", + "title": "Gridsearched hyperparams v022.1b random_init", + "model": "ODTE", + "version": "0.3.2", + "stratified": false, + "folds": 5, + "date": "2022-04-20", + "time": "10:52:20", + "duration": 22591.471411943436, + "seeds": [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1], + "platform": "Galgo", + "results": [ + { + "dataset": "balance-scale", + "samples": 625, + "features": 4, + "classes": 3, + "hyperparameters": { + "base_estimator__C": 57, + "base_estimator__gamma": 0.1, + "base_estimator__kernel": "rbf", + "base_estimator__multiclass_strategy": "ovr", + "n_estimators": 100, + "n_jobs": -1 + }, + "nodes": 7.361199999999999, + "leaves": 4.180599999999999, + "depth": 3.536, + "score": 0.96352, + "score_std": 0.024949741481626608, + "time": 0.31663217544555666, + "time_std": 0.19918813895255585 + }, + { + "dataset": "balloons", + "samples": 16, + "features": 4, + "classes": 2, + "hyperparameters": { + "base_estimator__C": 5, + "base_estimator__gamma": 0.14, + "base_estimator__kernel": "rbf", + "base_estimator__multiclass_strategy": "ovr", + "n_estimators": 100, + "n_jobs": -1 + }, + "nodes": 2.9951999999999996, + "leaves": 1.9975999999999998, + "depth": 1.9975999999999998, + "score": 0.785, + "score_std": 0.2461311755051675, + "time": 0.11560620784759522, + "time_std": 0.012784241828599895 + } + ] +} diff --git a/benchmark/tests/test_files/excel.test b/benchmark/tests/test_files/excel.test new file mode 100644 index 0000000..8cf2ef2 --- /dev/null +++ b/benchmark/tests/test_files/excel.test @@ -0,0 +1,48 @@ +1;1;" Report STree ver. 1.2.3 with 5 Folds cross validation and 10 random seeds. 2021-09-30 11:42:07" +2;1;" With gridsearched hyperparameters" +3;1;" Score is accuracy" +3;2;" Execution time" +3;5;" 624.25 s" +3;7;" " +3;8;"Platform" +3;9;"iMac27" +3;10;"Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]" +4;5;" 0.17 h" +4;10;"Stratified: False" +6;1;"Dataset" +6;2;"Samples" +6;3;"Features" +6;4;"Classes" +6;5;"Nodes" +6;6;"Leaves" +6;7;"Depth" +6;8;"Score" +6;9;"Score Std." +6;10;"Time" +6;11;"Time Std." +6;12;"Hyperparameters" +7;1;"balance-scale" +7;2;"625" +7;3;"4" +7;4;"3" +7;5;"7" +7;6;"4" +7;7;"3" +7;8;"0.97056" +7;9;"0.0150468069702512" +7;10;"0.01404867172241211" +7;11;"0.002026269126958884" +7;12;"{'C': 10000.0, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}" +8;1;"balloons" +8;2;"16" +8;3;"4" +8;4;"2" +8;5;"3" +8;6;"2" +8;7;"2" +8;8;"0.86" +8;9;"0.2850146195080759" +8;10;"0.0008541679382324218" +8;11;"3.629469326417878e-05" +8;12;"{'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}" +10;1;"** Accuracy compared to stree_default (liblinear-ovr) .: 0.0454" \ No newline at end of file diff --git a/benchmark/tests/test_files/excel_add_ODTE.test b/benchmark/tests/test_files/excel_add_ODTE.test new file mode 100644 index 0000000..f97bd71 --- /dev/null +++ b/benchmark/tests/test_files/excel_add_ODTE.test @@ -0,0 +1,48 @@ +1;1;" Report ODTE ver. 0.3.2 with 5 Folds cross validation and 10 random seeds. 2022-04-20 10:52:20" +2;1;" Gridsearched hyperparams v022.1b random_init" +3;1;" Score is accuracy" +3;2;" Execution time" +3;5;"22,591.47 s" +3;7;" " +3;8;"Platform" +3;9;"Galgo" +3;10;"Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]" +4;5;" 6.28 h" +4;10;"Stratified: False" +6;1;"Dataset" +6;2;"Samples" +6;3;"Features" +6;4;"Classes" +6;5;"Nodes" +6;6;"Leaves" +6;7;"Depth" +6;8;"Score" +6;9;"Score Std." +6;10;"Time" +6;11;"Time Std." +6;12;"Hyperparameters" +7;1;"balance-scale" +7;2;"625" +7;3;"4" +7;4;"3" +7;5;"7.361199999999999" +7;6;"4.180599999999999" +7;7;"3.536" +7;8;"0.96352" +7;9;"0.02494974148162661" +7;10;"0.3166321754455567" +7;11;"0.1991881389525559" +7;12;"{'base_estimator__C': 57, 'base_estimator__gamma': 0.1, 'base_estimator__kernel': 'rbf', 'base_estimator__multiclass_strategy': 'ovr', 'n_estimators': 100, 'n_jobs': -1}" +8;1;"balloons" +8;2;"16" +8;3;"4" +8;4;"2" +8;5;"2.9952" +8;6;"1.9976" +8;7;"1.9976" +8;8;"0.785" +8;9;"0.2461311755051675" +8;10;"0.1156062078475952" +8;11;"0.0127842418285999" +8;12;"{'base_estimator__C': 5, 'base_estimator__gamma': 0.14, 'base_estimator__kernel': 'rbf', 'base_estimator__multiclass_strategy': 'ovr', 'n_estimators': 100, 'n_jobs': -1}" +10;1;"** Accuracy compared to stree_default (liblinear-ovr) .: 0.0434" \ No newline at end of file diff --git a/benchmark/tests/test_files/excel_add_STree.test b/benchmark/tests/test_files/excel_add_STree.test new file mode 100644 index 0000000..f2d56ca --- /dev/null +++ b/benchmark/tests/test_files/excel_add_STree.test @@ -0,0 +1,46 @@ +1;1;" Report STree ver. 1.2.3 with 5 Folds cross validation and 10 random seeds. 2021-10-27 09:40:40" +2;1;" default A" +3;1;" Score is accuracy" +3;2;" Execution time" +3;5;"3,395.01 s" +3;7;" " +3;8;"Platform" +3;9;"iMac27" +3;10;"Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]" +4;5;" 0.94 h" +4;10;"Stratified: False" +6;1;"Dataset" +6;2;"Samples" +6;3;"Features" +6;4;"Classes" +6;5;"Nodes" +6;6;"Leaves" +6;7;"Depth" +6;8;"Score" +6;9;"Score Std." +6;10;"Time" +6;11;"Time Std." +7;1;"balance-scale" +7;2;"625" +7;3;"4" +7;4;"3" +7;5;"11.08" +7;6;"5.9" +7;7;"5.9" +7;8;"0.98" +7;9;"0.001" +7;10;"0.2852065515518188" +7;11;"0.06031593282605064" +8;1;"balloons" +8;2;"16" +8;3;"4" +8;4;"2" +8;5;"4.12" +8;6;"2.56" +8;7;"2.56" +8;8;"0.695" +8;9;"0.2756860130252853" +8;10;"0.02120100021362305" +8;11;"0.003526023309468471" +8;12;"{'splitter': 'iwss', 'max_features': 'auto'}" +10;1;"** Accuracy compared to stree_default (liblinear-ovr) .: 0.0416" \ No newline at end of file diff --git a/benchmark/tests/test_files/excel_compared.test b/benchmark/tests/test_files/excel_compared.test new file mode 100644 index 0000000..eb7239d --- /dev/null +++ b/benchmark/tests/test_files/excel_compared.test @@ -0,0 +1,52 @@ +1;1;" Report STree ver. 1.2.3 with 5 Folds cross validation and 10 random seeds. 2021-09-30 11:42:07" +2;1;" With gridsearched hyperparameters" +3;1;" Score is accuracy" +3;2;" Execution time" +3;5;" 624.25 s" +3;8;"Platform" +3;9;"iMac27" +3;10;"Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]" +4;10;"Stratified: False" +6;1;"Dataset" +6;2;"Samples" +6;3;"Features" +6;4;"Classes" +6;5;"Nodes" +6;6;"Leaves" +6;7;"Depth" +6;8;"Score" +6;9;"Stat" +6;10;"Score Std." +6;11;"Time" +6;12;"Time Std." +6;13;"Hyperparameters" +7;1;"balance-scale" +7;2;625 +7;3;4 +7;4;3 +7;5;7 +7;6;4 +7;7;3 +7;8;0.97056 +7;9;" " +7;10;0.0150468069702512 +7;11;0.01404867172241211 +7;12;0.002026269126958884 +7;13;"{'C': 10000.0, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}" +8;1;"balloons" +8;2;16 +8;3;4 +8;4;2 +8;5;3 +8;6;2 +8;7;2 +8;8;0.86 +8;9;"✔" +8;10;0.2850146195080759 +8;11;0.0008541679382324218 +8;12;3.629469326417878e-05 +8;13;"{'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}" +11;2;"✔" +11;3;1 +11;4;"Equal to best" +13;1;"** Accuracy compared to stree_default (liblinear-ovr) .: 0.0454" \ No newline at end of file diff --git a/benchmark/tests/test_files/report.test b/benchmark/tests/test_files/report.test new file mode 100644 index 0000000..2e5b77e --- /dev/null +++ b/benchmark/tests/test_files/report.test @@ -0,0 +1,15 @@ +*********************************************************************************************************************** +* Report STree ver. 1.2.3 with 5 Folds cross validation and 10 random seeds. 2021-09-30 11:42:07 * +* With gridsearched hyperparameters * +* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False * +* Execution took 624.25 seconds, 0.17 hours, on iMac27 * +* Score is accuracy * +*********************************************************************************************************************** + +Dataset Samp Feat. Cls Nodes Leaves Depth Score Time Hyperparameters +============================== ===== ===== === ======= ======= ======= =============== ================ =============== +balance-scale 625 4 3 7.00 4.00 3.00 0.970560±0.0150 0.014049±0.0020 {'C': 10000.0, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'} +balloons 16 4 2 3.00 2.00 2.00 0.860000±0.2850 0.000854±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'} +*********************************************************************************************************************** +* Accuracy compared to stree_default (liblinear-ovr) .: 0.0454 * +*********************************************************************************************************************** diff --git a/benchmark/tests/test_files/report_best.test b/benchmark/tests/test_files/report_best.test new file mode 100644 index 0000000..f166f28 --- /dev/null +++ b/benchmark/tests/test_files/report_best.test @@ -0,0 +1,11 @@ +****************************************************************************************************************************************************************** +* Report Best accuracy Scores with STree in any platform * +****************************************************************************************************************************************************************** + +Dataset Score File/Message Hyperparameters +============================== ======== ============================================================================ ============================================= +balance-scale 0.980000 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json {'splitter': 'iwss', 'max_features': 'auto'} +balloons 0.860000 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'} +****************************************************************************************************************************************************************** +* Scores compared to stree_default accuracy (liblinear-ovr) .: 0.0457 * +****************************************************************************************************************************************************************** diff --git a/benchmark/tests/test_files/report_compared.test b/benchmark/tests/test_files/report_compared.test new file mode 100644 index 0000000..2b675eb --- /dev/null +++ b/benchmark/tests/test_files/report_compared.test @@ -0,0 +1,16 @@ +*********************************************************************************************************************** +* Report STree ver. 1.2.3 with 5 Folds cross validation and 10 random seeds. 2021-09-30 11:42:07 * +* With gridsearched hyperparameters * +* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False * +* Execution took 624.25 seconds, 0.17 hours, on iMac27 * +* Score is accuracy * +*********************************************************************************************************************** + +Dataset Samp Feat. Cls Nodes Leaves Depth Score Time Hyperparameters +============================== ===== ===== === ======= ======= ======= =============== ================ =============== +balance-scale 625 4 3 7.00 4.00 3.00 0.970560±0.0150 0.014049±0.0020 {'C': 10000.0, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'} +balloons 16 4 2 3.00 2.00 2.00 0.860000±0.2850✔ 0.000854±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'} +*********************************************************************************************************************** +* ✔ Equal to best .....: 1 * +* Accuracy compared to stree_default (liblinear-ovr) .: 0.0454 * +*********************************************************************************************************************** diff --git a/benchmark/tests/test_files/report_grid.test b/benchmark/tests/test_files/report_grid.test new file mode 100644 index 0000000..7aa394f --- /dev/null +++ b/benchmark/tests/test_files/report_grid.test @@ -0,0 +1,11 @@ +****************************************************************************************************************************************************************** +* Report Grid accuracy Scores with STree in any platform * +****************************************************************************************************************************************************************** + +Dataset Score File/Message Hyperparameters +============================== ======== ============================================================================ ============================================= +balance-scale 0.919995 v. 1.2.4, Computed on Test on 2022-02-22 at 12:00:00 took 1s {'C': 1.0, 'kernel': 'liblinear', 'multiclass_strategy': 'ovr'} +balloons 0.625000 v. 1.2.4, Computed on Test on 2022-02-22 at 12:00:00 took 1s {'C': 1.0, 'kernel': 'linear', 'multiclass_strategy': 'ovr'} +****************************************************************************************************************************************************************** +* Scores compared to stree_default accuracy (liblinear-ovr) .: 0.0384 * +****************************************************************************************************************************************************************** diff --git a/requirements.txt b/requirements.txt index 370cf2c..9dedbe8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,5 @@ scikit-learn odte mufs xlsxwriter +openpyxl tqdm