Add new models and repair tests

2025-08-17 16:35:54 +00:00 · 2023-02-21 17:08:50 +01:00
parent 27bf414db9
commit e2504c7ae9
21 changed files with 29 additions and 279 deletions
--- a/benchmark/Datasets.py
+++ b/benchmark/Datasets.py
@@ -229,7 +229,7 @@ class Datasets:
        -------
        tuple (X, y) of numpy.ndarray
        """
-        discretiz = FImdlp(algorithm=0)
+        discretiz = FImdlp()
        return discretiz.fit_transform(X, y)
    def __iter__(self) -> Diterator:
--- a/benchmark/Experiments.py
+++ b/benchmark/Experiments.py
@@ -240,7 +240,7 @@ class Experiment:
                    cv=kfold,
                    fit_params=fit_params,
                    return_estimator=True,
-                    scoring=self.score_name,
+                    scoring=self.score_name.replace("-", "_"),
                )
            if np.isnan(res["test_score"]).any():
                if not self.ignore_nan:
--- a/benchmark/Models.py
+++ b/benchmark/Models.py
@@ -8,7 +8,7 @@ from sklearn.ensemble import (
 )
 from sklearn.svm import SVC
 from stree import Stree
-from bayesclass.clfs import TAN, KDB, AODE, KDBNew, TANNew
+from bayesclass.clfs import TAN, KDB, AODE, KDBNew, TANNew, AODENew
 from wodt import Wodt
 from odte import Odte
 from xgboost import XGBClassifier
--- a/benchmark/tests/.env
+++ b/benchmark/tests/.env
@@ -9,4 +9,5 @@ seeds=[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]
 discretize=0
 nodes=Nodes
 leaves=Leaves
-depth=Depth
+depth=Depth
 fit_features=0
--- a/benchmark/tests/.env.arff
+++ b/benchmark/tests/.env.arff
@@ -8,4 +8,5 @@ seeds=[271, 314, 171]
 discretize=1
 nodes=Nodes
 leaves=Leaves
-depth=Depth
+depth=Depth
 fit_features=1
--- a/benchmark/tests/.env.dist
+++ b/benchmark/tests/.env.dist
@@ -9,4 +9,5 @@ seeds=[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]
 discretize=0
 nodes=Nodes
 leaves=Leaves
-depth=Depth
+depth=Depth
 fit_features=0
--- a/benchmark/tests/.env.surcov
+++ b/benchmark/tests/.env.surcov
@@ -9,4 +9,5 @@ seeds=[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]
 discretize=0
 nodes=Nodes
 leaves=Leaves
-depth=Depth
+depth=Depth
 fit_features=0
--- a/benchmark/tests/Experiment_test.py
+++ b/benchmark/tests/Experiment_test.py
@@ -143,9 +143,9 @@ class ExperimentTest(TestBase):
        expected = {
            "state_names": {
                "sepallength": [0, 1, 2],
-                "sepalwidth": [0, 1, 3, 4],
+                "sepalwidth": [0, 1, 2, 3, 4, 5],
                "petallength": [0, 1, 2, 3],
-                "petalwidth": [0, 1, 2, 3],
+                "petalwidth": [0, 1, 2],
            },
            "features": [
                "sepallength",
@@ -161,6 +161,9 @@ class ExperimentTest(TestBase):
            self.assertEqual(computed["state_names"][key], value)
        for feature in expected["features"]:
            self.assertIn(feature, computed["features"])
        # Ask for states of a dataset that does not exist
        computed = exp._build_fit_params("not_existing")
        self.assertTrue("states" not in computed)
    @patch("sys.stdout", new_callable=StringIO)
    def test_experiment_with_nan_not_ignored(self, mock_output):
--- a/benchmark/tests/Util_test.py
+++ b/benchmark/tests/Util_test.py
@@ -183,6 +183,7 @@ class UtilTest(TestBase):
            "nodes": "Nodes",
            "leaves": "Leaves",
            "depth": "Depth",
            "fit_features": "0",
        }
        computed = EnvData().load()
        self.assertDictEqual(computed, expected)
--- a/benchmark/tests/results/results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json
+++ b/benchmark/tests/results/results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json
@@ -1,59 +1 @@
-{
+{"score_name": "accuracy", "title": "Gridsearched hyperparams v022.1b random_init", "model": "ODTE", "version": "0.3.2", "language_version": "3.11x", "language": "Python", "stratified": false, "folds": 5, "date": "2022-04-20", "time": "10:52:20", "duration": 22591.471411943436, "seeds": [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1], "platform": "Galgo", "results": [{"dataset": "balance-scale", "samples": 625, "features": 4, "classes": 3, "hyperparameters": {"base_estimator__C": 57, "base_estimator__gamma": 0.1, "base_estimator__kernel": "rbf", "base_estimator__multiclass_strategy": "ovr", "n_estimators": 100, "n_jobs": -1}, "nodes": 7.361199999999999, "leaves": 4.180599999999999, "depth": 3.536, "score": 0.96352, "score_std": 0.024949741481626608, "time": 0.31663217544555666, "time_std": 0.19918813895255585}, {"dataset": "balloons", "samples": 16, "features": 4, "classes": 2, "hyperparameters": {"base_estimator__C": 5, "base_estimator__gamma": 0.14, "base_estimator__kernel": "rbf", "base_estimator__multiclass_strategy": "ovr", "n_estimators": 100, "n_jobs": -1}, "nodes": 2.9951999999999996, "leaves": 1.9975999999999998, "depth": 1.9975999999999998, "score": 0.785, "score_std": 0.2461311755051675, "time": 0.11560620784759522, "time_std": 0.012784241828599895}], "discretized": false}
  "score_name": "accuracy",
  "title": "Gridsearched hyperparams v022.1b random_init",
  "model": "ODTE",
  "version": "0.3.2",
  "language_version": "3.11x",
  "language": "Python",
  "stratified": false,
  "folds": 5,
  "date": "2022-04-20",
  "time": "10:52:20",
  "duration": 22591.471411943436,
  "seeds": [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1],
  "platform": "Galgo",
  "results": [
    {
      "dataset": "balance-scale",
      "samples": 625,
      "features": 4,
      "classes": 3,
      "hyperparameters": {
        "base_estimator__C": 57,
        "base_estimator__gamma": 0.1,
        "base_estimator__kernel": "rbf",
        "base_estimator__multiclass_strategy": "ovr",
        "n_estimators": 100,
        "n_jobs": -1
      },
      "nodes": 7.361199999999999,
      "leaves": 4.180599999999999,
      "depth": 3.536,
      "score": 0.96352,
      "score_std": 0.024949741481626608,
      "time": 0.31663217544555666,
      "time_std": 0.19918813895255585
    },
    {
      "dataset": "balloons",
      "samples": 16,
      "features": 4,
      "classes": 2,
      "hyperparameters": {
        "base_estimator__C": 5,
        "base_estimator__gamma": 0.14,
        "base_estimator__kernel": "rbf",
        "base_estimator__multiclass_strategy": "ovr",
        "n_estimators": 100,
        "n_jobs": -1
      },
      "nodes": 2.9951999999999996,
      "leaves": 1.9975999999999998,
      "depth": 1.9975999999999998,
      "score": 0.785,
      "score_std": 0.2461311755051675,
      "time": 0.11560620784759522,
      "time_std": 0.012784241828599895
    }
  ]
 }
--- a/benchmark/tests/results/results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json
+++ b/benchmark/tests/results/results_accuracy_RandomForest_iMac27_2022-01-14_12:39:30_0.json
@@ -1,45 +1 @@
-{
+{"score_name": "accuracy", "title": "Test default paramters with RandomForest", "model": "RandomForest", "version": "-", "language_version": "3.11x", "language": "Python", "stratified": false, "folds": 5, "date": "2022-01-14", "time": "12:39:30", "duration": 272.7363500595093, "seeds": [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1], "platform": "iMac27", "results": [{"dataset": "balance-scale", "samples": 625, "features": 4, "classes": 3, "hyperparameters": {}, "nodes": 196.91440000000003, "leaves": 98.42, "depth": 10.681399999999998, "score": 0.83616, "score_std": 0.02649630917694009, "time": 0.08222018241882324, "time_std": 0.0013026326815120633}, {"dataset": "balloons", "samples": 16, "features": 4, "classes": 2, "hyperparameters": {}, "nodes": 9.110800000000001, "leaves": 4.58, "depth": 3.0982, "score": 0.625, "score_std": 0.24958298553119898, "time": 0.07016648769378662, "time_std": 0.002460508923990468}], "discretized": false}
  "score_name": "accuracy",
  "title": "Test default paramters with RandomForest",
  "model": "RandomForest",
  "version": "-",
  "language_version": "3.11x",
  "language": "Python",
  "stratified": false,
  "folds": 5,
  "date": "2022-01-14",
  "time": "12:39:30",
  "duration": 272.7363500595093,
  "seeds": [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1],
  "platform": "iMac27",
  "results": [
    {
      "dataset": "balance-scale",
      "samples": 625,
      "features": 4,
      "classes": 3,
      "hyperparameters": {},
      "nodes": 196.91440000000003,
      "leaves": 98.42,
      "depth": 10.681399999999998,
      "score": 0.83616,
      "score_std": 0.02649630917694009,
      "time": 0.08222018241882324,
      "time_std": 0.0013026326815120633
    },
    {
      "dataset": "balloons",
      "samples": 16,
      "features": 4,
      "classes": 2,
      "hyperparameters": {},
      "nodes": 9.110800000000001,
      "leaves": 4.58,
      "depth": 3.0982,
      "score": 0.625,
      "score_std": 0.24958298553119898,
      "time": 0.07016648769378662,
      "time_std": 0.002460508923990468
    }
  ]
 }
--- a/benchmark/tests/results/results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json
+++ b/benchmark/tests/results/results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json
@@ -1,57 +1 @@
-{
+{"score_name": "accuracy", "model": "STree", "stratified": false, "folds": 5, "language_version": "3.11x", "language": "Python", "date": "2021-09-30", "time": "11:42:07", "duration": 624.2505249977112, "seeds": [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1], "platform": "iMac27", "results": [{"dataset": "balance-scale", "samples": 625, "features": 4, "classes": 3, "hyperparameters": {"C": 10000, "gamma": 0.1, "kernel": "rbf", "max_iter": 10000, "multiclass_strategy": "ovr"}, "nodes": 7.0, "leaves": 4.0, "depth": 3.0, "score": 0.97056, "score_std": 0.015046806970251203, "time": 0.01404867172241211, "time_std": 0.002026269126958884}, {"dataset": "balloons", "samples": 16, "features": 4, "classes": 2, "hyperparameters": {"C": 7, "gamma": 0.1, "kernel": "rbf", "max_iter": 10000, "multiclass_strategy": "ovr"}, "nodes": 3.0, "leaves": 2.0, "depth": 2.0, "score": 0.86, "score_std": 0.28501461950807594, "time": 0.0008541679382324218, "time_std": 3.629469326417878e-05}], "title": "With gridsearched hyperparameters", "version": "1.2.3", "discretized": false}
  "score_name": "accuracy",
  "model": "STree",
  "stratified": false,
  "folds": 5,
  "language_version": "3.11x",
  "language": "Python",
  "date": "2021-09-30",
  "time": "11:42:07",
  "duration": 624.2505249977112,
  "seeds": [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1],
  "platform": "iMac27",
  "results": [
    {
      "dataset": "balance-scale",
      "samples": 625,
      "features": 4,
      "classes": 3,
      "hyperparameters": {
        "C": 10000,
        "gamma": 0.1,
        "kernel": "rbf",
        "max_iter": 10000,
        "multiclass_strategy": "ovr"
      },
      "nodes": 7.0,
      "leaves": 4.0,
      "depth": 3.0,
      "score": 0.97056,
      "score_std": 0.015046806970251203,
      "time": 0.01404867172241211,
      "time_std": 0.002026269126958884
    },
    {
      "dataset": "balloons",
      "samples": 16,
      "features": 4,
      "classes": 2,
      "hyperparameters": {
        "C": 7,
        "gamma": 0.1,
        "kernel": "rbf",
        "max_iter": 10000,
        "multiclass_strategy": "ovr"
      },
      "nodes": 3.0,
      "leaves": 2.0,
      "depth": 2.0,
      "score": 0.86,
      "score_std": 0.28501461950807594,
      "time": 0.0008541679382324218,
      "time_std": 3.629469326417878e-5
    }
  ],
  "title": "With gridsearched hyperparameters",
  "version": "1.2.3"
 }
--- a/benchmark/tests/results/results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json
+++ b/benchmark/tests/results/results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json
@@ -1,51 +1 @@
-{
+{"score_name": "accuracy", "model": "STree", "language": "Python", "language_version": "3.11x", "stratified": false, "folds": 5, "date": "2021-10-27", "time": "09:40:40", "duration": 3395.009148836136, "seeds": [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1], "platform": "iMac27", "results": [{"dataset": "balance-scale", "samples": 625, "features": 4, "classes": 3, "hyperparameters": {"splitter": "best", "max_features": "auto"}, "nodes": 11.08, "leaves": 5.9, "depth": 5.9, "score": 0.98, "score_std": 0.001, "time": 0.28520655155181884, "time_std": 0.06031593282605064}, {"dataset": "balloons", "samples": 16, "features": 4, "classes": 2, "hyperparameters": {"splitter": "best", "max_features": "auto"}, "nodes": 4.12, "leaves": 2.56, "depth": 2.56, "score": 0.695, "score_std": 0.2756860130252853, "time": 0.021201000213623047, "time_std": 0.003526023309468471}], "title": "default A", "version": "1.2.3", "discretized": false}
  "score_name": "accuracy",
  "model": "STree",
  "language": "Python",
  "language_version": "3.11x",
  "stratified": false,
  "folds": 5,
  "date": "2021-10-27",
  "time": "09:40:40",
  "duration": 3395.009148836136,
  "seeds": [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1],
  "platform": "iMac27",
  "results": [
    {
      "dataset": "balance-scale",
      "samples": 625,
      "features": 4,
      "classes": 3,
      "hyperparameters": {
        "splitter": "best",
        "max_features": "auto"
      },
      "nodes": 11.08,
      "leaves": 5.9,
      "depth": 5.9,
      "score": 0.98,
      "score_std": 0.001,
      "time": 0.28520655155181884,
      "time_std": 0.06031593282605064
    },
    {
      "dataset": "balloons",
      "samples": 16,
      "features": 4,
      "classes": 2,
      "hyperparameters": {
        "splitter": "best",
        "max_features": "auto"
      },
      "nodes": 4.12,
      "leaves": 2.56,
      "depth": 2.56,
      "score": 0.695,
      "score_std": 0.2756860130252853,
      "time": 0.021201000213623047,
      "time_std": 0.003526023309468471
    }
  ],
  "title": "default A",
  "version": "1.2.3"
 }
--- a/benchmark/tests/results/results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json
+++ b/benchmark/tests/results/results_accuracy_STree_macbook-pro_2021-11-01_19:17:07_0.json
@@ -1,51 +1 @@
-{
+{"score_name": "accuracy", "model": "STree", "language_version": "3.11x", "language": "Python", "stratified": false, "folds": 5, "date": "2021-11-01", "time": "19:17:07", "duration": 4115.042420864105, "seeds": [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1], "platform": "macbook-pro", "results": [{"dataset": "balance-scale", "samples": 625, "features": 4, "classes": 3, "hyperparameters": {"max_features": "auto", "splitter": "mutual"}, "nodes": 18.78, "leaves": 9.88, "depth": 5.9, "score": 0.97, "score_std": 0.002, "time": 0.23330417156219482, "time_std": 0.048087665954193885}, {"dataset": "balloons", "samples": 16, "features": 4, "classes": 2, "hyperparameters": {"max_features": "auto", "splitter": "mutual"}, "nodes": 4.72, "leaves": 2.86, "depth": 2.78, "score": 0.5566666666666668, "score_std": 0.2941277122460771, "time": 0.021352062225341795, "time_std": 0.005808742398555902}], "title": "default B", "version": "1.2.3", "discretized": false}
  "score_name": "accuracy",
  "model": "STree",
  "language_version": "3.11x",
  "language": "Python",
  "stratified": false,
  "folds": 5,
  "date": "2021-11-01",
  "time": "19:17:07",
  "duration": 4115.042420864105,
  "seeds": [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1],
  "platform": "macbook-pro",
  "results": [
    {
      "dataset": "balance-scale",
      "samples": 625,
      "features": 4,
      "classes": 3,
      "hyperparameters": {
        "max_features": "auto",
        "splitter": "mutual"
      },
      "nodes": 18.78,
      "leaves": 9.88,
      "depth": 5.9,
      "score": 0.97,
      "score_std": 0.002,
      "time": 0.23330417156219482,
      "time_std": 0.048087665954193885
    },
    {
      "dataset": "balloons",
      "samples": 16,
      "features": 4,
      "classes": 2,
      "hyperparameters": {
        "max_features": "auto",
        "splitter": "mutual"
      },
      "nodes": 4.72,
      "leaves": 2.86,
      "depth": 2.78,
      "score": 0.5566666666666668,
      "score_std": 0.2941277122460771,
      "time": 0.021352062225341795,
      "time_std": 0.005808742398555902
    }
  ],
  "title": "default B",
  "version": "1.2.3"
 }
--- a/benchmark/tests/test_files/be_list_report.test
+++ b/benchmark/tests/test_files/be_list_report.test
@@ -6,7 +6,7 @@
 [94m*************************************************************************************************************************
 [94m* STree ver. 1.2.3 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2021-11-01 19:17:07             *
 [94m* default B                                                                                                             *
-[94m* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False                                              *
+[94m* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False  Discretized: False                          *
 [94m* Execution took 4115.04 seconds,   1.14 hours, on macbook-pro                                                          *
 [94m* Score is accuracy                                                                                                     *
 [94m*************************************************************************************************************************
--- a/benchmark/tests/test_files/be_main_best.test
+++ b/benchmark/tests/test_files/be_main_best.test
@@ -1,7 +1,7 @@
 [94m*************************************************************************************************************************
 [94m* STree ver. 1.2.4 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2022-05-09 00:15:25             *
 [94m* test                                                                                                                  *
-[94m* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False                                              *
+[94m* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False  Discretized: False                          *
 [94m* Execution took    0.80 seconds,   0.00 hours, on iMac27                                                               *
 [94m* Score is accuracy                                                                                                     *
 [94m*************************************************************************************************************************
--- a/benchmark/tests/test_files/be_main_complete.test
+++ b/benchmark/tests/test_files/be_main_complete.test
@@ -1,7 +1,7 @@
 [94m*************************************************************************************************************************
 [94m* STree ver. 1.2.4 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2022-05-08 20:14:43             *
 [94m* test                                                                                                                  *
-[94m* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False                                              *
+[94m* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False  Discretized: False                          *
 [94m* Execution took    0.48 seconds,   0.00 hours, on iMac27                                                               *
 [94m* Score is accuracy                                                                                                     *
 [94m*************************************************************************************************************************
--- a/benchmark/tests/test_files/be_main_dataset.test
+++ b/benchmark/tests/test_files/be_main_dataset.test
@@ -1,7 +1,7 @@
 [94m*************************************************************************************************************************
 [94m* STree ver. 1.2.4 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2022-05-08 19:38:28             *
 [94m* Test with only one dataset                                                                                            *
-[94m* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False                                              *
+[94m* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False  Discretized: False                          *
 [94m* Execution took    0.06 seconds,   0.00 hours, on iMac27                                                               *
 [94m* Score is accuracy                                                                                                     *
 [94m*************************************************************************************************************************
--- a/benchmark/tests/test_files/be_main_grid.test
+++ b/benchmark/tests/test_files/be_main_grid.test
@@ -1,7 +1,7 @@
 [94m*************************************************************************************************************************
 [94m* STree ver. 1.2.4 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2022-05-09 00:21:06             *
 [94m* test                                                                                                                  *
-[94m* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False                                              *
+[94m* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False  Discretized: False                          *
 [94m* Execution took    0.89 seconds,   0.00 hours, on iMac27                                                               *
 [94m* Score is accuracy                                                                                                     *
 [94m*************************************************************************************************************************
--- a/benchmark/tests/test_files/report.test
+++ b/benchmark/tests/test_files/report.test
@@ -1,7 +1,7 @@
 [94m*************************************************************************************************************************
 [94m* STree ver. 1.2.3 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2021-09-30 11:42:07             *
 [94m* With gridsearched hyperparameters                                                                                     *
-[94m* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False                                              *
+[94m* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False  Discretized: False                          *
 [94m* Execution took  624.25 seconds,   0.17 hours, on iMac27                                                               *
 [94m* Score is accuracy                                                                                                     *
 [94m*************************************************************************************************************************
--- a/benchmark/tests/test_files/report_compared.test
+++ b/benchmark/tests/test_files/report_compared.test
@@ -1,7 +1,7 @@
 [94m*************************************************************************************************************************
 [94m* STree ver. 1.2.3 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2021-09-30 11:42:07             *
 [94m* With gridsearched hyperparameters                                                                                     *
-[94m* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False                                              *
+[94m* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False  Discretized: False                          *
 [94m* Execution took  624.25 seconds,   0.17 hours, on iMac27                                                               *
 [94m* Score is accuracy                                                                                                     *
 [94m*************************************************************************************************************************