Fix mistake in computing multiclass node belief

Set default criterion for split to entropy instead of gini Set default max_iter to 1e5 instead of 1e3 change up-down criterion to match SVC multiclass Fix impurity method of splitting nodes Update jupyter Notebooks
2025-08-16 07:56:06 +00:00 · 2020-11-01 17:37:17 +01:00
parent c593b55bec
commit ddc0fe15b8
9 changed files with 780 additions and 184 deletions
--- a/notebooks/benchmark.ipynb
+++ b/notebooks/benchmark.ipynb
--- a/notebooks/ensemble.ipynb
+++ b/notebooks/ensemble.ipynb
@@ -61,7 +61,13 @@
    {
     "output_type": "stream",
     "name": "stdout",
-     "text": "Fraud: 0.173% 492\nValid: 99.827% 284315\nX.shape (100492, 28)  y.shape (100492,)\nFraud: 0.644% 647\nValid: 99.356% 99845\n"
+     "text": [
+      "Fraud: 0.173% 492\n",
+      "Valid: 99.827% 284315\n",
+      "X.shape (100492, 28)  y.shape (100492,)\n",
+      "Fraud: 0.652% 655\n",
+      "Valid: 99.348% 99837\n"
+     ]
    }
   ],
   "source": [
@@ -129,12 +135,14 @@
    {
     "output_type": "stream",
     "name": "stdout",
-     "text": "Score Train:  0.9985784146480154\nScore Test:  0.9981093273185617\nTook 73.27 seconds\n"
+     "text": [
+      "Score Train:  0.9985073353804162\nScore Test:  0.9983746848878864\nTook 35.80 seconds\n"
+     ]
    }
   ],
   "source": [
    "now = time.time()\n",
-    "clf = Stree(max_depth=3, random_state=random_state)\n",
+    "clf = Stree(max_depth=3, random_state=random_state, max_iter=1e3)\n",
    "clf.fit(Xtrain, ytrain)\n",
    "print(\"Score Train: \", clf.score(Xtrain, ytrain))\n",
    "print(\"Score Test: \", clf.score(Xtest, ytest))\n",
@@ -169,13 +177,17 @@
    {
     "output_type": "stream",
     "name": "stdout",
-     "text": "Kernel: linear\tTime: 93.78 seconds\tScore Train: 0.9983083\tScore Test: 0.9983083\nKernel: rbf\tTime: 18.32 seconds\tScore Train: 0.9935602\tScore Test: 0.9935651\nKernel: poly\tTime: 69.68 seconds\tScore Train: 0.9973132\tScore Test: 0.9972801\n"
+     "text": [
+      "Kernel: linear\tTime: 49.66 seconds\tScore Train: 0.9983225\tScore Test: 0.9983083\n",
+      "Kernel: rbf\tTime: 12.73 seconds\tScore Train: 0.9934891\tScore Test: 0.9934656\n",
+      "Kernel: poly\tTime: 76.24 seconds\tScore Train: 0.9972706\tScore Test: 0.9969152\n"
+     ]
    }
   ],
   "source": [
    "for kernel in ['linear', 'rbf', 'poly']:\n",
    "    now = time.time()\n",
-    "    clf = AdaBoostClassifier(base_estimator=Stree(C=C, kernel=kernel, max_depth=max_depth, random_state=random_state), algorithm=\"SAMME\", n_estimators=n_estimators, random_state=random_state)\n",
+    "    clf = AdaBoostClassifier(base_estimator=Stree(C=C, kernel=kernel, max_depth=max_depth, random_state=random_state, max_iter=1e3), algorithm=\"SAMME\", n_estimators=n_estimators, random_state=random_state)\n",
    "    clf.fit(Xtrain, ytrain)\n",
    "    score_train = clf.score(Xtrain, ytrain)\n",
    "    score_test = clf.score(Xtest, ytest)\n",
@@ -210,13 +222,17 @@
    {
     "output_type": "stream",
     "name": "stdout",
-     "text": "Kernel: linear\tTime: 387.06 seconds\tScore Train: 0.9985784\tScore Test: 0.9981093\nKernel: rbf\tTime: 144.00 seconds\tScore Train: 0.9992750\tScore Test: 0.9983415\nKernel: poly\tTime: 101.78 seconds\tScore Train: 0.9992466\tScore Test: 0.9981757\n"
+     "text": [
+      "Kernel: linear\tTime: 231.51 seconds\tScore Train: 0.9984931\tScore Test: 0.9983083\n",
+      "Kernel: rbf\tTime: 114.77 seconds\tScore Train: 0.9992323\tScore Test: 0.9983083\n",
+      "Kernel: poly\tTime: 67.87 seconds\tScore Train: 0.9993319\tScore Test: 0.9985074\n"
+     ]
    }
   ],
   "source": [
    "for kernel in ['linear', 'rbf', 'poly']:\n",
    "    now = time.time()\n",
-    "    clf = BaggingClassifier(base_estimator=Stree(C=C, kernel=kernel, max_depth=max_depth, random_state=random_state), n_estimators=n_estimators, random_state=random_state)\n",
+    "    clf = BaggingClassifier(base_estimator=Stree(C=C, kernel=kernel, max_depth=max_depth, random_state=random_state, max_iter=1e3), n_estimators=n_estimators, random_state=random_state)\n",
    "    clf.fit(Xtrain, ytrain)\n",
    "    score_train = clf.score(Xtrain, ytrain)\n",
    "    score_test = clf.score(Xtest, ytest)\n",
@@ -235,12 +251,12 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.7.6-final"
+   "version": "3.8.4-final"
  },
  "orig_nbformat": 2,
  "kernelspec": {
-   "name": "python37664bitgeneralvenve3128601eb614c5da59c5055670b6040",
-   "display_name": "Python 3.7.6 64-bit ('general': venv)"
+   "name": "python38464bitgeneralf6de308d3831407c8bd68d4a5e328a38",
+   "display_name": "Python 3.8.4 64-bit ('general')"
  }
 },
 "nbformat": 4,
--- a/notebooks/features.ipynb
+++ b/notebooks/features.ipynb
--- a/notebooks/gridsearch.ipynb
+++ b/notebooks/gridsearch.ipynb
@@ -113,7 +113,9 @@
        {
          "output_type": "stream",
          "name": "stdout",
-          "text": "Fraud: 0.173% 492\nValid: 99.827% 284315\nX.shape (1492, 28)  y.shape (1492,)\nFraud: 32.976% 492\nValid: 67.024% 1000\n"
+          "text": [
+            "Fraud: 0.173% 492\nValid: 99.827% 284315\nX.shape (1492, 28)  y.shape (1492,)\nFraud: 33.177% 495\nValid: 66.823% 997\n"
+          ]
        }
      ]
    },
@@ -132,15 +134,38 @@
        "colab": {}
      },
      "source": [
-        "parameters = {\n",
+        "parameters = [{\n",
        "    'base_estimator': [Stree()],\n",
        "    'n_estimators': [10, 25],\n",
        "    'learning_rate': [.5, 1],\n",
+        "    'base_estimator__split_criteria': ['max_samples', 'impurity'],\n",
        "    'base_estimator__tol': [.1,  1e-02],\n",
-        "    'base_estimator__max_depth': [3, 5],\n",
-        "    'base_estimator__C': [7, 55],\n",
-        "    'base_estimator__kernel': ['linear', 'poly', 'rbf']\n",
-        "}"
+        "    'base_estimator__max_depth': [3, 5, 7],\n",
+        "    'base_estimator__C': [1, 7, 55],\n",
+        "    'base_estimator__kernel': ['linear']\n",
+        "},\n",
+        "{\n",
+        "    'base_estimator': [Stree()],\n",
+        "    'n_estimators': [10, 25],\n",
+        "    'learning_rate': [.5, 1],\n",
+        "    'base_estimator__split_criteria': ['max_samples', 'impurity'],\n",
+        "    'base_estimator__tol': [.1,  1e-02],\n",
+        "    'base_estimator__max_depth': [3, 5, 7],\n",
+        "    'base_estimator__C': [1, 7, 55],\n",
+        "    'base_estimator__degree': [3, 5, 7],\n",
+        "    'base_estimator__kernel': ['poly']\n",
+        "},\n",
+        "{\n",
+        "    'base_estimator': [Stree()],\n",
+        "    'n_estimators': [10, 25],\n",
+        "    'learning_rate': [.5, 1],\n",
+        "    'base_estimator__split_criteria': ['max_samples', 'impurity'],\n",
+        "    'base_estimator__tol': [.1,  1e-02],\n",
+        "    'base_estimator__max_depth': [3, 5, 7],\n",
+        "    'base_estimator__C': [1, 7, 55],\n",
+        "    'base_estimator__gamma': [.1, 1, 10],\n",
+        "    'base_estimator__kernel': ['rbf']\n",
+        "}]"
      ],
      "execution_count": 5,
      "outputs": []
@@ -153,7 +178,21 @@
        {
          "output_type": "execute_result",
          "data": {
-            "text/plain": "{'C': 1.0,\n 'criterion': 'gini',\n 'degree': 3,\n 'gamma': 'scale',\n 'kernel': 'linear',\n 'max_depth': None,\n 'max_features': None,\n 'max_iter': 1000,\n 'min_samples_split': 0,\n 'random_state': None,\n 'split_criteria': 'max_samples',\n 'splitter': 'random',\n 'tol': 0.0001}"
+            "text/plain": [
+              "{'C': 1.0,\n",
+              " 'criterion': 'entropy',\n",
+              " 'degree': 3,\n",
+              " 'gamma': 'scale',\n",
+              " 'kernel': 'linear',\n",
+              " 'max_depth': None,\n",
+              " 'max_features': None,\n",
+              " 'max_iter': 100000.0,\n",
+              " 'min_samples_split': 0,\n",
+              " 'random_state': None,\n",
+              " 'split_criteria': 'impurity',\n",
+              " 'splitter': 'random',\n",
+              " 'tol': 0.0001}"
+            ]
          },
          "metadata": {},
          "execution_count": 6
@@ -183,18 +222,156 @@
        {
          "output_type": "stream",
          "name": "stdout",
-          "text": "Fitting 5 folds for each of 96 candidates, totalling 480 fits\n[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    2.0s\n[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:    2.4s\n[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:    2.7s\n[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    3.3s\n[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    4.3s\n[Parallel(n_jobs=-1)]: Done  45 tasks      | elapsed:    5.3s\n[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    6.6s\n[Parallel(n_jobs=-1)]: Done  69 tasks      | elapsed:    8.1s\n[Parallel(n_jobs=-1)]: Done  82 tasks      | elapsed:    9.4s\n[Parallel(n_jobs=-1)]: Done  97 tasks      | elapsed:   10.1s\n[Parallel(n_jobs=-1)]: Done 112 tasks      | elapsed:   11.1s\n[Parallel(n_jobs=-1)]: Done 129 tasks      | elapsed:   12.3s\n[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   13.6s\n[Parallel(n_jobs=-1)]: Done 165 tasks      | elapsed:   14.9s\n[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:   16.2s\n[Parallel(n_jobs=-1)]: Done 205 tasks      | elapsed:   17.6s\n[Parallel(n_jobs=-1)]: Done 226 tasks      | elapsed:   19.1s\n[Parallel(n_jobs=-1)]: Done 249 tasks      | elapsed:   21.6s\n[Parallel(n_jobs=-1)]: Done 272 tasks      | elapsed:   25.9s\n[Parallel(n_jobs=-1)]: Done 297 tasks      | elapsed:   30.4s\n[Parallel(n_jobs=-1)]: Done 322 tasks      | elapsed:   36.7s\n[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:   38.1s\n[Parallel(n_jobs=-1)]: Done 376 tasks      | elapsed:   39.6s\n[Parallel(n_jobs=-1)]: Done 405 tasks      | elapsed:   41.9s\n[Parallel(n_jobs=-1)]: Done 434 tasks      | elapsed:   44.9s\n[Parallel(n_jobs=-1)]: Done 465 tasks      | elapsed:   48.2s\n[Parallel(n_jobs=-1)]: Done 480 out of 480 | elapsed:   49.2s finished\n"
+          "text": [
+            "Fitting 5 folds for each of 1008 candidates, totalling 5040 fits\n",
+            "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n",
+            "[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    2.6s\n",
+            "[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:    3.2s\n",
+            "[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:    3.5s\n",
+            "[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    4.0s\n",
+            "[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    4.5s\n",
+            "[Parallel(n_jobs=-1)]: Done  45 tasks      | elapsed:    5.0s\n",
+            "[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    5.5s\n",
+            "[Parallel(n_jobs=-1)]: Done  69 tasks      | elapsed:    6.2s\n",
+            "[Parallel(n_jobs=-1)]: Done  82 tasks      | elapsed:    7.1s\n",
+            "[Parallel(n_jobs=-1)]: Done  97 tasks      | elapsed:    8.2s\n",
+            "[Parallel(n_jobs=-1)]: Done 112 tasks      | elapsed:    9.6s\n",
+            "[Parallel(n_jobs=-1)]: Done 129 tasks      | elapsed:   11.0s\n",
+            "[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   12.5s\n",
+            "[Parallel(n_jobs=-1)]: Done 165 tasks      | elapsed:   14.3s\n",
+            "[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:   16.0s\n",
+            "[Parallel(n_jobs=-1)]: Done 205 tasks      | elapsed:   18.1s\n",
+            "[Parallel(n_jobs=-1)]: Done 226 tasks      | elapsed:   20.1s\n",
+            "[Parallel(n_jobs=-1)]: Done 249 tasks      | elapsed:   21.9s\n",
+            "[Parallel(n_jobs=-1)]: Done 272 tasks      | elapsed:   23.4s\n",
+            "[Parallel(n_jobs=-1)]: Done 297 tasks      | elapsed:   24.9s\n",
+            "[Parallel(n_jobs=-1)]: Done 322 tasks      | elapsed:   26.6s\n",
+            "[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:   29.3s\n",
+            "[Parallel(n_jobs=-1)]: Done 376 tasks      | elapsed:   31.9s\n",
+            "[Parallel(n_jobs=-1)]: Done 405 tasks      | elapsed:   35.5s\n",
+            "[Parallel(n_jobs=-1)]: Done 434 tasks      | elapsed:   38.7s\n",
+            "[Parallel(n_jobs=-1)]: Done 465 tasks      | elapsed:   42.1s\n",
+            "[Parallel(n_jobs=-1)]: Done 496 tasks      | elapsed:   46.1s\n",
+            "[Parallel(n_jobs=-1)]: Done 529 tasks      | elapsed:   52.7s\n",
+            "[Parallel(n_jobs=-1)]: Done 562 tasks      | elapsed:   58.1s\n",
+            "[Parallel(n_jobs=-1)]: Done 597 tasks      | elapsed:  1.1min\n",
+            "[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  1.3min\n",
+            "[Parallel(n_jobs=-1)]: Done 669 tasks      | elapsed:  1.5min\n",
+            "[Parallel(n_jobs=-1)]: Done 706 tasks      | elapsed:  1.6min\n",
+            "[Parallel(n_jobs=-1)]: Done 745 tasks      | elapsed:  1.7min\n",
+            "[Parallel(n_jobs=-1)]: Done 784 tasks      | elapsed:  1.8min\n",
+            "[Parallel(n_jobs=-1)]: Done 825 tasks      | elapsed:  1.8min\n",
+            "[Parallel(n_jobs=-1)]: Done 866 tasks      | elapsed:  1.8min\n",
+            "[Parallel(n_jobs=-1)]: Done 909 tasks      | elapsed:  1.9min\n",
+            "[Parallel(n_jobs=-1)]: Done 952 tasks      | elapsed:  1.9min\n",
+            "[Parallel(n_jobs=-1)]: Done 997 tasks      | elapsed:  2.0min\n",
+            "[Parallel(n_jobs=-1)]: Done 1042 tasks      | elapsed:  2.0min\n",
+            "[Parallel(n_jobs=-1)]: Done 1089 tasks      | elapsed:  2.1min\n",
+            "[Parallel(n_jobs=-1)]: Done 1136 tasks      | elapsed:  2.2min\n",
+            "[Parallel(n_jobs=-1)]: Done 1185 tasks      | elapsed:  2.2min\n",
+            "[Parallel(n_jobs=-1)]: Done 1234 tasks      | elapsed:  2.3min\n",
+            "[Parallel(n_jobs=-1)]: Done 1285 tasks      | elapsed:  2.4min\n",
+            "[Parallel(n_jobs=-1)]: Done 1336 tasks      | elapsed:  2.4min\n",
+            "[Parallel(n_jobs=-1)]: Done 1389 tasks      | elapsed:  2.5min\n",
+            "[Parallel(n_jobs=-1)]: Done 1442 tasks      | elapsed:  2.6min\n",
+            "[Parallel(n_jobs=-1)]: Done 1497 tasks      | elapsed:  2.6min\n",
+            "[Parallel(n_jobs=-1)]: Done 1552 tasks      | elapsed:  2.7min\n",
+            "[Parallel(n_jobs=-1)]: Done 1609 tasks      | elapsed:  2.8min\n",
+            "[Parallel(n_jobs=-1)]: Done 1666 tasks      | elapsed:  2.8min\n",
+            "[Parallel(n_jobs=-1)]: Done 1725 tasks      | elapsed:  2.9min\n",
+            "[Parallel(n_jobs=-1)]: Done 1784 tasks      | elapsed:  3.0min\n",
+            "[Parallel(n_jobs=-1)]: Done 1845 tasks      | elapsed:  3.0min\n",
+            "[Parallel(n_jobs=-1)]: Done 1906 tasks      | elapsed:  3.1min\n",
+            "[Parallel(n_jobs=-1)]: Done 1969 tasks      | elapsed:  3.2min\n",
+            "[Parallel(n_jobs=-1)]: Done 2032 tasks      | elapsed:  3.3min\n",
+            "[Parallel(n_jobs=-1)]: Done 2097 tasks      | elapsed:  3.3min\n",
+            "[Parallel(n_jobs=-1)]: Done 2162 tasks      | elapsed:  3.4min\n",
+            "[Parallel(n_jobs=-1)]: Done 2229 tasks      | elapsed:  3.5min\n",
+            "[Parallel(n_jobs=-1)]: Done 2296 tasks      | elapsed:  3.6min\n",
+            "[Parallel(n_jobs=-1)]: Done 2365 tasks      | elapsed:  3.6min\n",
+            "[Parallel(n_jobs=-1)]: Done 2434 tasks      | elapsed:  3.7min\n",
+            "[Parallel(n_jobs=-1)]: Done 2505 tasks      | elapsed:  3.8min\n",
+            "[Parallel(n_jobs=-1)]: Done 2576 tasks      | elapsed:  3.8min\n",
+            "[Parallel(n_jobs=-1)]: Done 2649 tasks      | elapsed:  3.9min\n",
+            "[Parallel(n_jobs=-1)]: Done 2722 tasks      | elapsed:  4.0min\n",
+            "[Parallel(n_jobs=-1)]: Done 2797 tasks      | elapsed:  4.1min\n",
+            "[Parallel(n_jobs=-1)]: Done 2872 tasks      | elapsed:  4.2min\n",
+            "[Parallel(n_jobs=-1)]: Done 2949 tasks      | elapsed:  4.3min\n",
+            "[Parallel(n_jobs=-1)]: Done 3026 tasks      | elapsed:  4.5min\n",
+            "[Parallel(n_jobs=-1)]: Done 3105 tasks      | elapsed:  4.7min\n",
+            "[Parallel(n_jobs=-1)]: Done 3184 tasks      | elapsed:  4.9min\n",
+            "[Parallel(n_jobs=-1)]: Done 3265 tasks      | elapsed:  5.0min\n",
+            "[Parallel(n_jobs=-1)]: Done 3346 tasks      | elapsed:  5.2min\n",
+            "[Parallel(n_jobs=-1)]: Done 3429 tasks      | elapsed:  5.4min\n",
+            "[Parallel(n_jobs=-1)]: Done 3512 tasks      | elapsed:  5.6min\n",
+            "[Parallel(n_jobs=-1)]: Done 3597 tasks      | elapsed:  5.9min\n",
+            "[Parallel(n_jobs=-1)]: Done 3682 tasks      | elapsed:  6.1min\n",
+            "[Parallel(n_jobs=-1)]: Done 3769 tasks      | elapsed:  6.3min\n",
+            "[Parallel(n_jobs=-1)]: Done 3856 tasks      | elapsed:  6.6min\n",
+            "[Parallel(n_jobs=-1)]: Done 3945 tasks      | elapsed:  6.9min\n",
+            "[Parallel(n_jobs=-1)]: Done 4034 tasks      | elapsed:  7.1min\n",
+            "[Parallel(n_jobs=-1)]: Done 4125 tasks      | elapsed:  7.4min\n",
+            "[Parallel(n_jobs=-1)]: Done 4216 tasks      | elapsed:  7.6min\n",
+            "[Parallel(n_jobs=-1)]: Done 4309 tasks      | elapsed:  7.8min\n",
+            "[Parallel(n_jobs=-1)]: Done 4402 tasks      | elapsed:  8.1min\n",
+            "[Parallel(n_jobs=-1)]: Done 4497 tasks      | elapsed:  8.5min\n",
+            "[Parallel(n_jobs=-1)]: Done 4592 tasks      | elapsed:  8.8min\n",
+            "[Parallel(n_jobs=-1)]: Done 4689 tasks      | elapsed:  9.0min\n",
+            "[Parallel(n_jobs=-1)]: Done 4786 tasks      | elapsed:  9.3min\n",
+            "[Parallel(n_jobs=-1)]: Done 4885 tasks      | elapsed:  9.6min\n",
+            "[Parallel(n_jobs=-1)]: Done 4984 tasks      | elapsed:  9.8min\n",
+            "[Parallel(n_jobs=-1)]: Done 5040 out of 5040 | elapsed: 10.0min finished\n"
+          ]
        },
        {
          "output_type": "execute_result",
          "data": {
-            "text/plain": "GridSearchCV(estimator=AdaBoostClassifier(algorithm='SAMME', random_state=2020),\n             n_jobs=-1,\n             param_grid={'base_estimator': [Stree(C=55, max_depth=3, tol=0.01)],\n                         'base_estimator__C': [7, 55],\n                         'base_estimator__kernel': ['linear', 'poly', 'rbf'],\n                         'base_estimator__max_depth': [3, 5],\n                         'base_estimator__tol': [0.1, 0.01],\n                         'learning_rate': [0.5, 1], 'n_estimators': [10, 25]},\n             return_train_score=True, verbose=10)"
+            "text/plain": [
+              "GridSearchCV(estimator=AdaBoostClassifier(algorithm='SAMME', random_state=2020),\n",
+              "             n_jobs=-1,\n",
+              "             param_grid=[{'base_estimator': [Stree(C=7, max_depth=5,\n",
+              "                                                   split_criteria='max_samples',\n",
+              "                                                   tol=0.01)],\n",
+              "                          'base_estimator__C': [1, 7, 55],\n",
+              "                          'base_estimator__kernel': ['linear'],\n",
+              "                          'base_estimator__max_depth': [3, 5, 7],\n",
+              "                          'base_estimator__split_criteria': ['max_samples',\n",
+              "                                                             'impurity'],\n",
+              "                          'base_e...\n",
+              "                          'learning_rate': [0.5, 1], 'n_estimators': [10, 25]},\n",
+              "                         {'base_estimator': [Stree()],\n",
+              "                          'base_estimator__C': [1, 7, 55],\n",
+              "                          'base_estimator__gamma': [0.1, 1, 10],\n",
+              "                          'base_estimator__kernel': ['rbf'],\n",
+              "                          'base_estimator__max_depth': [3, 5, 7],\n",
+              "                          'base_estimator__split_criteria': ['max_samples',\n",
+              "                                                             'impurity'],\n",
+              "                          'base_estimator__tol': [0.1, 0.01],\n",
+              "                          'learning_rate': [0.5, 1],\n",
+              "                          'n_estimators': [10, 25]}],\n",
+              "             return_train_score=True, verbose=10)"
+            ]
          },
          "metadata": {},
          "execution_count": 7
        }
      ]
    },
+    {
+      "source": [
+        "GridSearchCV(estimator=AdaBoostClassifier(algorithm='SAMME', random_state=2020),\n",
+        "             n_jobs=-1,\n",
+        "             param_grid={'base_estimator': [Stree(C=55, max_depth=3, tol=0.01)],\n",
+        "                         'base_estimator__C': [7, 55],\n",
+        "                         'base_estimator__kernel': ['linear', 'poly', 'rbf'],\n",
+        "                         'base_estimator__max_depth': [3, 5],\n",
+        "                         'base_estimator__tol': [0.1, 0.01],\n",
+        "                         'learning_rate': [0.5, 1], 'n_estimators': [10, 25]},\n",
+        "             return_train_score=True, verbose=10)"
+      ],
+      "cell_type": "markdown",
+      "metadata": {}
+    },
    {
      "cell_type": "code",
      "metadata": {
@@ -214,9 +391,31 @@
        {
          "output_type": "stream",
          "name": "stdout",
-          "text": "Best estimator:  AdaBoostClassifier(algorithm='SAMME',\n                   base_estimator=Stree(C=55, max_depth=3, tol=0.01),\n                   learning_rate=0.5, n_estimators=25, random_state=2020)\nBest hyperparameters:  {'base_estimator': Stree(C=55, max_depth=3, tol=0.01), 'base_estimator__C': 55, 'base_estimator__kernel': 'linear', 'base_estimator__max_depth': 3, 'base_estimator__tol': 0.01, 'learning_rate': 0.5, 'n_estimators': 25}\nBest accuracy:  0.9559440559440558\n"
+          "text": [
+            "Best estimator:  AdaBoostClassifier(algorithm='SAMME',\n                   base_estimator=Stree(C=7, max_depth=5,\n                                        split_criteria='max_samples',\n                                        tol=0.01),\n                   learning_rate=0.5, n_estimators=25, random_state=2020)\nBest hyperparameters:  {'base_estimator': Stree(C=7, max_depth=5, split_criteria='max_samples', tol=0.01), 'base_estimator__C': 7, 'base_estimator__kernel': 'linear', 'base_estimator__max_depth': 5, 'base_estimator__split_criteria': 'max_samples', 'base_estimator__tol': 0.01, 'learning_rate': 0.5, 'n_estimators': 25}\nBest accuracy:  0.9549825174825175\n"
+          ]
        }
      ]
+    },
+    {
+      "source": [
+        "Best estimator:  AdaBoostClassifier(algorithm='SAMME',\n",
+        "                   base_estimator=Stree(C=55, max_depth=3, tol=0.01),\n",
+        "                   learning_rate=0.5, n_estimators=25, random_state=2020)\n",
+        "\n",
+        "Best hyperparameters:  {'base_estimator': Stree(C=55, max_depth=3, tol=0.01), 'base_estimator__C': 55, 'base_estimator__kernel': 'linear', 'base_estimator__max_depth': 3, 'base_estimator__tol': 0.01, 'learning_rate': 0.5, 'n_estimators': 25}\n",
+        "\n",
+        "Best accuracy:  0.9559440559440558"
+      ],
+      "cell_type": "markdown",
+      "metadata": {}
+    },
+    {
+      "source": [
+        "0.9511547662863451"
+      ],
+      "cell_type": "markdown",
+      "metadata": {}
    }
  ],
  "metadata": {
@@ -230,12 +429,12 @@
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
-      "version": "3.7.6-final"
+      "version": "3.8.4-final"
    },
    "orig_nbformat": 2,
    "kernelspec": {
-      "name": "python37664bitgeneralvenvfbd0a23e74cf4e778460f5ffc6761f39",
-      "display_name": "Python 3.7.6 64-bit ('general': venv)"
+      "name": "python38464bitgeneralvenv77203c0a6afd4428bd66253ef62753dc",
+      "display_name": "Python 3.8.4 64-bit ('general': venv)"
    },
    "colab": {
      "name": "gridsearch.ipynb",
--- a/setup.py
+++ b/setup.py
@@ -1,6 +1,6 @@
 import setuptools

-__version__ = "0.9rc5"
+__version__ = "0.9rc6"
 __author__ = "Ricardo Montañana Gómez"


--- a/stree/Strees.py
+++ b/stree/Strees.py
@@ -100,9 +100,8 @@ class Snode:
        classes, card = np.unique(self._y, return_counts=True)
        if len(classes) > 1:
            max_card = max(card)
-            min_card = min(card)
            self._class = classes[card == max_card][0]
-            self._belief = max_card / (max_card + min_card)
+            self._belief = max_card / np.sum(card)
        else:
            self._belief = 1
            try:
@@ -111,18 +110,18 @@ class Snode:
                self._class = None

    def __str__(self) -> str:
-        if self.is_leaf():
        count_values = np.unique(self._y, return_counts=True)
-            result = (
+        if self.is_leaf():
+            return (
                f"{self._title} - Leaf class={self._class} belief="
                f"{self._belief: .6f} impurity={self._impurity:.4f} "
                f"counts={count_values}"
            )
-            return result
        else:
            return (
                f"{self._title} feaures={self._features} impurity="
                f"{self._impurity:.4f} "
+                f"counts={count_values}"
            )


@@ -273,33 +272,31 @@ class Splitter:

    def get_subspace(
        self, dataset: np.array, labels: np.array, max_features: int
-    ) -> list:
+    ) -> tuple:
        """Return the best/random subspace to make a split"""
        indices = self._get_subspaces_set(dataset, labels, max_features)
        return dataset[:, indices], indices

-    def _impurity(self, data: np.array, _) -> np.array:
+    def _impurity(self, data: np.array, y: np.array) -> np.array:
        """return column of dataset to be taken into account to split dataset

        :param data: distances to hyper plane of every class
        :type data: np.array (m, n_classes)
-        :param _: enable call compat with other measures
-        :type _: None
+        :param y: vector of labels (classes)
+        :type y: np.array (m,)
        :return: vector with the class assigned to each sample values
-        (can be 0, 1, ...)
+        (can be 0, 1, ...) -1 if none produces information gain
        :rtype: np.array shape (m,)
        """
-        min_impurity = float("inf")
-        selected = 0
-        y = data.copy()
-        y[data <= 0] = 0
-        y[data > 0] = 1
-        y = y.astype(int)
+        max_gain = 0
+        selected = -1
        for col in range(data.shape[1]):
-            impurity_of_class = self.partition_impurity(y[:, col])
-            if impurity_of_class < min_impurity:
+            tup = y[data[:, col] > 0]
+            tdn = y[data[:, col] <= 0]
+            info_gain = self.information_gain(y, tup, tdn)
+            if info_gain > max_gain:
                selected = col
-                min_impurity = impurity_of_class
+                max_gain = info_gain
        return selected

    @staticmethod
@@ -326,7 +323,8 @@ class Splitter:
        # array of (m, nc) nc = # classes
        data = self._distances(node, samples)
        if data.shape[0] < self._min_samples_split:
-            self._down = np.ones((data.shape[0]), dtype=bool)
+            # there aren't enough samples to split
+            self._up = np.ones((data.shape[0]), dtype=bool)
            return
        if data.ndim > 1:
            # split criteria for multiclass
@@ -340,8 +338,28 @@ class Splitter:
                # in predcit time just use the column computed in train time
                # is taking the classifier of class <col>
                col = node.get_partition_column()
+                if col == -1:
+                    # No partition is producing information gain
+                    data = np.ones(data.shape)
            data = data[:, col]
-        self._down = data > 0
+        self._up = data > 0
+
+    def part(self, origin: np.array) -> list:
+        """Split an array in two based on indices (down) and its complement
+        partition has to be called first to establish down indices
+
+        :param origin: dataset to split
+        :type origin: np.array
+        :param down: indices to use to split array
+        :type down: np.array
+        :return: list with two splits of the array
+        :rtype: list
+        """
+        down = ~self._up
+        return [
+            origin[self._up] if any(self._up) else None,
+            origin[down] if any(down) else None,
+        ]

    @staticmethod
    def _distances(node: Snode, data: np.ndarray) -> np.array:
@@ -357,23 +375,6 @@ class Splitter:
        """
        return node._clf.decision_function(data[:, node._features])

-    def part(self, origin: np.array) -> list:
-        """Split an array in two based on indices (down) and its complement
-        partition has to be called first to establish down indices
-
-        :param origin: dataset to split
-        :type origin: np.array
-        :param down: indices to use to split array
-        :type down: np.array
-        :return: list with two splits of the array
-        :rtype: list
-        """
-        up = ~self._down
-        return [
-            origin[up] if any(up) else None,
-            origin[self._down] if any(self._down) else None,
-        ]
-

 class Stree(BaseEstimator, ClassifierMixin):
    """Estimator that is based on binary trees of svm nodes
@@ -387,14 +388,14 @@ class Stree(BaseEstimator, ClassifierMixin):
        self,
        C: float = 1.0,
        kernel: str = "linear",
-        max_iter: int = 1000,
+        max_iter: int = 1e5,
        random_state: int = None,
        max_depth: int = None,
        tol: float = 1e-4,
        degree: int = 3,
        gamma="scale",
        split_criteria: str = "impurity",
-        criterion: str = "gini",
+        criterion: str = "entropy",
        min_samples_split: int = 0,
        max_features=None,
        splitter: str = "random",
--- a/stree/tests/Splitter_test.py
+++ b/stree/tests/Splitter_test.py
@@ -138,7 +138,7 @@ class Splitter_test(unittest.TestCase):
                [-0.1, 0.2, 0.3],
            ]
        )
-        expected = np.array([-0.1, 0.7, 0.7, 0.1, -0.1, -0.1])
+        expected = data[:, 0]
        y = [1, 2, 1, 0, 0, 0]
        computed = tcl._max_samples(data, y)
        self.assertEqual(0, computed)
@@ -158,9 +158,10 @@ class Splitter_test(unittest.TestCase):
                [-0.1, 0.2, 0.3],
            ]
        )
-        expected = np.array([0.2, 0.01, -0.9, 0.2, 0.2, 0.2])
-        computed = tcl._impurity(data, None)
-        self.assertEqual(1, computed)
+        expected = data[:, 2]
+        y = np.array([1, 2, 1, 0, 0, 0])
+        computed = tcl._impurity(data, y)
+        self.assertEqual(2, computed)
        computed_data = data[:, computed]
        self.assertEqual((6,), computed_data.shape)
        self.assertListEqual(expected.tolist(), computed_data.tolist())
@@ -176,9 +177,9 @@ class Splitter_test(unittest.TestCase):
    def test_splitter_parameter(self):
        expected_values = [
            [0, 1, 7, 9],  # best   entropy max_samples
-            [0, 2, 4, 5],  # best   entropy impurity
+            [3, 8, 10, 11],  # best   entropy impurity
            [0, 2, 8, 12],  # best   gini    max_samples
-            [4, 5, 9, 12],  # best   gini    impurity
+            [1, 2, 5, 12],  # best   gini    impurity
            [1, 2, 5, 10],  # random entropy max_samples
            [4, 8, 9, 12],  # random entropy impurity
            [3, 9, 11, 12],  # random gini    max_samples
--- a/stree/tests/Stree_test.py
+++ b/stree/tests/Stree_test.py
@@ -5,6 +5,7 @@ import warnings
 import numpy as np
 from sklearn.datasets import load_iris, load_wine
 from sklearn.exceptions import ConvergenceWarning
+from sklearn.svm import LinearSVC

 from stree import Stree, Snode
 from .utils import load_dataset
@@ -41,17 +42,17 @@ class Stree_test(unittest.TestCase):
        _, count_u = np.unique(y_up, return_counts=True)
        #
        for i in unique_y:
-            number_down = count_d[i]
-            try:
            number_up = count_u[i]
+            try:
+                number_down = count_d[i]
            except IndexError:
-                number_up = 0
+                number_down = 0
            self.assertEqual(count_y[i], number_down + number_up)
        # Is the partition made the same as the prediction?
        # as the node is not a leaf...
        _, count_yp = np.unique(y_prediction, return_counts=True)
-        self.assertEqual(count_yp[0], y_up.shape[0])
-        self.assertEqual(count_yp[1], y_down.shape[0])
+        self.assertEqual(count_yp[1], y_up.shape[0])
+        self.assertEqual(count_yp[0], y_down.shape[0])
        self._check_tree(node.get_down())
        self._check_tree(node.get_up())

@@ -100,17 +101,20 @@ class Stree_test(unittest.TestCase):
    def test_iterator_and_str(self):
        """Check preorder iterator"""
        expected = [
-            "root feaures=(0, 1, 2) impurity=0.5000",
-            "root - Down feaures=(0, 1, 2) impurity=0.0671",
-            "root - Down - Down, <cgaf> - Leaf class=1 belief= 0.975989 "
-            "impurity=0.0469 counts=(array([0, 1]), array([ 17, 691]))",
-            "root - Down - Up feaures=(0, 1, 2) impurity=0.3967",
-            "root - Down - Up - Down, <cgaf> - Leaf class=1 belief= 0.750000 "
-            "impurity=0.3750 counts=(array([0, 1]), array([1, 3]))",
-            "root - Down - Up - Up, <pure> - Leaf class=0 belief= 1.000000 "
-            "impurity=0.0000 counts=(array([0]), array([7]))",
-            "root - Up, <cgaf> - Leaf class=0 belief= 0.928297 impurity=0.1331"
-            " counts=(array([0, 1]), array([725,  56]))",
+            "root feaures=(0, 1, 2) impurity=1.0000 counts=(array([0, 1]), arr"
+            "ay([750, 750]))",
+            "root - Down, <cgaf> - Leaf class=0 belief= 0.928297 impurity=0.37"
+            "22 counts=(array([0, 1]), array([725,  56]))",
+            "root - Up feaures=(0, 1, 2) impurity=0.2178 counts=(array([0, 1])"
+            ", array([ 25, 694]))",
+            "root - Up - Down feaures=(0, 1, 2) impurity=0.8454 counts=(array("
+            "[0, 1]), array([8, 3]))",
+            "root - Up - Down - Down, <pure> - Leaf class=0 belief= 1.000000 i"
+            "mpurity=0.0000 counts=(array([0]), array([7]))",
+            "root - Up - Down - Up, <cgaf> - Leaf class=1 belief= 0.750000 imp"
+            "urity=0.8113 counts=(array([0, 1]), array([1, 3]))",
+            "root - Up - Up, <cgaf> - Leaf class=1 belief= 0.975989 impurity=0"
+            ".1634 counts=(array([0, 1]), array([ 17, 691]))",
        ]
        computed = []
        expected_string = ""
@@ -186,39 +190,43 @@ class Stree_test(unittest.TestCase):
    def test_muticlass_dataset(self):
        datasets = {
            "Synt": load_dataset(random_state=self._random_state, n_classes=3),
-            "Iris": load_iris(return_X_y=True),
+            "Iris": load_wine(return_X_y=True),
        }
        outcomes = {
            "Synt": {
-                "max_samples linear": 0.9533333333333334,
-                "max_samples rbf": 0.836,
-                "max_samples poly": 0.9473333333333334,
-                "impurity linear": 0.9533333333333334,
-                "impurity rbf": 0.836,
-                "impurity poly": 0.9473333333333334,
+                "max_samples linear": 0.9606666666666667,
+                "max_samples rbf": 0.7133333333333334,
+                "max_samples poly": 0.49066666666666664,
+                "impurity linear": 0.9606666666666667,
+                "impurity rbf": 0.7133333333333334,
+                "impurity poly": 0.49066666666666664,
            },
            "Iris": {
-                "max_samples linear": 0.98,
-                "max_samples rbf": 1.0,
-                "max_samples poly": 1.0,
-                "impurity linear": 0.98,
-                "impurity rbf": 1,
-                "impurity poly": 1,
+                "max_samples linear": 1.0,
+                "max_samples rbf": 0.6910112359550562,
+                "max_samples poly": 0.6966292134831461,
+                "impurity linear": 1,
+                "impurity rbf": 0.6910112359550562,
+                "impurity poly": 0.6966292134831461,
            },
        }
+
        for name, dataset in datasets.items():
            px, py = dataset
            for criteria in ["max_samples", "impurity"]:
                for kernel in self._kernels:
                    clf = Stree(
-                        C=1e4,
-                        max_iter=1e4,
+                        C=55,
+                        max_iter=1e5,
                        kernel=kernel,
                        random_state=self._random_state,
                    )
                    clf.fit(px, py)
-                    # print(f"{name} {criteria} {kernel}")
                    outcome = outcomes[name][f"{criteria} {kernel}"]
+                    # print(
+                    #     f"{name} {criteria} {kernel} {outcome} {clf.score(px"
+                    #     ", py)}"
+                    # )
                    self.assertAlmostEqual(outcome, clf.score(px, py))

    def test_max_features(self):
@@ -305,65 +313,7 @@ class Stree_test(unittest.TestCase):
        X, y = load_dataset(self._random_state)
        clf = Stree(random_state=self._random_state, max_features=2)
        clf.fit(X, y)
-        self.assertAlmostEqual(0.9426666666666667, clf.score(X, y))
-
-    def test_score_multi_class(self):
-        warnings.filterwarnings("ignore")
-        accuracies = [
-            0.7022472,  # Wine    linear impurity
-            0.8314607,  # Wine    linear max_samples
-            0.4044944,  # Wine    rbf   impurity
-            0.4044944,  # Wine    rbf   max_samples
-            0.3988764,  # Wine    poly  impurity
-            0.7640449,  # Wine    poly  max_samples
-            0.6600000,  # Iris    linear impurity
-            0.9666667,  # Iris    linear max_samples
-            0.3333333,  # Iris    rbf   impurity
-            0.9800000,  # Iris    rbf   max_samples
-            0.3333333,  # Iris    poly  impurity
-            1.0000000,  # Iris    poly  max_samples
-            0.7153333,  # Synthetic linear impurity
-            0.9313333,  # Synthetic linear max_samples
-            0.4806667,  # Synthetic rbf   impurity
-            0.8320000,  # Synthetic rbf   max_samples
-            0.4786667,  # Synthetic poly  impurity
-            0.6340000,  # Synthetic poly  max_samples
-        ]
-        datasets = [
-            ("Wine", load_wine(return_X_y=True)),
-            ("Iris", load_iris(return_X_y=True)),
-            (
-                "Synthetic",
-                load_dataset(self._random_state, n_classes=3, n_features=5),
-            ),
-        ]
-        for dataset_name, dataset in datasets:
-            X, y = dataset
-            for kernel in self._kernels:
-                for criteria in [
-                    "impurity",
-                    "max_samples",
-                ]:
-                    clf = Stree(
-                        C=17,
-                        random_state=self._random_state,
-                        kernel=kernel,
-                        split_criteria=criteria,
-                        degree=5,
-                        gamma="auto",
-                    )
-                    clf.fit(X, y)
-                    accuracy_score = clf.score(X, y)
-                    yp = clf.predict(X)
-                    accuracy_computed = np.mean(yp == y)
-                    # print(
-                    #     "{:.7f},  # {:7} {:5} {}".format(
-                    #         accuracy_score, dataset_name, kernel, criteria
-                    #     )
-                    # )
-                    accuracy_expected = accuracies.pop(0)
-                    self.assertEqual(accuracy_score, accuracy_computed)
-                    self.assertAlmostEqual(accuracy_expected, accuracy_score)
+        self.assertAlmostEqual(0.944, clf.score(X, y))

    def test_bogus_splitter_parameter(self):
        clf = Stree(splitter="duck")
@@ -406,3 +356,89 @@ class Stree_test(unittest.TestCase):
        # zero weights are ok when they don't erase a class
        _ = clf.train(X, y, weights_no_zero, 1, "test")
        self.assertListEqual(weights_no_zero.tolist(), original.tolist())
+
+    def test_multiclass_classifier_integrity(self):
+        """Checks if the multiclass operation is done right"""
+        X, y = load_iris(return_X_y=True)
+        clf = Stree(random_state=0)
+        clf.fit(X, y)
+        score = clf.score(X, y)
+        # Check accuracy of the whole model
+        self.assertAlmostEquals(0.98, score, 5)
+        svm = LinearSVC(random_state=0)
+        svm.fit(X, y)
+        self.assertAlmostEquals(0.9666666666666667, svm.score(X, y), 5)
+        data = svm.decision_function(X)
+        expected = [
+            0.4444444444444444,
+            0.35777777777777775,
+            0.4569777777777778,
+        ]
+        ty = data.copy()
+        ty[data <= 0] = 0
+        ty[data > 0] = 1
+        ty = ty.astype(int)
+        for i in range(3):
+            self.assertAlmostEquals(
+                expected[i],
+                clf.splitter_._gini(ty[:, i]),
+            )
+        # 1st Branch
+        # up has to have 50 samples of class 0
+        # down should have 100 [50, 50]
+        up = data[:, 2] > 0
+        resup = np.unique(y[up], return_counts=True)
+        resdn = np.unique(y[~up], return_counts=True)
+        self.assertListEqual([1, 2], resup[0].tolist())
+        self.assertListEqual([3, 50], resup[1].tolist())
+        self.assertListEqual([0, 1], resdn[0].tolist())
+        self.assertListEqual([50, 47], resdn[1].tolist())
+        # 2nd Branch
+        # up  should have 53 samples of classes [1, 2] [3, 50]
+        # down shoud have 47 samples of class 1
+        node_up = clf.tree_.get_down().get_up()
+        node_dn = clf.tree_.get_down().get_down()
+        resup = np.unique(node_up._y, return_counts=True)
+        resdn = np.unique(node_dn._y, return_counts=True)
+        self.assertListEqual([1, 2], resup[0].tolist())
+        self.assertListEqual([3, 50], resup[1].tolist())
+        self.assertListEqual([1], resdn[0].tolist())
+        self.assertListEqual([47], resdn[1].tolist())
+
+    def test_score_multiclass_rbf(self):
+        X, y = load_dataset(
+            random_state=self._random_state,
+            n_classes=3,
+            n_features=5,
+            n_samples=500,
+        )
+        clf = Stree(kernel="rbf", random_state=self._random_state)
+        self.assertEqual(0.824, clf.fit(X, y).score(X, y))
+        X, y = load_wine(return_X_y=True)
+        self.assertEqual(0.6741573033707865, clf.fit(X, y).score(X, y))
+
+    def test_score_multiclass_poly(self):
+        X, y = load_dataset(
+            random_state=self._random_state,
+            n_classes=3,
+            n_features=5,
+            n_samples=500,
+        )
+        clf = Stree(
+            kernel="poly", random_state=self._random_state, C=10, degree=5
+        )
+        self.assertEqual(0.786, clf.fit(X, y).score(X, y))
+        X, y = load_wine(return_X_y=True)
+        self.assertEqual(0.702247191011236, clf.fit(X, y).score(X, y))
+
+    def test_score_multiclass_linear(self):
+        X, y = load_dataset(
+            random_state=self._random_state,
+            n_classes=3,
+            n_features=5,
+            n_samples=1500,
+        )
+        clf = Stree(kernel="linear", random_state=self._random_state)
+        self.assertEqual(0.9533333333333334, clf.fit(X, y).score(X, y))
+        X, y = load_wine(return_X_y=True)
+        self.assertEqual(0.9550561797752809, clf.fit(X, y).score(X, y))
--- a/stree/tests/utils.py
+++ b/stree/tests/utils.py
@@ -1,9 +1,9 @@
 from sklearn.datasets import make_classification


-def load_dataset(random_state=0, n_classes=2, n_features=3):
+def load_dataset(random_state=0, n_classes=2, n_features=3, n_samples=1500):
    X, y = make_classification(
-        n_samples=1500,
+        n_samples=n_samples,
        n_features=n_features,
        n_informative=3,
        n_redundant=0,