Refactor predict and score and make mypy --strict

First Approach
2025-08-17 16:36:01 +00:00 · 2020-07-01 18:37:10 +02:00 · 2020-06-28 02:46:20 +02:00
11 changed files with 548 additions and 1107 deletions
--- a/notebooks/benchmark.ipynb
+++ b/notebooks/benchmark.ipynb
@@ -17,7 +17,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -29,7 +29,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -39,13 +39,13 @@
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn import tree\n",
    "from sklearn.metrics import classification_report, confusion_matrix, f1_score\n",
-    "from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, BaggingClassifier\n",
+    "from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier\n",
    "from stree import Stree"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -64,17 +64,13 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {
-    "tags": []
-   },
+   "execution_count": 6,
+   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
-     "text": [
-      "2020-11-01 11:14:06\n"
-     ]
+     "text": "2020-06-15 10:17:17\n"
    }
   ],
   "source": [
@@ -90,7 +86,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -102,17 +98,13 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "tags": []
-   },
+   "execution_count": 8,
+   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
-     "text": [
-      "Fraud: 0.173% 492\nValid: 99.827% 284,315\n"
-     ]
+     "text": "Fraud: 0.173% 492\nValid: 99.827% 284,315\n"
    }
   ],
   "source": [
@@ -122,7 +114,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -134,17 +126,13 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {
-    "tags": []
-   },
+   "execution_count": 10,
+   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
-     "text": [
-      "X shape: (284807, 29)\ny shape: (284807,)\n"
-     ]
+     "text": "X shape: (284807, 29)\ny shape: (284807,)\n"
    }
   ],
   "source": [
@@ -163,7 +151,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -174,7 +162,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -184,7 +172,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -194,17 +182,17 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Stree\n",
-    "stree = Stree(random_state=random_state, C=.01, max_iter=1e3)"
+    "stree = Stree(random_state=random_state, C=.01)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -214,12 +202,12 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
-    "# Bagging\n",
-    "bagging = BaggingClassifier(random_state=random_state)"
+    "# Gradient Boosting\n",
+    "gradient = GradientBoostingClassifier(random_state=random_state)"
   ]
  },
  {
@@ -231,7 +219,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -256,163 +244,20 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {
-    "tags": []
-   },
+   "execution_count": 18,
+   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
-     "text": [
-      "************************** Linear Tree **********************\n",
-      "Train Model Linear Tree took: 15.14 seconds\n",
-      "=========== Linear Tree - Train 199,364 samples =============\n",
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "           0   1.000000  1.000000  1.000000    199020\n",
-      "           1   1.000000  1.000000  1.000000       344\n",
-      "\n",
-      "    accuracy                       1.000000    199364\n",
-      "   macro avg   1.000000  1.000000  1.000000    199364\n",
-      "weighted avg   1.000000  1.000000  1.000000    199364\n",
-      "\n",
-      "=========== Linear Tree - Test 85,443 samples =============\n",
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "           0   0.999578  0.999613  0.999596     85295\n",
-      "           1   0.772414  0.756757  0.764505       148\n",
-      "\n",
-      "    accuracy                       0.999192     85443\n",
-      "   macro avg   0.885996  0.878185  0.882050     85443\n",
-      "weighted avg   0.999184  0.999192  0.999188     85443\n",
-      "\n",
-      "Confusion Matrix in Train\n",
-      "[[199020      0]\n",
-      " [     0    344]]\n",
-      "Confusion Matrix in Test\n",
-      "[[85262    33]\n",
-      " [   36   112]]\n",
-      "************************** Random Forest **********************\n",
-      "Train Model Random Forest took: 181.1 seconds\n",
-      "=========== Random Forest - Train 199,364 samples =============\n",
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "           0   1.000000  1.000000  1.000000    199020\n",
-      "           1   1.000000  1.000000  1.000000       344\n",
-      "\n",
-      "    accuracy                       1.000000    199364\n",
-      "   macro avg   1.000000  1.000000  1.000000    199364\n",
-      "weighted avg   1.000000  1.000000  1.000000    199364\n",
-      "\n",
-      "=========== Random Forest - Test 85,443 samples =============\n",
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "           0   0.999660  0.999965  0.999812     85295\n",
-      "           1   0.975410  0.804054  0.881481       148\n",
-      "\n",
-      "    accuracy                       0.999625     85443\n",
-      "   macro avg   0.987535  0.902009  0.940647     85443\n",
-      "weighted avg   0.999618  0.999625  0.999607     85443\n",
-      "\n",
-      "Confusion Matrix in Train\n",
-      "[[199020      0]\n",
-      " [     0    344]]\n",
-      "Confusion Matrix in Test\n",
-      "[[85292     3]\n",
-      " [   29   119]]\n",
-      "************************** Stree (SVM Tree) **********************\n",
-      "Train Model Stree (SVM Tree) took: 36.6 seconds\n",
-      "=========== Stree (SVM Tree) - Train 199,364 samples =============\n",
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "           0   0.999623  0.999864  0.999744    199020\n",
-      "           1   0.908784  0.781977  0.840625       344\n",
-      "\n",
-      "    accuracy                       0.999488    199364\n",
-      "   macro avg   0.954204  0.890921  0.920184    199364\n",
-      "weighted avg   0.999467  0.999488  0.999469    199364\n",
-      "\n",
-      "=========== Stree (SVM Tree) - Test 85,443 samples =============\n",
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "           0   0.999637  0.999918  0.999777     85295\n",
-      "           1   0.943548  0.790541  0.860294       148\n",
-      "\n",
-      "    accuracy                       0.999555     85443\n",
-      "   macro avg   0.971593  0.895229  0.930036     85443\n",
-      "weighted avg   0.999540  0.999555  0.999536     85443\n",
-      "\n",
-      "Confusion Matrix in Train\n",
-      "[[198993     27]\n",
-      " [    75    269]]\n",
-      "Confusion Matrix in Test\n",
-      "[[85288     7]\n",
-      " [   31   117]]\n",
-      "************************** AdaBoost model **********************\n",
-      "Train Model AdaBoost model took: 46.14 seconds\n",
-      "=========== AdaBoost model - Train 199,364 samples =============\n",
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "           0   0.999392  0.999678  0.999535    199020\n",
-      "           1   0.777003  0.648256  0.706815       344\n",
-      "\n",
-      "    accuracy                       0.999072    199364\n",
-      "   macro avg   0.888198  0.823967  0.853175    199364\n",
-      "weighted avg   0.999008  0.999072  0.999030    199364\n",
-      "\n",
-      "=========== AdaBoost model - Test 85,443 samples =============\n",
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "           0   0.999484  0.999707  0.999596     85295\n",
-      "           1   0.806202  0.702703  0.750903       148\n",
-      "\n",
-      "    accuracy                       0.999192     85443\n",
-      "   macro avg   0.902843  0.851205  0.875249     85443\n",
-      "weighted avg   0.999149  0.999192  0.999165     85443\n",
-      "\n",
-      "Confusion Matrix in Train\n",
-      "[[198956     64]\n",
-      " [   121    223]]\n",
-      "Confusion Matrix in Test\n",
-      "[[85270    25]\n",
-      " [   44   104]]\n",
-      "************************** Bagging model **********************\n",
-      "Train Model Bagging model took: 77.73 seconds\n",
-      "=========== Bagging model - Train 199,364 samples =============\n",
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "           0   0.999864  1.000000  0.999932    199020\n",
-      "           1   1.000000  0.921512  0.959153       344\n",
-      "\n",
-      "    accuracy                       0.999865    199364\n",
-      "   macro avg   0.999932  0.960756  0.979542    199364\n",
-      "weighted avg   0.999865  0.999865  0.999862    199364\n",
-      "\n",
-      "=========== Bagging model - Test 85,443 samples =============\n",
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "           0   0.999637  0.999953  0.999795     85295\n",
-      "           1   0.966942  0.790541  0.869888       148\n",
-      "\n",
-      "    accuracy                       0.999590     85443\n",
-      "   macro avg   0.983289  0.895247  0.934842     85443\n",
-      "weighted avg   0.999580  0.999590  0.999570     85443\n",
-      "\n",
-      "Confusion Matrix in Train\n",
-      "[[199020      0]\n",
-      " [    27    317]]\n",
-      "Confusion Matrix in Test\n",
-      "[[85291     4]\n",
-      " [   31   117]]\n"
-     ]
+     "text": "************************** Linear Tree **********************\nTrain Model Linear Tree took: 13.91 seconds\n=========== Linear Tree - Train 199,364 samples =============\n              precision    recall  f1-score   support\n\n           0   1.000000  1.000000  1.000000    199020\n           1   1.000000  1.000000  1.000000       344\n\n    accuracy                       1.000000    199364\n   macro avg   1.000000  1.000000  1.000000    199364\nweighted avg   1.000000  1.000000  1.000000    199364\n\n=========== Linear Tree - Test 85,443 samples =============\n              precision    recall  f1-score   support\n\n           0   0.999578  0.999613  0.999596     85295\n           1   0.772414  0.756757  0.764505       148\n\n    accuracy                       0.999192     85443\n   macro avg   0.885996  0.878185  0.882050     85443\nweighted avg   0.999184  0.999192  0.999188     85443\n\nConfusion Matrix in Train\n[[199020      0]\n [     0    344]]\nConfusion Matrix in Test\n[[85262    33]\n [   36   112]]\n************************** Random Forest **********************\nTrain Model Random Forest took: 173.1 seconds\n=========== Random Forest - Train 199,364 samples =============\n              precision    recall  f1-score   support\n\n           0   1.000000  1.000000  1.000000    199020\n           1   1.000000  1.000000  1.000000       344\n\n    accuracy                       1.000000    199364\n   macro avg   1.000000  1.000000  1.000000    199364\nweighted avg   1.000000  1.000000  1.000000    199364\n\n=========== Random Forest - Test 85,443 samples =============\n              precision    recall  f1-score   support\n\n           0   0.999660  0.999965  0.999812     85295\n           1   0.975410  0.804054  0.881481       148\n\n    accuracy                       0.999625     85443\n   macro avg   0.987535  0.902009  0.940647     85443\nweighted avg   0.999618  0.999625  0.999607     85443\n\nConfusion Matrix in Train\n[[199020      0]\n [     0    344]]\nConfusion Matrix in Test\n[[85292     3]\n [   29   119]]\n************************** Stree (SVM Tree) **********************\nTrain Model Stree (SVM Tree) took: 38.4 seconds\n=========== Stree (SVM Tree) - Train 199,364 samples =============\n              precision    recall  f1-score   support\n\n           0   0.999623  0.999864  0.999744    199020\n           1   0.908784  0.781977  0.840625       344\n\n    accuracy                       0.999488    199364\n   macro avg   0.954204  0.890921  0.920184    199364\nweighted avg   0.999467  0.999488  0.999469    199364\n\n=========== Stree (SVM Tree) - Test 85,443 samples =============\n              precision    recall  f1-score   support\n\n           0   0.999637  0.999918  0.999777     85295\n           1   0.943548  0.790541  0.860294       148\n\n    accuracy                       0.999555     85443\n   macro avg   0.971593  0.895229  0.930036     85443\nweighted avg   0.999540  0.999555  0.999536     85443\n\nConfusion Matrix in Train\n[[198993     27]\n [    75    269]]\nConfusion Matrix in Test\n[[85288     7]\n [   31   117]]\n************************** AdaBoost model **********************\nTrain Model AdaBoost model took: 47.21 seconds\n=========== AdaBoost model - Train 199,364 samples =============\n              precision    recall  f1-score   support\n\n           0   0.999392  0.999678  0.999535    199020\n           1   0.777003  0.648256  0.706815       344\n\n    accuracy                       0.999072    199364\n   macro avg   0.888198  0.823967  0.853175    199364\nweighted avg   0.999008  0.999072  0.999030    199364\n\n=========== AdaBoost model - Test 85,443 samples =============\n              precision    recall  f1-score   support\n\n           0   0.999484  0.999707  0.999596     85295\n           1   0.806202  0.702703  0.750903       148\n\n    accuracy                       0.999192     85443\n   macro avg   0.902843  0.851205  0.875249     85443\nweighted avg   0.999149  0.999192  0.999165     85443\n\nConfusion Matrix in Train\n[[198956     64]\n [   121    223]]\nConfusion Matrix in Test\n[[85270    25]\n [   44   104]]\n"
    }
   ],
   "source": [
    "# Train & Test models\n",
    "models = {\n",
    "    'Linear Tree':linear_tree, 'Random Forest': random_forest, 'Stree (SVM Tree)': stree,  \n",
-    "    'AdaBoost model': adaboost, 'Bagging model': bagging\n",
+    "    'AdaBoost model': adaboost\n",
    "}\n",
    "\n",
    "best_f1 = 0\n",
@@ -428,17 +273,13 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {
-    "tags": []
-   },
+   "execution_count": 19,
+   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
-     "text": [
-      "**************************************************************************************************************\n*The best f1 model is Random Forest, with a f1 score: 0.8815 in 181.07 seconds with 0.7 samples in train dataset\n**************************************************************************************************************\nModel: Linear Tree\t Time:  15.14 seconds\t f1: 0.7645\nModel: Random Forest\t Time: 181.07 seconds\t f1: 0.8815\nModel: Stree (SVM Tree)\t Time:  36.60 seconds\t f1: 0.8603\nModel: AdaBoost model\t Time:  46.14 seconds\t f1: 0.7509\nModel: Bagging model\t Time:  77.73 seconds\t f1: 0.8699\n"
-     ]
+     "text": "**************************************************************************************************************\n*The best f1 model is Random Forest, with a f1 score: 0.8815 in 173.095 seconds with 0.7 samples in train dataset\n**************************************************************************************************************\nModel: Linear Tree\t Time:  13.91 seconds\t f1: 0.7645\nModel: Random Forest\t Time: 173.09 seconds\t f1: 0.8815\nModel: Stree (SVM Tree)\t Time:  38.40 seconds\t f1: 0.8603\nModel: AdaBoost model\t Time:  47.21 seconds\t f1: 0.7509\n"
    }
   ],
   "source": [
@@ -473,53 +314,20 @@
    "******************************************************************************************************************\n",
    "Model: Linear Tree       Time:  23.05 seconds\t f1: 0.7645\n",
    "Model: Random Forest\t Time: 218.97 seconds\t f1: 0.8815\n",
-    "Model: Stree (SVM Tree)\t Time:  49.45 seconds\t f1: 0.8603\n",
+    "Model: Stree (SVM Tree)\t Time:  49.45 seconds\t f1: 0.8467\n",
    "Model: AdaBoost model\t Time:  73.83 seconds\t f1: 0.7509\n",
+    "Model: Gradient Boost.\t Time: 388.69 seconds\t f1: 0.5259\n",
    "Model: Neural Network\t Time:  25.47 seconds\t f1: 0.8328\n",
-    "Model: Bagging model\t Time:  77.93 seconds\t f1: 0.8699\n",
-    "\n",
    "```"
   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [
-    {
-     "output_type": "execute_result",
-     "data": {
-      "text/plain": [
-       "{'C': 0.01,\n",
-       " 'criterion': 'entropy',\n",
-       " 'degree': 3,\n",
-       " 'gamma': 'scale',\n",
-       " 'kernel': 'linear',\n",
-       " 'max_depth': None,\n",
-       " 'max_features': None,\n",
-       " 'max_iter': 1000.0,\n",
-       " 'min_samples_split': 0,\n",
-       " 'random_state': 2020,\n",
-       " 'split_criteria': 'impurity',\n",
-       " 'splitter': 'random',\n",
-       " 'tol': 0.0001}"
-      ]
-     },
-     "metadata": {},
-     "execution_count": 18
-    }
-   ],
-   "source": [
-    "stree.get_params()"
-   ]
  }
 ],
 "metadata": {
  "hide_input": false,
  "kernelspec": {
-   "display_name": "Python 3.8.4 64-bit ('general': venv)",
+   "display_name": "Python 3.7.6 64-bit ('general': venv)",
   "language": "python",
-   "name": "python38464bitgeneralvenv77203c0a6afd4428bd66253ef62753dc"
+   "name": "python37664bitgeneralvenvfbd0a23e74cf4e778460f5ffc6761f39"
  },
  "language_info": {
   "codemirror_mode": {
@@ -531,7 +339,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.8.4-final"
+   "version": "3.7.6-final"
  },
  "toc": {
   "base_numbering": 1,
--- a/notebooks/ensemble.ipynb
+++ b/notebooks/ensemble.ipynb
@@ -61,13 +61,7 @@
    {
     "output_type": "stream",
     "name": "stdout",
-     "text": [
-      "Fraud: 0.173% 492\n",
-      "Valid: 99.827% 284315\n",
-      "X.shape (100492, 28)  y.shape (100492,)\n",
-      "Fraud: 0.652% 655\n",
-      "Valid: 99.348% 99837\n"
-     ]
+     "text": "Fraud: 0.173% 492\nValid: 99.827% 284315\nX.shape (100492, 28)  y.shape (100492,)\nFraud: 0.644% 647\nValid: 99.356% 99845\n"
    }
   ],
   "source": [
@@ -135,14 +129,12 @@
    {
     "output_type": "stream",
     "name": "stdout",
-     "text": [
-      "Score Train:  0.9985073353804162\nScore Test:  0.9983746848878864\nTook 35.80 seconds\n"
-     ]
+     "text": "Score Train:  0.9985784146480154\nScore Test:  0.9981093273185617\nTook 73.27 seconds\n"
    }
   ],
   "source": [
    "now = time.time()\n",
-    "clf = Stree(max_depth=3, random_state=random_state, max_iter=1e3)\n",
+    "clf = Stree(max_depth=3, random_state=random_state)\n",
    "clf.fit(Xtrain, ytrain)\n",
    "print(\"Score Train: \", clf.score(Xtrain, ytrain))\n",
    "print(\"Score Test: \", clf.score(Xtest, ytest))\n",
@@ -177,17 +169,13 @@
    {
     "output_type": "stream",
     "name": "stdout",
-     "text": [
-      "Kernel: linear\tTime: 49.66 seconds\tScore Train: 0.9983225\tScore Test: 0.9983083\n",
-      "Kernel: rbf\tTime: 12.73 seconds\tScore Train: 0.9934891\tScore Test: 0.9934656\n",
-      "Kernel: poly\tTime: 76.24 seconds\tScore Train: 0.9972706\tScore Test: 0.9969152\n"
-     ]
+     "text": "Kernel: linear\tTime: 93.78 seconds\tScore Train: 0.9983083\tScore Test: 0.9983083\nKernel: rbf\tTime: 18.32 seconds\tScore Train: 0.9935602\tScore Test: 0.9935651\nKernel: poly\tTime: 69.68 seconds\tScore Train: 0.9973132\tScore Test: 0.9972801\n"
    }
   ],
   "source": [
    "for kernel in ['linear', 'rbf', 'poly']:\n",
    "    now = time.time()\n",
-    "    clf = AdaBoostClassifier(base_estimator=Stree(C=C, kernel=kernel, max_depth=max_depth, random_state=random_state, max_iter=1e3), algorithm=\"SAMME\", n_estimators=n_estimators, random_state=random_state)\n",
+    "    clf = AdaBoostClassifier(base_estimator=Stree(C=C, kernel=kernel, max_depth=max_depth, random_state=random_state), algorithm=\"SAMME\", n_estimators=n_estimators, random_state=random_state)\n",
    "    clf.fit(Xtrain, ytrain)\n",
    "    score_train = clf.score(Xtrain, ytrain)\n",
    "    score_test = clf.score(Xtest, ytest)\n",
@@ -222,17 +210,13 @@
    {
     "output_type": "stream",
     "name": "stdout",
-     "text": [
-      "Kernel: linear\tTime: 231.51 seconds\tScore Train: 0.9984931\tScore Test: 0.9983083\n",
-      "Kernel: rbf\tTime: 114.77 seconds\tScore Train: 0.9992323\tScore Test: 0.9983083\n",
-      "Kernel: poly\tTime: 67.87 seconds\tScore Train: 0.9993319\tScore Test: 0.9985074\n"
-     ]
+     "text": "Kernel: linear\tTime: 387.06 seconds\tScore Train: 0.9985784\tScore Test: 0.9981093\nKernel: rbf\tTime: 144.00 seconds\tScore Train: 0.9992750\tScore Test: 0.9983415\nKernel: poly\tTime: 101.78 seconds\tScore Train: 0.9992466\tScore Test: 0.9981757\n"
    }
   ],
   "source": [
    "for kernel in ['linear', 'rbf', 'poly']:\n",
    "    now = time.time()\n",
-    "    clf = BaggingClassifier(base_estimator=Stree(C=C, kernel=kernel, max_depth=max_depth, random_state=random_state, max_iter=1e3), n_estimators=n_estimators, random_state=random_state)\n",
+    "    clf = BaggingClassifier(base_estimator=Stree(C=C, kernel=kernel, max_depth=max_depth, random_state=random_state), n_estimators=n_estimators, random_state=random_state)\n",
    "    clf.fit(Xtrain, ytrain)\n",
    "    score_train = clf.score(Xtrain, ytrain)\n",
    "    score_test = clf.score(Xtest, ytest)\n",
@@ -251,12 +235,12 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.8.4-final"
+   "version": "3.7.6-final"
  },
  "orig_nbformat": 2,
  "kernelspec": {
-   "name": "python38464bitgeneralf6de308d3831407c8bd68d4a5e328a38",
-   "display_name": "Python 3.8.4 64-bit ('general')"
+   "name": "python37664bitgeneralvenve3128601eb614c5da59c5055670b6040",
+   "display_name": "Python 3.7.6 64-bit ('general': venv)"
  }
 },
 "nbformat": 4,
--- a/notebooks/features.ipynb
+++ b/notebooks/features.ipynb
--- a/notebooks/gridsearch.ipynb
+++ b/notebooks/gridsearch.ipynb
@@ -66,8 +66,7 @@
        "id": "z9Q-YUfBDZEq",
        "colab_type": "code",
        "colab": {},
-        "outputId": "afc822fb-f16a-4302-8a67-2b9e2880159b",
-        "tags": []
+        "outputId": "afc822fb-f16a-4302-8a67-2b9e2880159b"
      },
      "source": [
        "random_state=1\n",
@@ -113,9 +112,7 @@
        {
          "output_type": "stream",
          "name": "stdout",
-          "text": [
-            "Fraud: 0.173% 492\nValid: 99.827% 284315\nX.shape (1492, 28)  y.shape (1492,)\nFraud: 33.177% 495\nValid: 66.823% 997\n"
-          ]
+          "text": "Fraud: 0.173% 492\nValid: 99.827% 284315\nX.shape (1492, 28)  y.shape (1492,)\nFraud: 33.244% 496\nValid: 66.756% 996\n"
        }
      ]
    },
@@ -134,68 +131,31 @@
        "colab": {}
      },
      "source": [
-        "parameters = [{\n",
+        "parameters = {\n",
        "    'base_estimator': [Stree()],\n",
        "    'n_estimators': [10, 25],\n",
        "    'learning_rate': [.5, 1],\n",
-        "    'base_estimator__split_criteria': ['max_samples', 'impurity'],\n",
        "    'base_estimator__tol': [.1,  1e-02],\n",
-        "    'base_estimator__max_depth': [3, 5, 7],\n",
-        "    'base_estimator__C': [1, 7, 55],\n",
-        "    'base_estimator__kernel': ['linear']\n",
-        "},\n",
-        "{\n",
-        "    'base_estimator': [Stree()],\n",
-        "    'n_estimators': [10, 25],\n",
-        "    'learning_rate': [.5, 1],\n",
-        "    'base_estimator__split_criteria': ['max_samples', 'impurity'],\n",
-        "    'base_estimator__tol': [.1,  1e-02],\n",
-        "    'base_estimator__max_depth': [3, 5, 7],\n",
-        "    'base_estimator__C': [1, 7, 55],\n",
-        "    'base_estimator__degree': [3, 5, 7],\n",
-        "    'base_estimator__kernel': ['poly']\n",
-        "},\n",
-        "{\n",
-        "    'base_estimator': [Stree()],\n",
-        "    'n_estimators': [10, 25],\n",
-        "    'learning_rate': [.5, 1],\n",
-        "    'base_estimator__split_criteria': ['max_samples', 'impurity'],\n",
-        "    'base_estimator__tol': [.1,  1e-02],\n",
-        "    'base_estimator__max_depth': [3, 5, 7],\n",
-        "    'base_estimator__C': [1, 7, 55],\n",
-        "    'base_estimator__gamma': [.1, 1, 10],\n",
-        "    'base_estimator__kernel': ['rbf']\n",
-        "}]"
+        "    'base_estimator__max_depth': [3, 5],\n",
+        "    'base_estimator__C': [1, 3],\n",
+        "    'base_estimator__kernel': ['linear', 'poly', 'rbf']\n",
+        "}"
      ],
-      "execution_count": 5,
+      "execution_count": 9,
      "outputs": []
    },
    {
      "cell_type": "code",
-      "execution_count": 6,
+      "execution_count": 14,
      "metadata": {},
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
-            "text/plain": [
-              "{'C': 1.0,\n",
-              " 'criterion': 'entropy',\n",
-              " 'degree': 3,\n",
-              " 'gamma': 'scale',\n",
-              " 'kernel': 'linear',\n",
-              " 'max_depth': None,\n",
-              " 'max_features': None,\n",
-              " 'max_iter': 100000.0,\n",
-              " 'min_samples_split': 0,\n",
-              " 'random_state': None,\n",
-              " 'split_criteria': 'impurity',\n",
-              " 'splitter': 'random',\n",
-              " 'tol': 0.0001}"
-            ]
+            "text/plain": "{'C': 1.0,\n 'degree': 3,\n 'gamma': 'scale',\n 'kernel': 'linear',\n 'max_depth': None,\n 'max_iter': 1000,\n 'min_samples_split': 0,\n 'random_state': None,\n 'tol': 0.0001}"
          },
          "metadata": {},
-          "execution_count": 6
+          "execution_count": 14
        }
      ],
      "source": [
@@ -208,214 +168,52 @@
        "id": "CrcB8o6EDZE5",
        "colab_type": "code",
        "colab": {},
-        "outputId": "7703413a-d563-4289-a13b-532f38f82762",
-        "tags": []
+        "outputId": "7703413a-d563-4289-a13b-532f38f82762"
      },
      "source": [
        "random_state=2020\n",
-        "clf = AdaBoostClassifier(random_state=random_state, algorithm=\"SAMME\")\n",
+        "clf = AdaBoostClassifier(random_state=random_state)\n",
        "grid = GridSearchCV(clf, parameters, verbose=10, n_jobs=-1, return_train_score=True)\n",
        "grid.fit(Xtrain, ytrain)"
      ],
-      "execution_count": 7,
+      "execution_count": 11,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
-          "text": [
-            "Fitting 5 folds for each of 1008 candidates, totalling 5040 fits\n",
-            "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n",
-            "[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    2.6s\n",
-            "[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:    3.2s\n",
-            "[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:    3.5s\n",
-            "[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    4.0s\n",
-            "[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    4.5s\n",
-            "[Parallel(n_jobs=-1)]: Done  45 tasks      | elapsed:    5.0s\n",
-            "[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    5.5s\n",
-            "[Parallel(n_jobs=-1)]: Done  69 tasks      | elapsed:    6.2s\n",
-            "[Parallel(n_jobs=-1)]: Done  82 tasks      | elapsed:    7.1s\n",
-            "[Parallel(n_jobs=-1)]: Done  97 tasks      | elapsed:    8.2s\n",
-            "[Parallel(n_jobs=-1)]: Done 112 tasks      | elapsed:    9.6s\n",
-            "[Parallel(n_jobs=-1)]: Done 129 tasks      | elapsed:   11.0s\n",
-            "[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   12.5s\n",
-            "[Parallel(n_jobs=-1)]: Done 165 tasks      | elapsed:   14.3s\n",
-            "[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:   16.0s\n",
-            "[Parallel(n_jobs=-1)]: Done 205 tasks      | elapsed:   18.1s\n",
-            "[Parallel(n_jobs=-1)]: Done 226 tasks      | elapsed:   20.1s\n",
-            "[Parallel(n_jobs=-1)]: Done 249 tasks      | elapsed:   21.9s\n",
-            "[Parallel(n_jobs=-1)]: Done 272 tasks      | elapsed:   23.4s\n",
-            "[Parallel(n_jobs=-1)]: Done 297 tasks      | elapsed:   24.9s\n",
-            "[Parallel(n_jobs=-1)]: Done 322 tasks      | elapsed:   26.6s\n",
-            "[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:   29.3s\n",
-            "[Parallel(n_jobs=-1)]: Done 376 tasks      | elapsed:   31.9s\n",
-            "[Parallel(n_jobs=-1)]: Done 405 tasks      | elapsed:   35.5s\n",
-            "[Parallel(n_jobs=-1)]: Done 434 tasks      | elapsed:   38.7s\n",
-            "[Parallel(n_jobs=-1)]: Done 465 tasks      | elapsed:   42.1s\n",
-            "[Parallel(n_jobs=-1)]: Done 496 tasks      | elapsed:   46.1s\n",
-            "[Parallel(n_jobs=-1)]: Done 529 tasks      | elapsed:   52.7s\n",
-            "[Parallel(n_jobs=-1)]: Done 562 tasks      | elapsed:   58.1s\n",
-            "[Parallel(n_jobs=-1)]: Done 597 tasks      | elapsed:  1.1min\n",
-            "[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  1.3min\n",
-            "[Parallel(n_jobs=-1)]: Done 669 tasks      | elapsed:  1.5min\n",
-            "[Parallel(n_jobs=-1)]: Done 706 tasks      | elapsed:  1.6min\n",
-            "[Parallel(n_jobs=-1)]: Done 745 tasks      | elapsed:  1.7min\n",
-            "[Parallel(n_jobs=-1)]: Done 784 tasks      | elapsed:  1.8min\n",
-            "[Parallel(n_jobs=-1)]: Done 825 tasks      | elapsed:  1.8min\n",
-            "[Parallel(n_jobs=-1)]: Done 866 tasks      | elapsed:  1.8min\n",
-            "[Parallel(n_jobs=-1)]: Done 909 tasks      | elapsed:  1.9min\n",
-            "[Parallel(n_jobs=-1)]: Done 952 tasks      | elapsed:  1.9min\n",
-            "[Parallel(n_jobs=-1)]: Done 997 tasks      | elapsed:  2.0min\n",
-            "[Parallel(n_jobs=-1)]: Done 1042 tasks      | elapsed:  2.0min\n",
-            "[Parallel(n_jobs=-1)]: Done 1089 tasks      | elapsed:  2.1min\n",
-            "[Parallel(n_jobs=-1)]: Done 1136 tasks      | elapsed:  2.2min\n",
-            "[Parallel(n_jobs=-1)]: Done 1185 tasks      | elapsed:  2.2min\n",
-            "[Parallel(n_jobs=-1)]: Done 1234 tasks      | elapsed:  2.3min\n",
-            "[Parallel(n_jobs=-1)]: Done 1285 tasks      | elapsed:  2.4min\n",
-            "[Parallel(n_jobs=-1)]: Done 1336 tasks      | elapsed:  2.4min\n",
-            "[Parallel(n_jobs=-1)]: Done 1389 tasks      | elapsed:  2.5min\n",
-            "[Parallel(n_jobs=-1)]: Done 1442 tasks      | elapsed:  2.6min\n",
-            "[Parallel(n_jobs=-1)]: Done 1497 tasks      | elapsed:  2.6min\n",
-            "[Parallel(n_jobs=-1)]: Done 1552 tasks      | elapsed:  2.7min\n",
-            "[Parallel(n_jobs=-1)]: Done 1609 tasks      | elapsed:  2.8min\n",
-            "[Parallel(n_jobs=-1)]: Done 1666 tasks      | elapsed:  2.8min\n",
-            "[Parallel(n_jobs=-1)]: Done 1725 tasks      | elapsed:  2.9min\n",
-            "[Parallel(n_jobs=-1)]: Done 1784 tasks      | elapsed:  3.0min\n",
-            "[Parallel(n_jobs=-1)]: Done 1845 tasks      | elapsed:  3.0min\n",
-            "[Parallel(n_jobs=-1)]: Done 1906 tasks      | elapsed:  3.1min\n",
-            "[Parallel(n_jobs=-1)]: Done 1969 tasks      | elapsed:  3.2min\n",
-            "[Parallel(n_jobs=-1)]: Done 2032 tasks      | elapsed:  3.3min\n",
-            "[Parallel(n_jobs=-1)]: Done 2097 tasks      | elapsed:  3.3min\n",
-            "[Parallel(n_jobs=-1)]: Done 2162 tasks      | elapsed:  3.4min\n",
-            "[Parallel(n_jobs=-1)]: Done 2229 tasks      | elapsed:  3.5min\n",
-            "[Parallel(n_jobs=-1)]: Done 2296 tasks      | elapsed:  3.6min\n",
-            "[Parallel(n_jobs=-1)]: Done 2365 tasks      | elapsed:  3.6min\n",
-            "[Parallel(n_jobs=-1)]: Done 2434 tasks      | elapsed:  3.7min\n",
-            "[Parallel(n_jobs=-1)]: Done 2505 tasks      | elapsed:  3.8min\n",
-            "[Parallel(n_jobs=-1)]: Done 2576 tasks      | elapsed:  3.8min\n",
-            "[Parallel(n_jobs=-1)]: Done 2649 tasks      | elapsed:  3.9min\n",
-            "[Parallel(n_jobs=-1)]: Done 2722 tasks      | elapsed:  4.0min\n",
-            "[Parallel(n_jobs=-1)]: Done 2797 tasks      | elapsed:  4.1min\n",
-            "[Parallel(n_jobs=-1)]: Done 2872 tasks      | elapsed:  4.2min\n",
-            "[Parallel(n_jobs=-1)]: Done 2949 tasks      | elapsed:  4.3min\n",
-            "[Parallel(n_jobs=-1)]: Done 3026 tasks      | elapsed:  4.5min\n",
-            "[Parallel(n_jobs=-1)]: Done 3105 tasks      | elapsed:  4.7min\n",
-            "[Parallel(n_jobs=-1)]: Done 3184 tasks      | elapsed:  4.9min\n",
-            "[Parallel(n_jobs=-1)]: Done 3265 tasks      | elapsed:  5.0min\n",
-            "[Parallel(n_jobs=-1)]: Done 3346 tasks      | elapsed:  5.2min\n",
-            "[Parallel(n_jobs=-1)]: Done 3429 tasks      | elapsed:  5.4min\n",
-            "[Parallel(n_jobs=-1)]: Done 3512 tasks      | elapsed:  5.6min\n",
-            "[Parallel(n_jobs=-1)]: Done 3597 tasks      | elapsed:  5.9min\n",
-            "[Parallel(n_jobs=-1)]: Done 3682 tasks      | elapsed:  6.1min\n",
-            "[Parallel(n_jobs=-1)]: Done 3769 tasks      | elapsed:  6.3min\n",
-            "[Parallel(n_jobs=-1)]: Done 3856 tasks      | elapsed:  6.6min\n",
-            "[Parallel(n_jobs=-1)]: Done 3945 tasks      | elapsed:  6.9min\n",
-            "[Parallel(n_jobs=-1)]: Done 4034 tasks      | elapsed:  7.1min\n",
-            "[Parallel(n_jobs=-1)]: Done 4125 tasks      | elapsed:  7.4min\n",
-            "[Parallel(n_jobs=-1)]: Done 4216 tasks      | elapsed:  7.6min\n",
-            "[Parallel(n_jobs=-1)]: Done 4309 tasks      | elapsed:  7.8min\n",
-            "[Parallel(n_jobs=-1)]: Done 4402 tasks      | elapsed:  8.1min\n",
-            "[Parallel(n_jobs=-1)]: Done 4497 tasks      | elapsed:  8.5min\n",
-            "[Parallel(n_jobs=-1)]: Done 4592 tasks      | elapsed:  8.8min\n",
-            "[Parallel(n_jobs=-1)]: Done 4689 tasks      | elapsed:  9.0min\n",
-            "[Parallel(n_jobs=-1)]: Done 4786 tasks      | elapsed:  9.3min\n",
-            "[Parallel(n_jobs=-1)]: Done 4885 tasks      | elapsed:  9.6min\n",
-            "[Parallel(n_jobs=-1)]: Done 4984 tasks      | elapsed:  9.8min\n",
-            "[Parallel(n_jobs=-1)]: Done 5040 out of 5040 | elapsed: 10.0min finished\n"
-          ]
+          "text": "Fitting 5 folds for each of 96 candidates, totalling 480 fits\n[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    3.6s\n[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:    4.2s\n[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:    4.8s\n[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    5.3s\n[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    6.2s\n[Parallel(n_jobs=-1)]: Done  45 tasks      | elapsed:    7.2s\n[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    8.9s\n[Parallel(n_jobs=-1)]: Done  69 tasks      | elapsed:   10.7s\n[Parallel(n_jobs=-1)]: Done  82 tasks      | elapsed:   12.7s\n[Parallel(n_jobs=-1)]: Done  97 tasks      | elapsed:   16.7s\n[Parallel(n_jobs=-1)]: Done 112 tasks      | elapsed:   19.4s\n[Parallel(n_jobs=-1)]: Done 129 tasks      | elapsed:   24.4s\n[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   29.3s\n[Parallel(n_jobs=-1)]: Done 165 tasks      | elapsed:   32.7s\n[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:   36.4s\n[Parallel(n_jobs=-1)]: Done 205 tasks      | elapsed:   39.7s\n[Parallel(n_jobs=-1)]: Done 226 tasks      | elapsed:   43.7s\n[Parallel(n_jobs=-1)]: Done 249 tasks      | elapsed:   46.6s\n[Parallel(n_jobs=-1)]: Done 272 tasks      | elapsed:   48.8s\n[Parallel(n_jobs=-1)]: Done 297 tasks      | elapsed:   52.0s\n[Parallel(n_jobs=-1)]: Done 322 tasks      | elapsed:   55.9s\n[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  1.0min\n[Parallel(n_jobs=-1)]: Done 376 tasks      | elapsed:  1.2min\n[Parallel(n_jobs=-1)]: Done 405 tasks      | elapsed:  1.3min\n[Parallel(n_jobs=-1)]: Done 434 tasks      | elapsed:  1.3min\n[Parallel(n_jobs=-1)]: Done 465 tasks      | elapsed:  1.4min\n[Parallel(n_jobs=-1)]: Done 480 out of 480 | elapsed:  1.5min finished\n"
        },
        {
          "output_type": "execute_result",
          "data": {
-            "text/plain": [
-              "GridSearchCV(estimator=AdaBoostClassifier(algorithm='SAMME', random_state=2020),\n",
-              "             n_jobs=-1,\n",
-              "             param_grid=[{'base_estimator': [Stree(C=7, max_depth=5,\n",
-              "                                                   split_criteria='max_samples',\n",
-              "                                                   tol=0.01)],\n",
-              "                          'base_estimator__C': [1, 7, 55],\n",
-              "                          'base_estimator__kernel': ['linear'],\n",
-              "                          'base_estimator__max_depth': [3, 5, 7],\n",
-              "                          'base_estimator__split_criteria': ['max_samples',\n",
-              "                                                             'impurity'],\n",
-              "                          'base_e...\n",
-              "                          'learning_rate': [0.5, 1], 'n_estimators': [10, 25]},\n",
-              "                         {'base_estimator': [Stree()],\n",
-              "                          'base_estimator__C': [1, 7, 55],\n",
-              "                          'base_estimator__gamma': [0.1, 1, 10],\n",
-              "                          'base_estimator__kernel': ['rbf'],\n",
-              "                          'base_estimator__max_depth': [3, 5, 7],\n",
-              "                          'base_estimator__split_criteria': ['max_samples',\n",
-              "                                                             'impurity'],\n",
-              "                          'base_estimator__tol': [0.1, 0.01],\n",
-              "                          'learning_rate': [0.5, 1],\n",
-              "                          'n_estimators': [10, 25]}],\n",
-              "             return_train_score=True, verbose=10)"
-            ]
+            "text/plain": "GridSearchCV(estimator=AdaBoostClassifier(random_state=2020), n_jobs=-1,\n             param_grid={'base_estimator': [Stree(C=1, max_depth=3, tol=0.1)],\n                         'base_estimator__C': [1, 3],\n                         'base_estimator__kernel': ['linear', 'poly', 'rbf'],\n                         'base_estimator__max_depth': [3, 5],\n                         'base_estimator__tol': [0.1, 0.01],\n                         'learning_rate': [0.5, 1], 'n_estimators': [10, 25]},\n             return_train_score=True, verbose=10)"
          },
          "metadata": {},
-          "execution_count": 7
+          "execution_count": 11
        }
      ]
    },
-    {
-      "source": [
-        "GridSearchCV(estimator=AdaBoostClassifier(algorithm='SAMME', random_state=2020),\n",
-        "             n_jobs=-1,\n",
-        "             param_grid={'base_estimator': [Stree(C=55, max_depth=3, tol=0.01)],\n",
-        "                         'base_estimator__C': [7, 55],\n",
-        "                         'base_estimator__kernel': ['linear', 'poly', 'rbf'],\n",
-        "                         'base_estimator__max_depth': [3, 5],\n",
-        "                         'base_estimator__tol': [0.1, 0.01],\n",
-        "                         'learning_rate': [0.5, 1], 'n_estimators': [10, 25]},\n",
-        "             return_train_score=True, verbose=10)"
-      ],
-      "cell_type": "markdown",
-      "metadata": {}
-    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "ZjX88NoYDZE8",
        "colab_type": "code",
        "colab": {},
-        "outputId": "285163c8-fa33-4915-8ae7-61c4f7844344",
-        "tags": []
+        "outputId": "285163c8-fa33-4915-8ae7-61c4f7844344"
      },
      "source": [
        "print(\"Best estimator: \", grid.best_estimator_)\n",
        "print(\"Best hyperparameters: \", grid.best_params_)\n",
        "print(\"Best accuracy: \", grid.best_score_)"
      ],
-      "execution_count": 8,
+      "execution_count": 16,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
-          "text": [
-            "Best estimator:  AdaBoostClassifier(algorithm='SAMME',\n                   base_estimator=Stree(C=7, max_depth=5,\n                                        split_criteria='max_samples',\n                                        tol=0.01),\n                   learning_rate=0.5, n_estimators=25, random_state=2020)\nBest hyperparameters:  {'base_estimator': Stree(C=7, max_depth=5, split_criteria='max_samples', tol=0.01), 'base_estimator__C': 7, 'base_estimator__kernel': 'linear', 'base_estimator__max_depth': 5, 'base_estimator__split_criteria': 'max_samples', 'base_estimator__tol': 0.01, 'learning_rate': 0.5, 'n_estimators': 25}\nBest accuracy:  0.9549825174825175\n"
-          ]
+          "text": "Best estimator:  AdaBoostClassifier(base_estimator=Stree(C=1, max_depth=3, tol=0.1),\n                   learning_rate=0.5, n_estimators=10, random_state=2020)\nBest hyperparameters:  {'base_estimator': Stree(C=1, max_depth=3, tol=0.1), 'base_estimator__C': 1, 'base_estimator__kernel': 'linear', 'base_estimator__max_depth': 3, 'base_estimator__tol': 0.1, 'learning_rate': 0.5, 'n_estimators': 10}\nBest accuracy:  0.9492316893632683\n"
        }
      ]
-    },
-    {
-      "source": [
-        "Best estimator:  AdaBoostClassifier(algorithm='SAMME',\n",
-        "                   base_estimator=Stree(C=55, max_depth=3, tol=0.01),\n",
-        "                   learning_rate=0.5, n_estimators=25, random_state=2020)\n",
-        "\n",
-        "Best hyperparameters:  {'base_estimator': Stree(C=55, max_depth=3, tol=0.01), 'base_estimator__C': 55, 'base_estimator__kernel': 'linear', 'base_estimator__max_depth': 3, 'base_estimator__tol': 0.01, 'learning_rate': 0.5, 'n_estimators': 25}\n",
-        "\n",
-        "Best accuracy:  0.9559440559440558"
-      ],
-      "cell_type": "markdown",
-      "metadata": {}
-    },
-    {
-      "source": [
-        "0.9511547662863451"
-      ],
-      "cell_type": "markdown",
-      "metadata": {}
    }
  ],
  "metadata": {
@@ -429,12 +227,12 @@
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
-      "version": "3.8.4-final"
+      "version": "3.7.6-final"
    },
    "orig_nbformat": 2,
    "kernelspec": {
-      "name": "python38464bitgeneralvenv77203c0a6afd4428bd66253ef62753dc",
-      "display_name": "Python 3.8.4 64-bit ('general': venv)"
+      "name": "python37664bitgeneralvenvfbd0a23e74cf4e778460f5ffc6761f39",
+      "display_name": "Python 3.7.6 64-bit ('general': venv)"
    },
    "colab": {
      "name": "gridsearch.ipynb",
--- a/setup.py
+++ b/setup.py
@@ -1,6 +1,6 @@
 import setuptools

-__version__ = "0.9rc6"
+__version__ = "0.9rc4"
 __author__ = "Ricardo Montañana Gómez"


@@ -25,7 +25,7 @@ setuptools.setup(
    classifiers=[
        "Development Status :: 4 - Beta",
        "License :: OSI Approved :: MIT License",
-        "Programming Language :: Python :: 3.8",
+        "Programming Language :: Python :: 3.7",
        "Natural Language :: English",
        "Topic :: Scientific/Engineering :: Artificial Intelligence",
        "Intended Audience :: Science/Research",
--- a/stree/Strees.py
+++ b/stree/Strees.py
@@ -6,25 +6,24 @@ __version__ = "0.9"
 Build an oblique tree classifier based on SVM Trees
 """

+from __future__ import annotations
 import os
-import numbers
 import random
 import warnings
-from math import log, factorial
-from typing import Optional
-import numpy as np
-from sklearn.base import BaseEstimator, ClassifierMixin
-from sklearn.svm import SVC, LinearSVC
-from sklearn.utils import check_consistent_length
-from sklearn.utils.multiclass import check_classification_targets
-from sklearn.exceptions import ConvergenceWarning
-from sklearn.utils.validation import (
-    check_X_y,
-    check_array,
+from typing import Optional, List, Union, Tuple
+from math import log
+from itertools import combinations
+import numpy as np  # type: ignore
+from sklearn.base import BaseEstimator, ClassifierMixin  # type: ignore
+from sklearn.svm import SVC, LinearSVC  # type: ignore
+from sklearn.utils.multiclass import (  # type: ignore
+    check_classification_targets,
+)
+from sklearn.exceptions import ConvergenceWarning  # type: ignore
+from sklearn.utils.validation import (  # type: ignore
    check_is_fitted,
    _check_sample_weight,
 )
-from sklearn.metrics._classification import _weighted_sum, _check_targets


 class Snode:
@@ -34,7 +33,7 @@ class Snode:

    def __init__(
        self,
-        clf: SVC,
+        clf: Union[SVC, LinearSVC],
        X: np.ndarray,
        y: np.ndarray,
        features: np.array,
@@ -42,25 +41,25 @@ class Snode:
        title: str,
        weight: np.ndarray = None,
    ):
-        self._clf = clf
-        self._title = title
-        self._belief = 0.0
+        self._clf: Union[SVC, LinearSVC] = clf
+        self._title: str = title
+        self._belief: float = 0.0
        # Only store dataset in Testing
-        self._X = X if os.environ.get("TESTING", "NS") != "NS" else None
-        self._y = y
-        self._down = None
-        self._up = None
+        self._X: Optional[np.array] = X if os.environ.get(
+            "TESTING", "NS"
+        ) != "NS" else None
+        self._y: np.array = y
+        self._down: Optional[Snode] = None
+        self._up: Optional[Snode] = None
        self._class = None
-        self._feature = None
-        self._sample_weight = (
+        self._sample_weight: Optional[np.array] = (
            weight if os.environ.get("TESTING", "NS") != "NS" else None
        )
-        self._features = features
-        self._impurity = impurity
-        self._partition_column: int = -1
+        self._features: Tuple[int, ...] = features
+        self._impurity: float = impurity

    @classmethod
-    def copy(cls, node: "Snode") -> "Snode":
+    def copy(cls, node: Snode) -> Snode:
        return cls(
            node._clf,
            node._X,
@@ -70,28 +69,22 @@ class Snode:
            node._title,
        )

-    def set_partition_column(self, col: int):
-        self._partition_column = col
-
-    def get_partition_column(self) -> int:
-        return self._partition_column
-
-    def set_down(self, son):
+    def set_down(self, son: Snode) -> None:
        self._down = son

-    def set_up(self, son):
+    def set_up(self, son: Snode) -> None:
        self._up = son

    def is_leaf(self) -> bool:
        return self._up is None and self._down is None

-    def get_down(self) -> "Snode":
+    def get_down(self) -> Optional[Snode]:
        return self._down

-    def get_up(self) -> "Snode":
+    def get_up(self) -> Optional[Snode]:
        return self._up

-    def make_predictor(self):
+    def make_predictor(self) -> None:
        """Compute the class of the predictor and its belief based on the
        subdataset of the node only if it is a leaf
        """
@@ -100,8 +93,9 @@ class Snode:
        classes, card = np.unique(self._y, return_counts=True)
        if len(classes) > 1:
            max_card = max(card)
+            min_card = min(card)
            self._class = classes[card == max_card][0]
-            self._belief = max_card / np.sum(card)
+            self._belief = max_card / (max_card + min_card)
        else:
            self._belief = 1
            try:
@@ -110,29 +104,30 @@ class Snode:
                self._class = None

    def __str__(self) -> str:
-        count_values = np.unique(self._y, return_counts=True)
        if self.is_leaf():
-            return (
+            count_values = np.unique(self._y, return_counts=True)
+            result = (
                f"{self._title} - Leaf class={self._class} belief="
                f"{self._belief: .6f} impurity={self._impurity:.4f} "
                f"counts={count_values}"
            )
+            return result
        else:
            return (
                f"{self._title} feaures={self._features} impurity="
-                f"{self._impurity:.4f} "
-                f"counts={count_values}"
+                f"{self._impurity:.4f}"
            )


 class Siterator:
-    """Stree preorder iterator"""
+    """Stree preorder iterator
+    """

-    def __init__(self, tree: Snode):
-        self._stack = []
+    def __init__(self, tree: Optional[Snode]):
+        self._stack: List[Snode] = []
        self._push(tree)

-    def _push(self, node: Snode):
+    def _push(self, node: Optional[Snode]) -> None:
        if node is not None:
            self._stack.append(node)

@@ -148,21 +143,21 @@ class Siterator:
 class Splitter:
    def __init__(
        self,
-        clf: SVC = None,
-        criterion: str = None,
-        splitter_type: str = None,
-        criteria: str = None,
-        min_samples_split: int = None,
-        random_state=None,
+        clf: Union[SVC, LinearSVC] = None,
+        criterion: str = "",
+        splitter_type: str = "",
+        criteria: str = "",
+        min_samples_split: int = 0,
+        random_state: Optional[int] = None,
    ):
-        self._clf = clf
-        self._random_state = random_state
+        self._clf: Union[SVC, LinearSVC] = clf
+        self._random_state: Optional[int] = random_state
        if random_state is not None:
            random.seed(random_state)
-        self._criterion = criterion
-        self._min_samples_split = min_samples_split
-        self._criteria = criteria
-        self._splitter_type = splitter_type
+        self._criterion: str = criterion
+        self._min_samples_split: int = min_samples_split
+        self._criteria: str = criteria
+        self._splitter_type: str = splitter_type

        if clf is None:
            raise ValueError(f"clf has to be a sklearn estimator, got({clf})")
@@ -172,28 +167,26 @@ class Splitter:
                f"criterion must be gini or entropy got({criterion})"
            )

-        if criteria not in [
-            "max_samples",
-            "impurity",
-        ]:
+        if criteria not in ["min_distance", "max_samples", "max_distance"]:
            raise ValueError(
-                f"criteria has to be max_samples or impurity; got ({criteria})"
+                "split_criteria has to be min_distance "
+                f"max_distance or max_samples got ({criteria})"
            )

        if splitter_type not in ["random", "best"]:
            raise ValueError(
-                f"splitter must be either random or best, got({splitter_type})"
+                f"splitter must be either random or best got({splitter_type})"
            )
        self.criterion_function = getattr(self, f"_{self._criterion}")
        self.decision_criteria = getattr(self, f"_{self._criteria}")

-    def partition_impurity(self, y: np.array) -> np.array:
+    def impurity(self, y: np.array) -> np.array:
        return self.criterion_function(y)

    @staticmethod
    def _gini(y: np.array) -> float:
        _, count = np.unique(y, return_counts=True)
-        return 1 - np.sum(np.square(count / np.sum(count)))
+        return float(1 - np.sum(np.square(count / np.sum(count))))

    @staticmethod
    def _entropy(y: np.array) -> float:
@@ -227,7 +220,7 @@ class Splitter:
        if samples == 0:
            return 0.0
        else:
-            result = (
+            result = float(
                imp_prev
                - (card_up / samples) * imp_up
                - (card_dn / samples) * imp_dn
@@ -235,17 +228,20 @@ class Splitter:
            return result

    def _select_best_set(
-        self, dataset: np.array, labels: np.array, features_sets: list
-    ) -> list:
-        max_gain = 0
-        selected = None
+        self,
+        dataset: np.array,
+        labels: np.array,
+        features_sets: List[Tuple[int, ...]],
+    ) -> Tuple[int, ...]:
+        max_gain: float = 0.0
+        selected: Union[Tuple[int, ...], None] = None
        warnings.filterwarnings("ignore", category=ConvergenceWarning)
        for feature_set in features_sets:
            self._clf.fit(dataset[:, feature_set], labels)
            node = Snode(
                self._clf, dataset, labels, feature_set, 0.0, "subset"
            )
-            self.partition(dataset, node, train=True)
+            self.partition(dataset, node)
            y1, y2 = self.part(labels)
            gain = self.information_gain(labels, y1, y2)
            if gain > max_gain:
@@ -253,125 +249,93 @@ class Splitter:
                selected = feature_set
        return selected if selected is not None else feature_set

-    @staticmethod
-    def _generate_spaces(features: int, max_features: int) -> list:
-        comb = set()
-        # Generate at most 5 combinations
-        if max_features == features:
-            set_length = 1
-        else:
-            number = factorial(features) / (
-                factorial(max_features) * factorial(features - max_features)
-            )
-            set_length = min(5, number)
-        while len(comb) < set_length:
-            comb.add(
-                tuple(sorted(random.sample(range(features), max_features)))
-            )
-        return list(comb)
-
    def _get_subspaces_set(
        self, dataset: np.array, labels: np.array, max_features: int
    ) -> np.array:
-        features_sets = self._generate_spaces(dataset.shape[1], max_features)
+        features = range(dataset.shape[1])
+        features_sets = list(combinations(features, max_features))
        if len(features_sets) > 1:
            if self._splitter_type == "random":
                index = random.randint(0, len(features_sets) - 1)
                return features_sets[index]
            else:
+                # get only 3 sets at most
+                if len(features_sets) > 3:
+                    features_sets = random.sample(features_sets, 3)
                return self._select_best_set(dataset, labels, features_sets)
        else:
            return features_sets[0]

    def get_subspace(
        self, dataset: np.array, labels: np.array, max_features: int
-    ) -> tuple:
-        """Return the best/random subspace to make a split"""
+    ) -> Tuple[np.array, np.array]:
+        """Return the best subspace to make a split
+        """
        indices = self._get_subspaces_set(dataset, labels, max_features)
        return dataset[:, indices], indices

-    def _impurity(self, data: np.array, y: np.array) -> np.array:
-        """return column of dataset to be taken into account to split dataset
+    @staticmethod
+    def _min_distance(data: np.array, _: np.array) -> np.array:
+        """Assign class to min distances

+        return a vector of classes so partition can separate class 0 from
+        the rest of classes, ie. class 0 goes to one splitted node and the
+        rest of classes go to the other
        :param data: distances to hyper plane of every class
        :type data: np.array (m, n_classes)
-        :param y: vector of labels (classes)
-        :type y: np.array (m,)
-        :return: column of dataset to be taken into account to split dataset
-        :rtype: int
+        :param _: enable call compat with other measures
+        :type _: None
+        :return: vector with the class assigned to each sample
+        :rtype: np.array shape (m,)
        """
-        max_gain = 0
-        selected = -1
-        for col in range(data.shape[1]):
-            tup = y[data[:, col] > 0]
-            tdn = y[data[:, col] <= 0]
-            info_gain = self.information_gain(y, tup, tdn)
-            if info_gain > max_gain:
-                selected = col
-                max_gain = info_gain
-        return selected
+        return np.argmin(data, axis=1)
+
+    @staticmethod
+    def _max_distance(data: np.array, _: np.array) -> np.array:
+        """Assign class to max distances
+
+        return a vector of classes so partition can separate class 0 from
+        the rest of classes, ie. class 0 goes to one splitted node and the
+        rest of classes go to the other
+        :param data: distances to hyper plane of every class
+        :type data: np.array (m, n_classes)
+        :param _: enable call compat with other measures
+        :type _: None
+        :return: vector with the class assigned to each sample values
+        (can be 0, 1, ...)
+        :rtype: np.array shape (m,)
+        """
+        return np.argmax(data, axis=1)

    @staticmethod
    def _max_samples(data: np.array, y: np.array) -> np.array:
-        """return column of dataset to be taken into account to split dataset
+        """return distances of the class with more samples

        :param data: distances to hyper plane of every class
        :type data: np.array (m, n_classes)
        :param y: vector of labels (classes)
        :type y: np.array (m,)
-        :return: column of dataset to be taken into account to split dataset
-        :rtype: int
+        :return: vector with distances to hyperplane (can be positive or neg.)
+        :rtype: np.array shape (m,)
        """
        # select the class with max number of samples
        _, samples = np.unique(y, return_counts=True)
-        return np.argmax(samples)
+        selected = np.argmax(samples)
+        return data[:, selected]

-    def partition(self, samples: np.array, node: Snode, train: bool):
+    def partition(self, samples: np.array, node: Snode) -> None:
        """Set the criteria to split arrays. Compute the indices of the samples
        that should go to one side of the tree (down)

        """
-        # data contains the distances of every sample to every class hyperplane
-        # array of (m, nc) nc = # classes
        data = self._distances(node, samples)
        if data.shape[0] < self._min_samples_split:
-            # there aren't enough samples to split
-            self._up = np.ones((data.shape[0]), dtype=bool)
+            self._down = np.ones((data.shape[0]), dtype=bool)
            return
        if data.ndim > 1:
            # split criteria for multiclass
-            # Convert data to a (m, 1) array selecting values for samples
-            if train:
-                # in train time we have to compute the column to take into
-                # account to split the dataset
-                col = self.decision_criteria(data, node._y)
-                node.set_partition_column(col)
-            else:
-                # in predcit time just use the column computed in train time
-                # is taking the classifier of class <col>
-                col = node.get_partition_column()
-                if col == -1:
-                    # No partition is producing information gain
-                    data = np.ones(data.shape)
-            data = data[:, col]
-        self._up = data > 0
-
-    def part(self, origin: np.array) -> list:
-        """Split an array in two based on indices (down) and its complement
-        partition has to be called first to establish down indices
-
-        :param origin: dataset to split
-        :type origin: np.array
-        :param down: indices to use to split array
-        :type down: np.array
-        :return: list with two splits of the array
-        :rtype: list
-        """
-        down = ~self._up
-        return [
-            origin[self._up] if any(self._up) else None,
-            origin[down] if any(down) else None,
-        ]
+            data = self.decision_criteria(data, node._y)
+        self._down = data > 0

    @staticmethod
    def _distances(node: Snode, data: np.ndarray) -> np.array:
@@ -381,14 +345,30 @@ class Splitter:
        :type node: Snode
        :param data: samples to find out distance to hyperplane
        :type data: np.ndarray
-        :return: array of shape (m, nc) with the distances of every sample to
-        the hyperplane of every class. nc = # of classes
+        :return: array of shape (m, 1) with the distances of every sample to
+        the hyperplane of the node
        :rtype: np.array
        """
        return node._clf.decision_function(data[:, node._features])

+    def part(self, origin: np.array) -> Tuple[np.array, np.array]:
+        """Split an array in two based on indices (down) and its complement

-class Stree(BaseEstimator, ClassifierMixin):
+        :param origin: dataset to split
+        :type origin: np.array
+        :param down: indices to use to split array
+        :type down: np.array
+        :return: list with two splits of the array
+        :rtype: list
+        """
+        up = ~self._down
+        return (
+            origin[up] if any(up) else None,
+            origin[self._down] if any(self._down) else None,
+        )
+
+
+class Stree(BaseEstimator, ClassifierMixin):  # type: ignore
    """Estimator that is based on binary trees of svm nodes
    can deal with sample_weights in predict, used in boosting sklearn methods
    inheriting from BaseEstimator implements get_params and set_params methods
@@ -400,43 +380,35 @@ class Stree(BaseEstimator, ClassifierMixin):
        self,
        C: float = 1.0,
        kernel: str = "linear",
-        max_iter: int = 1e5,
-        random_state: int = None,
-        max_depth: int = None,
+        max_iter: int = 1000,
+        random_state: Optional[int] = None,
+        max_depth: Optional[int] = None,
        tol: float = 1e-4,
        degree: int = 3,
-        gamma="scale",
-        split_criteria: str = "impurity",
-        criterion: str = "entropy",
+        gamma: Union[float, str] = "scale",
+        split_criteria: str = "max_samples",
+        criterion: str = "gini",
        min_samples_split: int = 0,
-        max_features=None,
+        max_features: Optional[Union[str, int, float]] = None,
        splitter: str = "random",
    ):
        self.max_iter = max_iter
-        self.C = C
-        self.kernel = kernel
-        self.random_state = random_state
-        self.max_depth = max_depth
-        self.tol = tol
-        self.gamma = gamma
-        self.degree = degree
-        self.min_samples_split = min_samples_split
-        self.split_criteria = split_criteria
-        self.max_features = max_features
-        self.criterion = criterion
-        self.splitter = splitter
-
-    def _more_tags(self) -> dict:
-        """Required by sklearn to supply features of the classifier
-
-        :return: the tag required
-        :rtype: dict
-        """
-        return {"requires_y": True}
+        self.C: float = C
+        self.kernel: str = kernel
+        self.random_state: Optional[int] = random_state
+        self.max_depth: Optional[int] = max_depth
+        self.tol: float = tol
+        self.gamma: Union[float, str] = gamma
+        self.degree: int = degree
+        self.min_samples_split: int = min_samples_split
+        self.split_criteria: str = split_criteria
+        self.max_features: Union[str, int, float, None] = max_features
+        self.criterion: str = criterion
+        self.splitter: str = splitter

    def fit(
        self, X: np.ndarray, y: np.ndarray, sample_weight: np.array = None
-    ) -> "Stree":
+    ) -> Stree:
        """Build the tree based on the dataset of samples and its labels

        :param X: dataset of samples to make predictions
@@ -465,13 +437,11 @@ class Stree(BaseEstimator, ClassifierMixin):
                f"Maximum depth has to be greater than 1... got (max_depth=\
                    {self.max_depth})"
            )
-
        check_classification_targets(y)
-        X, y = check_X_y(X, y)
+        X, y = self._validate_data(X, y)
        sample_weight = _check_sample_weight(
            sample_weight, X, dtype=np.float64
        )
-        check_classification_targets(y)
        # Initialize computed parameters
        self.splitter_ = Splitter(
            clf=self._build_clf(),
@@ -487,8 +457,6 @@ class Stree(BaseEstimator, ClassifierMixin):
        self.n_classes_ = self.classes_.shape[0]
        self.n_iter_ = self.max_iter
        self.depth_ = 0
-        self.n_features_ = X.shape[1]
-        self.n_features_in_ = X.shape[1]
        self.max_features_ = self._initialize_max_features()
        self.tree_ = self.train(X, y, sample_weight, 1, "root")
        self._build_predictor()
@@ -544,10 +512,10 @@ class Stree(BaseEstimator, ClassifierMixin):
            if np.unique(y_next).shape[0] != self.n_classes_:
                sample_weight += 1e-5
        clf.fit(Xs, y, sample_weight=sample_weight)
-        impurity = self.splitter_.partition_impurity(y)
+        impurity = self.splitter_.impurity(y)
        node = Snode(clf, X, y, features, impurity, title, sample_weight)
        self.depth_ = max(depth, self.depth_)
-        self.splitter_.partition(X, node, True)
+        self.splitter_.partition(X, node)
        X_U, X_D = self.splitter_.part(X)
        y_u, y_d = self.splitter_.part(y)
        sw_u, sw_d = self.splitter_.part(sample_weight)
@@ -562,14 +530,25 @@ class Stree(BaseEstimator, ClassifierMixin):
                title=title + ", <cgaf>",
                weight=sample_weight,
            )
-        node.set_up(self.train(X_U, y_u, sw_u, depth + 1, title + " - Up"))
-        node.set_down(self.train(X_D, y_d, sw_d, depth + 1, title + " - Down"))
+        node.set_up(
+            self.train(  # type: ignore
+                X_U, y_u, sw_u, depth + 1, title + " - Up"
+            )
+        )
+        node.set_down(
+            self.train(  # type: ignore
+                X_D, y_d, sw_d, depth + 1, title + " - Down"
+            )
+        )
        return node

-    def _build_predictor(self):
-        """Process the leaves to make them predictors"""
+    def _build_predictor(self) -> None:
+        """Process the leaves to make them predictors
+        """

-        def run_tree(node: Snode):
+        def run_tree(node: Optional[Snode]) -> None:
+            if node is None:
+                raise ValueError("Can't build predictors on None")
            if node.is_leaf():
                node.make_predictor()
                return
@@ -578,8 +557,9 @@ class Stree(BaseEstimator, ClassifierMixin):

        run_tree(self.tree_)

-    def _build_clf(self):
-        """Build the correct classifier for the node"""
+    def _build_clf(self) -> Union[LinearSVC, SVC]:
+        """ Build the correct classifier for the node
+        """
        return (
            LinearSVC(
                max_iter=self.max_iter,
@@ -626,30 +606,30 @@ class Stree(BaseEstimator, ClassifierMixin):
        """

        def predict_class(
-            xp: np.array, indices: np.array, node: Snode
+            xp: np.array, indices: np.array, node: Optional[Snode]
        ) -> np.array:
            if xp is None:
                return [], []
-            if node.is_leaf():
+            if node.is_leaf():  # type: ignore
                # set a class for every sample in dataset
-                prediction = np.full((xp.shape[0], 1), node._class)
+                prediction = np.full(
+                    (xp.shape[0], 1), node._class  # type: ignore
+                )
                return prediction, indices
-            self.splitter_.partition(xp, node, train=False)
+            self.splitter_.partition(xp, node)  # type: ignore
            x_u, x_d = self.splitter_.part(xp)
            i_u, i_d = self.splitter_.part(indices)
-            prx_u, prin_u = predict_class(x_u, i_u, node.get_up())
-            prx_d, prin_d = predict_class(x_d, i_d, node.get_down())
+            prx_u, prin_u = predict_class(
+                x_u, i_u, node.get_up()  # type: ignore
+            )
+            prx_d, prin_d = predict_class(
+                x_d, i_d, node.get_down()  # type: ignore
+            )
            return np.append(prx_u, prx_d), np.append(prin_u, prin_d)

-        # sklearn check
-        check_is_fitted(self, ["tree_"])
+        check_is_fitted(self, "n_features_in_")
        # Input validation
-        X = check_array(X)
-        if X.shape[1] != self.n_features_:
-            raise ValueError(
-                f"Expected {self.n_features_} features but got "
-                f"({X.shape[1]})"
-            )
+        X = self._validate_data(X, reset=False)
        # setup prediction & make it happen
        indices = np.arange(X.shape[0])
        result = (
@@ -659,32 +639,6 @@ class Stree(BaseEstimator, ClassifierMixin):
        )
        return self.classes_[result]

-    def score(
-        self, X: np.array, y: np.array, sample_weight: np.array = None
-    ) -> float:
-        """Compute accuracy of the prediction
-
-        :param X: dataset of samples to make predictions
-        :type X: np.array
-        :param y_true: samples labels
-        :type y_true: np.array
-        :param sample_weight: weights of the samples. Rescale C per sample.
-        Hi' weights force the classifier to put more emphasis on these points
-        :type sample_weight: np.array optional
-        :return: accuracy of the prediction
-        :rtype: float
-        """
-        # sklearn check
-        check_is_fitted(self)
-        check_classification_targets(y)
-        X, y = check_X_y(X, y)
-        y_pred = self.predict(X).reshape(y.shape)
-        # Compute accuracy for each possible representation
-        _, y_true, y_pred = _check_targets(y, y_pred)
-        check_consistent_length(y_true, y_pred, sample_weight)
-        score = y_true == y_pred
-        return _weighted_sum(score, sample_weight, normalize=True)
-
    def __iter__(self) -> Siterator:
        """Create an iterator to be able to visit the nodes of the tree in
        preorder, can make a list with all the nodes in preorder
@@ -712,11 +666,11 @@ class Stree(BaseEstimator, ClassifierMixin):
    def _initialize_max_features(self) -> int:
        if isinstance(self.max_features, str):
            if self.max_features == "auto":
-                max_features = max(1, int(np.sqrt(self.n_features_)))
+                max_features = max(1, int(np.sqrt(self.n_features_in_)))
            elif self.max_features == "sqrt":
-                max_features = max(1, int(np.sqrt(self.n_features_)))
+                max_features = max(1, int(np.sqrt(self.n_features_in_)))
            elif self.max_features == "log2":
-                max_features = max(1, int(np.log2(self.n_features_)))
+                max_features = max(1, int(np.log2(self.n_features_in_)))
            else:
                raise ValueError(
                    "Invalid value for max_features. "
@@ -724,13 +678,13 @@ class Stree(BaseEstimator, ClassifierMixin):
                    "'sqrt' or 'log2'."
                )
        elif self.max_features is None:
-            max_features = self.n_features_
-        elif isinstance(self.max_features, numbers.Integral):
+            max_features = self.n_features_in_
+        elif isinstance(self.max_features, int):
            max_features = self.max_features
        else:  # float
            if self.max_features > 0.0:
                max_features = max(
-                    1, int(self.max_features * self.n_features_)
+                    1, int(self.max_features * self.n_features_in_)
                )
            else:
                raise ValueError(
--- a/stree/tests/Snode_test.py
+++ b/stree/tests/Snode_test.py
@@ -1,3 +1,4 @@
+# type: ignore
 import os
 import unittest

@@ -40,13 +41,12 @@ class Snode_test(unittest.TestCase):
            # Check Class
            class_computed = classes[card == max_card]
            self.assertEqual(class_computed, node._class)
-            # Check Partition column
-            self.assertEqual(node._partition_column, -1)

        check_leave(self._clf.tree_)

    def test_nodes_coefs(self):
-        """Check if the nodes of the tree have the right attributes filled"""
+        """Check if the nodes of the tree have the right attributes filled
+        """

        def run_tree(node: Snode):
            if node._belief < 1:
@@ -55,19 +55,16 @@ class Snode_test(unittest.TestCase):
                self.assertIsNotNone(node._clf.coef_)
            if node.is_leaf():
                return
-            run_tree(node.get_up())
            run_tree(node.get_down())
+            run_tree(node.get_up())

-        model = Stree(self._random_state)
-        model.fit(*load_dataset(self._random_state, 3, 4))
-        run_tree(model.tree_)
+        run_tree(self._clf.tree_)

    def test_make_predictor_on_leaf(self):
        test = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test")
        test.make_predictor()
        self.assertEqual(1, test._class)
        self.assertEqual(0.75, test._belief)
-        self.assertEqual(-1, test._partition_column)

    def test_make_predictor_on_not_leaf(self):
        test = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test")
@@ -75,14 +72,11 @@ class Snode_test(unittest.TestCase):
        test.make_predictor()
        self.assertIsNone(test._class)
        self.assertEqual(0, test._belief)
-        self.assertEqual(-1, test._partition_column)
-        self.assertEqual(-1, test.get_up()._partition_column)

    def test_make_predictor_on_leaf_bogus_data(self):
        test = Snode(None, [1, 2, 3, 4], [], [], 0.0, "test")
        test.make_predictor()
        self.assertIsNone(test._class)
-        self.assertEqual(-1, test._partition_column)

    def test_copy_node(self):
        px = [1, 2, 3, 4]
@@ -93,4 +87,3 @@ class Snode_test(unittest.TestCase):
        self.assertListEqual(computed._y, py)
        self.assertEqual("test", computed._title)
        self.assertIsInstance(computed._clf, Stree)
-        self.assertEqual(test._partition_column, computed._partition_column)
--- a/stree/tests/Splitter_test.py
+++ b/stree/tests/Splitter_test.py
@@ -1,3 +1,4 @@
+# type: ignore
 import os
 import unittest
 import random
@@ -19,7 +20,7 @@ class Splitter_test(unittest.TestCase):
        min_samples_split=0,
        splitter_type="random",
        criterion="gini",
-        criteria="max_samples",
+        criteria="min_distance",
        random_state=None,
    ):
        return Splitter(
@@ -46,7 +47,11 @@ class Splitter_test(unittest.TestCase):
            _ = Splitter(clf=None)
        for splitter_type in ["best", "random"]:
            for criterion in ["gini", "entropy"]:
-                for criteria in ["max_samples", "impurity"]:
+                for criteria in [
+                    "min_distance",
+                    "max_samples",
+                    "max_distance",
+                ]:
                    tcl = self.build(
                        splitter_type=splitter_type,
                        criterion=criterion,
@@ -134,45 +139,43 @@ class Splitter_test(unittest.TestCase):
                [0.7, 0.01, -0.1],
                [0.7, -0.9, 0.5],
                [0.1, 0.2, 0.3],
-                [-0.1, 0.2, 0.3],
-                [-0.1, 0.2, 0.3],
            ]
        )
-        expected = data[:, 0]
-        y = [1, 2, 1, 0, 0, 0]
+        expected = np.array([0.2, 0.01, -0.9, 0.2])
+        y = [1, 2, 1, 0]
        computed = tcl._max_samples(data, y)
-        self.assertEqual(0, computed)
-        computed_data = data[:, computed]
-        self.assertEqual((6,), computed_data.shape)
-        self.assertListEqual(expected.tolist(), computed_data.tolist())
+        self.assertEqual((4,), computed.shape)
+        self.assertListEqual(expected.tolist(), computed.tolist())

-    def test_impurity(self):
-        tcl = self.build(criteria="impurity")
+    def test_min_distance(self):
+        tcl = self.build()
        data = np.array(
            [
                [-0.1, 0.2, -0.3],
                [0.7, 0.01, -0.1],
                [0.7, -0.9, 0.5],
                [0.1, 0.2, 0.3],
-                [-0.1, 0.2, 0.3],
-                [-0.1, 0.2, 0.3],
            ]
        )
-        expected = data[:, 2]
-        y = np.array([1, 2, 1, 0, 0, 0])
-        computed = tcl._impurity(data, y)
-        self.assertEqual(2, computed)
-        computed_data = data[:, computed]
-        self.assertEqual((6,), computed_data.shape)
-        self.assertListEqual(expected.tolist(), computed_data.tolist())
+        expected = np.array([2, 2, 1, 0])
+        computed = tcl._min_distance(data, None)
+        self.assertEqual((4,), computed.shape)
+        self.assertListEqual(expected.tolist(), computed.tolist())

-    def test_generate_subspaces(self):
-        features = 250
-        for max_features in range(2, features):
-            num = len(Splitter._generate_spaces(features, max_features))
-            self.assertEqual(5, num)
-        self.assertEqual(3, len(Splitter._generate_spaces(3, 2)))
-        self.assertEqual(4, len(Splitter._generate_spaces(4, 3)))
+    def test_max_distance(self):
+        tcl = self.build(criteria="max_distance")
+        data = np.array(
+            [
+                [-0.1, 0.2, -0.3],
+                [0.7, 0.01, -0.1],
+                [0.7, -0.9, 0.5],
+                [0.1, 0.2, 0.3],
+            ]
+        )
+        expected = np.array([1, 0, 0, 2])
+        computed = tcl._max_distance(data, None)
+        self.assertEqual((4,), computed.shape)
+        self.assertListEqual(expected.tolist(), computed.tolist())

    def test_best_splitter_few_sets(self):
        X, y = load_iris(return_X_y=True)
@@ -184,22 +187,27 @@ class Splitter_test(unittest.TestCase):

    def test_splitter_parameter(self):
        expected_values = [
-            [1, 4, 9, 12],  # best   entropy max_samples
-            [1, 3, 6, 10],  # best   entropy impurity
-            [6, 8, 10, 12],  # best   gini    max_samples
-            [7, 8, 10, 11],  # best   gini    impurity
-            [0, 3, 8, 12],  # random entropy max_samples
-            [0, 3, 9, 11],  # random entropy impurity
-            [0, 4, 7, 12],  # random gini    max_samples
-            [0, 2, 5, 6],  # random gini    impurity
+            [2, 3, 5, 7],  # best   entropy min_distance
+            [0, 2, 4, 5],  # best   entropy max_samples
+            [0, 2, 8, 12],  # best   entropy max_distance
+            [1, 2, 5, 12],  # best   gini    min_distance
+            [0, 3, 4, 10],  # best   gini    max_samples
+            [1, 2, 9, 12],  # best   gini    max_distance
+            [3, 9, 11, 12],  # random entropy min_distance
+            [1, 5, 6, 9],  # random entropy max_samples
+            [1, 2, 4, 8],  # random entropy max_distance
+            [2, 6, 7, 12],  # random gini    min_distance
+            [3, 9, 10, 11],  # random gini    max_samples
+            [2, 5, 8, 12],  # random gini    max_distance
        ]
        X, y = load_wine(return_X_y=True)
        rn = 0
        for splitter_type in ["best", "random"]:
            for criterion in ["entropy", "gini"]:
                for criteria in [
+                    "min_distance",
                    "max_samples",
-                    "impurity",
+                    "max_distance",
                ]:
                    tcl = self.build(
                        splitter_type=splitter_type,
@@ -212,10 +220,8 @@ class Splitter_test(unittest.TestCase):
                    dataset, computed = tcl.get_subspace(X, y, max_features=4)
                    # print(
                    #     "{},  # {:7s}{:8s}{:15s}".format(
-                    #         list(computed),
-                    #         splitter_type,
-                    #         criterion,
-                    #         criteria,
+                    #         list(computed), splitter_type, criterion,
+                    #           criteria,
                    #     )
                    # )
                    self.assertListEqual(expected, list(computed))
--- a/stree/tests/Stree_test.py
+++ b/stree/tests/Stree_test.py
@@ -1,3 +1,4 @@
+# type: ignore
 import os
 import unittest
 import warnings
@@ -5,7 +6,6 @@ import warnings
 import numpy as np
 from sklearn.datasets import load_iris, load_wine
 from sklearn.exceptions import ConvergenceWarning
-from sklearn.svm import LinearSVC

 from stree import Stree, Snode
 from .utils import load_dataset
@@ -42,22 +42,23 @@ class Stree_test(unittest.TestCase):
        _, count_u = np.unique(y_up, return_counts=True)
        #
        for i in unique_y:
-            number_up = count_u[i]
+            number_down = count_d[i]
            try:
-                number_down = count_d[i]
+                number_up = count_u[i]
            except IndexError:
-                number_down = 0
+                number_up = 0
            self.assertEqual(count_y[i], number_down + number_up)
        # Is the partition made the same as the prediction?
        # as the node is not a leaf...
        _, count_yp = np.unique(y_prediction, return_counts=True)
-        self.assertEqual(count_yp[1], y_up.shape[0])
-        self.assertEqual(count_yp[0], y_down.shape[0])
+        self.assertEqual(count_yp[0], y_up.shape[0])
+        self.assertEqual(count_yp[1], y_down.shape[0])
        self._check_tree(node.get_down())
        self._check_tree(node.get_up())

    def test_build_tree(self):
-        """Check if the tree is built the same way as predictions of models"""
+        """Check if the tree is built the same way as predictions of models
+        """
        warnings.filterwarnings("ignore")
        for kernel in self._kernels:
            clf = Stree(kernel=kernel, random_state=self._random_state)
@@ -99,22 +100,20 @@ class Stree_test(unittest.TestCase):
            self.assertListEqual(yp_line.tolist(), yp_once.tolist())

    def test_iterator_and_str(self):
-        """Check preorder iterator"""
+        """Check preorder iterator
+        """
        expected = [
-            "root feaures=(0, 1, 2) impurity=1.0000 counts=(array([0, 1]), arr"
-            "ay([750, 750]))",
-            "root - Down, <cgaf> - Leaf class=0 belief= 0.928297 impurity=0.37"
-            "22 counts=(array([0, 1]), array([725,  56]))",
-            "root - Up feaures=(0, 1, 2) impurity=0.2178 counts=(array([0, 1])"
-            ", array([ 25, 694]))",
-            "root - Up - Down feaures=(0, 1, 2) impurity=0.8454 counts=(array("
-            "[0, 1]), array([8, 3]))",
-            "root - Up - Down - Down, <pure> - Leaf class=0 belief= 1.000000 i"
-            "mpurity=0.0000 counts=(array([0]), array([7]))",
-            "root - Up - Down - Up, <cgaf> - Leaf class=1 belief= 0.750000 imp"
-            "urity=0.8113 counts=(array([0, 1]), array([1, 3]))",
-            "root - Up - Up, <cgaf> - Leaf class=1 belief= 0.975989 impurity=0"
-            ".1634 counts=(array([0, 1]), array([ 17, 691]))",
+            "root feaures=(0, 1, 2) impurity=0.5000",
+            "root - Down feaures=(0, 1, 2) impurity=0.0671",
+            "root - Down - Down, <cgaf> - Leaf class=1 belief= 0.975989 "
+            "impurity=0.0469 counts=(array([0, 1]), array([ 17, 691]))",
+            "root - Down - Up feaures=(0, 1, 2) impurity=0.3967",
+            "root - Down - Up - Down, <cgaf> - Leaf class=1 belief= 0.750000 "
+            "impurity=0.3750 counts=(array([0, 1]), array([1, 3]))",
+            "root - Down - Up - Up, <pure> - Leaf class=0 belief= 1.000000 "
+            "impurity=0.0000 counts=(array([0]), array([7]))",
+            "root - Up, <cgaf> - Leaf class=0 belief= 0.928297 impurity=0.1331"
+            " counts=(array([0, 1]), array([725,  56]))",
        ]
        computed = []
        expected_string = ""
@@ -190,43 +189,44 @@ class Stree_test(unittest.TestCase):
    def test_muticlass_dataset(self):
        datasets = {
            "Synt": load_dataset(random_state=self._random_state, n_classes=3),
-            "Iris": load_wine(return_X_y=True),
+            "Iris": load_iris(return_X_y=True),
        }
        outcomes = {
            "Synt": {
-                "max_samples linear": 0.9606666666666667,
-                "max_samples rbf": 0.7133333333333334,
-                "max_samples poly": 0.49066666666666664,
-                "impurity linear": 0.9606666666666667,
-                "impurity rbf": 0.7133333333333334,
-                "impurity poly": 0.49066666666666664,
+                "max_samples linear": 0.9533333333333334,
+                "max_samples rbf": 0.836,
+                "max_samples poly": 0.9473333333333334,
+                "min_distance linear": 0.9533333333333334,
+                "min_distance rbf": 0.836,
+                "min_distance poly": 0.9473333333333334,
+                "max_distance linear": 0.9533333333333334,
+                "max_distance rbf": 0.836,
+                "max_distance poly": 0.9473333333333334,
            },
            "Iris": {
-                "max_samples linear": 1.0,
-                "max_samples rbf": 0.6910112359550562,
-                "max_samples poly": 0.6966292134831461,
-                "impurity linear": 1,
-                "impurity rbf": 0.6910112359550562,
-                "impurity poly": 0.6966292134831461,
+                "max_samples linear": 0.98,
+                "max_samples rbf": 1.0,
+                "max_samples poly": 1.0,
+                "min_distance linear": 0.98,
+                "min_distance rbf": 1.0,
+                "min_distance poly": 1.0,
+                "max_distance linear": 0.98,
+                "max_distance rbf": 1.0,
+                "max_distance poly": 1.0,
            },
        }
-
        for name, dataset in datasets.items():
            px, py = dataset
-            for criteria in ["max_samples", "impurity"]:
+            for criteria in ["max_samples", "min_distance", "max_distance"]:
                for kernel in self._kernels:
                    clf = Stree(
-                        C=55,
-                        max_iter=1e5,
+                        C=1e4,
+                        max_iter=1e4,
                        kernel=kernel,
                        random_state=self._random_state,
                    )
                    clf.fit(px, py)
                    outcome = outcomes[name][f"{criteria} {kernel}"]
-                    # print(
-                    #     f"{name} {criteria} {kernel} {outcome} {clf.score(px"
-                    #     ", py)}"
-                    # )
                    self.assertAlmostEqual(outcome, clf.score(px, py))

    def test_max_features(self):
@@ -240,7 +240,7 @@ class Stree_test(unittest.TestCase):
            (None, 16),
        ]
        clf = Stree()
-        clf.n_features_ = n_features
+        clf.n_features_in_ = n_features
        for max_features, expected in expected_values:
            clf.set_params(**dict(max_features=max_features))
            computed = clf._initialize_max_features()
@@ -298,10 +298,7 @@ class Stree_test(unittest.TestCase):
            0.9433333333333334,
        ]
        for kernel, accuracy_expected in zip(self._kernels, accuracies):
-            clf = Stree(
-                random_state=self._random_state,
-                kernel=kernel,
-            )
+            clf = Stree(random_state=self._random_state, kernel=kernel,)
            clf.fit(X, y)
            accuracy_score = clf.score(X, y)
            yp = clf.predict(X)
@@ -313,7 +310,75 @@ class Stree_test(unittest.TestCase):
        X, y = load_dataset(self._random_state)
        clf = Stree(random_state=self._random_state, max_features=2)
        clf.fit(X, y)
-        self.assertAlmostEqual(0.9246666666666666, clf.score(X, y))
+        self.assertAlmostEqual(0.9426666666666667, clf.score(X, y))
+
+    def test_score_multi_class(self):
+        warnings.filterwarnings("ignore")
+        accuracies = [
+            0.8258427,  # Wine    linear min_distance
+            0.6741573,  # Wine    linear max_distance
+            0.8314607,  # Wine    linear max_samples
+            0.6629213,  # Wine    rbf   min_distance
+            1.0000000,  # Wine    rbf   max_distance
+            0.4044944,  # Wine    rbf   max_samples
+            0.9157303,  # Wine    poly  min_distance
+            1.0000000,  # Wine    poly  max_distance
+            0.7640449,  # Wine    poly  max_samples
+            0.9933333,  # Iris    linear min_distance
+            0.9666667,  # Iris    linear max_distance
+            0.9666667,  # Iris    linear max_samples
+            0.9800000,  # Iris    rbf   min_distance
+            0.9800000,  # Iris    rbf   max_distance
+            0.9800000,  # Iris    rbf   max_samples
+            1.0000000,  # Iris    poly  min_distance
+            1.0000000,  # Iris    poly  max_distance
+            1.0000000,  # Iris    poly  max_samples
+            0.8993333,  # Synthetic linear min_distance
+            0.6533333,  # Synthetic linear max_distance
+            0.9313333,  # Synthetic linear max_samples
+            0.8320000,  # Synthetic rbf   min_distance
+            0.6660000,  # Synthetic rbf   max_distance
+            0.8320000,  # Synthetic rbf   max_samples
+            0.6066667,  # Synthetic poly  min_distance
+            0.6840000,  # Synthetic poly  max_distance
+            0.6340000,  # Synthetic poly  max_samples
+        ]
+        datasets = [
+            ("Wine", load_wine(return_X_y=True)),
+            ("Iris", load_iris(return_X_y=True)),
+            (
+                "Synthetic",
+                load_dataset(self._random_state, n_classes=3, n_features=5),
+            ),
+        ]
+        for dataset_name, dataset in datasets:
+            X, y = dataset
+            for kernel in self._kernels:
+                for criteria in [
+                    "min_distance",
+                    "max_distance",
+                    "max_samples",
+                ]:
+                    clf = Stree(
+                        C=17,
+                        random_state=self._random_state,
+                        kernel=kernel,
+                        split_criteria=criteria,
+                        degree=5,
+                        gamma="auto",
+                    )
+                    clf.fit(X, y)
+                    accuracy_score = clf.score(X, y)
+                    yp = clf.predict(X)
+                    accuracy_computed = np.mean(yp == y)
+                    # print(
+                    #     "{:.7f},  # {:7} {:5} {}".format(
+                    #         accuracy_score, dataset_name, kernel, criteria
+                    #     )
+                    # )
+                    accuracy_expected = accuracies.pop(0)
+                    self.assertEqual(accuracy_score, accuracy_computed)
+                    self.assertAlmostEqual(accuracy_expected, accuracy_score)

    def test_bogus_splitter_parameter(self):
        clf = Stree(splitter="duck")
@@ -343,13 +408,7 @@ class Stree_test(unittest.TestCase):
        original = weights_no_zero.copy()
        clf = Stree()
        clf.fit(X, y)
-        node = clf.train(
-            X,
-            y,
-            weights,
-            1,
-            "test",
-        )
+        node = clf.train(X, y, weights, 1, "test",)
        # if a class is lost with zero weights the patch adds epsilon
        self.assertListEqual(weights.tolist(), weights_epsilon)
        self.assertListEqual(node._sample_weight.tolist(), weights_epsilon)
@@ -357,88 +416,23 @@ class Stree_test(unittest.TestCase):
        _ = clf.train(X, y, weights_no_zero, 1, "test")
        self.assertListEqual(weights_no_zero.tolist(), original.tolist())

-    def test_multiclass_classifier_integrity(self):
-        """Checks if the multiclass operation is done right"""
-        X, y = load_iris(return_X_y=True)
-        clf = Stree(random_state=0)
+    def test_build_predictor(self):
+        X, y = load_dataset(self._random_state)
+        clf = Stree(random_state=self._random_state)
+        with self.assertRaises(ValueError):
+            clf.tree_ = None
+            clf._build_predictor()
        clf.fit(X, y)
-        score = clf.score(X, y)
-        # Check accuracy of the whole model
-        self.assertAlmostEquals(0.98, score, 5)
-        svm = LinearSVC(random_state=0)
-        svm.fit(X, y)
-        self.assertAlmostEquals(0.9666666666666667, svm.score(X, y), 5)
-        data = svm.decision_function(X)
-        expected = [
-            0.4444444444444444,
-            0.35777777777777775,
-            0.4569777777777778,
-        ]
-        ty = data.copy()
-        ty[data <= 0] = 0
-        ty[data > 0] = 1
-        ty = ty.astype(int)
-        for i in range(3):
-            self.assertAlmostEquals(
-                expected[i],
-                clf.splitter_._gini(ty[:, i]),
-            )
-        # 1st Branch
-        # up has to have 50 samples of class 0
-        # down should have 100 [50, 50]
-        up = data[:, 2] > 0
-        resup = np.unique(y[up], return_counts=True)
-        resdn = np.unique(y[~up], return_counts=True)
-        self.assertListEqual([1, 2], resup[0].tolist())
-        self.assertListEqual([3, 50], resup[1].tolist())
-        self.assertListEqual([0, 1], resdn[0].tolist())
-        self.assertListEqual([50, 47], resdn[1].tolist())
-        # 2nd Branch
-        # up  should have 53 samples of classes [1, 2] [3, 50]
-        # down shoud have 47 samples of class 1
-        node_up = clf.tree_.get_down().get_up()
-        node_dn = clf.tree_.get_down().get_down()
-        resup = np.unique(node_up._y, return_counts=True)
-        resdn = np.unique(node_dn._y, return_counts=True)
-        self.assertListEqual([1, 2], resup[0].tolist())
-        self.assertListEqual([3, 50], resup[1].tolist())
-        self.assertListEqual([1], resdn[0].tolist())
-        self.assertListEqual([47], resdn[1].tolist())
-
-    def test_score_multiclass_rbf(self):
-        X, y = load_dataset(
-            random_state=self._random_state,
-            n_classes=3,
-            n_features=5,
-            n_samples=500,
-        )
-        clf = Stree(kernel="rbf", random_state=self._random_state)
-        self.assertEqual(0.824, clf.fit(X, y).score(X, y))
-        X, y = load_wine(return_X_y=True)
-        self.assertEqual(0.6741573033707865, clf.fit(X, y).score(X, y))
-
-    def test_score_multiclass_poly(self):
-        X, y = load_dataset(
-            random_state=self._random_state,
-            n_classes=3,
-            n_features=5,
-            n_samples=500,
-        )
-        clf = Stree(
-            kernel="poly", random_state=self._random_state, C=10, degree=5
-        )
-        self.assertEqual(0.786, clf.fit(X, y).score(X, y))
-        X, y = load_wine(return_X_y=True)
-        self.assertEqual(0.702247191011236, clf.fit(X, y).score(X, y))
-
-    def test_score_multiclass_linear(self):
-        X, y = load_dataset(
-            random_state=self._random_state,
-            n_classes=3,
-            n_features=5,
-            n_samples=1500,
-        )
-        clf = Stree(kernel="linear", random_state=self._random_state)
-        self.assertEqual(0.9533333333333334, clf.fit(X, y).score(X, y))
-        X, y = load_wine(return_X_y=True)
-        self.assertEqual(0.9550561797752809, clf.fit(X, y).score(X, y))
+        node = clf.tree_.get_down().get_down()
+        expected_impurity = 0.04686951386893923
+        expected_class = 1
+        expected_belief = 0.9759887005649718
+        self.assertAlmostEqual(expected_impurity, node._impurity)
+        self.assertAlmostEqual(expected_belief, node._belief)
+        self.assertEqual(expected_class, node._class)
+        node._belief = 0.0
+        node._class = None
+        clf._build_predictor()
+        node = clf.tree_.get_down().get_down()
+        self.assertAlmostEqual(expected_belief, node._belief)
+        self.assertEqual(expected_class, node._class)
--- a/stree/tests/init.py
+++ b/stree/tests/init.py
@@ -1,3 +1,4 @@
+# type: ignore
 from .Stree_test import Stree_test
 from .Snode_test import Snode_test
 from .Splitter_test import Splitter_test
--- a/stree/tests/utils.py
+++ b/stree/tests/utils.py
@@ -1,9 +1,10 @@
+# type: ignore
 from sklearn.datasets import make_classification


-def load_dataset(random_state=0, n_classes=2, n_features=3, n_samples=1500):
+def load_dataset(random_state=0, n_classes=2, n_features=3):
    X, y = make_classification(
-        n_samples=n_samples,
+        n_samples=1500,
        n_features=n_features,
        n_informative=3,
        n_redundant=0,
Author	SHA1	Message	Date
Ricardo Montañana	d1e30a3372	Refactor predict and score and make mypy --strict	2020-07-01 18:37:10 +02:00
Ricardo Montañana	fa001f97a4	First Approach	2020-06-28 02:46:20 +02:00