From 724a4855fb490f2e37c7e43ebba7c7e39f025521 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= <rmontanana@gmail.com>
Date: Sat, 30 May 2020 11:09:59 +0200
Subject: [PATCH] Adapt some notebooks

---
 README.md                   |   2 +-
 notebooks/adaboost.ipynb    | 190 ++++++++++++++++++++++++++++++++++++
 notebooks/gridsearch.ipynb  | 152 ++++++++++++++++-------------
 notebooks/test_graphs.ipynb |   4 +-
 stree/Strees.py             |   9 +-
 5 files changed, 279 insertions(+), 78 deletions(-)
 create mode 100644 notebooks/adaboost.ipynb

diff --git a/README.md b/README.md
index c104d30..540658c 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 
 # Stree
 
-Oblique Tree classifier based on SVM nodes
+Oblique Tree classifier based on SVM nodes. The nodes are built and splitted with sklearn LinearSVC models.Stree is a sklearn estimator and can be integrated in pipelines, grid searches, etc.
 
 ![Stree](https://raw.github.com/doctorado-ml/stree/master/example.png)
 
diff --git a/notebooks/adaboost.ipynb b/notebooks/adaboost.ipynb
new file mode 100644
index 0000000..cfed940
--- /dev/null
+++ b/notebooks/adaboost.ipynb
@@ -0,0 +1,190 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time\n",
+    "from sklearn.ensemble import AdaBoostClassifier\n",
+    "from sklearn.tree import DecisionTreeClassifier\n",
+    "from sklearn.svm import LinearSVC\n",
+    "from sklearn.model_selection import GridSearchCV, train_test_split\n",
+    "from sklearn.datasets import load_iris\n",
+    "from stree import Stree"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "if not os.path.isfile('data/creditcard.csv'):\n",
+    "    !wget --no-check-certificate --content-disposition http://nube.jccm.es/index.php/s/Zs7SYtZQJ3RQ2H2/download\n",
+    "    !tar xzf creditcard.tgz"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": "Fraud: 0.244% 196\nValid: 99.755% 80234\nX.shape (1196, 28)  y.shape (1196,)\nFraud: 16.722% 200\nValid: 83.278% 996\n"
+    }
+   ],
+   "source": [
+    "random_state=1\n",
+    "\n",
+    "def load_creditcard(n_examples=0):\n",
+    "    import pandas as pd\n",
+    "    import numpy as np\n",
+    "    import random\n",
+    "    df = pd.read_csv('data/creditcard.csv')\n",
+    "    print(\"Fraud: {0:.3f}% {1}\".format(df.Class[df.Class == 1].count()*100/df.shape[0], df.Class[df.Class == 1].count()))\n",
+    "    print(\"Valid: {0:.3f}% {1}\".format(df.Class[df.Class == 0].count()*100/df.shape[0], df.Class[df.Class == 0].count()))\n",
+    "    y = df.Class\n",
+    "    X = df.drop(['Class', 'Time', 'Amount'], axis=1).values\n",
+    "    if n_examples > 0:\n",
+    "        # Take first n_examples samples\n",
+    "        X = X[:n_examples, :]\n",
+    "        y = y[:n_examples, :]\n",
+    "    else:\n",
+    "        # Take all the positive samples with a number of random negatives\n",
+    "        if n_examples < 0:\n",
+    "            Xt = X[(y == 1).ravel()]\n",
+    "            yt = y[(y == 1).ravel()]\n",
+    "            indices = random.sample(range(X.shape[0]), -1 * n_examples)\n",
+    "            X = np.append(Xt, X[indices], axis=0)\n",
+    "            y = np.append(yt, y[indices], axis=0)\n",
+    "    print(\"X.shape\", X.shape, \" y.shape\", y.shape)\n",
+    "    print(\"Fraud: {0:.3f}% {1}\".format(len(y[y == 1])*100/X.shape[0], len(y[y == 1])))\n",
+    "    print(\"Valid: {0:.3f}% {1}\".format(len(y[y == 0]) * 100 / X.shape[0], len(y[y == 0])))\n",
+    "    Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, train_size=0.7, shuffle=True, random_state=random_state, stratify=y)\n",
+    "    return Xtrain, Xtest, ytrain, ytest\n",
+    "\n",
+    "data = load_creditcard(-1000) # Take all true samples + 1000 of the others\n",
+    "# data = load_creditcard(5000)  # Take the first 5000 samples\n",
+    "# data = load_creditcard(0) # Take all the samples\n",
+    "\n",
+    "Xtrain = data[0]\n",
+    "Xtest = data[1]\n",
+    "ytrain = data[2]\n",
+    "ytest = data[3]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": "Score Train:  0.986857825567503\nScore Test:  0.9805013927576601\nTook 0.12 seconds\n"
+    }
+   ],
+   "source": [
+    "now = time.time()\n",
+    "clf = Stree(max_depth=3, random_state=random_state)\n",
+    "clf.fit(Xtrain, ytrain)\n",
+    "print(\"Score Train: \", clf.score(Xtrain, ytrain))\n",
+    "print(\"Score Test: \", clf.score(Xtest, ytest))\n",
+    "print(f\"Took {time.time() - now:.2f} seconds\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": "Score Train:  0.997610513739546\nScore Test:  0.9721448467966574\nTook 7.80 seconds\n"
+    }
+   ],
+   "source": [
+    "now = time.time()\n",
+    "clf2 = AdaBoostClassifier(Stree(max_depth=3, random_state=random_state), n_estimators=100, random_state=random_state)\n",
+    "clf2.fit(Xtrain, ytrain)\n",
+    "print(\"Score Train: \", clf2.score(Xtrain, ytrain))\n",
+    "print(\"Score Test: \", clf2.score(Xtest, ytest))\n",
+    "print(f\"Took {time.time() - now:.2f} seconds\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": "Score Train:  0.9796893667861409\nScore Test:  0.9554317548746518\nTook 0.48 seconds\n"
+    }
+   ],
+   "source": [
+    "now = time.time()\n",
+    "clf3 = AdaBoostClassifier(LinearSVC(random_state=random_state), n_estimators=100, random_state=random_state, algorithm='SAMME')\n",
+    "clf3.fit(Xtrain, ytrain)\n",
+    "print(\"Score Train: \", clf3.score(Xtrain, ytrain))\n",
+    "print(\"Score Test: \", clf3.score(Xtest, ytest))\n",
+    "print(f\"Took {time.time() - now:.2f} seconds\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": "Score Train:  1.0\nScore Test:  0.9721448467966574\nTook 0.86 seconds\n"
+    }
+   ],
+   "source": [
+    "now = time.time()\n",
+    "clf4 = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1, random_state=random_state), n_estimators=100, random_state=random_state)\n",
+    "clf4.fit(Xtrain, ytrain)\n",
+    "print(\"Score Train: \", clf4.score(Xtrain, ytrain))\n",
+    "print(\"Score Test: \", clf4.score(Xtest, ytest))\n",
+    "print(f\"Took {time.time() - now:.2f} seconds\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.6-final"
+  },
+  "orig_nbformat": 2,
+  "kernelspec": {
+   "name": "python37664bitgeneralvenvfbd0a23e74cf4e778460f5ffc6761f39",
+   "display_name": "Python 3.7.6 64-bit ('general': venv)"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
\ No newline at end of file
diff --git a/notebooks/gridsearch.ipynb b/notebooks/gridsearch.ipynb
index adc4978..0d5ae8e 100644
--- a/notebooks/gridsearch.ipynb
+++ b/notebooks/gridsearch.ipynb
@@ -20,8 +20,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#X, y = load_iris(return_X_y=True)\n",
-    "#y[y==2] = 0"
+    "import os\n",
+    "if not os.path.isfile('data/creditcard.csv'):\n",
+    "    !wget --no-check-certificate --content-disposition http://nube.jccm.es/index.php/s/Zs7SYtZQJ3RQ2H2/download\n",
+    "    !tar xzf creditcard.tgz"
    ]
   },
   {
@@ -32,7 +34,7 @@
     {
      "output_type": "stream",
      "name": "stdout",
-     "text": "Fraud: 0.244% 196\nValid: 99.755% 80234\nX.shape (1196, 28)  y.shape (1196,)\nFraud: 16.472% 197\nValid: 83.528% 999\n"
+     "text": "Fraud: 0.244% 196\nValid: 99.755% 80234\nX.shape (1196, 28)  y.shape (1196,)\nFraud: 16.555% 198\nValid: 83.445% 998\n"
     }
    ],
    "source": [
@@ -79,88 +81,54 @@
    "cell_type": "code",
    "execution_count": 4,
    "metadata": {},
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": "root\nroot - Down - Leaf class=1.0 belief=0.976000 counts=(array([0., 1.]), array([  3, 122]))\nroot - Up - Leaf class=0.0 belief=0.977528 counts=(array([0., 1.]), array([696,  16]))\n\n"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "c = Stree(max_depth=2)\n",
-    "c.fit(Xtrain, ytrain)\n",
-    "print(c)"
+    "parameters = {\n",
+    "    'base_estimator': [Stree()],\n",
+    "    'n_estimators': [50, 100, 150],\n",
+    "    'learning_rate': [.5, 1],\n",
+    "    'base_estimator__tol': [.1,  1e-02],\n",
+    "    'base_estimator__max_depth': [5, 7],\n",
+    "    'base_estimator__C': [1, 3]\n",
+    "}\n",
+    "#'max_depth': [3, 5]"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 5,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": "({'C': 1.0,\n  'class_weight': None,\n  'dual': True,\n  'fit_intercept': True,\n  'intercept_scaling': 1,\n  'loss': 'squared_hinge',\n  'max_iter': 1000,\n  'multi_class': 'ovr',\n  'penalty': 'l2',\n  'random_state': None,\n  'tol': 0.0001,\n  'verbose': 0},\n {'ccp_alpha': 0.0,\n  'class_weight': None,\n  'criterion': 'gini',\n  'max_depth': None,\n  'max_features': None,\n  'max_leaf_nodes': None,\n  'min_impurity_decrease': 0.0,\n  'min_impurity_split': None,\n  'min_samples_leaf': 1,\n  'min_samples_split': 2,\n  'min_weight_fraction_leaf': 0.0,\n  'presort': 'deprecated',\n  'random_state': None,\n  'splitter': 'best'})"
+     },
+     "metadata": {},
+     "execution_count": 5
+    }
+   ],
    "source": [
-    "#'base_estimator': [DecisionTreeClassifier(max_depth=1), Stree(max_depth=2), Stree(max_depth=3)],\n",
-    "parameters = {\n",
-    "    'base_estimator': [LinearSVC(), Stree(max_depth=2), Stree(max_depth=3)],\n",
-    "    'n_estimators': [20, 50, 100, 150],\n",
-    "    'learning_rate': [.5, 1, 1.5] \n",
-    "}"
+    "LinearSVC().get_params(), DecisionTreeClassifier().get_params()"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 6,
    "metadata": {},
-   "outputs": [],
-   "source": [
-    "#parameters = {\n",
-    "#    'base_estimator': [DecisionTreeClassifier(max_depth=1), DecisionTreeClassifier(max_depth=5), Stree(), Stree(C=.1), Stree(C=.01), Stree(C=3)],\n",
-    "#    'n_estimators': [20, 50, 100, 150],\n",
-    "#    'learning_rate': [.5, 1, 1.5]           \n",
-    "#}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
    "outputs": [
     {
      "output_type": "stream",
      "name": "stdout",
-     "text": "(X: numpy.ndarray, y: numpy.ndarray, sample_weight: <built-in function array> = None) -> 'Stree'\n"
-    }
-   ],
-   "source": [
-    "from inspect import signature\n",
-    "print(signature(c.fit))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from sklearn.utils.validation import _check_sample_weight"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": "Fitting 5 folds for each of 36 candidates, totalling 180 fits\n[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    1.3s\n[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:    1.3s\n[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:    1.3s\n[Parallel(n_jobs=-1)]: Batch computation too fast (0.1671s.) Setting batch_size=2.\n[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    1.3s\n[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    1.4s\n[Parallel(n_jobs=-1)]: Batch computation too fast (0.0413s.) Setting batch_size=4.\n[Parallel(n_jobs=-1)]: Done  50 tasks      | elapsed:    1.4s\n[Parallel(n_jobs=-1)]: Batch computation too slow (7.7880s.) Setting batch_size=1.\n[Parallel(n_jobs=-1)]: Done  74 tasks      | elapsed:    9.2s\n[Parallel(n_jobs=-1)]: Done 121 tasks      | elapsed:   48.9s\n[Parallel(n_jobs=-1)]: Done 140 tasks      | elapsed:  1.0min\n[Parallel(n_jobs=-1)]: Done 161 tasks      | elapsed:  1.3min\n[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:  1.6min finished\n"
+     "text": "Fitting 5 folds for each of 48 candidates, totalling 240 fits\n[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    4.6s\n[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:    7.1s\n[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:   10.4s\n[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:   17.9s\n[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   22.8s\n[Parallel(n_jobs=-1)]: Done  45 tasks      | elapsed:   27.4s\n[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:   33.0s\n[Parallel(n_jobs=-1)]: Done  69 tasks      | elapsed:   39.3s\n[Parallel(n_jobs=-1)]: Done  82 tasks      | elapsed:   48.6s\n[Parallel(n_jobs=-1)]: Done  97 tasks      | elapsed:   57.5s\n[Parallel(n_jobs=-1)]: Done 112 tasks      | elapsed:  1.1min\n[Parallel(n_jobs=-1)]: Done 129 tasks      | elapsed:  1.3min\n[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:  1.5min\n[Parallel(n_jobs=-1)]: Done 165 tasks      | elapsed:  1.7min\n[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:  1.9min\n[Parallel(n_jobs=-1)]: Done 205 tasks      | elapsed:  2.2min\n[Parallel(n_jobs=-1)]: Done 240 out of 240 | elapsed:  2.6min finished\n"
     },
     {
      "output_type": "execute_result",
      "data": {
-      "text/plain": "GridSearchCV(estimator=AdaBoostClassifier(random_state=2020), n_jobs=-1,\n             param_grid={'base_estimator': [LinearSVC(), Stree(max_depth=2),\n                                            Stree(max_depth=3)],\n                         'learning_rate': [0.5, 1, 1.5],\n                         'n_estimators': [20, 50, 100, 150]},\n             return_train_score=True, verbose=10)"
+      "text/plain": "GridSearchCV(estimator=AdaBoostClassifier(random_state=2020), n_jobs=-1,\n             param_grid={'base_estimator': [Stree(C=1, max_depth=7, tol=0.1)],\n                         'base_estimator__C': [1, 3],\n                         'base_estimator__max_depth': [5, 7],\n                         'base_estimator__tol': [0.1, 0.01],\n                         'learning_rate': [0.5, 1],\n                         'n_estimators': [50, 100, 150]},\n             return_train_score=True, verbose=10)"
      },
      "metadata": {},
-     "execution_count": 9
+     "execution_count": 6
     }
    ],
    "source": [
@@ -172,25 +140,75 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": "(AdaBoostClassifier(base_estimator=Stree(C=1, max_depth=7, tol=0.1),\n                    learning_rate=0.5, random_state=2020),\n 0.9808810949529512)"
+     },
+     "metadata": {},
+     "execution_count": 7
+    }
+   ],
+   "source": [
+    "grid.best_estimator_, grid.best_score_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": "[{'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 1,\n  'base_estimator__max_depth': 5,\n  'base_estimator__tol': 0.1,\n  'learning_rate': 0.5,\n  'n_estimators': 50},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 1,\n  'base_estimator__max_depth': 5,\n  'base_estimator__tol': 0.1,\n  'learning_rate': 0.5,\n  'n_estimators': 100},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 1,\n  'base_estimator__max_depth': 5,\n  'base_estimator__tol': 0.1,\n  'learning_rate': 0.5,\n  'n_estimators': 150},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 1,\n  'base_estimator__max_depth': 5,\n  'base_estimator__tol': 0.1,\n  'learning_rate': 1,\n  'n_estimators': 50},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 1,\n  'base_estimator__max_depth': 5,\n  'base_estimator__tol': 0.1,\n  'learning_rate': 1,\n  'n_estimators': 100},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 1,\n  'base_estimator__max_depth': 5,\n  'base_estimator__tol': 0.1,\n  'learning_rate': 1,\n  'n_estimators': 150},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 1,\n  'base_estimator__max_depth': 5,\n  'base_estimator__tol': 0.01,\n  'learning_rate': 0.5,\n  'n_estimators': 50},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 1,\n  'base_estimator__max_depth': 5,\n  'base_estimator__tol': 0.01,\n  'learning_rate': 0.5,\n  'n_estimators': 100},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 1,\n  'base_estimator__max_depth': 5,\n  'base_estimator__tol': 0.01,\n  'learning_rate': 0.5,\n  'n_estimators': 150},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 1,\n  'base_estimator__max_depth': 5,\n  'base_estimator__tol': 0.01,\n  'learning_rate': 1,\n  'n_estimators': 50},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 1,\n  'base_estimator__max_depth': 5,\n  'base_estimator__tol': 0.01,\n  'learning_rate': 1,\n  'n_estimators': 100},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 1,\n  'base_estimator__max_depth': 5,\n  'base_estimator__tol': 0.01,\n  'learning_rate': 1,\n  'n_estimators': 150},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 1,\n  'base_estimator__max_depth': 7,\n  'base_estimator__tol': 0.1,\n  'learning_rate': 0.5,\n  'n_estimators': 50},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 1,\n  'base_estimator__max_depth': 7,\n  'base_estimator__tol': 0.1,\n  'learning_rate': 0.5,\n  'n_estimators': 100},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 1,\n  'base_estimator__max_depth': 7,\n  'base_estimator__tol': 0.1,\n  'learning_rate': 0.5,\n  'n_estimators': 150},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 1,\n  'base_estimator__max_depth': 7,\n  'base_estimator__tol': 0.1,\n  'learning_rate': 1,\n  'n_estimators': 50},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 1,\n  'base_estimator__max_depth': 7,\n  'base_estimator__tol': 0.1,\n  'learning_rate': 1,\n  'n_estimators': 100},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 1,\n  'base_estimator__max_depth': 7,\n  'base_estimator__tol': 0.1,\n  'learning_rate': 1,\n  'n_estimators': 150},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 1,\n  'base_estimator__max_depth': 7,\n  'base_estimator__tol': 0.01,\n  'learning_rate': 0.5,\n  'n_estimators': 50},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 1,\n  'base_estimator__max_depth': 7,\n  'base_estimator__tol': 0.01,\n  'learning_rate': 0.5,\n  'n_estimators': 100},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 1,\n  'base_estimator__max_depth': 7,\n  'base_estimator__tol': 0.01,\n  'learning_rate': 0.5,\n  'n_estimators': 150},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 1,\n  'base_estimator__max_depth': 7,\n  'base_estimator__tol': 0.01,\n  'learning_rate': 1,\n  'n_estimators': 50},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 1,\n  'base_estimator__max_depth': 7,\n  'base_estimator__tol': 0.01,\n  'learning_rate': 1,\n  'n_estimators': 100},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 1,\n  'base_estimator__max_depth': 7,\n  'base_estimator__tol': 0.01,\n  'learning_rate': 1,\n  'n_estimators': 150},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 3,\n  'base_estimator__max_depth': 5,\n  'base_estimator__tol': 0.1,\n  'learning_rate': 0.5,\n  'n_estimators': 50},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 3,\n  'base_estimator__max_depth': 5,\n  'base_estimator__tol': 0.1,\n  'learning_rate': 0.5,\n  'n_estimators': 100},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 3,\n  'base_estimator__max_depth': 5,\n  'base_estimator__tol': 0.1,\n  'learning_rate': 0.5,\n  'n_estimators': 150},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 3,\n  'base_estimator__max_depth': 5,\n  'base_estimator__tol': 0.1,\n  'learning_rate': 1,\n  'n_estimators': 50},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 3,\n  'base_estimator__max_depth': 5,\n  'base_estimator__tol': 0.1,\n  'learning_rate': 1,\n  'n_estimators': 100},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 3,\n  'base_estimator__max_depth': 5,\n  'base_estimator__tol': 0.1,\n  'learning_rate': 1,\n  'n_estimators': 150},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 3,\n  'base_estimator__max_depth': 5,\n  'base_estimator__tol': 0.01,\n  'learning_rate': 0.5,\n  'n_estimators': 50},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 3,\n  'base_estimator__max_depth': 5,\n  'base_estimator__tol': 0.01,\n  'learning_rate': 0.5,\n  'n_estimators': 100},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 3,\n  'base_estimator__max_depth': 5,\n  'base_estimator__tol': 0.01,\n  'learning_rate': 0.5,\n  'n_estimators': 150},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 3,\n  'base_estimator__max_depth': 5,\n  'base_estimator__tol': 0.01,\n  'learning_rate': 1,\n  'n_estimators': 50},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 3,\n  'base_estimator__max_depth': 5,\n  'base_estimator__tol': 0.01,\n  'learning_rate': 1,\n  'n_estimators': 100},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 3,\n  'base_estimator__max_depth': 5,\n  'base_estimator__tol': 0.01,\n  'learning_rate': 1,\n  'n_estimators': 150},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 3,\n  'base_estimator__max_depth': 7,\n  'base_estimator__tol': 0.1,\n  'learning_rate': 0.5,\n  'n_estimators': 50},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 3,\n  'base_estimator__max_depth': 7,\n  'base_estimator__tol': 0.1,\n  'learning_rate': 0.5,\n  'n_estimators': 100},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 3,\n  'base_estimator__max_depth': 7,\n  'base_estimator__tol': 0.1,\n  'learning_rate': 0.5,\n  'n_estimators': 150},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 3,\n  'base_estimator__max_depth': 7,\n  'base_estimator__tol': 0.1,\n  'learning_rate': 1,\n  'n_estimators': 50},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 3,\n  'base_estimator__max_depth': 7,\n  'base_estimator__tol': 0.1,\n  'learning_rate': 1,\n  'n_estimators': 100},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 3,\n  'base_estimator__max_depth': 7,\n  'base_estimator__tol': 0.1,\n  'learning_rate': 1,\n  'n_estimators': 150},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 3,\n  'base_estimator__max_depth': 7,\n  'base_estimator__tol': 0.01,\n  'learning_rate': 0.5,\n  'n_estimators': 50},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 3,\n  'base_estimator__max_depth': 7,\n  'base_estimator__tol': 0.01,\n  'learning_rate': 0.5,\n  'n_estimators': 100},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 3,\n  'base_estimator__max_depth': 7,\n  'base_estimator__tol': 0.01,\n  'learning_rate': 0.5,\n  'n_estimators': 150},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 3,\n  'base_estimator__max_depth': 7,\n  'base_estimator__tol': 0.01,\n  'learning_rate': 1,\n  'n_estimators': 50},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 3,\n  'base_estimator__max_depth': 7,\n  'base_estimator__tol': 0.01,\n  'learning_rate': 1,\n  'n_estimators': 100},\n {'base_estimator': Stree(C=1, max_depth=7, tol=0.1),\n  'base_estimator__C': 3,\n  'base_estimator__max_depth': 7,\n  'base_estimator__tol': 0.01,\n  'learning_rate': 1,\n  'n_estimators': 150}]"
+     },
+     "metadata": {},
+     "execution_count": 8
+    }
+   ],
+   "source": [
+    "grid.cv_results_[\"params\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
      "output_type": "stream",
      "name": "stdout",
-     "text": "AdaBoostClassifier(base_estimator=Stree(max_depth=2), learning_rate=0.5,\n                   n_estimators=150, random_state=2020)\n"
+     "text": "base_estimator  base_estimator__C  base_estimator__max_depth  \\\n0                                  1                          5   \n1                                  1                          5   \n2                                  1                          5   \n3                                  1                          5   \n4                                  1                          5   \n5                                  1                          5   \n6                                  1                          5   \n7                                  1                          5   \n8                                  1                          5   \n9                                  1                          5   \n10                                 1                          5   \n11                                 1                          5   \n12                                 1                          7   \n13                                 1                          7   \n14                                 1                          7   \n15                                 1                          7   \n16                                 1                          7   \n17                                 1                          7   \n18                                 1                          7   \n19                                 1                          7   \n20                                 1                          7   \n21                                 1                          7   \n22                                 1                          7   \n23                                 1                          7   \n24                                 3                          5   \n25                                 3                          5   \n26                                 3                          5   \n27                                 3                          5   \n28                                 3                          5   \n29                                 3                          5   \n30                                 3                          5   \n31                                 3                          5   \n32                                 3                          5   \n33                                 3                          5   \n34                                 3                          5   \n35                                 3                          5   \n36                                 3                          7   \n37                                 3                          7   \n38                                 3                          7   \n39                                 3                          7   \n40                                 3                          7   \n41                                 3                          7   \n42                                 3                          7   \n43                                 3                          7   \n44                                 3                          7   \n45                                 3                          7   \n46                                 3                          7   \n47                                 3                          7   \n\n    base_estimator__tol  learning_rate  n_estimators  Accuracy  \n0                  0.10            0.5            50  0.979691  \n1                  0.10            0.5           100  0.978493  \n2                  0.10            0.5           150  0.978493  \n3                  0.10            1.0            50  0.978486  \n4                  0.10            1.0           100  0.978493  \n5                  0.10            1.0           150  0.979691  \n6                  0.01            0.5            50  0.979691  \n7                  0.01            0.5           100  0.978493  \n8                  0.01            0.5           150  0.978493  \n9                  0.01            1.0            50  0.978486  \n10                 0.01            1.0           100  0.978493  \n11                 0.01            1.0           150  0.979691  \n12                 0.10            0.5            50  0.980881  \n13                 0.10            0.5           100  0.980881  \n14                 0.10            0.5           150  0.978486  \n15                 0.10            1.0            50  0.979691  \n16                 0.10            1.0           100  0.976098  \n17                 0.10            1.0           150  0.976098  \n18                 0.01            0.5            50  0.980881  \n19                 0.01            0.5           100  0.980881  \n20                 0.01            0.5           150  0.978486  \n21                 0.01            1.0            50  0.979691  \n22                 0.01            1.0           100  0.976098  \n23                 0.01            1.0           150  0.976098  \n24                 0.10            0.5            50  0.979691  \n25                 0.10            0.5           100  0.979683  \n26                 0.10            0.5           150  0.977303  \n27                 0.10            1.0            50  0.978493  \n28                 0.10            1.0           100  0.977295  \n29                 0.10            1.0           150  0.977295  \n30                 0.01            0.5            50  0.979691  \n31                 0.01            0.5           100  0.979683  \n32                 0.01            0.5           150  0.977303  \n33                 0.01            1.0            50  0.978493  \n34                 0.01            1.0           100  0.977295  \n35                 0.01            1.0           150  0.977295  \n36                 0.10            0.5            50  0.980881  \n37                 0.10            0.5           100  0.977303  \n38                 0.10            0.5           150  0.978500  \n39                 0.10            1.0            50  0.977303  \n40                 0.10            1.0           100  0.978493  \n41                 0.10            1.0           150  0.978493  \n42                 0.01            0.5            50  0.980881  \n43                 0.01            0.5           100  0.977303  \n44                 0.01            0.5           150  0.978500  \n45                 0.01            1.0            50  0.977303  \n46                 0.01            1.0           100  0.978493  \n47                 0.01            1.0           150  0.978493  \n"
     }
    ],
    "source": [
-    "print(grid.best_estimator_)"
+    "import pandas as pd\n",
+    "res = pd.concat([pd.DataFrame(grid.cv_results_[\"params\"]),pd.DataFrame(grid.cv_results_[\"mean_test_score\"], columns=[\"Accuracy\"])], axis=1)\n",
+    "\n",
+    "#print(res.sort_values(['Accuracy'], ascending=False))\n",
+    "print(res)"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 10,
    "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": "{'cv': None,\n 'error_score': nan,\n 'estimator__algorithm': 'SAMME.R',\n 'estimator__base_estimator': None,\n 'estimator__learning_rate': 1.0,\n 'estimator__n_estimators': 50,\n 'estimator__random_state': 2020,\n 'estimator': AdaBoostClassifier(random_state=2020),\n 'iid': 'deprecated',\n 'n_jobs': -1,\n 'param_grid': {'base_estimator': [Stree(C=1, max_depth=7, tol=0.1)],\n  'n_estimators': [50, 100, 150],\n  'learning_rate': [0.5, 1],\n  'base_estimator__tol': [0.1, 0.01],\n  'base_estimator__max_depth': [5, 7],\n  'base_estimator__C': [1, 3]},\n 'pre_dispatch': '2*n_jobs',\n 'refit': True,\n 'return_train_score': True,\n 'scoring': None,\n 'verbose': 10}"
+     },
+     "metadata": {},
+     "execution_count": 10
+    }
+   ],
    "source": [
-    "AdaBoostClassifier(base_estimator=Stree(max_depth=3), learning_rate=0.5,\n",
-    "                   n_estimators=20, random_state=2020)"
+    "grid.get_params()"
    ]
   }
  ],
diff --git a/notebooks/test_graphs.ipynb b/notebooks/test_graphs.ipynb
index a288634..5c07aec 100644
--- a/notebooks/test_graphs.ipynb
+++ b/notebooks/test_graphs.ipynb
@@ -14,7 +14,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
@@ -24,7 +24,7 @@
      "traceback": [
       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
       "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-2-36af63297651>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdatasets\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mmake_blobs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msvm\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mLinearSVC\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mstree\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mStree\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mStree_grapher\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;32m<ipython-input-12-36af63297651>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdatasets\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mmake_blobs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msvm\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mLinearSVC\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mstree\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mStree\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mStree_grapher\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
       "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'stree'"
      ]
     }
diff --git a/stree/Strees.py b/stree/Strees.py
index 072dd62..5ed2e03 100644
--- a/stree/Strees.py
+++ b/stree/Strees.py
@@ -152,11 +152,6 @@ class Stree(BaseEstimator, ClassifierMixin):
             # doesn't work with multiclass as each sample has to do inner product with its own coeficients
             # computes positition of every sample is w.r.t. the hyperplane
             res = self._linear_function(data, node)
-        # data_up, data_down = self._split_array(data, down)
-        # indices_up, indices_down = self._split_array(indices, down)
-        # res_up, res_down = self._split_array(res, down)
-        # weight_up, weight_down = self._split_array(weights, down)
-        #return [data_up, indices_up, data_down, indices_down, weight_up, weight_down, res_up, res_down]
         return res
 
     def _split_criteria(self, data: np.array) -> np.array:
@@ -176,7 +171,6 @@ class Stree(BaseEstimator, ClassifierMixin):
         sample_weight = _check_sample_weight(sample_weight, X)
         check_classification_targets(y)
         # Initialize computed parameters
-        #self.random_state = check_random_state(self.random_state)
         self.classes_ = np.unique(y)
         self.n_iter_ = self.max_iter
         self.depth_ = 0
@@ -316,8 +310,7 @@ class Stree(BaseEstimator, ClassifierMixin):
         # sklearn check
         check_is_fitted(self)
         yp = self.predict(X).reshape(y.shape)
-        right = (yp == y).astype(int)
-        return np.sum(right) / len(y)
+        return np.mean(yp == y)
 
     def __iter__(self) -> Siterator:
         try: