#3 Add degree hyperparam and update notebooks

Update readme to add new notebooks
2025-08-15 23:46:02 +00:00 · 2020-06-08 20:16:42 +02:00
parent d7c0bc3bc5
commit 26273e936a
8 changed files with 680 additions and 591 deletions
--- a/notebooks/adaboost.ipynb
+++ b/notebooks/adaboost.ipynb
@@ -1,15 +1,42 @@
 {
 "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Test AdaBoost with different configurations"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Setup\n",
+    "Uncomment the next cell if STree is not already installed"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
+   "source": [
+    "#\n",
+    "# Google Colab setup\n",
+    "#\n",
+    "#!pip install git+https://github.com/doctorado-ml/stree"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "import time\n",
    "from sklearn.ensemble import AdaBoostClassifier\n",
    "from sklearn.tree import DecisionTreeClassifier\n",
-    "from sklearn.svm import SVC\n",
+    "from sklearn.svm import LinearSVC, SVC\n",
    "from sklearn.model_selection import GridSearchCV, train_test_split\n",
    "from sklearn.datasets import load_iris\n",
    "from stree import Stree"
@@ -17,7 +44,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -29,13 +56,13 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
-     "text": "Fraud: 0.244% 196\nValid: 99.755% 80234\nX.shape (1196, 28)  y.shape (1196,)\nFraud: 16.722% 200\nValid: 83.278% 996\n"
+     "text": "Fraud: 0.173% 492\nValid: 99.827% 284315\nX.shape (100492, 28)  y.shape (100492,)\nFraud: 0.659% 662\nValid: 99.341% 99830\n"
    }
   ],
   "source": [
@@ -68,9 +95,10 @@
    "    Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, train_size=0.7, shuffle=True, random_state=random_state, stratify=y)\n",
    "    return Xtrain, Xtest, ytrain, ytest\n",
    "\n",
-    "data = load_creditcard(-1000) # Take all true samples + 1000 of the others\n",
+    "# data = load_creditcard(-1000) # Take all true samples + 1000 of the others\n",
    "# data = load_creditcard(5000)  # Take the first 5000 samples\n",
    "# data = load_creditcard(0) # Take all the samples\n",
+    "data = load_creditcard(-100000)\n",
    "\n",
    "Xtrain = data[0]\n",
    "Xtest = data[1]\n",
@@ -78,15 +106,29 @@
    "ytest = data[3]"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Tests"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## STree alone on the whole dataset and linear kernel"
+   ]
+  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
-     "text": "Score Train:  0.986857825567503\nScore Test:  0.9805013927576601\nTook 0.12 seconds\n"
+     "text": "Score Train:  0.9985499829409757\nScore Test:  0.998407854584052\nTook 39.45 seconds\n"
    }
   ],
   "source": [
@@ -99,43 +141,21 @@
   ]
  },
  {
-   "cell_type": "code",
-   "execution_count": 5,
+   "cell_type": "markdown",
   "metadata": {},
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": "Score Train:  0.997610513739546\nScore Test:  0.9721448467966574\nTook 7.80 seconds\n"
-    }
-   ],
   "source": [
-    "now = time.time()\n",
-    "clf2 = AdaBoostClassifier(Stree(max_depth=3, random_state=random_state), n_estimators=100, random_state=random_state)\n",
-    "clf2.fit(Xtrain, ytrain)\n",
-    "print(\"Score Train: \", clf2.score(Xtrain, ytrain))\n",
-    "print(\"Score Test: \", clf2.score(Xtest, ytest))\n",
-    "print(f\"Took {time.time() - now:.2f} seconds\")"
+    "## Different kernels with different configuations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": "Score Train:  0.9796893667861409\nScore Test:  0.9554317548746518\nTook 0.48 seconds\n"
-    }
-   ],
+   "outputs": [],
   "source": [
-    "now = time.time()\n",
-    "clf3 = AdaBoostClassifier(SVC(kernel="linear",random_state=random_state), n_estimators=100, random_state=random_state, algorithm='SAMME')\n",
-    "clf3.fit(Xtrain, ytrain)\n",
-    "print(\"Score Train: \", clf3.score(Xtrain, ytrain))\n",
-    "print(\"Score Test: \", clf3.score(Xtest, ytest))\n",
-    "print(f\"Took {time.time() - now:.2f} seconds\")"
+    "n_estimators = 10\n",
+    "C = 7\n",
+    "max_depth = 3"
   ]
  },
  {
@@ -146,24 +166,46 @@
    {
     "output_type": "stream",
     "name": "stdout",
-     "text": "Score Train:  1.0\nScore Test:  0.9721448467966574\nTook 0.86 seconds\n"
+     "text": "Kernel: linear\tTime: 87.00 seconds\tScore Train: 0.9982372\tScore Test: 0.9981425\nKernel: rbf\tTime: 60.60 seconds\tScore Train: 0.9934181\tScore Test: 0.9933992\nKernel: poly\tTime: 88.08 seconds\tScore Train: 0.9937450\tScore Test: 0.9938968\n"
    }
   ],
   "source": [
-    "now = time.time()\n",
-    "clf4 = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1, random_state=random_state), n_estimators=100, random_state=random_state)\n",
-    "clf4.fit(Xtrain, ytrain)\n",
-    "print(\"Score Train: \", clf4.score(Xtrain, ytrain))\n",
-    "print(\"Score Test: \", clf4.score(Xtest, ytest))\n",
-    "print(f\"Took {time.time() - now:.2f} seconds\")"
+    "for kernel in ['linear', 'rbf', 'poly']:\n",
+    "    now = time.time()\n",
+    "    clf = AdaBoostClassifier(Stree(C=7, kernel=kernel, max_depth=max_depth, random_state=random_state), n_estimators=n_estimators, random_state=random_state)\n",
+    "    clf.fit(Xtrain, ytrain)\n",
+    "    score_train = clf.score(Xtrain, ytrain)\n",
+    "    score_test = clf.score(Xtest, ytest)\n",
+    "    print(f\"Kernel: {kernel}\\tTime: {time.time() - now:.2f} seconds\\tScore Train: {score_train:.7f}\\tScore Test: {score_test:.7f}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Test algorithm SAMME in AdaBoost to check speed/accuracy"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
   "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": "Kernel: linear\tTime: 58.75 seconds\tScore Train: 0.9980524\tScore Test: 0.9978771\nKernel: rbf\tTime: 12.49 seconds\tScore Train: 0.9934181\tScore Test: 0.9933992\nKernel: poly\tTime: 97.85 seconds\tScore Train: 0.9972137\tScore Test: 0.9971806\n"
+    }
+   ],
+   "source": [
+    "for kernel in ['linear', 'rbf', 'poly']:\n",
+    "    now = time.time()\n",
+    "    clf = AdaBoostClassifier(Stree(C=7, kernel=kernel, max_depth=max_depth, random_state=random_state), n_estimators=n_estimators, random_state=random_state, algorithm=\"SAMME\")\n",
+    "    clf.fit(Xtrain, ytrain)\n",
+    "    score_train = clf.score(Xtrain, ytrain)\n",
+    "    score_test = clf.score(Xtest, ytest)\n",
+    "    print(f\"Kernel: {kernel}\\tTime: {time.time() - now:.2f} seconds\\tScore Train: {score_train:.7f}\\tScore Test: {score_test:.7f}\")"
+   ]
  }
 ],
 "metadata": {
--- a/notebooks/crcard_graphs.ipynb
+++ b/notebooks/crcard_graphs.ipynb
--- a/notebooks/gridsearch.ipynb
+++ b/notebooks/gridsearch.ipynb
--- a/notebooks/test.ipynb
+++ b/notebooks/test.ipynb
@@ -1,5 +1,20 @@
 {
 "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Compare STree with different estimators"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Setup\n",
+    "Uncomment the next cell if STree is not already installed"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 1,
@@ -40,6 +55,13 @@
    "    !tar xzf creditcard.tgz"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Tests"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 4,
@@ -55,6 +77,13 @@
    "print(datetime.date.today(), time.strftime(\"%H:%M:%S\"))"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load dataset and normalize values"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 5,
@@ -113,6 +142,13 @@
    "print(f\"X shape: {X.shape}\\ny shape: {y.shape}\")"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Build the models"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 9,
@@ -174,6 +210,13 @@
    "gradient = GradientBoostingClassifier(random_state=random_state)"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Do the test"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 15,
--- a/notebooks/test2.ipynb
+++ b/notebooks/test2.ipynb
@@ -1,8 +1,23 @@
 {
 "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Test smple_weight, kernels, C, sklearn estimator"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Setup\n",
+    "Uncomment the next cell if STree is not already installed"
+   ]
+  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -14,7 +29,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -30,7 +45,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -42,13 +57,13 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
-     "text": "Fraud: 0.244% 196\nValid: 99.755% 80234\nX.shape (1196, 28)  y.shape (1196,)\nFraud: 16.472% 197\nValid: 83.528% 999\n"
+     "text": "Fraud: 0.173% 492\nValid: 99.827% 284315\nX.shape (1492, 28)  y.shape (1492,)\nFraud: 33.177% 495\nValid: 66.823% 997\n"
    }
   ],
   "source": [
@@ -94,15 +109,30 @@
    "weights[ytrain==1] = 1.99755 "
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Tests"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Test smple_weights\n",
+    "Compute accuracy with weights in samples. The weights are set based on the inverse of the number of samples of each class"
+   ]
+  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
-     "text": "Accuracy of Train without weights 1.0\nAccuracy of Train with    weights 1.0\nAccuracy of Tests without weights 0.9554317548746518\nAccuracy of Tests with    weights 0.9777158774373259\n"
+     "text": "Accuracy of Train without weights 0.9770114942528736\nAccuracy of Train with    weights 0.9818007662835249\nAccuracy of Tests without weights 0.953125\nAccuracy of Tests with    weights 0.9419642857142857\n"
    }
   ],
   "source": [
@@ -113,9 +143,46 @@
    "print(\"Accuracy of Tests with    weights\", Stree(C=C, random_state=1).fit(Xtrain, ytrain, sample_weight=weights).score(Xtest, ytest))"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Test accuracy with different kernels\n",
+    "Compute accuracy on train and test set with default hyperparmeters of every kernel"
+   ]
+  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": "Time: 0.20s\tKernel: linear\tAccuracy_train: 0.9712643678160919\tAccuracy_test: 0.9575892857142857\nTime: 0.09s\tKernel: rbf\tAccuracy_train: 0.9932950191570882\tAccuracy_test: 0.9620535714285714\nTime: 0.09s\tKernel: poly\tAccuracy_train: 0.9904214559386973\tAccuracy_test: 0.9508928571428571\n"
+    }
+   ],
+   "source": [
+    "random_state=1\n",
+    "for kernel in ['linear', 'rbf', 'poly']:\n",
+    "    now = time.time()\n",
+    "    clf = Stree(C=7, kernel=kernel, random_state=random_state).fit(Xtrain, ytrain)\n",
+    "    accuracy_train = clf.score(Xtrain, ytrain)\n",
+    "    accuracy_test = clf.score(Xtest, ytest)\n",
+    "    time_spent = time.time() - now\n",
+    "    print(f\"Time: {time_spent:.2f}s\\tKernel: {kernel}\\tAccuracy_train: {accuracy_train}\\tAccuracy_test: {accuracy_test}\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Test diferent values of C"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
   "metadata": {
    "tags": [
     "outputPrepend"
@@ -125,7 +192,7 @@
    {
     "output_type": "stream",
     "name": "stdout",
-     "text": "************** C=0.001 ****************************\nClassifier's accuracy (train): 0.9749\nClassifier's accuracy (test) : 0.9749\nroot\nroot - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([117]))\nroot - Up, <cgaf> - Leaf class=0 belief= 0.970833 counts=(array([0, 1]), array([699,  21]))\n\n**************************************************\n************** C=0.01 ****************************\nClassifier's accuracy (train): 0.9797\nClassifier's accuracy (test) : 0.9777\nroot\nroot - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([121]))\nroot - Up, <cgaf> - Leaf class=0 belief= 0.976257 counts=(array([0, 1]), array([699,  17]))\n\n**************************************************\n************** C=1 ****************************\nClassifier's accuracy (train): 0.9869\nClassifier's accuracy (test) : 0.9805\nroot\nroot - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([127]))\nroot - Up, <cgaf> - Leaf class=0 belief= 0.984507 counts=(array([0, 1]), array([699,  11]))\n\n**************************************************\n************** C=5 ****************************\nClassifier's accuracy (train): 0.9892\nClassifier's accuracy (test) : 0.9721\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([128]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([3]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up, <cgaf> - Leaf class=0 belief= 0.987216 counts=(array([0, 1]), array([695,   9]))\n\n**************************************************\n************** C=17 ****************************\nClassifier's accuracy (train): 0.9952\nClassifier's accuracy (test) : 0.9749\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([120]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([57]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([14]))\nroot - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([12]))\nroot - Up - Up, <cgaf> - Leaf class=0 belief= 0.993691 counts=(array([0, 1]), array([630,   4]))\n\n**************************************************\n0.1084 secs\n"
+     "text": "************** C=0.001 ****************************\nClassifier's accuracy (train): 0.9550\nClassifier's accuracy (test) : 0.9509\nroot\nroot - Down\nroot - Down - Down, <cgaf> - Leaf class=1 belief= 0.980583 counts=(array([0, 1]), array([  6, 303]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([2]))\nroot - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up, <cgaf> - Leaf class=0 belief= 0.943836 counts=(array([0, 1]), array([689,  41]))\n\n**************************************************\n************** C=0.01 ****************************\nClassifier's accuracy (train): 0.9569\nClassifier's accuracy (test) : 0.9576\nroot\nroot - Down\nroot - Down - Down, <cgaf> - Leaf class=1 belief= 0.990228 counts=(array([0, 1]), array([  3, 304]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up, <cgaf> - Leaf class=0 belief= 0.942935 counts=(array([0, 1]), array([694,  42]))\n\n**************************************************\n************** C=1 ****************************\nClassifier's accuracy (train): 0.9665\nClassifier's accuracy (test) : 0.9598\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([311]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([4]))\nroot - Up, <cgaf> - Leaf class=0 belief= 0.951989 counts=(array([0, 1]), array([694,  35]))\n\n**************************************************\n************** C=5 ****************************\nClassifier's accuracy (train): 0.9674\nClassifier's accuracy (test) : 0.9621\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([312]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([4]))\nroot - Up\nroot - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up\nroot - Up - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up\nroot - Up - Up - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up - Up - Up, <cgaf> - Leaf class=0 belief= 0.953039 counts=(array([0, 1]), array([690,  34]))\n\n**************************************************\n************** C=17 ****************************\nClassifier's accuracy (train): 0.9770\nClassifier's accuracy (test) : 0.9509\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([314]))\nroot - Down - Up\nroot - Down - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Down - Up - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([12]))\nroot - Up\nroot - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up\nroot - Up - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up\nroot - Up - Up - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([3]))\nroot - Up - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up - Up - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([2]))\nroot - Up - Up - Up - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up - Up - Up - Up - Up, <cgaf> - Leaf class=0 belief= 0.965714 counts=(array([0, 1]), array([676,  24]))\n\n**************************************************\n0.9578 secs\n"
    }
   ],
   "source": [
@@ -143,13 +210,13 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
-     "text": "[[0.95399748 0.04600252]\n [0.92625258 0.07374742]\n [0.97804877 0.02195123]\n [0.94803313 0.05196687]]\n"
+     "text": "[[0.88204928 0.11795072]\n [0.8640131  0.1359869 ]\n [0.94207521 0.05792479]\n [0.90219947 0.09780053]]\n"
    }
   ],
   "source": [
@@ -165,48 +232,11 @@
   ]
  },
  {
-   "cell_type": "code",
-   "execution_count": 6,
+   "cell_type": "markdown",
   "metadata": {},
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": "root\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([120]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([57]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([14]))\nroot - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([12]))\nroot - Up - Up, <cgaf> - Leaf class=0 belief= 0.993691 counts=(array([0, 1]), array([630,   4]))\n"
-    }
-   ],
   "source": [
-    "#check iterator\n",
-    "for i in list(clf):\n",
-    "    print(i)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": "root\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([120]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([57]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([14]))\nroot - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([12]))\nroot - Up - Up, <cgaf> - Leaf class=0 belief= 0.993691 counts=(array([0, 1]), array([630,   4]))\n"
-    }
-   ],
-   "source": [
-    "#check iterator again\n",
-    "for i in clf:\n",
-    "    print(i)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Check if the classifier is a sklearn estimator\n",
-    "from sklearn.utils.estimator_checks import check_estimator\n",
-    "check_estimator(Stree())"
+    "## Test iterator\n",
+    "Check different weays of using the iterator"
   ]
  },
  {
@@ -217,7 +247,48 @@
    {
     "output_type": "stream",
     "name": "stdout",
-     "text": "1 functools.partial(<function check_no_attributes_set_in_init at 0x128922b90>, 'Stree')\n2 functools.partial(<function check_estimators_dtypes at 0x128918cb0>, 'Stree')\n3 functools.partial(<function check_fit_score_takes_y at 0x128918b90>, 'Stree')\n4 functools.partial(<function check_sample_weights_pandas_series at 0x1289144d0>, 'Stree')\n5 functools.partial(<function check_sample_weights_not_an_array at 0x1289145f0>, 'Stree')\n6 functools.partial(<function check_sample_weights_list at 0x128914710>, 'Stree')\n7 functools.partial(<function check_sample_weights_shape at 0x128914830>, 'Stree')\n8 functools.partial(<function check_sample_weights_invariance at 0x128914950>, 'Stree')\n9 functools.partial(<function check_estimators_fit_returns_self at 0x12891ecb0>, 'Stree')\n10 functools.partial(<function check_estimators_fit_returns_self at 0x12891ecb0>, 'Stree', readonly_memmap=True)\n11 functools.partial(<function check_complex_data at 0x128914b00>, 'Stree')\n12 functools.partial(<function check_dtype_object at 0x128914a70>, 'Stree')\n13 functools.partial(<function check_estimators_empty_data_messages at 0x128918dd0>, 'Stree')\n14 functools.partial(<function check_pipeline_consistency at 0x128918a70>, 'Stree')\n15 functools.partial(<function check_estimators_nan_inf at 0x128918ef0>, 'Stree')\n16 functools.partial(<function check_estimators_overwrite_params at 0x128922a70>, 'Stree')\n17 functools.partial(<function check_estimator_sparse_data at 0x1289143b0>, 'Stree')\n18 functools.partial(<function check_estimators_pickle at 0x12891e170>, 'Stree')\n19 functools.partial(<function check_classifier_data_not_an_array at 0x128922dd0>, 'Stree')\n20 functools.partial(<function check_classifiers_one_label at 0x12891e830>, 'Stree')\n21 functools.partial(<function check_classifiers_classes at 0x128922290>, 'Stree')\n22 functools.partial(<function check_estimators_partial_fit_n_features at 0x12891e290>, 'Stree')\n23 functools.partial(<function check_classifiers_train at 0x12891e950>, 'Stree')\n24 functools.partial(<function check_classifiers_train at 0x12891e950>, 'Stree', readonly_memmap=True)\n25 functools.partial(<function check_classifiers_train at 0x12891e950>, 'Stree', readonly_memmap=True, X_dtype='float32')\n26 functools.partial(<function check_classifiers_regression_target at 0x1289278c0>, 'Stree')\n27 functools.partial(<function check_supervised_y_no_nan at 0x12890c4d0>, 'Stree')\n28 functools.partial(<function check_supervised_y_2d at 0x12891eef0>, 'Stree')\n29 functools.partial(<function check_estimators_unfitted at 0x12891edd0>, 'Stree')\n30 functools.partial(<function check_non_transformer_estimators_n_iter at 0x128927440>, 'Stree')\n31 functools.partial(<function check_decision_proba_consistency at 0x1289279e0>, 'Stree')\n32 functools.partial(<function check_fit2d_predict1d at 0x128918050>, 'Stree')\n33 functools.partial(<function check_methods_subset_invariance at 0x128918200>, 'Stree')\n34 functools.partial(<function check_fit2d_1sample at 0x128918320>, 'Stree')\n35 functools.partial(<function check_fit2d_1feature at 0x128918440>, 'Stree')\n36 functools.partial(<function check_fit1d at 0x128918560>, 'Stree')\n37 functools.partial(<function check_get_params_invariance at 0x128927680>, 'Stree')\n38 functools.partial(<function check_set_params at 0x1289277a0>, 'Stree')\n39 functools.partial(<function check_dict_unchanged at 0x128914c20>, 'Stree')\n40 functools.partial(<function check_dont_overwrite_parameters at 0x128914ef0>, 'Stree')\n41 functools.partial(<function check_fit_idempotent at 0x128927b90>, 'Stree')\n42 functools.partial(<function check_n_features_in at 0x128927c20>, 'Stree')\n43 functools.partial(<function check_requires_y_none at 0x128927cb0>, 'Stree')\n"
+     "text": "root\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([314]))\nroot - Down - Up\nroot - Down - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Down - Up - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([12]))\nroot - Up\nroot - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up\nroot - Up - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up\nroot - Up - Up - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([3]))\nroot - Up - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up - Up - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([2]))\nroot - Up - Up - Up - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up - Up - Up - Up - Up, <cgaf> - Leaf class=0 belief= 0.965714 counts=(array([0, 1]), array([676,  24]))\n"
+    }
+   ],
+   "source": [
+    "#check iterator\n",
+    "for i in list(clf):\n",
+    "    print(i)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": "root\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([314]))\nroot - Down - Up\nroot - Down - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Down - Up - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([12]))\nroot - Up\nroot - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up\nroot - Up - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up\nroot - Up - Up - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([3]))\nroot - Up - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up - Up - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([2]))\nroot - Up - Up - Up - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up - Up - Up - Up - Up, <cgaf> - Leaf class=0 belief= 0.965714 counts=(array([0, 1]), array([676,  24]))\n"
+    }
+   ],
+   "source": [
+    "#check iterator again\n",
+    "for i in clf:\n",
+    "    print(i)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Test STree is a sklearn estimator"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": "1 functools.partial(<function check_no_attributes_set_in_init at 0x12bd3b5f0>, 'Stree')\n2 functools.partial(<function check_estimators_dtypes at 0x12bd31710>, 'Stree')\n3 functools.partial(<function check_fit_score_takes_y at 0x12bd315f0>, 'Stree')\n4 functools.partial(<function check_sample_weights_pandas_series at 0x12bd21ef0>, 'Stree')\n5 functools.partial(<function check_sample_weights_not_an_array at 0x12bd2d050>, 'Stree')\n6 functools.partial(<function check_sample_weights_list at 0x12bd2d170>, 'Stree')\n7 functools.partial(<function check_sample_weights_shape at 0x12bd2d290>, 'Stree')\n8 functools.partial(<function check_sample_weights_invariance at 0x12bd2d3b0>, 'Stree')\n9 functools.partial(<function check_estimators_fit_returns_self at 0x12bd37710>, 'Stree')\n10 functools.partial(<function check_estimators_fit_returns_self at 0x12bd37710>, 'Stree', readonly_memmap=True)\n11 functools.partial(<function check_complex_data at 0x12bd2d560>, 'Stree')\n12 functools.partial(<function check_dtype_object at 0x12bd2d4d0>, 'Stree')\n13 functools.partial(<function check_estimators_empty_data_messages at 0x12bd31830>, 'Stree')\n14 functools.partial(<function check_pipeline_consistency at 0x12bd314d0>, 'Stree')\n15 functools.partial(<function check_estimators_nan_inf at 0x12bd31950>, 'Stree')\n16 functools.partial(<function check_estimators_overwrite_params at 0x12bd3b4d0>, 'Stree')\n17 functools.partial(<function check_estimator_sparse_data at 0x12bd21dd0>, 'Stree')\n18 functools.partial(<function check_estimators_pickle at 0x12bd31b90>, 'Stree')\n19 functools.partial(<function check_classifier_data_not_an_array at 0x12bd3b830>, 'Stree')\n20 functools.partial(<function check_classifiers_one_label at 0x12bd37290>, 'Stree')\n21 functools.partial(<function check_classifiers_classes at 0x12bd37cb0>, 'Stree')\n22 functools.partial(<function check_estimators_partial_fit_n_features at 0x12bd31cb0>, 'Stree')\n23 functools.partial(<function check_classifiers_train at 0x12bd373b0>, 'Stree')\n24 functools.partial(<function check_classifiers_train at 0x12bd373b0>, 'Stree', readonly_memmap=True)\n25 functools.partial(<function check_classifiers_train at 0x12bd373b0>, 'Stree', readonly_memmap=True, X_dtype='float32')\n26 functools.partial(<function check_classifiers_regression_target at 0x12bd40320>, 'Stree')\n27 functools.partial(<function check_supervised_y_no_nan at 0x12bd20ef0>, 'Stree')\n28 functools.partial(<function check_supervised_y_2d at 0x12bd37950>, 'Stree')\n29 functools.partial(<function check_estimators_unfitted at 0x12bd37830>, 'Stree')\n30 functools.partial(<function check_non_transformer_estimators_n_iter at 0x12bd3be60>, 'Stree')\n31 functools.partial(<function check_decision_proba_consistency at 0x12bd40440>, 'Stree')\n32 functools.partial(<function check_fit2d_predict1d at 0x12bd2da70>, 'Stree')\n33 functools.partial(<function check_methods_subset_invariance at 0x12bd2dc20>, 'Stree')\n34 functools.partial(<function check_fit2d_1sample at 0x12bd2dd40>, 'Stree')\n35 functools.partial(<function check_fit2d_1feature at 0x12bd2de60>, 'Stree')\n36 functools.partial(<function check_fit1d at 0x12bd2df80>, 'Stree')\n37 functools.partial(<function check_get_params_invariance at 0x12bd400e0>, 'Stree')\n38 functools.partial(<function check_set_params at 0x12bd40200>, 'Stree')\n39 functools.partial(<function check_dict_unchanged at 0x12bd2d680>, 'Stree')\n40 functools.partial(<function check_dont_overwrite_parameters at 0x12bd2d950>, 'Stree')\n41 functools.partial(<function check_fit_idempotent at 0x12bd405f0>, 'Stree')\n42 functools.partial(<function check_n_features_in at 0x12bd40680>, 'Stree')\n43 functools.partial(<function check_requires_y_none at 0x12bd40710>, 'Stree')\n"
    }
   ],
   "source": [
@@ -232,10 +303,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
   "metadata": {},
   "outputs": [],
-   "source": []
+   "source": [
+    "# Check if the classifier is a sklearn estimator\n",
+    "from sklearn.utils.estimator_checks import check_estimator\n",
+    "check_estimator(Stree())"
+   ]
  }
 ],
 "metadata": {
--- a/notebooks/test_graphs.ipynb
+++ b/notebooks/test_graphs.ipynb