#3 Add sample_weights to score, update notebooks

Update readme to use new names of notebooks
2025-08-17 08:26:00 +00:00 · 2020-06-09 01:46:38 +02:00
parent 26273e936a
commit 7e932de072
5 changed files with 406 additions and 351 deletions
--- a/README.md
+++ b/README.md
@@ -18,21 +18,17 @@ pip install git+https://github.com/doctorado-ml/stree
 ### Jupyter notebooks
-##### Slow launch but better integration
+* [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/Doctorado-ML/STree/master?urlpath=lab/tree/notebooks/benchmark.ipynb) Benchmark
-* [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/Doctorado-ML/STree/master?urlpath=lab/tree/notebooks/test.ipynb) Test notebook
+* [![Test](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Doctorado-ML/STree/blob/master/notebooks/benchmark.ipynb) Benchmark
-##### Fast launch but have to run first commented out cell for setup
+* [![Test2](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Doctorado-ML/STree/blob/master/notebooks/features.ipynb) Test features
 * [![Test](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Doctorado-ML/STree/blob/master/notebooks/test.ipynb) Test notebook
 * [![Test2](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Doctorado-ML/STree/blob/master/notebooks/test2.ipynb) Another Test notebook
 * [![Adaboost](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Doctorado-ML/STree/blob/master/notebooks/adaboost.ipynb) Adaboost
 * [![Gridsearch](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Doctorado-ML/STree/blob/master/notebooks/gridsearch.ipynb) Gridsearch
-* [![Test Graphics](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Doctorado-ML/STree/blob/master/notebooks/test_graphs.ipynb) Test Graphics notebook
+* [![Test Graphics](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Doctorado-ML/STree/blob/master/notebooks/test_graphs.ipynb) Test Graphics
 ### Command line
--- a/notebooks/benchmark.ipynb
+++ b/notebooks/benchmark.ipynb
--- a/notebooks/features.ipynb
+++ b/notebooks/features.ipynb
@@ -0,0 +1,370 @@
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Test smple_weight, kernels, C, sklearn estimator"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Setup\n",
    "Uncomment the next cell if STree is not already installed"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "#\n",
    "# Google Colab setup\n",
    "#\n",
    "#!pip install git+https://github.com/doctorado-ml/stree"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from sklearn.svm import SVC\n",
    "from sklearn.tree import DecisionTreeClassifier\n",
    "from sklearn.utils.estimator_checks import check_estimator\n",
    "from sklearn.datasets import make_classification, load_iris, load_wine\n",
    "from sklearn.model_selection import train_test_split\n",
    "from stree import Stree\n",
    "import time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "if not os.path.isfile('data/creditcard.csv'):\n",
    "    !wget --no-check-certificate --content-disposition http://nube.jccm.es/index.php/s/Zs7SYtZQJ3RQ2H2/download\n",
    "    !tar xzf creditcard.tgz"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "Fraud: 0.173% 492\nValid: 99.827% 284315\nX.shape (1492, 28)  y.shape (1492,)\nFraud: 33.177% 495\nValid: 66.823% 997\n"
    }
   ],
   "source": [
    "random_state=1\n",
    "\n",
    "def load_creditcard(n_examples=0):\n",
    "    import pandas as pd\n",
    "    import numpy as np\n",
    "    import random\n",
    "    df = pd.read_csv('data/creditcard.csv')\n",
    "    print(\"Fraud: {0:.3f}% {1}\".format(df.Class[df.Class == 1].count()*100/df.shape[0], df.Class[df.Class == 1].count()))\n",
    "    print(\"Valid: {0:.3f}% {1}\".format(df.Class[df.Class == 0].count()*100/df.shape[0], df.Class[df.Class == 0].count()))\n",
    "    y = df.Class\n",
    "    X = df.drop(['Class', 'Time', 'Amount'], axis=1).values\n",
    "    if n_examples > 0:\n",
    "        # Take first n_examples samples\n",
    "        X = X[:n_examples, :]\n",
    "        y = y[:n_examples, :]\n",
    "    else:\n",
    "        # Take all the positive samples with a number of random negatives\n",
    "        if n_examples < 0:\n",
    "            Xt = X[(y == 1).ravel()]\n",
    "            yt = y[(y == 1).ravel()]\n",
    "            indices = random.sample(range(X.shape[0]), -1 * n_examples)\n",
    "            X = np.append(Xt, X[indices], axis=0)\n",
    "            y = np.append(yt, y[indices], axis=0)\n",
    "    print(\"X.shape\", X.shape, \" y.shape\", y.shape)\n",
    "    print(\"Fraud: {0:.3f}% {1}\".format(len(y[y == 1])*100/X.shape[0], len(y[y == 1])))\n",
    "    print(\"Valid: {0:.3f}% {1}\".format(len(y[y == 0]) * 100 / X.shape[0], len(y[y == 0])))\n",
    "    Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, train_size=0.7, shuffle=True, random_state=random_state, stratify=y)\n",
    "    return Xtrain, Xtest, ytrain, ytest\n",
    "\n",
    "# data = load_creditcard(-5000) # Take all true samples + 5000 of the others\n",
    "# data = load_creditcard(5000)  # Take the first 5000 samples\n",
    "data = load_creditcard(-1000) # Take all the samples\n",
    "\n",
    "Xtrain = data[0]\n",
    "Xtest = data[1]\n",
    "ytrain = data[2]\n",
    "ytest = data[3]\n",
    "# Set weights inverse to its count class in dataset\n",
    "weights = np.ones(Xtrain.shape[0],) * 1.00244\n",
    "weights[ytrain==1] = 1.99755\n",
    "weights_test = np.ones(Xtest.shape[0],) * 1.00244\n",
    "weights_test[ytest==1] = 1.99755 "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Tests"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Test smple_weights\n",
    "Compute accuracy with weights in samples. The weights are set based on the inverse of the number of samples of each class"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "Accuracy of Train without weights 0.9722222222222222\nAccuracy of Train with    weights 0.9875478927203065\nAccuracy of Tests without weights 0.9508928571428571\nAccuracy of Tests with    weights 0.9486607142857143\n"
    }
   ],
   "source": [
    "C = 23\n",
    "print(\"Accuracy of Train without weights\", Stree(C=C, random_state=1).fit(Xtrain, ytrain).score(Xtrain, ytrain))\n",
    "print(\"Accuracy of Train with    weights\", Stree(C=C, random_state=1).fit(Xtrain, ytrain, sample_weight=weights).score(Xtrain, ytrain))\n",
    "print(\"Accuracy of Tests without weights\", Stree(C=C, random_state=1).fit(Xtrain, ytrain).score(Xtest, ytest))\n",
    "print(\"Accuracy of Tests with    weights\", Stree(C=C, random_state=1).fit(Xtrain, ytrain, sample_weight=weights).score(Xtest, ytest))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Test accuracy with different kernels\n",
    "Compute accuracy on train and test set with default hyperparmeters of every kernel"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "Time: 0.27s\tKernel: linear\tAccuracy_train: 0.9712643678160919\tAccuracy_test: 0.953125\nTime: 0.08s\tKernel: rbf\tAccuracy_train: 0.9932950191570882\tAccuracy_test: 0.9620535714285714\nTime: 0.05s\tKernel: poly\tAccuracy_train: 0.9923371647509579\tAccuracy_test: 0.9419642857142857\n"
    }
   ],
   "source": [
    "random_state=1\n",
    "for kernel in ['linear', 'rbf', 'poly']:\n",
    "    now = time.time()\n",
    "    clf = Stree(C=7, kernel=kernel, random_state=random_state).fit(Xtrain, ytrain)\n",
    "    accuracy_train = clf.score(Xtrain, ytrain)\n",
    "    accuracy_test = clf.score(Xtest, ytest)\n",
    "    time_spent = time.time() - now\n",
    "    print(f\"Time: {time_spent:.2f}s\\tKernel: {kernel}\\tAccuracy_train: {accuracy_train}\\tAccuracy_test: {accuracy_test}\")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Test diferent values of C"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "tags": [
     "outputPrepend"
    ]
   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "************** C=0.001 ****************************\nClassifier's accuracy (train): 0.9550\nClassifier's accuracy (test) : 0.9554\nroot\nroot - Down, <cgaf> - Leaf class=1 belief= 0.977636 counts=(array([0, 1]), array([  7, 306]))\nroot - Up, <cgaf> - Leaf class=0 belief= 0.945280 counts=(array([0, 1]), array([691,  40]))\n\n**************************************************\n************** C=0.01 ****************************\nClassifier's accuracy (train): 0.9569\nClassifier's accuracy (test) : 0.9554\nroot\nroot - Down, <cgaf> - Leaf class=1 belief= 0.983923 counts=(array([0, 1]), array([  5, 306]))\nroot - Up, <cgaf> - Leaf class=0 belief= 0.945430 counts=(array([0, 1]), array([693,  40]))\n\n**************************************************\n************** C=1 ****************************\nClassifier's accuracy (train): 0.9665\nClassifier's accuracy (test) : 0.9576\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([311]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([4]))\nroot - Up, <cgaf> - Leaf class=0 belief= 0.951989 counts=(array([0, 1]), array([694,  35]))\n\n**************************************************\n************** C=5 ****************************\nClassifier's accuracy (train): 0.9703\nClassifier's accuracy (test) : 0.9509\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([310]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([5]))\nroot - Up\nroot - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up\nroot - Up - Up - Down\nroot - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([3]))\nroot - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up - Up\nroot - Up - Up - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up - Up - Up, <cgaf> - Leaf class=0 belief= 0.957004 counts=(array([0, 1]), array([690,  31]))\n\n**************************************************\n************** C=17 ****************************\nClassifier's accuracy (train): 0.9799\nClassifier's accuracy (test) : 0.9531\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([310]))\nroot - Down - Up\nroot - Down - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([5]))\nroot - Down - Up - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([15]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([9]))\nroot - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([10]))\nroot - Up - Up\nroot - Up - Up - Down\nroot - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up, <cgaf> - Leaf class=0 belief= 0.969653 counts=(array([0, 1]), array([671,  21]))\n\n**************************************************\n0.5032 secs\n"
    }
   ],
   "source": [
    "t = time.time()\n",
    "for C in (.001, .01, 1, 5, 17):\n",
    "    clf = Stree(C=C, random_state=random_state)\n",
    "    clf.fit(Xtrain, ytrain)\n",
    "    print(f\"************** C={C} ****************************\")\n",
    "    print(f\"Classifier's accuracy (train): {clf.score(Xtrain, ytrain):.4f}\")\n",
    "    print(f\"Classifier's accuracy (test) : {clf.score(Xtest, ytest):.4f}\")\n",
    "    print(clf)\n",
    "    print(f\"**************************************************\")\n",
    "print(f\"{time.time() - t:.4f} secs\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Test iterator\n",
    "Check different weays of using the iterator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "root\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([310]))\nroot - Down - Up\nroot - Down - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([5]))\nroot - Down - Up - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([15]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([9]))\nroot - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([10]))\nroot - Up - Up\nroot - Up - Up - Down\nroot - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up, <cgaf> - Leaf class=0 belief= 0.969653 counts=(array([0, 1]), array([671,  21]))\n"
    }
   ],
   "source": [
    "#check iterator\n",
    "for i in list(clf):\n",
    "    print(i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "root\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([310]))\nroot - Down - Up\nroot - Down - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([5]))\nroot - Down - Up - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([15]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([9]))\nroot - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([10]))\nroot - Up - Up\nroot - Up - Up - Down\nroot - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up, <cgaf> - Leaf class=0 belief= 0.969653 counts=(array([0, 1]), array([671,  21]))\n"
    }
   ],
   "source": [
    "#check iterator again\n",
    "for i in clf:\n",
    "    print(i)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Test STree is a sklearn estimator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "1 functools.partial(<function check_no_attributes_set_in_init at 0x124d443b0>, 'Stree')\n2 functools.partial(<function check_estimators_dtypes at 0x124d3b4d0>, 'Stree')\n3 functools.partial(<function check_fit_score_takes_y at 0x124d3b3b0>, 'Stree')\n4 functools.partial(<function check_sample_weights_pandas_series at 0x124d33cb0>, 'Stree')\n5 functools.partial(<function check_sample_weights_not_an_array at 0x124d33dd0>, 'Stree')\n6 functools.partial(<function check_sample_weights_list at 0x124d33ef0>, 'Stree')\n7 functools.partial(<function check_sample_weights_shape at 0x124d35050>, 'Stree')\n8 functools.partial(<function check_sample_weights_invariance at 0x124d35170>, 'Stree')\n9 functools.partial(<function check_estimators_fit_returns_self at 0x124d3e4d0>, 'Stree')\n10 functools.partial(<function check_estimators_fit_returns_self at 0x124d3e4d0>, 'Stree', readonly_memmap=True)\n11 functools.partial(<function check_complex_data at 0x124d35320>, 'Stree')\n12 functools.partial(<function check_dtype_object at 0x124d35290>, 'Stree')\n13 functools.partial(<function check_estimators_empty_data_messages at 0x124d3b5f0>, 'Stree')\n14 functools.partial(<function check_pipeline_consistency at 0x124d3b290>, 'Stree')\n15 functools.partial(<function check_estimators_nan_inf at 0x124d3b710>, 'Stree')\n16 functools.partial(<function check_estimators_overwrite_params at 0x124d44290>, 'Stree')\n17 functools.partial(<function check_estimator_sparse_data at 0x124d33b90>, 'Stree')\n18 functools.partial(<function check_estimators_pickle at 0x124d3b950>, 'Stree')\n19 functools.partial(<function check_classifier_data_not_an_array at 0x124d445f0>, 'Stree')\n20 functools.partial(<function check_classifiers_one_label at 0x124d3e050>, 'Stree')\n21 functools.partial(<function check_classifiers_classes at 0x124d3ea70>, 'Stree')\n22 functools.partial(<function check_estimators_partial_fit_n_features at 0x124d3ba70>, 'Stree')\n23 functools.partial(<function check_classifiers_train at 0x124d3e170>, 'Stree')\n24 functools.partial(<function check_classifiers_train at 0x124d3e170>, 'Stree', readonly_memmap=True)\n25 functools.partial(<function check_classifiers_train at 0x124d3e170>, 'Stree', readonly_memmap=True, X_dtype='float32')\n26 functools.partial(<function check_classifiers_regression_target at 0x124d480e0>, 'Stree')\n27 functools.partial(<function check_supervised_y_no_nan at 0x124d2d9e0>, 'Stree')\n28 functools.partial(<function check_supervised_y_2d at 0x124d3e710>, 'Stree')\n29 functools.partial(<function check_estimators_unfitted at 0x124d3e5f0>, 'Stree')\n30 functools.partial(<function check_non_transformer_estimators_n_iter at 0x124d44c20>, 'Stree')\n31 functools.partial(<function check_decision_proba_consistency at 0x124d48200>, 'Stree')\n32 functools.partial(<function check_fit2d_predict1d at 0x124d35830>, 'Stree')\n33 functools.partial(<function check_methods_subset_invariance at 0x124d359e0>, 'Stree')\n34 functools.partial(<function check_fit2d_1sample at 0x124d35b00>, 'Stree')\n35 functools.partial(<function check_fit2d_1feature at 0x124d35c20>, 'Stree')\n36 functools.partial(<function check_fit1d at 0x124d35d40>, 'Stree')\n37 functools.partial(<function check_get_params_invariance at 0x124d44e60>, 'Stree')\n38 functools.partial(<function check_set_params at 0x124d44f80>, 'Stree')\n39 functools.partial(<function check_dict_unchanged at 0x124d35440>, 'Stree')\n40 functools.partial(<function check_dont_overwrite_parameters at 0x124d35710>, 'Stree')\n41 functools.partial(<function check_fit_idempotent at 0x124d483b0>, 'Stree')\n42 functools.partial(<function check_n_features_in at 0x124d48440>, 'Stree')\n43 functools.partial(<function check_requires_y_none at 0x124d484d0>, 'Stree')\n"
    }
   ],
   "source": [
    "# Make checks one by one\n",
    "c = 0\n",
    "checks = check_estimator(Stree(), generate_only=True)\n",
    "for check in checks:\n",
    "    c += 1\n",
    "    print(c, check[1])\n",
    "    check[1](check[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Check if the classifier is a sklearn estimator\n",
    "check_estimator(Stree())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Compare to SVM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "== Not Weighted ===\nSVC train score ..: 0.9530651340996169\nSTree train score : 0.960727969348659\nSVC test score ...: 0.9620535714285714\nSTree test score .: 0.9642857142857143\n==== Weighted =====\nSVC train score ..: 0.960727969348659\nSTree train score : 0.960727969348659\nSVC test score ...: 0.953125\nSTree test score .: 0.9553571428571429\n*SVC test score ..: 0.9397723008352139\n*STree test score : 0.9431162390279932\n"
    }
   ],
   "source": [
    "svc = SVC(C=7, kernel='rbf', gamma=.001, random_state=random_state)\n",
    "clf = Stree(C=17, kernel='rbf', gamma=.001, random_state=random_state)\n",
    "svc.fit(Xtrain, ytrain)\n",
    "clf.fit(Xtrain, ytrain)\n",
    "print(\"== Not Weighted ===\")\n",
    "print(\"SVC train score ..:\", svc.score(Xtrain, ytrain))\n",
    "print(\"STree train score :\", clf.score(Xtrain, ytrain))\n",
    "print(\"SVC test score ...:\", svc.score(Xtest, ytest))\n",
    "print(\"STree test score .:\", clf.score(Xtest, ytest))\n",
    "svc.fit(Xtrain, ytrain, weights)\n",
    "clf.fit(Xtrain, ytrain, weights)\n",
    "print(\"==== Weighted =====\")\n",
    "print(\"SVC train score ..:\", svc.score(Xtrain, ytrain))\n",
    "print(\"STree train score :\", clf.score(Xtrain, ytrain))\n",
    "print(\"SVC test score ...:\", svc.score(Xtest, ytest))\n",
    "print(\"STree test score .:\", clf.score(Xtest, ytest))\n",
    "print(\"*SVC test score ..:\", svc.score(Xtest, ytest, weights_test))\n",
    "print(\"*STree test score :\", clf.score(Xtest, ytest, weights_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "root\nroot - Down, <cgaf> - Leaf class=1 belief= 0.978056 counts=(array([0, 1]), array([  7, 312]))\nroot - Up, <cgaf> - Leaf class=0 belief= 0.953103 counts=(array([0, 1]), array([691,  34]))\n\n"
    }
   ],
   "source": [
    "print(clf)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.7.6 64-bit ('general': venv)",
   "language": "python",
   "name": "python37664bitgeneralvenvfbd0a23e74cf4e778460f5ffc6761f39"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6-final"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
--- a/notebooks/test2.ipynb
+++ b/notebooks/test2.ipynb
@@ -1,337 +0,0 @@
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Test smple_weight, kernels, C, sklearn estimator"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Setup\n",
    "Uncomment the next cell if STree is not already installed"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "#\n",
    "# Google Colab setup\n",
    "#\n",
    "#!pip install git+https://github.com/doctorado-ml/stree"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from sklearn.svm import SVC\n",
    "from sklearn.tree import DecisionTreeClassifier\n",
    "from sklearn.datasets import make_classification, load_iris, load_wine\n",
    "from sklearn.model_selection import train_test_split\n",
    "from stree import Stree\n",
    "import time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "if not os.path.isfile('data/creditcard.csv'):\n",
    "    !wget --no-check-certificate --content-disposition http://nube.jccm.es/index.php/s/Zs7SYtZQJ3RQ2H2/download\n",
    "    !tar xzf creditcard.tgz"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "Fraud: 0.173% 492\nValid: 99.827% 284315\nX.shape (1492, 28)  y.shape (1492,)\nFraud: 33.177% 495\nValid: 66.823% 997\n"
    }
   ],
   "source": [
    "random_state=1\n",
    "\n",
    "def load_creditcard(n_examples=0):\n",
    "    import pandas as pd\n",
    "    import numpy as np\n",
    "    import random\n",
    "    df = pd.read_csv('data/creditcard.csv')\n",
    "    print(\"Fraud: {0:.3f}% {1}\".format(df.Class[df.Class == 1].count()*100/df.shape[0], df.Class[df.Class == 1].count()))\n",
    "    print(\"Valid: {0:.3f}% {1}\".format(df.Class[df.Class == 0].count()*100/df.shape[0], df.Class[df.Class == 0].count()))\n",
    "    y = df.Class\n",
    "    X = df.drop(['Class', 'Time', 'Amount'], axis=1).values\n",
    "    if n_examples > 0:\n",
    "        # Take first n_examples samples\n",
    "        X = X[:n_examples, :]\n",
    "        y = y[:n_examples, :]\n",
    "    else:\n",
    "        # Take all the positive samples with a number of random negatives\n",
    "        if n_examples < 0:\n",
    "            Xt = X[(y == 1).ravel()]\n",
    "            yt = y[(y == 1).ravel()]\n",
    "            indices = random.sample(range(X.shape[0]), -1 * n_examples)\n",
    "            X = np.append(Xt, X[indices], axis=0)\n",
    "            y = np.append(yt, y[indices], axis=0)\n",
    "    print(\"X.shape\", X.shape, \" y.shape\", y.shape)\n",
    "    print(\"Fraud: {0:.3f}% {1}\".format(len(y[y == 1])*100/X.shape[0], len(y[y == 1])))\n",
    "    print(\"Valid: {0:.3f}% {1}\".format(len(y[y == 0]) * 100 / X.shape[0], len(y[y == 0])))\n",
    "    Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, train_size=0.7, shuffle=True, random_state=random_state, stratify=y)\n",
    "    return Xtrain, Xtest, ytrain, ytest\n",
    "\n",
    "# data = load_creditcard(-5000) # Take all true samples + 5000 of the others\n",
    "# data = load_creditcard(5000)  # Take the first 5000 samples\n",
    "data = load_creditcard(-1000) # Take all the samples\n",
    "\n",
    "Xtrain = data[0]\n",
    "Xtest = data[1]\n",
    "ytrain = data[2]\n",
    "ytest = data[3]\n",
    "# Set weights inverse to its count class in dataset\n",
    "weights = np.ones(Xtrain.shape[0],) * 1.00244\n",
    "weights[ytrain==1] = 1.99755 "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Tests"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Test smple_weights\n",
    "Compute accuracy with weights in samples. The weights are set based on the inverse of the number of samples of each class"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "Accuracy of Train without weights 0.9770114942528736\nAccuracy of Train with    weights 0.9818007662835249\nAccuracy of Tests without weights 0.953125\nAccuracy of Tests with    weights 0.9419642857142857\n"
    }
   ],
   "source": [
    "C = 23\n",
    "print(\"Accuracy of Train without weights\", Stree(C=C, random_state=1).fit(Xtrain, ytrain).score(Xtrain, ytrain))\n",
    "print(\"Accuracy of Train with    weights\", Stree(C=C, random_state=1).fit(Xtrain, ytrain, sample_weight=weights).score(Xtrain, ytrain))\n",
    "print(\"Accuracy of Tests without weights\", Stree(C=C, random_state=1).fit(Xtrain, ytrain).score(Xtest, ytest))\n",
    "print(\"Accuracy of Tests with    weights\", Stree(C=C, random_state=1).fit(Xtrain, ytrain, sample_weight=weights).score(Xtest, ytest))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Test accuracy with different kernels\n",
    "Compute accuracy on train and test set with default hyperparmeters of every kernel"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "Time: 0.20s\tKernel: linear\tAccuracy_train: 0.9712643678160919\tAccuracy_test: 0.9575892857142857\nTime: 0.09s\tKernel: rbf\tAccuracy_train: 0.9932950191570882\tAccuracy_test: 0.9620535714285714\nTime: 0.09s\tKernel: poly\tAccuracy_train: 0.9904214559386973\tAccuracy_test: 0.9508928571428571\n"
    }
   ],
   "source": [
    "random_state=1\n",
    "for kernel in ['linear', 'rbf', 'poly']:\n",
    "    now = time.time()\n",
    "    clf = Stree(C=7, kernel=kernel, random_state=random_state).fit(Xtrain, ytrain)\n",
    "    accuracy_train = clf.score(Xtrain, ytrain)\n",
    "    accuracy_test = clf.score(Xtest, ytest)\n",
    "    time_spent = time.time() - now\n",
    "    print(f\"Time: {time_spent:.2f}s\\tKernel: {kernel}\\tAccuracy_train: {accuracy_train}\\tAccuracy_test: {accuracy_test}\")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Test diferent values of C"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "tags": [
     "outputPrepend"
    ]
   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "************** C=0.001 ****************************\nClassifier's accuracy (train): 0.9550\nClassifier's accuracy (test) : 0.9509\nroot\nroot - Down\nroot - Down - Down, <cgaf> - Leaf class=1 belief= 0.980583 counts=(array([0, 1]), array([  6, 303]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([2]))\nroot - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up, <cgaf> - Leaf class=0 belief= 0.943836 counts=(array([0, 1]), array([689,  41]))\n\n**************************************************\n************** C=0.01 ****************************\nClassifier's accuracy (train): 0.9569\nClassifier's accuracy (test) : 0.9576\nroot\nroot - Down\nroot - Down - Down, <cgaf> - Leaf class=1 belief= 0.990228 counts=(array([0, 1]), array([  3, 304]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up, <cgaf> - Leaf class=0 belief= 0.942935 counts=(array([0, 1]), array([694,  42]))\n\n**************************************************\n************** C=1 ****************************\nClassifier's accuracy (train): 0.9665\nClassifier's accuracy (test) : 0.9598\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([311]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([4]))\nroot - Up, <cgaf> - Leaf class=0 belief= 0.951989 counts=(array([0, 1]), array([694,  35]))\n\n**************************************************\n************** C=5 ****************************\nClassifier's accuracy (train): 0.9674\nClassifier's accuracy (test) : 0.9621\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([312]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([4]))\nroot - Up\nroot - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up\nroot - Up - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up\nroot - Up - Up - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up - Up - Up, <cgaf> - Leaf class=0 belief= 0.953039 counts=(array([0, 1]), array([690,  34]))\n\n**************************************************\n************** C=17 ****************************\nClassifier's accuracy (train): 0.9770\nClassifier's accuracy (test) : 0.9509\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([314]))\nroot - Down - Up\nroot - Down - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Down - Up - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([12]))\nroot - Up\nroot - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up\nroot - Up - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up\nroot - Up - Up - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([3]))\nroot - Up - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up - Up - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([2]))\nroot - Up - Up - Up - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up - Up - Up - Up - Up, <cgaf> - Leaf class=0 belief= 0.965714 counts=(array([0, 1]), array([676,  24]))\n\n**************************************************\n0.9578 secs\n"
    }
   ],
   "source": [
    "t = time.time()\n",
    "for C in (.001, .01, 1, 5, 17):\n",
    "    clf = Stree(C=C, random_state=random_state)\n",
    "    clf.fit(Xtrain, ytrain)\n",
    "    print(f\"************** C={C} ****************************\")\n",
    "    print(f\"Classifier's accuracy (train): {clf.score(Xtrain, ytrain):.4f}\")\n",
    "    print(f\"Classifier's accuracy (test) : {clf.score(Xtest, ytest):.4f}\")\n",
    "    print(clf)\n",
    "    print(f\"**************************************************\")\n",
    "print(f\"{time.time() - t:.4f} secs\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "[[0.88204928 0.11795072]\n [0.8640131  0.1359869 ]\n [0.94207521 0.05792479]\n [0.90219947 0.09780053]]\n"
    }
   ],
   "source": [
    "import numpy as np\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from sklearn.svm import LinearSVC\n",
    "from sklearn.calibration import CalibratedClassifierCV\n",
    "scaler = StandardScaler()\n",
    "cclf = CalibratedClassifierCV(base_estimator=LinearSVC(), cv=5)\n",
    "cclf.fit(Xtrain, ytrain)\n",
    "res = cclf.predict_proba(Xtest)\n",
    "print(res[:4, :])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Test iterator\n",
    "Check different weays of using the iterator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "root\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([314]))\nroot - Down - Up\nroot - Down - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Down - Up - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([12]))\nroot - Up\nroot - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up\nroot - Up - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up\nroot - Up - Up - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([3]))\nroot - Up - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up - Up - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([2]))\nroot - Up - Up - Up - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up - Up - Up - Up - Up, <cgaf> - Leaf class=0 belief= 0.965714 counts=(array([0, 1]), array([676,  24]))\n"
    }
   ],
   "source": [
    "#check iterator\n",
    "for i in list(clf):\n",
    "    print(i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "root\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([314]))\nroot - Down - Up\nroot - Down - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Down - Up - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([12]))\nroot - Up\nroot - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up\nroot - Up - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up\nroot - Up - Up - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([3]))\nroot - Up - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up - Up - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([2]))\nroot - Up - Up - Up - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up - Up - Up - Up - Up, <cgaf> - Leaf class=0 belief= 0.965714 counts=(array([0, 1]), array([676,  24]))\n"
    }
   ],
   "source": [
    "#check iterator again\n",
    "for i in clf:\n",
    "    print(i)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Test STree is a sklearn estimator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "1 functools.partial(<function check_no_attributes_set_in_init at 0x12bd3b5f0>, 'Stree')\n2 functools.partial(<function check_estimators_dtypes at 0x12bd31710>, 'Stree')\n3 functools.partial(<function check_fit_score_takes_y at 0x12bd315f0>, 'Stree')\n4 functools.partial(<function check_sample_weights_pandas_series at 0x12bd21ef0>, 'Stree')\n5 functools.partial(<function check_sample_weights_not_an_array at 0x12bd2d050>, 'Stree')\n6 functools.partial(<function check_sample_weights_list at 0x12bd2d170>, 'Stree')\n7 functools.partial(<function check_sample_weights_shape at 0x12bd2d290>, 'Stree')\n8 functools.partial(<function check_sample_weights_invariance at 0x12bd2d3b0>, 'Stree')\n9 functools.partial(<function check_estimators_fit_returns_self at 0x12bd37710>, 'Stree')\n10 functools.partial(<function check_estimators_fit_returns_self at 0x12bd37710>, 'Stree', readonly_memmap=True)\n11 functools.partial(<function check_complex_data at 0x12bd2d560>, 'Stree')\n12 functools.partial(<function check_dtype_object at 0x12bd2d4d0>, 'Stree')\n13 functools.partial(<function check_estimators_empty_data_messages at 0x12bd31830>, 'Stree')\n14 functools.partial(<function check_pipeline_consistency at 0x12bd314d0>, 'Stree')\n15 functools.partial(<function check_estimators_nan_inf at 0x12bd31950>, 'Stree')\n16 functools.partial(<function check_estimators_overwrite_params at 0x12bd3b4d0>, 'Stree')\n17 functools.partial(<function check_estimator_sparse_data at 0x12bd21dd0>, 'Stree')\n18 functools.partial(<function check_estimators_pickle at 0x12bd31b90>, 'Stree')\n19 functools.partial(<function check_classifier_data_not_an_array at 0x12bd3b830>, 'Stree')\n20 functools.partial(<function check_classifiers_one_label at 0x12bd37290>, 'Stree')\n21 functools.partial(<function check_classifiers_classes at 0x12bd37cb0>, 'Stree')\n22 functools.partial(<function check_estimators_partial_fit_n_features at 0x12bd31cb0>, 'Stree')\n23 functools.partial(<function check_classifiers_train at 0x12bd373b0>, 'Stree')\n24 functools.partial(<function check_classifiers_train at 0x12bd373b0>, 'Stree', readonly_memmap=True)\n25 functools.partial(<function check_classifiers_train at 0x12bd373b0>, 'Stree', readonly_memmap=True, X_dtype='float32')\n26 functools.partial(<function check_classifiers_regression_target at 0x12bd40320>, 'Stree')\n27 functools.partial(<function check_supervised_y_no_nan at 0x12bd20ef0>, 'Stree')\n28 functools.partial(<function check_supervised_y_2d at 0x12bd37950>, 'Stree')\n29 functools.partial(<function check_estimators_unfitted at 0x12bd37830>, 'Stree')\n30 functools.partial(<function check_non_transformer_estimators_n_iter at 0x12bd3be60>, 'Stree')\n31 functools.partial(<function check_decision_proba_consistency at 0x12bd40440>, 'Stree')\n32 functools.partial(<function check_fit2d_predict1d at 0x12bd2da70>, 'Stree')\n33 functools.partial(<function check_methods_subset_invariance at 0x12bd2dc20>, 'Stree')\n34 functools.partial(<function check_fit2d_1sample at 0x12bd2dd40>, 'Stree')\n35 functools.partial(<function check_fit2d_1feature at 0x12bd2de60>, 'Stree')\n36 functools.partial(<function check_fit1d at 0x12bd2df80>, 'Stree')\n37 functools.partial(<function check_get_params_invariance at 0x12bd400e0>, 'Stree')\n38 functools.partial(<function check_set_params at 0x12bd40200>, 'Stree')\n39 functools.partial(<function check_dict_unchanged at 0x12bd2d680>, 'Stree')\n40 functools.partial(<function check_dont_overwrite_parameters at 0x12bd2d950>, 'Stree')\n41 functools.partial(<function check_fit_idempotent at 0x12bd405f0>, 'Stree')\n42 functools.partial(<function check_n_features_in at 0x12bd40680>, 'Stree')\n43 functools.partial(<function check_requires_y_none at 0x12bd40710>, 'Stree')\n"
    }
   ],
   "source": [
    "# Make checks one by one\n",
    "c = 0\n",
    "checks = check_estimator(Stree(), generate_only=True)\n",
    "for check in checks:\n",
    "    c += 1\n",
    "    print(c, check[1])\n",
    "    check[1](check[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Check if the classifier is a sklearn estimator\n",
    "from sklearn.utils.estimator_checks import check_estimator\n",
    "check_estimator(Stree())"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.7.6 64-bit ('general': venv)",
   "language": "python",
   "name": "python37664bitgeneralvenvfbd0a23e74cf4e778460f5ffc6761f39"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6-final"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
--- a/stree/Strees.py
+++ b/stree/Strees.py
@@ -11,6 +11,7 @@ import os
 import numpy as np
 from sklearn.base import BaseEstimator, ClassifierMixin
 from sklearn.svm import SVC, LinearSVC
 from sklearn.utils import check_consistent_length
 from sklearn.utils.multiclass import check_classification_targets
 from sklearn.utils.validation import (
    check_X_y,
@@ -18,6 +19,8 @@ from sklearn.utils.validation import (
    check_is_fitted,
    _check_sample_weight,
 )
 from sklearn.utils.sparsefuncs import count_nonzero
 from sklearn.metrics._classification import _weighted_sum, _check_targets
 class Snode:
@@ -201,6 +204,13 @@ class Stree(BaseEstimator, ClassifierMixin):
    ) -> "Stree":
        """Build the tree based on the dataset of samples and its labels
        :param X: dataset of samples to make predictions
        :type X: np.array
        :param y: samples labels
        :type y: np.array
        :param sample_weight: weights of the samples. Rescale C per sample.
        Hi' weights force the classifier to put more emphasis on these points
        :type sample_weight: np.array optional
        :raises ValueError: if parameters C or max_depth are out of bounds
        :return: itself to be able to chain actions: fit().predict() ...
        :rtype: Stree
@@ -284,7 +294,8 @@ class Stree(BaseEstimator, ClassifierMixin):
        :type X: np.ndarray
        :param y: samples labels
        :type y: np.ndarray
-        :param sample_weight: weight of samples (used in boosting)
+        :param sample_weight: weight of samples. Rescale C per sample.
        Hi weights force the classifier to put more emphasis on these points.
        :type sample_weight: np.ndarray
        :param depth: actual depth in the tree
        :type depth: int
@@ -435,20 +446,35 @@ class Stree(BaseEstimator, ClassifierMixin):
        result[:, 0] = 1 - result[:, 1]
        return self._reorder_results(result, indices)
-    def score(self, X: np.array, y: np.array) -> float:
+    def score(
        self, X: np.array, y: np.array, sample_weight: np.array = None
    ) -> float:
        """Compute accuracy of the prediction
        :param X: dataset of samples to make predictions
        :type X: np.array
-        :param y: samples labels
+        :param y_true: samples labels
-        :type y: np.array
+        :type y_true: np.array
        :param sample_weight: weights of the samples. Rescale C per sample.
        Hi' weights force the classifier to put more emphasis on these points
        :type sample_weight: np.array optional
        :return: accuracy of the prediction
        :rtype: float
        """
        # sklearn check
        check_is_fitted(self)
-        yp = self.predict(X).reshape(y.shape)
+
-        return np.mean(yp == y)
+        y_pred = self.predict(X).reshape(y.shape)
        # Compute accuracy for each possible representation
        y_type, y_true, y_pred = _check_targets(y, y_pred)
        check_consistent_length(y_true, y_pred, sample_weight)
        if y_type.startswith("multilabel"):
            differing_labels = count_nonzero(y_true - y_pred, axis=1)
            score = differing_labels == 0
        else:
            score = y_true == y_pred
        return _weighted_sum(score, sample_weight, normalize=True)
    def __iter__(self) -> Siterator:
        """Create an iterator to be able to visit the nodes of the tree in preorder,