First approach

Added max_depth, tol, weighted samples
This commit is contained in:
2020-05-29 12:46:10 +02:00
parent e95bd9697a
commit ed98054f0d
10 changed files with 676 additions and 516 deletions

View File

@@ -18,15 +18,15 @@ pip install git+https://github.com/doctorado-ml/stree
##### Slow launch but better integration
* [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/Doctorado-ML/STree/master?urlpath=lab/tree/test.ipynb) Test notebook
* [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/Doctorado-ML/STree/master?urlpath=lab/tree/notebooks/test.ipynb) Test notebook
##### Fast launch but have to run first commented out cell for setup
* [![Test](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Doctorado-ML/STree/blob/master/test.ipynb) Test notebook
* [![Test](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Doctorado-ML/STree/blob/master/notebooks/test.ipynb) Test notebook
* [![Test2](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Doctorado-ML/STree/blob/master/test2.ipynb) Another Test notebook
* [![Test2](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Doctorado-ML/STree/blob/master/notebooks/test2.ipynb) Another Test notebook
* [![Test Graphics](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Doctorado-ML/STree/blob/master/test_graphs.ipynb) Test Graphics notebook
* [![Test Graphics](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Doctorado-ML/STree/blob/master/notebooks/test_graphs.ipynb) Test Graphics notebook
### Command line

162
notebooks/gridsearch.ipynb Normal file
View File

@@ -0,0 +1,162 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.ensemble import AdaBoostClassifier\n",
"from sklearn.tree import DecisionTreeClassifier\n",
"from sklearn.model_selection import GridSearchCV\n",
"from sklearn.datasets import load_iris\n",
"from stree import Stree"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"#X, y = load_iris(return_X_y=True)\n",
"#y[y==2] = 0"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"df = pd.read_csv('data/creditcard.csv')\n",
"y = df.Class.values\n",
"X = df.drop(['Class', 'Time', 'Amount'], axis=1).values"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "\n"
}
],
"source": [
"c = Stree(C=17, max_depth=2)\n",
"print(c)\n",
"c.fit(X, y)\n",
"print(len(list(c)))\n",
"print(c)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"#'base_estimator': [DecisionTreeClassifier(max_depth=1), Stree(max_depth=2), Stree(max_depth=3)],\n",
"parameters = {\n",
" 'base_estimator': [Stree(max_depth=2), Stree(max_depth=3)],\n",
" 'n_estimators': [20, 50, 100, 150],\n",
" 'learning_rate': [.5, 1, 1.5] \n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"#parameters = {\n",
"# 'base_estimator': [DecisionTreeClassifier(max_depth=1), DecisionTreeClassifier(max_depth=5), Stree(), Stree(C=.1), Stree(C=.01), Stree(C=3)],\n",
"# 'n_estimators': [20, 50, 100, 150],\n",
"# 'learning_rate': [.5, 1, 1.5] \n",
"#}"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "Fitting 5 folds for each of 24 candidates, totalling 120 fits\n[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n[Parallel(n_jobs=-1)]: Done 2 tasks | elapsed: 2.5s\n[Parallel(n_jobs=-1)]: Done 9 tasks | elapsed: 2.6s\n[Parallel(n_jobs=-1)]: Done 16 tasks | elapsed: 2.6s\n[Parallel(n_jobs=-1)]: Done 25 tasks | elapsed: 2.6s\n[Parallel(n_jobs=-1)]: Batch computation too fast (0.1837s.) Setting batch_size=2.\n[Parallel(n_jobs=-1)]: Done 34 tasks | elapsed: 2.7s\n[Parallel(n_jobs=-1)]: Done 45 tasks | elapsed: 2.7s\n[Parallel(n_jobs=-1)]: Batch computation too fast (0.0313s.) Setting batch_size=4.\n[Parallel(n_jobs=-1)]: Done 64 tasks | elapsed: 2.7s\n[Parallel(n_jobs=-1)]: Batch computation too fast (0.0302s.) Setting batch_size=8.\n[Parallel(n_jobs=-1)]: Done 92 out of 120 | elapsed: 2.7s remaining: 0.8s\n[Parallel(n_jobs=-1)]: Done 118 out of 120 | elapsed: 2.7s remaining: 0.0s\n[Parallel(n_jobs=-1)]: Done 120 out of 120 | elapsed: 2.7s finished\n"
},
{
"output_type": "error",
"ename": "ValueError",
"evalue": "Stree doesn't support sample_weight.",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-7-1a0e5b8c6bec>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mclf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mAdaBoostClassifier\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrandom_state\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrandom_state\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mgrid\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mGridSearchCV\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mclf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparameters\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_jobs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreturn_train_score\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mgrid\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m~/.virtualenvs/general/lib/python3.7/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36minner_f\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 71\u001b[0m FutureWarning)\n\u001b[1;32m 72\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0marg\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0marg\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparameters\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 73\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 74\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0minner_f\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 75\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.virtualenvs/general/lib/python3.7/site-packages/sklearn/model_selection/_search.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, groups, **fit_params)\u001b[0m\n\u001b[1;32m 763\u001b[0m \u001b[0mrefit_start_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 764\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0my\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 765\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbest_estimator_\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfit_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 766\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 767\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbest_estimator_\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfit_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.virtualenvs/general/lib/python3.7/site-packages/sklearn/ensemble/_weight_boosting.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[1;32m 441\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 442\u001b[0m \u001b[0;31m# Fit\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 443\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 444\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 445\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_validate_estimator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.virtualenvs/general/lib/python3.7/site-packages/sklearn/ensemble/_weight_boosting.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 116\u001b[0m \u001b[0;31m# Check parameters\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 117\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_validate_estimator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 118\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 119\u001b[0m \u001b[0;31m# Clear any previous fit results\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.virtualenvs/general/lib/python3.7/site-packages/sklearn/ensemble/_weight_boosting.py\u001b[0m in \u001b[0;36m_validate_estimator\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 459\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mhas_fit_parameter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbase_estimator_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"sample_weight\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 460\u001b[0m raise ValueError(\"%s doesn't support sample_weight.\"\n\u001b[0;32m--> 461\u001b[0;31m % self.base_estimator_.__class__.__name__)\n\u001b[0m\u001b[1;32m 462\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 463\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_boost\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0miboost\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrandom_state\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: Stree doesn't support sample_weight."
]
}
],
"source": [
"random_state=2020\n",
"clf = AdaBoostClassifier(random_state=random_state)\n",
"grid = GridSearchCV(clf, parameters, verbose=10, n_jobs=-1, return_train_score=True)\n",
"grid.fit(X, y)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "AdaBoostClassifier(base_estimator=Stree(max_depth=2), learning_rate=0.5,\n n_estimators=20, random_state=2020)\n"
}
],
"source": [
"print(grid.best_estimator_)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6-final"
},
"orig_nbformat": 2,
"kernelspec": {
"name": "python37664bitgeneralvenvfbd0a23e74cf4e778460f5ffc6761f39",
"display_name": "Python 3.7.6 64-bit ('general': venv)"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

233
notebooks/test2.ipynb Normal file
View File

@@ -0,0 +1,233 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"#\n",
"# Google Colab setup\n",
"#\n",
"#!pip install git+https://github.com/doctorado-ml/stree"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from sklearn.svm import LinearSVC\n",
"from sklearn.tree import DecisionTreeClassifier\n",
"from sklearn.datasets import make_classification, load_iris, load_wine\n",
"from sklearn.model_selection import train_test_split\n",
"from stree import Stree\n",
"import time"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"if not os.path.isfile('data/creditcard.csv'):\n",
" !wget --no-check-certificate --content-disposition http://nube.jccm.es/index.php/s/Zs7SYtZQJ3RQ2H2/download\n",
" !tar xzf creditcard.tgz"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "Fraud: 0.173% 492\nValid: 99.827% 284315\nX.shape (1492, 28) y.shape (1492,)\nFraud: 33.043% 493\nValid: 66.957% 999\n"
}
],
"source": [
"random_state=1\n",
"\n",
"def load_creditcard(n_examples=0):\n",
" import pandas as pd\n",
" import numpy as np\n",
" import random\n",
" df = pd.read_csv('data/creditcard.csv')\n",
" print(\"Fraud: {0:.3f}% {1}\".format(df.Class[df.Class == 1].count()*100/df.shape[0], df.Class[df.Class == 1].count()))\n",
" print(\"Valid: {0:.3f}% {1}\".format(df.Class[df.Class == 0].count()*100/df.shape[0], df.Class[df.Class == 0].count()))\n",
" y = df.Class\n",
" X = df.drop(['Class', 'Time', 'Amount'], axis=1).values\n",
" if n_examples > 0:\n",
" # Take first n_examples samples\n",
" X = X[:n_examples, :]\n",
" y = y[:n_examples, :]\n",
" else:\n",
" # Take all the positive samples with a number of random negatives\n",
" if n_examples < 0:\n",
" Xt = X[(y == 1).ravel()]\n",
" yt = y[(y == 1).ravel()]\n",
" indices = random.sample(range(X.shape[0]), -1 * n_examples)\n",
" X = np.append(Xt, X[indices], axis=0)\n",
" y = np.append(yt, y[indices], axis=0)\n",
" print(\"X.shape\", X.shape, \" y.shape\", y.shape)\n",
" print(\"Fraud: {0:.3f}% {1}\".format(len(y[y == 1])*100/X.shape[0], len(y[y == 1])))\n",
" print(\"Valid: {0:.3f}% {1}\".format(len(y[y == 0]) * 100 / X.shape[0], len(y[y == 0])))\n",
" Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, train_size=0.7, shuffle=True, random_state=random_state, stratify=y)\n",
" return Xtrain, Xtest, ytrain, ytest\n",
"\n",
"# data = load_creditcard(-5000) # Take all true samples + 5000 of the others\n",
"# data = load_creditcard(5000) # Take the first 5000 samples\n",
"data = load_creditcard(-1000) # Take all the samples\n",
"\n",
"Xtrain = data[0]\n",
"Xtest = data[1]\n",
"ytrain = data[2]\n",
"ytest = data[3]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"tags": [
"outputPrepend"
]
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "depth: 1\ndepth: 2\ndepth: 2\n************** C=0.001 ****************************\nClassifier's accuracy (train): 0.9550\nClassifier's accuracy (test) : 0.9598\nroot\nroot - Down, <cgaf> - Leaf class=1 belief=0.983766 counts=(array([0, 1]), array([ 5, 303]))\nroot - Up, <cgaf> - Leaf class=0 belief=0.942935 counts=(array([0, 1]), array([694, 42]))\n\n**************************************************\ndepth: 1\ndepth: 2\ndepth: 2\ndepth: 3\n************** C=0.01 ****************************\nClassifier's accuracy (train): 0.9569\nClassifier's accuracy (test) : 0.9598\nroot\nroot - Down\nroot - Down - Down, <cgaf> - Leaf class=1 belief=0.990196 counts=(array([0, 1]), array([ 3, 303]))\nroot - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([2]))\nroot - Up, <cgaf> - Leaf class=0 belief=0.942935 counts=(array([0, 1]), array([694, 42]))\n\n**************************************************\ndepth: 1\ndepth: 2\ndepth: 3\ndepth: 4\ndepth: 3\ndepth: 2\n************** C=1 ****************************\nClassifier's accuracy (train): 0.9684\nClassifier's accuracy (test) : 0.9688\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([310]))\nroot - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([4]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([1]))\nroot - Up - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([1]))\nroot - Up - Up\nroot - Up - Up - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up, <cgaf> - Leaf class=0 belief=0.954608 counts=(array([0, 1]), array([694, 33]))\n\n**************************************************\ndepth: 1\ndepth: 2\ndepth: 2\n************** C=5 ****************************\nClassifier's accuracy (train): 0.9693\nClassifier's accuracy (test) : 0.9710\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([313]))\nroot - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([7]))\nroot - Up, <cgaf> - Leaf class=0 belief=0.955801 counts=(array([0, 1]), array([692, 32]))\n\n**************************************************\ndepth: 1\ndepth: 2\ndepth: 3\ndepth: 4\ndepth: 5\ndepth: 6\ndepth: 6\ndepth: 5\ndepth: 4\ndepth: 3\ndepth: 2\ndepth: 3\n************** C=17 ****************************\nClassifier's accuracy (train): 0.9818\nClassifier's accuracy (test) : 0.9554\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([307]))\nroot - Down - Up\nroot - Down - Up - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([8]))\nroot - Down - Up - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([25]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([3]))\nroot - Up - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([1]))\nroot - Up - Up\nroot - Up - Up - Down\nroot - Up - Up - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([2]))\nroot - Up - Up - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([1]))\nroot - Up - Up - Up\nroot - Up - Up - Up - Down\nroot - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([5]))\nroot - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([5]))\nroot - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([1]))\nroot - Up - Up - Up - Up - Up, <cgaf> - Leaf class=0 belief=0.972263 counts=(array([0, 1]), array([666, 19]))\n\n**************************************************\n0.6576 secs\n"
}
],
"source": [
"t = time.time()\n",
"for C in (.001, .01, 1, 5, 17):\n",
" clf = Stree(C=C, random_state=random_state)\n",
" clf.fit(Xtrain, ytrain)\n",
" print(f\"************** C={C} ****************************\")\n",
" print(f\"Classifier's accuracy (train): {clf.score(Xtrain, ytrain):.4f}\")\n",
" print(f\"Classifier's accuracy (test) : {clf.score(Xtest, ytest):.4f}\")\n",
" print(clf)\n",
" print(f\"**************************************************\")\n",
"print(f\"{time.time() - t:.4f} secs\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.svm import LinearSVC\n",
"from sklearn.calibration import CalibratedClassifierCV\n",
"scaler = StandardScaler()\n",
"cclf = CalibratedClassifierCV(base_estimator=LinearSVC(), cv=5)\n",
"cclf.fit(Xtrain, ytrain)\n",
"res = cclf.predict_proba(Xtest)\n",
"#an array containing probabilities of belonging to the 1st class"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "root\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([307]))\nroot - Down - Up\nroot - Down - Up - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([8]))\nroot - Down - Up - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([25]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([3]))\nroot - Up - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([1]))\nroot - Up - Up\nroot - Up - Up - Down\nroot - Up - Up - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([2]))\nroot - Up - Up - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([1]))\nroot - Up - Up - Up\nroot - Up - Up - Up - Down\nroot - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([5]))\nroot - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([5]))\nroot - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([1]))\nroot - Up - Up - Up - Up - Up, <cgaf> - Leaf class=0 belief=0.972263 counts=(array([0, 1]), array([666, 19]))\n"
}
],
"source": [
"#check iterator\n",
"for i in list(clf):\n",
" print(i)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "root\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([307]))\nroot - Down - Up\nroot - Down - Up - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([8]))\nroot - Down - Up - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([25]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([3]))\nroot - Up - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([1]))\nroot - Up - Up\nroot - Up - Up - Down\nroot - Up - Up - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([2]))\nroot - Up - Up - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([1]))\nroot - Up - Up - Up\nroot - Up - Up - Up - Down\nroot - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([5]))\nroot - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([5]))\nroot - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([1]))\nroot - Up - Up - Up - Up - Up, <cgaf> - Leaf class=0 belief=0.972263 counts=(array([0, 1]), array([666, 19]))\n"
}
],
"source": [
"#check iterator again\n",
"for i in clf:\n",
" print(i)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "depth: 1\ndepth: 2\ndepth: 1\ndepth: 2\ndepth: 1\ndepth: 1\ndepth: 1\ndepth: 2\ndepth: 3\ndepth: 3\ndepth: 2\ndepth: 3\ndepth: 3\ndepth: 4\ndepth: 4\ndepth: 5\ndepth: 1\ndepth: 2\ndepth: 2\ndepth: 1\ndepth: 2\ndepth: 2\ndepth: 1\ndepth: 1\ndepth: 1\ndepth: 1\ndepth: 2\ndepth: 2\ndepth: 1\ndepth: 2\ndepth: 2\ndepth: 1\ndepth: 2\ndepth: 2\ndepth: 1\ndepth: 1\ndepth: 2\ndepth: 2\ndepth: 1\ndepth: 2\ndepth: 2\ndepth: 1\ndepth: 1\ndepth: 1\ndepth: 1\ndepth: 2\ndepth: 2\ndepth: 1\ndepth: 2\ndepth: 2\ndepth: 1\ndepth: 2\ndepth: 2\ndepth: 1\ndepth: 2\ndepth: 2\ndepth: 1\ndepth: 2\ndepth: 3\ndepth: 3\ndepth: 2\ndepth: 3\ndepth: 3\ndepth: 4\ndepth: 4\ndepth: 5\ndepth: 1\ndepth: 2\ndepth: 3\ndepth: 3\ndepth: 2\ndepth: 3\ndepth: 3\ndepth: 4\ndepth: 4\ndepth: 5\ndepth: 1\ndepth: 2\ndepth: 3\ndepth: 1\ndepth: 1\ndepth: 1\ndepth: 2\ndepth: 1\ndepth: 1\ndepth: 1\ndepth: 1\n"
}
],
"source": [
"# Check if the classifier is a sklearn estimator\n",
"from sklearn.utils.estimator_checks import check_estimator\n",
"check_estimator(Stree())"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "1 functools.partial(<function check_no_attributes_set_in_init at 0x12959d320>, 'Stree')\n2 functools.partial(<function check_estimators_dtypes at 0x129595440>, 'Stree')\ndepth: 1\ndepth: 2\ndepth: 1\ndepth: 2\ndepth: 1\ndepth: 1\n3 functools.partial(<function check_fit_score_takes_y at 0x129595320>, 'Stree')\ndepth: 1\ndepth: 2\ndepth: 3\ndepth: 3\ndepth: 2\ndepth: 3\ndepth: 3\ndepth: 4\ndepth: 4\ndepth: 5\n4 functools.partial(<function check_sample_weights_pandas_series at 0x12958ed40>, 'Stree')\n5 functools.partial(<function check_sample_weights_not_an_array at 0x12958ee60>, 'Stree')\n6 functools.partial(<function check_sample_weights_list at 0x12958ef80>, 'Stree')\n7 functools.partial(<function check_sample_weights_invariance at 0x1295900e0>, 'Stree')\n8 functools.partial(<function check_estimators_fit_returns_self at 0x129598440>, 'Stree')\ndepth: 1\ndepth: 2\ndepth: 2\n9 functools.partial(<function check_estimators_fit_returns_self at 0x129598440>, 'Stree', readonly_memmap=True)\ndepth: 1\ndepth: 2\ndepth: 2\n10 functools.partial(<function check_complex_data at 0x129590290>, 'Stree')\n11 functools.partial(<function check_dtype_object at 0x129590200>, 'Stree')\ndepth: 1\n12 functools.partial(<function check_estimators_empty_data_messages at 0x129595560>, 'Stree')\n13 functools.partial(<function check_pipeline_consistency at 0x129595200>, 'Stree')\ndepth: 1\ndepth: 1\n14 functools.partial(<function check_estimators_nan_inf at 0x129595680>, 'Stree')\ndepth: 1\ndepth: 2\ndepth: 2\ndepth: 1\ndepth: 2\ndepth: 2\n15 functools.partial(<function check_estimators_overwrite_params at 0x12959d200>, 'Stree')\ndepth: 1\ndepth: 2\ndepth: 2\n16 functools.partial(<function check_estimator_sparse_data at 0x12958ec20>, 'Stree')\n17 functools.partial(<function check_estimators_pickle at 0x1295958c0>, 'Stree')\ndepth: 1\n18 functools.partial(<function check_classifier_data_not_an_array at 0x12959d560>, 'Stree')\ndepth: 1\ndepth: 2\ndepth: 2\ndepth: 1\ndepth: 2\ndepth: 2\n19 functools.partial(<function check_classifiers_one_label at 0x129595f80>, 'Stree')\n20 functools.partial(<function check_classifiers_classes at 0x1295989e0>, 'Stree')\ndepth: 1\ndepth: 1\ndepth: 1\n21 functools.partial(<function check_estimators_partial_fit_n_features at 0x1295959e0>, 'Stree')\n22 functools.partial(<function check_classifiers_train at 0x1295980e0>, 'Stree')\ndepth: 1\ndepth: 2\ndepth: 2\ndepth: 1\ndepth: 2\ndepth: 2\n23 functools.partial(<function check_classifiers_train at 0x1295980e0>, 'Stree', readonly_memmap=True)\ndepth: 1\ndepth: 2\ndepth: 2\ndepth: 1\ndepth: 2\ndepth: 2\n24 functools.partial(<function check_classifiers_regression_target at 0x12959df80>, 'Stree')\n25 functools.partial(<function check_supervised_y_no_nan at 0x12958cd40>, 'Stree')\n26 functools.partial(<function check_supervised_y_2d at 0x129598680>, 'Stree')\ndepth: 1\ndepth: 2\ndepth: 3\ndepth: 3\ndepth: 2\ndepth: 3\ndepth: 3\ndepth: 4\ndepth: 4\ndepth: 5\ndepth: 1\ndepth: 2\ndepth: 3\ndepth: 3\ndepth: 2\ndepth: 3\ndepth: 3\ndepth: 4\ndepth: 4\ndepth: 5\n27 functools.partial(<function check_estimators_unfitted at 0x129598560>, 'Stree')\n28 functools.partial(<function check_non_transformer_estimators_n_iter at 0x12959db00>, 'Stree')\ndepth: 1\ndepth: 2\ndepth: 3\n29 functools.partial(<function check_decision_proba_consistency at 0x1295a20e0>, 'Stree')\n30 functools.partial(<function check_fit2d_predict1d at 0x1295907a0>, 'Stree')\ndepth: 1\n31 functools.partial(<function check_methods_subset_invariance at 0x129590950>, 'Stree')\ndepth: 1\n32 functools.partial(<function check_fit2d_1sample at 0x129590a70>, 'Stree')\n33 functools.partial(<function check_fit2d_1feature at 0x129590b90>, 'Stree')\ndepth: 1\ndepth: 2\n34 functools.partial(<function check_fit1d at 0x129590cb0>, 'Stree')\n35 functools.partial(<function check_get_params_invariance at 0x12959dd40>, 'Stree')\n36 functools.partial(<function check_set_params at 0x12959de60>, 'Stree')\n37 functools.partial(<function check_dict_unchanged at 0x1295903b0>, 'Stree')\ndepth: 1\n38 functools.partial(<function check_dont_overwrite_parameters at 0x129590680>, 'Stree')\ndepth: 1\n39 functools.partial(<function check_fit_idempotent at 0x1295a2290>, 'Stree')\ndepth: 1\ndepth: 1\n"
}
],
"source": [
"# Make checks one by one\n",
"c = 0\n",
"checks = check_estimator(Stree(), generate_only=True)\n",
"for check in checks:\n",
" c += 1\n",
" print(c, check[1])\n",
" check[1](check[0])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6-final"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

197
notebooks/test_graphs.ipynb Normal file
View File

@@ -0,0 +1,197 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"#\n",
"# Google Colab setup\n",
"#\n",
"#!pip install git+https://github.com/doctorado-ml/stree"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"output_type": "error",
"ename": "ModuleNotFoundError",
"evalue": "No module named 'stree'",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-2-36af63297651>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdatasets\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mmake_blobs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msvm\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mLinearSVC\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mstree\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mStree\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mStree_grapher\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'stree'"
]
}
],
"source": [
"import time\n",
"import random\n",
"import numpy as np\n",
"from sklearn.datasets import make_blobs\n",
"from sklearn.svm import LinearSVC\n",
"from stree import Stree, Stree_grapher"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def build_data(random_state):\n",
" random.seed(random_state)\n",
" X, y = make_blobs(centers=10, n_features=3, n_samples=500, random_state=random_state)\n",
" def make_binary(y):\n",
" for i in range(2, 10):\n",
" y[y==i] = random.randint(0, 1)\n",
" return y\n",
" y = make_binary(y)\n",
" #print(X.shape, np.unique(y), y[y==0].shape, y[y==1].shape)\n",
" return X, y"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"output_type": "error",
"ename": "NameError",
"evalue": "name 'Stree_grapher' is not defined",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-4-b909470cb406>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbuild_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mgr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mStree_grapher\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mC\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m.01\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmax_iter\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m200\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mgr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;31m#gr.save_all(save_folder='data/', save_prefix='7')\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mNameError\u001b[0m: name 'Stree_grapher' is not defined"
]
}
],
"source": [
"X, y = build_data(10)\n",
"gr = Stree_grapher(dict(C=.01, max_iter=200))\n",
"gr.fit(X, y)\n",
"#gr.save_all(save_folder='data/', save_prefix='7')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"output_type": "error",
"ename": "NameError",
"evalue": "name 'gr' is not defined",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-5-efa3db892bfd>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mNameError\u001b[0m: name 'gr' is not defined"
]
}
],
"source": [
"print(gr)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"output_type": "error",
"ename": "NameError",
"evalue": "name 'gr' is not defined",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-6-0e62f081c9aa>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mmatplotlib\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mmatplotlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0muse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Agg'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mgr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msave_all\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msave_folder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'data/'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mNameError\u001b[0m: name 'gr' is not defined"
]
}
],
"source": [
"import matplotlib\n",
"matplotlib.use('Agg')\n",
"gr.save_all(save_folder='data/')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"output_type": "error",
"ename": "NameError",
"evalue": "name 'gr' is not defined",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-7-b0484cfe9d26>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;31m#%matplotlib inline\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mget_ipython\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_line_magic\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'matplotlib'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'widget'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mgr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_tree_gr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplot_hyperplane\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mNameError\u001b[0m: name 'gr' is not defined"
]
}
],
"source": [
"#Uncomment one of the following lines to display graphics: static(inline), dynamic(widget)\n",
"#%matplotlib inline\n",
"%matplotlib widget\n",
"gr._tree_gr.plot_hyperplane()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"output_type": "error",
"ename": "NameError",
"evalue": "name 'gr' is not defined",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-8-4277c1aacbe2>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mget_ipython\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_line_magic\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'matplotlib'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'inline'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;31m#%matplotlib widget\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mgr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplot_all\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mNameError\u001b[0m: name 'gr' is not defined"
]
}
],
"source": [
"#Uncomment one of the following lines to display graphics: static(inline), dynamic(widget)\n",
"%matplotlib inline\n",
"#%matplotlib widget\n",
"gr.plot_all()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6-final"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -104,23 +104,28 @@ class Stree(BaseEstimator, ClassifierMixin):
"""
__folder = 'data/'
def __init__(self, C: float = 1.0, max_iter: int = 1000, random_state: int = 0, use_predictions: bool = False):
def __init__(self, C: float = 1.0, max_iter: int = 1000, random_state: int = 0,
max_depth: int=None, tol: float=1e-4, use_predictions: bool = False):
self.max_iter = max_iter
self.C = C
self.random_state = random_state
self.random_state = random_state
self.use_predictions = use_predictions
self.max_depth = max_depth
self.tol = tol
def get_params(self, deep=True):
def get_params(self, deep: bool=True) -> dict:
"""Get dict with hyperparameters and its values to accomplish sklearn rules
"""
return {
'C': self.C,
'random_state': self.random_state,
'max_iter': self.max_iter,
'use_predictions': self.use_predictions
'use_predictions': self.use_predictions,
'max_depth': self.max_depth,
'tol': self.tol
}
def set_params(self, **parameters):
def set_params(self, **parameters: dict):
"""Set hyperparmeters as specified by sklearn, needed in Gridsearchs
"""
for parameter, value in parameters.items():
@@ -128,13 +133,18 @@ class Stree(BaseEstimator, ClassifierMixin):
return self
# Added binary_only tag as required by sklearn check_estimator
def _more_tags(self):
def _more_tags(self) -> dict:
return {'binary_only': True}
def _linear_function(self, data: np.array, node: Snode) -> np.array:
coef = node._vector[0, :].reshape(-1, data.shape[1])
return data.dot(coef.T) + node._interceptor[0]
def _split_array(self, origin: np.array, down: np.array) -> list:
up = ~down
return origin[up[:, 0]] if any(up) else None, \
origin[down[:, 0]] if any(down) else None
def _split_data(self, node: Snode, data: np.ndarray, indices: np.ndarray) -> list:
if self.use_predictions:
yp = node._clf.predict(data)
@@ -145,25 +155,30 @@ class Stree(BaseEstimator, ClassifierMixin):
# computes positition of every sample is w.r.t. the hyperplane
res = self._linear_function(data, node)
down = res > 0
up = ~down
data_down = data[down[:, 0]] if any(down) else None
indices_down = indices[down[:, 0]] if any(down) else None
res_down = res[down[:, 0]] if any(down) else None
data_up = data[up[:, 0]] if any(up) else None
indices_up = indices[up[:, 0]] if any(up) else None
res_up = res[up[:, 0]] if any(up) else None
data_up, data_down = self._split_array(data, down)
indices_up, indices_down = self._split_array(indices, down)
res_up, res_down = self._split_array(res, down)
return [data_up, indices_up, data_down, indices_down, res_up, res_down]
def fit(self, X: np.ndarray, y: np.ndarray, title: str = 'root') -> 'Stree':
def fit(self, X: np.ndarray, y: np.ndarray, weighted_samples: np.array=None, **fitparams: dict) -> 'Stree':
from sklearn.utils.multiclass import check_classification_targets
if fitparams is not None:
self.set_params(**fitparams)
if type(y).__name__ == 'np.ndarray':
y = y.ravel()
if self.C < 0:
raise ValueError(f"Penalty term must be positive... got (C={self.C:f})")
self.__max_depth = np.iinfo(np.int32).max if self.max_depth is None else self.max_depth
if self.__max_depth < 1:
raise ValueError(f"Maximum depth has to be greater than 1... got (max_depth={self.max_depth})")
check_classification_targets(y)
X, y = check_X_y(X, y)
self.classes_ = np.unique(y)
self.n_iter_ = self.max_iter
self.depth_ = 0
check_classification_targets(y)
self.n_features_in_ = X.shape[1]
self.tree_ = self.train(X, y.ravel(), title)
self.tree_ = self.train(X, y, 1, 'root')
self._build_predictor()
return self
@@ -180,8 +195,11 @@ class Stree(BaseEstimator, ClassifierMixin):
run_tree(self.tree_)
def train(self, X: np.ndarray, y: np.ndarray, title: str = 'root') -> Snode:
if np.unique(y).shape[0] == 1:
def train(self, X: np.ndarray, y: np.ndarray, depth: int, title: str = 'root') -> Snode:
if depth > self.__max_depth:
return None
if np.unique(y).shape[0] == 1 :
# only 1 class => pure dataset
return Snode(None, X, y, title + ', <pure>')
# Train the model
@@ -189,12 +207,13 @@ class Stree(BaseEstimator, ClassifierMixin):
random_state=self.random_state)
clf.fit(X, y)
tree = Snode(clf, X, y, title)
self.depth_ = max(depth, self.depth_)
X_U, y_u, X_D, y_d, _, _ = self._split_data(tree, X, y)
if X_U is None or X_D is None:
# didn't part anything
return Snode(clf, X, y, title + ', <cgaf>')
tree.set_up(self.train(X_U, y_u, title + ' - Up'))
tree.set_down(self.train(X_D, y_d, title + ' - Down'))
tree.set_up(self.train(X_U, y_u, depth + 1, title + ' - Up'))
tree.set_down(self.train(X_D, y_d, depth + 1, title + ' - Down'))
return tree
def _reorder_results(self, y: np.array, indices: np.array, proba=False) -> np.array:
@@ -273,8 +292,9 @@ class Stree(BaseEstimator, ClassifierMixin):
result = result.reshape(X.shape[0], 2)
# Turn distances to hyperplane into probabilities based on fitting distances
# of samples to its hyperplane that classified them, to the sigmoid function
result[:, 1] = 1 / (1 + np.exp(-result[:, 1])) # Probability of being 1
result[:, 0] = 1 - result[:, 1] # Probability of being 0
# Probability of being 1
result[:, 1] = 1 / (1 + np.exp(-result[:, 1]))
result[:, 0] = 1 - result[:, 1] # Probability of being 0
return self._reorder_results(result, indices, proba=True)
def score(self, X: np.array, y: np.array) -> float:
@@ -286,8 +306,12 @@ class Stree(BaseEstimator, ClassifierMixin):
right = (yp == y).astype(int)
return np.sum(right) / len(y)
def __iter__(self):
return Siterator(self.tree_)
def __iter__(self) -> Siterator:
try:
tree = self.tree_
except:
tree = None
return Siterator(tree)
def __str__(self) -> str:
output = ''
@@ -295,6 +319,9 @@ class Stree(BaseEstimator, ClassifierMixin):
output += str(i) + '\n'
return output
def get_folder(self) -> str:
return self.__folder
def _save_datasets(self, tree: Snode, catalog: typing.TextIO, number: int):
"""Save the dataset of the node in a csv file
@@ -324,4 +351,3 @@ class Stree(BaseEstimator, ClassifierMixin):
os.mkdir(self.__folder)
with open(self.get_catalog_name(), 'w', encoding='utf-8') as catalog:
self._save_datasets(self.tree_, catalog, 1)

View File

@@ -118,6 +118,12 @@ class Stree_test(unittest.TestCase):
x_file, y_file = self._get_file_data(row[0])
y_original = np.array(self._find_out(x_file, X, y), dtype=int)
self.assertTrue(np.array_equal(y_file, y_original))
if os.path.isdir(self._clf.get_folder()):
try:
os.remove(f"{self._clf.get_folder()}*")
os.rmdir(self._clf.get_folder())
except:
pass
def test_single_prediction(self):
X, y = self._get_Xy()
@@ -253,6 +259,30 @@ class Stree_test(unittest.TestCase):
from sklearn.utils.estimator_checks import check_estimator
check_estimator(Stree())
def test_exception_if_C_is_negative(self):
tclf = Stree(C=-1)
with self.assertRaises(ValueError):
tclf.fit(*self._get_Xy())
def test_check_max_depth_is_positive_or_None(self):
tcl = Stree()
self.assertIsNone(tcl.max_depth)
tcl = Stree(max_depth=1)
self.assertGreaterEqual(1, tcl.max_depth)
with self.assertRaises(ValueError):
tcl = Stree(max_depth=-1)
tcl.fit(*self._get_Xy())
def test_check_max_depth(self):
depth = 3
tcl = Stree(random_state=self._random_state, max_depth=depth)
tcl.fit(*self._get_Xy())
self.assertEqual(depth, tcl.depth_)
def test_unfitted_tree_is_iterable(self):
tcl = Stree()
self.assertEqual(0, len(list(tcl)))
class Snode_test(unittest.TestCase):
def __init__(self, *args, **kwargs):

View File

@@ -1,227 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"#\n",
"# Google Colab setup\n",
"#\n",
"#!pip install git+https://github.com/doctorado-ml/stree"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from sklearn.svm import LinearSVC\n",
"from sklearn.tree import DecisionTreeClassifier\n",
"from sklearn.datasets import make_classification, load_iris, load_wine\n",
"from sklearn.model_selection import train_test_split\n",
"from stree import Stree\n",
"import time"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"if not os.path.isfile('data/creditcard.csv'):\n",
" !wget --no-check-certificate --content-disposition http://nube.jccm.es/index.php/s/Zs7SYtZQJ3RQ2H2/download\n",
" !tar xzf creditcard.tgz"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "Fraud: 0.173% 492\nValid: 99.827% 284315\nX.shape (1492, 28) y.shape (1492,)\nFraud: 32.976% 492\nValid: 67.024% 1000\n"
}
],
"source": [
"random_state=1\n",
"\n",
"def load_creditcard(n_examples=0):\n",
" import pandas as pd\n",
" import numpy as np\n",
" import random\n",
" df = pd.read_csv('data/creditcard.csv')\n",
" print(\"Fraud: {0:.3f}% {1}\".format(df.Class[df.Class == 1].count()*100/df.shape[0], df.Class[df.Class == 1].count()))\n",
" print(\"Valid: {0:.3f}% {1}\".format(df.Class[df.Class == 0].count()*100/df.shape[0], df.Class[df.Class == 0].count()))\n",
" y = df.Class\n",
" X = df.drop(['Class', 'Time', 'Amount'], axis=1).values\n",
" if n_examples > 0:\n",
" # Take first n_examples samples\n",
" X = X[:n_examples, :]\n",
" y = y[:n_examples, :]\n",
" else:\n",
" # Take all the positive samples with a number of random negatives\n",
" if n_examples < 0:\n",
" Xt = X[(y == 1).ravel()]\n",
" yt = y[(y == 1).ravel()]\n",
" indices = random.sample(range(X.shape[0]), -1 * n_examples)\n",
" X = np.append(Xt, X[indices], axis=0)\n",
" y = np.append(yt, y[indices], axis=0)\n",
" print(\"X.shape\", X.shape, \" y.shape\", y.shape)\n",
" print(\"Fraud: {0:.3f}% {1}\".format(len(y[y == 1])*100/X.shape[0], len(y[y == 1])))\n",
" print(\"Valid: {0:.3f}% {1}\".format(len(y[y == 0]) * 100 / X.shape[0], len(y[y == 0])))\n",
" Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, train_size=0.7, shuffle=True, random_state=random_state, stratify=y)\n",
" return Xtrain, Xtest, ytrain, ytest\n",
"\n",
"# data = load_creditcard(-5000) # Take all true samples + 5000 of the others\n",
"# data = load_creditcard(5000) # Take the first 5000 samples\n",
"data = load_creditcard(-1000) # Take all the samples\n",
"\n",
"Xtrain = data[0]\n",
"Xtest = data[1]\n",
"ytrain = data[2]\n",
"ytest = data[3]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"tags": [
"outputPrepend"
]
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "************** C=0.001 ****************************\nClassifier's accuracy (train): 0.9579\nClassifier's accuracy (test) : 0.9509\nroot\nroot - Down, <cgaf> - Leaf class=1 belief=0.987013 counts=(array([0, 1]), array([ 4, 304]))\nroot - Up, <cgaf> - Leaf class=0 belief=0.945652 counts=(array([0, 1]), array([696, 40]))\n\n**************************************************\n************** C=0.01 ****************************\nClassifier's accuracy (train): 0.9579\nClassifier's accuracy (test) : 0.9509\nroot\nroot - Down, <cgaf> - Leaf class=1 belief=0.990196 counts=(array([0, 1]), array([ 3, 303]))\nroot - Up, <cgaf> - Leaf class=0 belief=0.944444 counts=(array([0, 1]), array([697, 41]))\n\n**************************************************\n************** C=1 ****************************\nClassifier's accuracy (train): 0.9693\nClassifier's accuracy (test) : 0.9576\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([311]))\nroot - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([6]))\nroot - Up\nroot - Up - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([1]))\nroot - Up - Up, <cgaf> - Leaf class=0 belief=0.955923 counts=(array([0, 1]), array([694, 32]))\n\n**************************************************\n************** C=5 ****************************\nClassifier's accuracy (train): 0.9713\nClassifier's accuracy (test) : 0.9576\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([314]))\nroot - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([6]))\nroot - Up, <cgaf> - Leaf class=0 belief=0.958564 counts=(array([0, 1]), array([694, 30]))\n\n**************************************************\n************** C=17 ****************************\nClassifier's accuracy (train): 0.9780\nClassifier's accuracy (test) : 0.9420\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([301]))\nroot - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([13]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([17]))\nroot - Up - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([3]))\nroot - Up - Up\nroot - Up - Up - Down\nroot - Up - Up - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([1]))\nroot - Up - Up - Up\nroot - Up - Up - Up - Down\nroot - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([2]))\nroot - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([1]))\nroot - Up - Up - Up - Up, <cgaf> - Leaf class=0 belief=0.967376 counts=(array([0, 1]), array([682, 23]))\n\n**************************************************\n0.4537 secs\n"
}
],
"source": [
"t = time.time()\n",
"for C in (.001, .01, 1, 5, 17):\n",
" clf = Stree(C=C, random_state=random_state)\n",
" clf.fit(Xtrain, ytrain)\n",
" print(f\"************** C={C} ****************************\")\n",
" print(f\"Classifier's accuracy (train): {clf.score(Xtrain, ytrain):.4f}\")\n",
" print(f\"Classifier's accuracy (test) : {clf.score(Xtest, ytest):.4f}\")\n",
" print(clf)\n",
" print(f\"**************************************************\")\n",
"print(f\"{time.time() - t:.4f} secs\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.svm import LinearSVC\n",
"from sklearn.calibration import CalibratedClassifierCV\n",
"scaler = StandardScaler()\n",
"cclf = CalibratedClassifierCV(base_estimator=LinearSVC(), cv=5)\n",
"cclf.fit(Xtrain, ytrain)\n",
"res = cclf.predict_proba(Xtest)\n",
"#an array containing probabilities of belonging to the 1st class"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "root\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([301]))\nroot - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([13]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([17]))\nroot - Up - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([3]))\nroot - Up - Up\nroot - Up - Up - Down\nroot - Up - Up - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([1]))\nroot - Up - Up - Up\nroot - Up - Up - Up - Down\nroot - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([2]))\nroot - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([1]))\nroot - Up - Up - Up - Up, <cgaf> - Leaf class=0 belief=0.967376 counts=(array([0, 1]), array([682, 23]))\n"
}
],
"source": [
"#check iterator\n",
"for i in list(clf):\n",
" print(i)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "root\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([301]))\nroot - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([13]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([17]))\nroot - Up - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([3]))\nroot - Up - Up\nroot - Up - Up - Down\nroot - Up - Up - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([1]))\nroot - Up - Up - Up\nroot - Up - Up - Up - Down\nroot - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief=1.000000 counts=(array([1]), array([2]))\nroot - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief=1.000000 counts=(array([0]), array([1]))\nroot - Up - Up - Up - Up, <cgaf> - Leaf class=0 belief=0.967376 counts=(array([0, 1]), array([682, 23]))\n"
}
],
"source": [
"#check iterator again\n",
"for i in clf:\n",
" print(i)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"# Check if the classifier is a sklearn estimator\n",
"from sklearn.utils.estimator_checks import check_estimator\n",
"check_estimator(Stree())"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "1 functools.partial(<function check_no_attributes_set_in_init at 0x12d18e0e0>, 'Stree')\n2 functools.partial(<function check_estimators_dtypes at 0x12d185200>, 'Stree')\n3 functools.partial(<function check_fit_score_takes_y at 0x12d1850e0>, 'Stree')\n4 functools.partial(<function check_sample_weights_pandas_series at 0x12d17eb00>, 'Stree')\n5 functools.partial(<function check_sample_weights_not_an_array at 0x12d17ec20>, 'Stree')\n6 functools.partial(<function check_sample_weights_list at 0x12d17ed40>, 'Stree')\n7 functools.partial(<function check_sample_weights_invariance at 0x12d17ee60>, 'Stree')\n8 functools.partial(<function check_estimators_fit_returns_self at 0x12d189200>, 'Stree')\n9 functools.partial(<function check_estimators_fit_returns_self at 0x12d189200>, 'Stree', readonly_memmap=True)\n10 functools.partial(<function check_complex_data at 0x12d181050>, 'Stree')\n11 functools.partial(<function check_dtype_object at 0x12d17ef80>, 'Stree')\n12 functools.partial(<function check_estimators_empty_data_messages at 0x12d185320>, 'Stree')\n13 functools.partial(<function check_pipeline_consistency at 0x12d181f80>, 'Stree')\n14 functools.partial(<function check_estimators_nan_inf at 0x12d185440>, 'Stree')\n15 functools.partial(<function check_estimators_overwrite_params at 0x12d189f80>, 'Stree')\n16 functools.partial(<function check_estimator_sparse_data at 0x12d17e9e0>, 'Stree')\n17 functools.partial(<function check_estimators_pickle at 0x12d185680>, 'Stree')\n18 functools.partial(<function check_classifier_data_not_an_array at 0x12d18e320>, 'Stree')\n19 functools.partial(<function check_classifiers_one_label at 0x12d185d40>, 'Stree')\n20 functools.partial(<function check_classifiers_classes at 0x12d1897a0>, 'Stree')\n21 functools.partial(<function check_estimators_partial_fit_n_features at 0x12d1857a0>, 'Stree')\n22 functools.partial(<function check_classifiers_train at 0x12d185e60>, 'Stree')\n23 functools.partial(<function check_classifiers_train at 0x12d185e60>, 'Stree', readonly_memmap=True)\n24 functools.partial(<function check_classifiers_regression_target at 0x12d18ed40>, 'Stree')\n25 functools.partial(<function check_supervised_y_no_nan at 0x12d17cb00>, 'Stree')\n26 functools.partial(<function check_supervised_y_2d at 0x12d189440>, 'Stree')\n27 functools.partial(<function check_estimators_unfitted at 0x12d189320>, 'Stree')\n28 functools.partial(<function check_non_transformer_estimators_n_iter at 0x12d18e8c0>, 'Stree')\n29 functools.partial(<function check_decision_proba_consistency at 0x12d18ee60>, 'Stree')\n30 functools.partial(<function check_fit2d_predict1d at 0x12d181560>, 'Stree')\n31 functools.partial(<function check_methods_subset_invariance at 0x12d181710>, 'Stree')\n32 functools.partial(<function check_fit2d_1sample at 0x12d181830>, 'Stree')\n33 functools.partial(<function check_fit2d_1feature at 0x12d181950>, 'Stree')\n34 functools.partial(<function check_fit1d at 0x12d181a70>, 'Stree')\n35 functools.partial(<function check_get_params_invariance at 0x12d18eb00>, 'Stree')\n36 functools.partial(<function check_set_params at 0x12d18ec20>, 'Stree')\n37 functools.partial(<function check_dict_unchanged at 0x12d181170>, 'Stree')\n38 functools.partial(<function check_dont_overwrite_parameters at 0x12d181440>, 'Stree')\n39 functools.partial(<function check_fit_idempotent at 0x12d192050>, 'Stree')\n"
}
],
"source": [
"# Make checks one by one\n",
"c = 0\n",
"checks = check_estimator(Stree(), generate_only=True)\n",
"for check in checks:\n",
" c += 1\n",
" print(c, check[1])\n",
" check[1](check[0])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6-final"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

File diff suppressed because one or more lines are too long