#3 Add degree hyperparam and update notebooks

Update readme to add new  notebooks
This commit is contained in:
2020-06-08 20:16:42 +02:00
parent d7c0bc3bc5
commit 26273e936a
8 changed files with 680 additions and 591 deletions

View File

@@ -4,7 +4,7 @@
# Stree # Stree
Oblique Tree classifier based on SVM nodes. The nodes are built and splitted with sklearn SVC models.Stree is a sklearn estimator and can be integrated in pipelines, grid searches, etc. Oblique Tree classifier based on SVM nodes. The nodes are built and splitted with sklearn SVC models. Stree is a sklearn estimator and can be integrated in pipelines, grid searches, etc.
![Stree](https://raw.github.com/doctorado-ml/stree/master/example.png) ![Stree](https://raw.github.com/doctorado-ml/stree/master/example.png)
@@ -28,6 +28,10 @@ pip install git+https://github.com/doctorado-ml/stree
* [![Test2](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Doctorado-ML/STree/blob/master/notebooks/test2.ipynb) Another Test notebook * [![Test2](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Doctorado-ML/STree/blob/master/notebooks/test2.ipynb) Another Test notebook
* [![Adaboost](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Doctorado-ML/STree/blob/master/notebooks/adaboost.ipynb) Adaboost
* [![Gridsearch](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Doctorado-ML/STree/blob/master/notebooks/gridsearch.ipynb) Gridsearch
* [![Test Graphics](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Doctorado-ML/STree/blob/master/notebooks/test_graphs.ipynb) Test Graphics notebook * [![Test Graphics](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Doctorado-ML/STree/blob/master/notebooks/test_graphs.ipynb) Test Graphics notebook
### Command line ### Command line

View File

@@ -1,15 +1,42 @@
{ {
"cells": [ "cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Test AdaBoost with different configurations"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Setup\n",
"Uncomment the next cell if STree is not already installed"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 1,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [
"#\n",
"# Google Colab setup\n",
"#\n",
"#!pip install git+https://github.com/doctorado-ml/stree"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [ "source": [
"import time\n", "import time\n",
"from sklearn.ensemble import AdaBoostClassifier\n", "from sklearn.ensemble import AdaBoostClassifier\n",
"from sklearn.tree import DecisionTreeClassifier\n", "from sklearn.tree import DecisionTreeClassifier\n",
"from sklearn.svm import SVC\n", "from sklearn.svm import LinearSVC, SVC\n",
"from sklearn.model_selection import GridSearchCV, train_test_split\n", "from sklearn.model_selection import GridSearchCV, train_test_split\n",
"from sklearn.datasets import load_iris\n", "from sklearn.datasets import load_iris\n",
"from stree import Stree" "from stree import Stree"
@@ -17,7 +44,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 3,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -29,13 +56,13 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 4,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"output_type": "stream", "output_type": "stream",
"name": "stdout", "name": "stdout",
"text": "Fraud: 0.244% 196\nValid: 99.755% 80234\nX.shape (1196, 28) y.shape (1196,)\nFraud: 16.722% 200\nValid: 83.278% 996\n" "text": "Fraud: 0.173% 492\nValid: 99.827% 284315\nX.shape (100492, 28) y.shape (100492,)\nFraud: 0.659% 662\nValid: 99.341% 99830\n"
} }
], ],
"source": [ "source": [
@@ -68,9 +95,10 @@
" Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, train_size=0.7, shuffle=True, random_state=random_state, stratify=y)\n", " Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, train_size=0.7, shuffle=True, random_state=random_state, stratify=y)\n",
" return Xtrain, Xtest, ytrain, ytest\n", " return Xtrain, Xtest, ytrain, ytest\n",
"\n", "\n",
"data = load_creditcard(-1000) # Take all true samples + 1000 of the others\n", "# data = load_creditcard(-1000) # Take all true samples + 1000 of the others\n",
"# data = load_creditcard(5000) # Take the first 5000 samples\n", "# data = load_creditcard(5000) # Take the first 5000 samples\n",
"# data = load_creditcard(0) # Take all the samples\n", "# data = load_creditcard(0) # Take all the samples\n",
"data = load_creditcard(-100000)\n",
"\n", "\n",
"Xtrain = data[0]\n", "Xtrain = data[0]\n",
"Xtest = data[1]\n", "Xtest = data[1]\n",
@@ -78,15 +106,29 @@
"ytest = data[3]" "ytest = data[3]"
] ]
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Tests"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## STree alone on the whole dataset and linear kernel"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 5,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"output_type": "stream", "output_type": "stream",
"name": "stdout", "name": "stdout",
"text": "Score Train: 0.986857825567503\nScore Test: 0.9805013927576601\nTook 0.12 seconds\n" "text": "Score Train: 0.9985499829409757\nScore Test: 0.998407854584052\nTook 39.45 seconds\n"
} }
], ],
"source": [ "source": [
@@ -99,43 +141,21 @@
] ]
}, },
{ {
"cell_type": "code", "cell_type": "markdown",
"execution_count": 5,
"metadata": {}, "metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "Score Train: 0.997610513739546\nScore Test: 0.9721448467966574\nTook 7.80 seconds\n"
}
],
"source": [ "source": [
"now = time.time()\n", "## Different kernels with different configuations"
"clf2 = AdaBoostClassifier(Stree(max_depth=3, random_state=random_state), n_estimators=100, random_state=random_state)\n",
"clf2.fit(Xtrain, ytrain)\n",
"print(\"Score Train: \", clf2.score(Xtrain, ytrain))\n",
"print(\"Score Test: \", clf2.score(Xtest, ytest))\n",
"print(f\"Took {time.time() - now:.2f} seconds\")"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": 6,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"output_type": "stream",
"name": "stdout",
"text": "Score Train: 0.9796893667861409\nScore Test: 0.9554317548746518\nTook 0.48 seconds\n"
}
],
"source": [ "source": [
"now = time.time()\n", "n_estimators = 10\n",
"clf3 = AdaBoostClassifier(SVC(kernel="linear",random_state=random_state), n_estimators=100, random_state=random_state, algorithm='SAMME')\n", "C = 7\n",
"clf3.fit(Xtrain, ytrain)\n", "max_depth = 3"
"print(\"Score Train: \", clf3.score(Xtrain, ytrain))\n",
"print(\"Score Test: \", clf3.score(Xtest, ytest))\n",
"print(f\"Took {time.time() - now:.2f} seconds\")"
] ]
}, },
{ {
@@ -146,24 +166,46 @@
{ {
"output_type": "stream", "output_type": "stream",
"name": "stdout", "name": "stdout",
"text": "Score Train: 1.0\nScore Test: 0.9721448467966574\nTook 0.86 seconds\n" "text": "Kernel: linear\tTime: 87.00 seconds\tScore Train: 0.9982372\tScore Test: 0.9981425\nKernel: rbf\tTime: 60.60 seconds\tScore Train: 0.9934181\tScore Test: 0.9933992\nKernel: poly\tTime: 88.08 seconds\tScore Train: 0.9937450\tScore Test: 0.9938968\n"
} }
], ],
"source": [ "source": [
"now = time.time()\n", "for kernel in ['linear', 'rbf', 'poly']:\n",
"clf4 = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1, random_state=random_state), n_estimators=100, random_state=random_state)\n", " now = time.time()\n",
"clf4.fit(Xtrain, ytrain)\n", " clf = AdaBoostClassifier(Stree(C=7, kernel=kernel, max_depth=max_depth, random_state=random_state), n_estimators=n_estimators, random_state=random_state)\n",
"print(\"Score Train: \", clf4.score(Xtrain, ytrain))\n", " clf.fit(Xtrain, ytrain)\n",
"print(\"Score Test: \", clf4.score(Xtest, ytest))\n", " score_train = clf.score(Xtrain, ytrain)\n",
"print(f\"Took {time.time() - now:.2f} seconds\")" " score_test = clf.score(Xtest, ytest)\n",
" print(f\"Kernel: {kernel}\\tTime: {time.time() - now:.2f} seconds\\tScore Train: {score_train:.7f}\\tScore Test: {score_test:.7f}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Test algorithm SAMME in AdaBoost to check speed/accuracy"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 8,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
"source": [] {
"output_type": "stream",
"name": "stdout",
"text": "Kernel: linear\tTime: 58.75 seconds\tScore Train: 0.9980524\tScore Test: 0.9978771\nKernel: rbf\tTime: 12.49 seconds\tScore Train: 0.9934181\tScore Test: 0.9933992\nKernel: poly\tTime: 97.85 seconds\tScore Train: 0.9972137\tScore Test: 0.9971806\n"
}
],
"source": [
"for kernel in ['linear', 'rbf', 'poly']:\n",
" now = time.time()\n",
" clf = AdaBoostClassifier(Stree(C=7, kernel=kernel, max_depth=max_depth, random_state=random_state), n_estimators=n_estimators, random_state=random_state, algorithm=\"SAMME\")\n",
" clf.fit(Xtrain, ytrain)\n",
" score_train = clf.score(Xtrain, ytrain)\n",
" score_test = clf.score(Xtest, ytest)\n",
" print(f\"Kernel: {kernel}\\tTime: {time.time() - now:.2f} seconds\\tScore Train: {score_train:.7f}\\tScore Test: {score_test:.7f}\")"
]
} }
], ],
"metadata": { "metadata": {

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -1,5 +1,20 @@
{ {
"cells": [ "cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Compare STree with different estimators"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Setup\n",
"Uncomment the next cell if STree is not already installed"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 1,
@@ -40,6 +55,13 @@
" !tar xzf creditcard.tgz" " !tar xzf creditcard.tgz"
] ]
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Tests"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 4,
@@ -55,6 +77,13 @@
"print(datetime.date.today(), time.strftime(\"%H:%M:%S\"))" "print(datetime.date.today(), time.strftime(\"%H:%M:%S\"))"
] ]
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load dataset and normalize values"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": 5,
@@ -113,6 +142,13 @@
"print(f\"X shape: {X.shape}\\ny shape: {y.shape}\")" "print(f\"X shape: {X.shape}\\ny shape: {y.shape}\")"
] ]
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Build the models"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 9, "execution_count": 9,
@@ -174,6 +210,13 @@
"gradient = GradientBoostingClassifier(random_state=random_state)" "gradient = GradientBoostingClassifier(random_state=random_state)"
] ]
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Do the test"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 15, "execution_count": 15,

View File

@@ -1,8 +1,23 @@
{ {
"cells": [ "cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Test smple_weight, kernels, C, sklearn estimator"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Setup\n",
"Uncomment the next cell if STree is not already installed"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 1,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -14,7 +29,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 2,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -30,7 +45,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 3,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -42,13 +57,13 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 4,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"output_type": "stream", "output_type": "stream",
"name": "stdout", "name": "stdout",
"text": "Fraud: 0.244% 196\nValid: 99.755% 80234\nX.shape (1196, 28) y.shape (1196,)\nFraud: 16.472% 197\nValid: 83.528% 999\n" "text": "Fraud: 0.173% 492\nValid: 99.827% 284315\nX.shape (1492, 28) y.shape (1492,)\nFraud: 33.177% 495\nValid: 66.823% 997\n"
} }
], ],
"source": [ "source": [
@@ -94,15 +109,30 @@
"weights[ytrain==1] = 1.99755 " "weights[ytrain==1] = 1.99755 "
] ]
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Tests"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Test smple_weights\n",
"Compute accuracy with weights in samples. The weights are set based on the inverse of the number of samples of each class"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 5,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"output_type": "stream", "output_type": "stream",
"name": "stdout", "name": "stdout",
"text": "Accuracy of Train without weights 1.0\nAccuracy of Train with weights 1.0\nAccuracy of Tests without weights 0.9554317548746518\nAccuracy of Tests with weights 0.9777158774373259\n" "text": "Accuracy of Train without weights 0.9770114942528736\nAccuracy of Train with weights 0.9818007662835249\nAccuracy of Tests without weights 0.953125\nAccuracy of Tests with weights 0.9419642857142857\n"
} }
], ],
"source": [ "source": [
@@ -113,9 +143,46 @@
"print(\"Accuracy of Tests with weights\", Stree(C=C, random_state=1).fit(Xtrain, ytrain, sample_weight=weights).score(Xtest, ytest))" "print(\"Accuracy of Tests with weights\", Stree(C=C, random_state=1).fit(Xtrain, ytrain, sample_weight=weights).score(Xtest, ytest))"
] ]
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Test accuracy with different kernels\n",
"Compute accuracy on train and test set with default hyperparmeters of every kernel"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 6,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "Time: 0.20s\tKernel: linear\tAccuracy_train: 0.9712643678160919\tAccuracy_test: 0.9575892857142857\nTime: 0.09s\tKernel: rbf\tAccuracy_train: 0.9932950191570882\tAccuracy_test: 0.9620535714285714\nTime: 0.09s\tKernel: poly\tAccuracy_train: 0.9904214559386973\tAccuracy_test: 0.9508928571428571\n"
}
],
"source": [
"random_state=1\n",
"for kernel in ['linear', 'rbf', 'poly']:\n",
" now = time.time()\n",
" clf = Stree(C=7, kernel=kernel, random_state=random_state).fit(Xtrain, ytrain)\n",
" accuracy_train = clf.score(Xtrain, ytrain)\n",
" accuracy_test = clf.score(Xtest, ytest)\n",
" time_spent = time.time() - now\n",
" print(f\"Time: {time_spent:.2f}s\\tKernel: {kernel}\\tAccuracy_train: {accuracy_train}\\tAccuracy_test: {accuracy_test}\")\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Test diferent values of C"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": { "metadata": {
"tags": [ "tags": [
"outputPrepend" "outputPrepend"
@@ -125,7 +192,7 @@
{ {
"output_type": "stream", "output_type": "stream",
"name": "stdout", "name": "stdout",
"text": "************** C=0.001 ****************************\nClassifier's accuracy (train): 0.9749\nClassifier's accuracy (test) : 0.9749\nroot\nroot - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([117]))\nroot - Up, <cgaf> - Leaf class=0 belief= 0.970833 counts=(array([0, 1]), array([699, 21]))\n\n**************************************************\n************** C=0.01 ****************************\nClassifier's accuracy (train): 0.9797\nClassifier's accuracy (test) : 0.9777\nroot\nroot - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([121]))\nroot - Up, <cgaf> - Leaf class=0 belief= 0.976257 counts=(array([0, 1]), array([699, 17]))\n\n**************************************************\n************** C=1 ****************************\nClassifier's accuracy (train): 0.9869\nClassifier's accuracy (test) : 0.9805\nroot\nroot - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([127]))\nroot - Up, <cgaf> - Leaf class=0 belief= 0.984507 counts=(array([0, 1]), array([699, 11]))\n\n**************************************************\n************** C=5 ****************************\nClassifier's accuracy (train): 0.9892\nClassifier's accuracy (test) : 0.9721\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([128]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([3]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up, <cgaf> - Leaf class=0 belief= 0.987216 counts=(array([0, 1]), array([695, 9]))\n\n**************************************************\n************** C=17 ****************************\nClassifier's accuracy (train): 0.9952\nClassifier's accuracy (test) : 0.9749\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([120]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([57]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([14]))\nroot - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([12]))\nroot - Up - Up, <cgaf> - Leaf class=0 belief= 0.993691 counts=(array([0, 1]), array([630, 4]))\n\n**************************************************\n0.1084 secs\n" "text": "************** C=0.001 ****************************\nClassifier's accuracy (train): 0.9550\nClassifier's accuracy (test) : 0.9509\nroot\nroot - Down\nroot - Down - Down, <cgaf> - Leaf class=1 belief= 0.980583 counts=(array([0, 1]), array([ 6, 303]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([2]))\nroot - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up, <cgaf> - Leaf class=0 belief= 0.943836 counts=(array([0, 1]), array([689, 41]))\n\n**************************************************\n************** C=0.01 ****************************\nClassifier's accuracy (train): 0.9569\nClassifier's accuracy (test) : 0.9576\nroot\nroot - Down\nroot - Down - Down, <cgaf> - Leaf class=1 belief= 0.990228 counts=(array([0, 1]), array([ 3, 304]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up, <cgaf> - Leaf class=0 belief= 0.942935 counts=(array([0, 1]), array([694, 42]))\n\n**************************************************\n************** C=1 ****************************\nClassifier's accuracy (train): 0.9665\nClassifier's accuracy (test) : 0.9598\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([311]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([4]))\nroot - Up, <cgaf> - Leaf class=0 belief= 0.951989 counts=(array([0, 1]), array([694, 35]))\n\n**************************************************\n************** C=5 ****************************\nClassifier's accuracy (train): 0.9674\nClassifier's accuracy (test) : 0.9621\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([312]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([4]))\nroot - Up\nroot - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up\nroot - Up - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up\nroot - Up - Up - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up - Up - Up, <cgaf> - Leaf class=0 belief= 0.953039 counts=(array([0, 1]), array([690, 34]))\n\n**************************************************\n************** C=17 ****************************\nClassifier's accuracy (train): 0.9770\nClassifier's accuracy (test) : 0.9509\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([314]))\nroot - Down - Up\nroot - Down - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Down - Up - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([12]))\nroot - Up\nroot - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up\nroot - Up - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up\nroot - Up - Up - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([3]))\nroot - Up - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up - Up - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([2]))\nroot - Up - Up - Up - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up - Up - Up - Up - Up, <cgaf> - Leaf class=0 belief= 0.965714 counts=(array([0, 1]), array([676, 24]))\n\n**************************************************\n0.9578 secs\n"
} }
], ],
"source": [ "source": [
@@ -143,13 +210,13 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": 8,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"output_type": "stream", "output_type": "stream",
"name": "stdout", "name": "stdout",
"text": "[[0.95399748 0.04600252]\n [0.92625258 0.07374742]\n [0.97804877 0.02195123]\n [0.94803313 0.05196687]]\n" "text": "[[0.88204928 0.11795072]\n [0.8640131 0.1359869 ]\n [0.94207521 0.05792479]\n [0.90219947 0.09780053]]\n"
} }
], ],
"source": [ "source": [
@@ -165,48 +232,11 @@
] ]
}, },
{ {
"cell_type": "code", "cell_type": "markdown",
"execution_count": 6,
"metadata": {}, "metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "root\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([120]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([57]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([14]))\nroot - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([12]))\nroot - Up - Up, <cgaf> - Leaf class=0 belief= 0.993691 counts=(array([0, 1]), array([630, 4]))\n"
}
],
"source": [ "source": [
"#check iterator\n", "## Test iterator\n",
"for i in list(clf):\n", "Check different weays of using the iterator"
" print(i)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "root\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([120]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([57]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([14]))\nroot - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([12]))\nroot - Up - Up, <cgaf> - Leaf class=0 belief= 0.993691 counts=(array([0, 1]), array([630, 4]))\n"
}
],
"source": [
"#check iterator again\n",
"for i in clf:\n",
" print(i)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# Check if the classifier is a sklearn estimator\n",
"from sklearn.utils.estimator_checks import check_estimator\n",
"check_estimator(Stree())"
] ]
}, },
{ {
@@ -217,7 +247,48 @@
{ {
"output_type": "stream", "output_type": "stream",
"name": "stdout", "name": "stdout",
"text": "1 functools.partial(<function check_no_attributes_set_in_init at 0x128922b90>, 'Stree')\n2 functools.partial(<function check_estimators_dtypes at 0x128918cb0>, 'Stree')\n3 functools.partial(<function check_fit_score_takes_y at 0x128918b90>, 'Stree')\n4 functools.partial(<function check_sample_weights_pandas_series at 0x1289144d0>, 'Stree')\n5 functools.partial(<function check_sample_weights_not_an_array at 0x1289145f0>, 'Stree')\n6 functools.partial(<function check_sample_weights_list at 0x128914710>, 'Stree')\n7 functools.partial(<function check_sample_weights_shape at 0x128914830>, 'Stree')\n8 functools.partial(<function check_sample_weights_invariance at 0x128914950>, 'Stree')\n9 functools.partial(<function check_estimators_fit_returns_self at 0x12891ecb0>, 'Stree')\n10 functools.partial(<function check_estimators_fit_returns_self at 0x12891ecb0>, 'Stree', readonly_memmap=True)\n11 functools.partial(<function check_complex_data at 0x128914b00>, 'Stree')\n12 functools.partial(<function check_dtype_object at 0x128914a70>, 'Stree')\n13 functools.partial(<function check_estimators_empty_data_messages at 0x128918dd0>, 'Stree')\n14 functools.partial(<function check_pipeline_consistency at 0x128918a70>, 'Stree')\n15 functools.partial(<function check_estimators_nan_inf at 0x128918ef0>, 'Stree')\n16 functools.partial(<function check_estimators_overwrite_params at 0x128922a70>, 'Stree')\n17 functools.partial(<function check_estimator_sparse_data at 0x1289143b0>, 'Stree')\n18 functools.partial(<function check_estimators_pickle at 0x12891e170>, 'Stree')\n19 functools.partial(<function check_classifier_data_not_an_array at 0x128922dd0>, 'Stree')\n20 functools.partial(<function check_classifiers_one_label at 0x12891e830>, 'Stree')\n21 functools.partial(<function check_classifiers_classes at 0x128922290>, 'Stree')\n22 functools.partial(<function check_estimators_partial_fit_n_features at 0x12891e290>, 'Stree')\n23 functools.partial(<function check_classifiers_train at 0x12891e950>, 'Stree')\n24 functools.partial(<function check_classifiers_train at 0x12891e950>, 'Stree', readonly_memmap=True)\n25 functools.partial(<function check_classifiers_train at 0x12891e950>, 'Stree', readonly_memmap=True, X_dtype='float32')\n26 functools.partial(<function check_classifiers_regression_target at 0x1289278c0>, 'Stree')\n27 functools.partial(<function check_supervised_y_no_nan at 0x12890c4d0>, 'Stree')\n28 functools.partial(<function check_supervised_y_2d at 0x12891eef0>, 'Stree')\n29 functools.partial(<function check_estimators_unfitted at 0x12891edd0>, 'Stree')\n30 functools.partial(<function check_non_transformer_estimators_n_iter at 0x128927440>, 'Stree')\n31 functools.partial(<function check_decision_proba_consistency at 0x1289279e0>, 'Stree')\n32 functools.partial(<function check_fit2d_predict1d at 0x128918050>, 'Stree')\n33 functools.partial(<function check_methods_subset_invariance at 0x128918200>, 'Stree')\n34 functools.partial(<function check_fit2d_1sample at 0x128918320>, 'Stree')\n35 functools.partial(<function check_fit2d_1feature at 0x128918440>, 'Stree')\n36 functools.partial(<function check_fit1d at 0x128918560>, 'Stree')\n37 functools.partial(<function check_get_params_invariance at 0x128927680>, 'Stree')\n38 functools.partial(<function check_set_params at 0x1289277a0>, 'Stree')\n39 functools.partial(<function check_dict_unchanged at 0x128914c20>, 'Stree')\n40 functools.partial(<function check_dont_overwrite_parameters at 0x128914ef0>, 'Stree')\n41 functools.partial(<function check_fit_idempotent at 0x128927b90>, 'Stree')\n42 functools.partial(<function check_n_features_in at 0x128927c20>, 'Stree')\n43 functools.partial(<function check_requires_y_none at 0x128927cb0>, 'Stree')\n" "text": "root\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([314]))\nroot - Down - Up\nroot - Down - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Down - Up - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([12]))\nroot - Up\nroot - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up\nroot - Up - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up\nroot - Up - Up - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([3]))\nroot - Up - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up - Up - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([2]))\nroot - Up - Up - Up - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up - Up - Up - Up - Up, <cgaf> - Leaf class=0 belief= 0.965714 counts=(array([0, 1]), array([676, 24]))\n"
}
],
"source": [
"#check iterator\n",
"for i in list(clf):\n",
" print(i)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "root\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([314]))\nroot - Down - Up\nroot - Down - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Down - Up - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([12]))\nroot - Up\nroot - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up\nroot - Up - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up\nroot - Up - Up - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([3]))\nroot - Up - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up - Up - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([2]))\nroot - Up - Up - Up - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up - Up - Up - Up - Up, <cgaf> - Leaf class=0 belief= 0.965714 counts=(array([0, 1]), array([676, 24]))\n"
}
],
"source": [
"#check iterator again\n",
"for i in clf:\n",
" print(i)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Test STree is a sklearn estimator"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "1 functools.partial(<function check_no_attributes_set_in_init at 0x12bd3b5f0>, 'Stree')\n2 functools.partial(<function check_estimators_dtypes at 0x12bd31710>, 'Stree')\n3 functools.partial(<function check_fit_score_takes_y at 0x12bd315f0>, 'Stree')\n4 functools.partial(<function check_sample_weights_pandas_series at 0x12bd21ef0>, 'Stree')\n5 functools.partial(<function check_sample_weights_not_an_array at 0x12bd2d050>, 'Stree')\n6 functools.partial(<function check_sample_weights_list at 0x12bd2d170>, 'Stree')\n7 functools.partial(<function check_sample_weights_shape at 0x12bd2d290>, 'Stree')\n8 functools.partial(<function check_sample_weights_invariance at 0x12bd2d3b0>, 'Stree')\n9 functools.partial(<function check_estimators_fit_returns_self at 0x12bd37710>, 'Stree')\n10 functools.partial(<function check_estimators_fit_returns_self at 0x12bd37710>, 'Stree', readonly_memmap=True)\n11 functools.partial(<function check_complex_data at 0x12bd2d560>, 'Stree')\n12 functools.partial(<function check_dtype_object at 0x12bd2d4d0>, 'Stree')\n13 functools.partial(<function check_estimators_empty_data_messages at 0x12bd31830>, 'Stree')\n14 functools.partial(<function check_pipeline_consistency at 0x12bd314d0>, 'Stree')\n15 functools.partial(<function check_estimators_nan_inf at 0x12bd31950>, 'Stree')\n16 functools.partial(<function check_estimators_overwrite_params at 0x12bd3b4d0>, 'Stree')\n17 functools.partial(<function check_estimator_sparse_data at 0x12bd21dd0>, 'Stree')\n18 functools.partial(<function check_estimators_pickle at 0x12bd31b90>, 'Stree')\n19 functools.partial(<function check_classifier_data_not_an_array at 0x12bd3b830>, 'Stree')\n20 functools.partial(<function check_classifiers_one_label at 0x12bd37290>, 'Stree')\n21 functools.partial(<function check_classifiers_classes at 0x12bd37cb0>, 'Stree')\n22 functools.partial(<function check_estimators_partial_fit_n_features at 0x12bd31cb0>, 'Stree')\n23 functools.partial(<function check_classifiers_train at 0x12bd373b0>, 'Stree')\n24 functools.partial(<function check_classifiers_train at 0x12bd373b0>, 'Stree', readonly_memmap=True)\n25 functools.partial(<function check_classifiers_train at 0x12bd373b0>, 'Stree', readonly_memmap=True, X_dtype='float32')\n26 functools.partial(<function check_classifiers_regression_target at 0x12bd40320>, 'Stree')\n27 functools.partial(<function check_supervised_y_no_nan at 0x12bd20ef0>, 'Stree')\n28 functools.partial(<function check_supervised_y_2d at 0x12bd37950>, 'Stree')\n29 functools.partial(<function check_estimators_unfitted at 0x12bd37830>, 'Stree')\n30 functools.partial(<function check_non_transformer_estimators_n_iter at 0x12bd3be60>, 'Stree')\n31 functools.partial(<function check_decision_proba_consistency at 0x12bd40440>, 'Stree')\n32 functools.partial(<function check_fit2d_predict1d at 0x12bd2da70>, 'Stree')\n33 functools.partial(<function check_methods_subset_invariance at 0x12bd2dc20>, 'Stree')\n34 functools.partial(<function check_fit2d_1sample at 0x12bd2dd40>, 'Stree')\n35 functools.partial(<function check_fit2d_1feature at 0x12bd2de60>, 'Stree')\n36 functools.partial(<function check_fit1d at 0x12bd2df80>, 'Stree')\n37 functools.partial(<function check_get_params_invariance at 0x12bd400e0>, 'Stree')\n38 functools.partial(<function check_set_params at 0x12bd40200>, 'Stree')\n39 functools.partial(<function check_dict_unchanged at 0x12bd2d680>, 'Stree')\n40 functools.partial(<function check_dont_overwrite_parameters at 0x12bd2d950>, 'Stree')\n41 functools.partial(<function check_fit_idempotent at 0x12bd405f0>, 'Stree')\n42 functools.partial(<function check_n_features_in at 0x12bd40680>, 'Stree')\n43 functools.partial(<function check_requires_y_none at 0x12bd40710>, 'Stree')\n"
} }
], ],
"source": [ "source": [
@@ -232,10 +303,14 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 11,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [] "source": [
"# Check if the classifier is a sklearn estimator\n",
"from sklearn.utils.estimator_checks import check_estimator\n",
"check_estimator(Stree())"
]
} }
], ],
"metadata": { "metadata": {

File diff suppressed because one or more lines are too long

View File

@@ -126,6 +126,7 @@ class Stree(BaseEstimator, ClassifierMixin):
random_state: int = None, random_state: int = None,
max_depth: int = None, max_depth: int = None,
tol: float = 1e-4, tol: float = 1e-4,
degree: int = 3,
gamma="scale", gamma="scale",
min_samples_split: int = 0, min_samples_split: int = 0,
): ):
@@ -136,6 +137,7 @@ class Stree(BaseEstimator, ClassifierMixin):
self.max_depth = max_depth self.max_depth = max_depth
self.tol = tol self.tol = tol
self.gamma = gamma self.gamma = gamma
self.degree = degree
self.min_samples_split = min_samples_split self.min_samples_split = min_samples_split
def _more_tags(self) -> dict: def _more_tags(self) -> dict:
@@ -263,6 +265,7 @@ class Stree(BaseEstimator, ClassifierMixin):
tol=self.tol, tol=self.tol,
C=self.C, C=self.C,
gamma=self.gamma, gamma=self.gamma,
degree=self.degree,
) )
) )