mirror of
https://github.com/Doctorado-ML/STree.git
synced 2025-08-15 23:46:02 +00:00
#3 Add degree hyperparam and update notebooks
Update readme to add new notebooks
This commit is contained in:
@@ -1,15 +1,42 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Test AdaBoost with different configurations"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Setup\n",
|
||||
"Uncomment the next cell if STree is not already installed"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#\n",
|
||||
"# Google Colab setup\n",
|
||||
"#\n",
|
||||
"#!pip install git+https://github.com/doctorado-ml/stree"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import time\n",
|
||||
"from sklearn.ensemble import AdaBoostClassifier\n",
|
||||
"from sklearn.tree import DecisionTreeClassifier\n",
|
||||
"from sklearn.svm import SVC\n",
|
||||
"from sklearn.svm import LinearSVC, SVC\n",
|
||||
"from sklearn.model_selection import GridSearchCV, train_test_split\n",
|
||||
"from sklearn.datasets import load_iris\n",
|
||||
"from stree import Stree"
|
||||
@@ -17,7 +44,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -29,13 +56,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": "Fraud: 0.244% 196\nValid: 99.755% 80234\nX.shape (1196, 28) y.shape (1196,)\nFraud: 16.722% 200\nValid: 83.278% 996\n"
|
||||
"text": "Fraud: 0.173% 492\nValid: 99.827% 284315\nX.shape (100492, 28) y.shape (100492,)\nFraud: 0.659% 662\nValid: 99.341% 99830\n"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
@@ -68,9 +95,10 @@
|
||||
" Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, train_size=0.7, shuffle=True, random_state=random_state, stratify=y)\n",
|
||||
" return Xtrain, Xtest, ytrain, ytest\n",
|
||||
"\n",
|
||||
"data = load_creditcard(-1000) # Take all true samples + 1000 of the others\n",
|
||||
"# data = load_creditcard(-1000) # Take all true samples + 1000 of the others\n",
|
||||
"# data = load_creditcard(5000) # Take the first 5000 samples\n",
|
||||
"# data = load_creditcard(0) # Take all the samples\n",
|
||||
"data = load_creditcard(-100000)\n",
|
||||
"\n",
|
||||
"Xtrain = data[0]\n",
|
||||
"Xtest = data[1]\n",
|
||||
@@ -78,15 +106,29 @@
|
||||
"ytest = data[3]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Tests"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## STree alone on the whole dataset and linear kernel"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": "Score Train: 0.986857825567503\nScore Test: 0.9805013927576601\nTook 0.12 seconds\n"
|
||||
"text": "Score Train: 0.9985499829409757\nScore Test: 0.998407854584052\nTook 39.45 seconds\n"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
@@ -99,43 +141,21 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": "Score Train: 0.997610513739546\nScore Test: 0.9721448467966574\nTook 7.80 seconds\n"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"now = time.time()\n",
|
||||
"clf2 = AdaBoostClassifier(Stree(max_depth=3, random_state=random_state), n_estimators=100, random_state=random_state)\n",
|
||||
"clf2.fit(Xtrain, ytrain)\n",
|
||||
"print(\"Score Train: \", clf2.score(Xtrain, ytrain))\n",
|
||||
"print(\"Score Test: \", clf2.score(Xtest, ytest))\n",
|
||||
"print(f\"Took {time.time() - now:.2f} seconds\")"
|
||||
"## Different kernels with different configuations"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": "Score Train: 0.9796893667861409\nScore Test: 0.9554317548746518\nTook 0.48 seconds\n"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"now = time.time()\n",
|
||||
"clf3 = AdaBoostClassifier(SVC(kernel="linear",random_state=random_state), n_estimators=100, random_state=random_state, algorithm='SAMME')\n",
|
||||
"clf3.fit(Xtrain, ytrain)\n",
|
||||
"print(\"Score Train: \", clf3.score(Xtrain, ytrain))\n",
|
||||
"print(\"Score Test: \", clf3.score(Xtest, ytest))\n",
|
||||
"print(f\"Took {time.time() - now:.2f} seconds\")"
|
||||
"n_estimators = 10\n",
|
||||
"C = 7\n",
|
||||
"max_depth = 3"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -146,24 +166,46 @@
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": "Score Train: 1.0\nScore Test: 0.9721448467966574\nTook 0.86 seconds\n"
|
||||
"text": "Kernel: linear\tTime: 87.00 seconds\tScore Train: 0.9982372\tScore Test: 0.9981425\nKernel: rbf\tTime: 60.60 seconds\tScore Train: 0.9934181\tScore Test: 0.9933992\nKernel: poly\tTime: 88.08 seconds\tScore Train: 0.9937450\tScore Test: 0.9938968\n"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"now = time.time()\n",
|
||||
"clf4 = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1, random_state=random_state), n_estimators=100, random_state=random_state)\n",
|
||||
"clf4.fit(Xtrain, ytrain)\n",
|
||||
"print(\"Score Train: \", clf4.score(Xtrain, ytrain))\n",
|
||||
"print(\"Score Test: \", clf4.score(Xtest, ytest))\n",
|
||||
"print(f\"Took {time.time() - now:.2f} seconds\")"
|
||||
"for kernel in ['linear', 'rbf', 'poly']:\n",
|
||||
" now = time.time()\n",
|
||||
" clf = AdaBoostClassifier(Stree(C=7, kernel=kernel, max_depth=max_depth, random_state=random_state), n_estimators=n_estimators, random_state=random_state)\n",
|
||||
" clf.fit(Xtrain, ytrain)\n",
|
||||
" score_train = clf.score(Xtrain, ytrain)\n",
|
||||
" score_test = clf.score(Xtest, ytest)\n",
|
||||
" print(f\"Kernel: {kernel}\\tTime: {time.time() - now:.2f} seconds\\tScore Train: {score_train:.7f}\\tScore Test: {score_test:.7f}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Test algorithm SAMME in AdaBoost to check speed/accuracy"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": "Kernel: linear\tTime: 58.75 seconds\tScore Train: 0.9980524\tScore Test: 0.9978771\nKernel: rbf\tTime: 12.49 seconds\tScore Train: 0.9934181\tScore Test: 0.9933992\nKernel: poly\tTime: 97.85 seconds\tScore Train: 0.9972137\tScore Test: 0.9971806\n"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for kernel in ['linear', 'rbf', 'poly']:\n",
|
||||
" now = time.time()\n",
|
||||
" clf = AdaBoostClassifier(Stree(C=7, kernel=kernel, max_depth=max_depth, random_state=random_state), n_estimators=n_estimators, random_state=random_state, algorithm=\"SAMME\")\n",
|
||||
" clf.fit(Xtrain, ytrain)\n",
|
||||
" score_train = clf.score(Xtrain, ytrain)\n",
|
||||
" score_test = clf.score(Xtest, ytest)\n",
|
||||
" print(f\"Kernel: {kernel}\\tTime: {time.time() - now:.2f} seconds\\tScore Train: {score_train:.7f}\\tScore Test: {score_test:.7f}\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -1,5 +1,20 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Compare STree with different estimators"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Setup\n",
|
||||
"Uncomment the next cell if STree is not already installed"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
@@ -40,6 +55,13 @@
|
||||
" !tar xzf creditcard.tgz"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Tests"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
@@ -55,6 +77,13 @@
|
||||
"print(datetime.date.today(), time.strftime(\"%H:%M:%S\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load dataset and normalize values"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
@@ -113,6 +142,13 @@
|
||||
"print(f\"X shape: {X.shape}\\ny shape: {y.shape}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Build the models"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
@@ -174,6 +210,13 @@
|
||||
"gradient = GradientBoostingClassifier(random_state=random_state)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Do the test"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
|
@@ -1,8 +1,23 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Test smple_weight, kernels, C, sklearn estimator"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Setup\n",
|
||||
"Uncomment the next cell if STree is not already installed"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -14,7 +29,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -30,7 +45,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -42,13 +57,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": "Fraud: 0.244% 196\nValid: 99.755% 80234\nX.shape (1196, 28) y.shape (1196,)\nFraud: 16.472% 197\nValid: 83.528% 999\n"
|
||||
"text": "Fraud: 0.173% 492\nValid: 99.827% 284315\nX.shape (1492, 28) y.shape (1492,)\nFraud: 33.177% 495\nValid: 66.823% 997\n"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
@@ -94,15 +109,30 @@
|
||||
"weights[ytrain==1] = 1.99755 "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Tests"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Test smple_weights\n",
|
||||
"Compute accuracy with weights in samples. The weights are set based on the inverse of the number of samples of each class"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": "Accuracy of Train without weights 1.0\nAccuracy of Train with weights 1.0\nAccuracy of Tests without weights 0.9554317548746518\nAccuracy of Tests with weights 0.9777158774373259\n"
|
||||
"text": "Accuracy of Train without weights 0.9770114942528736\nAccuracy of Train with weights 0.9818007662835249\nAccuracy of Tests without weights 0.953125\nAccuracy of Tests with weights 0.9419642857142857\n"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
@@ -113,9 +143,46 @@
|
||||
"print(\"Accuracy of Tests with weights\", Stree(C=C, random_state=1).fit(Xtrain, ytrain, sample_weight=weights).score(Xtest, ytest))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Test accuracy with different kernels\n",
|
||||
"Compute accuracy on train and test set with default hyperparmeters of every kernel"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": "Time: 0.20s\tKernel: linear\tAccuracy_train: 0.9712643678160919\tAccuracy_test: 0.9575892857142857\nTime: 0.09s\tKernel: rbf\tAccuracy_train: 0.9932950191570882\tAccuracy_test: 0.9620535714285714\nTime: 0.09s\tKernel: poly\tAccuracy_train: 0.9904214559386973\tAccuracy_test: 0.9508928571428571\n"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"random_state=1\n",
|
||||
"for kernel in ['linear', 'rbf', 'poly']:\n",
|
||||
" now = time.time()\n",
|
||||
" clf = Stree(C=7, kernel=kernel, random_state=random_state).fit(Xtrain, ytrain)\n",
|
||||
" accuracy_train = clf.score(Xtrain, ytrain)\n",
|
||||
" accuracy_test = clf.score(Xtest, ytest)\n",
|
||||
" time_spent = time.time() - now\n",
|
||||
" print(f\"Time: {time_spent:.2f}s\\tKernel: {kernel}\\tAccuracy_train: {accuracy_train}\\tAccuracy_test: {accuracy_test}\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Test diferent values of C"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"outputPrepend"
|
||||
@@ -125,7 +192,7 @@
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": "************** C=0.001 ****************************\nClassifier's accuracy (train): 0.9749\nClassifier's accuracy (test) : 0.9749\nroot\nroot - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([117]))\nroot - Up, <cgaf> - Leaf class=0 belief= 0.970833 counts=(array([0, 1]), array([699, 21]))\n\n**************************************************\n************** C=0.01 ****************************\nClassifier's accuracy (train): 0.9797\nClassifier's accuracy (test) : 0.9777\nroot\nroot - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([121]))\nroot - Up, <cgaf> - Leaf class=0 belief= 0.976257 counts=(array([0, 1]), array([699, 17]))\n\n**************************************************\n************** C=1 ****************************\nClassifier's accuracy (train): 0.9869\nClassifier's accuracy (test) : 0.9805\nroot\nroot - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([127]))\nroot - Up, <cgaf> - Leaf class=0 belief= 0.984507 counts=(array([0, 1]), array([699, 11]))\n\n**************************************************\n************** C=5 ****************************\nClassifier's accuracy (train): 0.9892\nClassifier's accuracy (test) : 0.9721\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([128]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([3]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up, <cgaf> - Leaf class=0 belief= 0.987216 counts=(array([0, 1]), array([695, 9]))\n\n**************************************************\n************** C=17 ****************************\nClassifier's accuracy (train): 0.9952\nClassifier's accuracy (test) : 0.9749\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([120]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([57]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([14]))\nroot - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([12]))\nroot - Up - Up, <cgaf> - Leaf class=0 belief= 0.993691 counts=(array([0, 1]), array([630, 4]))\n\n**************************************************\n0.1084 secs\n"
|
||||
"text": "************** C=0.001 ****************************\nClassifier's accuracy (train): 0.9550\nClassifier's accuracy (test) : 0.9509\nroot\nroot - Down\nroot - Down - Down, <cgaf> - Leaf class=1 belief= 0.980583 counts=(array([0, 1]), array([ 6, 303]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([2]))\nroot - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up, <cgaf> - Leaf class=0 belief= 0.943836 counts=(array([0, 1]), array([689, 41]))\n\n**************************************************\n************** C=0.01 ****************************\nClassifier's accuracy (train): 0.9569\nClassifier's accuracy (test) : 0.9576\nroot\nroot - Down\nroot - Down - Down, <cgaf> - Leaf class=1 belief= 0.990228 counts=(array([0, 1]), array([ 3, 304]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up, <cgaf> - Leaf class=0 belief= 0.942935 counts=(array([0, 1]), array([694, 42]))\n\n**************************************************\n************** C=1 ****************************\nClassifier's accuracy (train): 0.9665\nClassifier's accuracy (test) : 0.9598\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([311]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([4]))\nroot - Up, <cgaf> - Leaf class=0 belief= 0.951989 counts=(array([0, 1]), array([694, 35]))\n\n**************************************************\n************** C=5 ****************************\nClassifier's accuracy (train): 0.9674\nClassifier's accuracy (test) : 0.9621\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([312]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([4]))\nroot - Up\nroot - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up\nroot - Up - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up\nroot - Up - Up - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up - Up - Up, <cgaf> - Leaf class=0 belief= 0.953039 counts=(array([0, 1]), array([690, 34]))\n\n**************************************************\n************** C=17 ****************************\nClassifier's accuracy (train): 0.9770\nClassifier's accuracy (test) : 0.9509\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([314]))\nroot - Down - Up\nroot - Down - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Down - Up - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([12]))\nroot - Up\nroot - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up\nroot - Up - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up\nroot - Up - Up - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([3]))\nroot - Up - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up - Up - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([2]))\nroot - Up - Up - Up - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up - Up - Up - Up - Up, <cgaf> - Leaf class=0 belief= 0.965714 counts=(array([0, 1]), array([676, 24]))\n\n**************************************************\n0.9578 secs\n"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
@@ -143,13 +210,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": "[[0.95399748 0.04600252]\n [0.92625258 0.07374742]\n [0.97804877 0.02195123]\n [0.94803313 0.05196687]]\n"
|
||||
"text": "[[0.88204928 0.11795072]\n [0.8640131 0.1359869 ]\n [0.94207521 0.05792479]\n [0.90219947 0.09780053]]\n"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
@@ -165,48 +232,11 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": "root\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([120]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([57]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([14]))\nroot - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([12]))\nroot - Up - Up, <cgaf> - Leaf class=0 belief= 0.993691 counts=(array([0, 1]), array([630, 4]))\n"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"#check iterator\n",
|
||||
"for i in list(clf):\n",
|
||||
" print(i)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": "root\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([120]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([57]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([14]))\nroot - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([12]))\nroot - Up - Up, <cgaf> - Leaf class=0 belief= 0.993691 counts=(array([0, 1]), array([630, 4]))\n"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"#check iterator again\n",
|
||||
"for i in clf:\n",
|
||||
" print(i)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Check if the classifier is a sklearn estimator\n",
|
||||
"from sklearn.utils.estimator_checks import check_estimator\n",
|
||||
"check_estimator(Stree())"
|
||||
"## Test iterator\n",
|
||||
"Check different weays of using the iterator"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -217,7 +247,48 @@
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": "1 functools.partial(<function check_no_attributes_set_in_init at 0x128922b90>, 'Stree')\n2 functools.partial(<function check_estimators_dtypes at 0x128918cb0>, 'Stree')\n3 functools.partial(<function check_fit_score_takes_y at 0x128918b90>, 'Stree')\n4 functools.partial(<function check_sample_weights_pandas_series at 0x1289144d0>, 'Stree')\n5 functools.partial(<function check_sample_weights_not_an_array at 0x1289145f0>, 'Stree')\n6 functools.partial(<function check_sample_weights_list at 0x128914710>, 'Stree')\n7 functools.partial(<function check_sample_weights_shape at 0x128914830>, 'Stree')\n8 functools.partial(<function check_sample_weights_invariance at 0x128914950>, 'Stree')\n9 functools.partial(<function check_estimators_fit_returns_self at 0x12891ecb0>, 'Stree')\n10 functools.partial(<function check_estimators_fit_returns_self at 0x12891ecb0>, 'Stree', readonly_memmap=True)\n11 functools.partial(<function check_complex_data at 0x128914b00>, 'Stree')\n12 functools.partial(<function check_dtype_object at 0x128914a70>, 'Stree')\n13 functools.partial(<function check_estimators_empty_data_messages at 0x128918dd0>, 'Stree')\n14 functools.partial(<function check_pipeline_consistency at 0x128918a70>, 'Stree')\n15 functools.partial(<function check_estimators_nan_inf at 0x128918ef0>, 'Stree')\n16 functools.partial(<function check_estimators_overwrite_params at 0x128922a70>, 'Stree')\n17 functools.partial(<function check_estimator_sparse_data at 0x1289143b0>, 'Stree')\n18 functools.partial(<function check_estimators_pickle at 0x12891e170>, 'Stree')\n19 functools.partial(<function check_classifier_data_not_an_array at 0x128922dd0>, 'Stree')\n20 functools.partial(<function check_classifiers_one_label at 0x12891e830>, 'Stree')\n21 functools.partial(<function check_classifiers_classes at 0x128922290>, 'Stree')\n22 functools.partial(<function check_estimators_partial_fit_n_features at 0x12891e290>, 'Stree')\n23 functools.partial(<function check_classifiers_train at 0x12891e950>, 'Stree')\n24 functools.partial(<function check_classifiers_train at 0x12891e950>, 'Stree', readonly_memmap=True)\n25 functools.partial(<function check_classifiers_train at 0x12891e950>, 'Stree', readonly_memmap=True, X_dtype='float32')\n26 functools.partial(<function check_classifiers_regression_target at 0x1289278c0>, 'Stree')\n27 functools.partial(<function check_supervised_y_no_nan at 0x12890c4d0>, 'Stree')\n28 functools.partial(<function check_supervised_y_2d at 0x12891eef0>, 'Stree')\n29 functools.partial(<function check_estimators_unfitted at 0x12891edd0>, 'Stree')\n30 functools.partial(<function check_non_transformer_estimators_n_iter at 0x128927440>, 'Stree')\n31 functools.partial(<function check_decision_proba_consistency at 0x1289279e0>, 'Stree')\n32 functools.partial(<function check_fit2d_predict1d at 0x128918050>, 'Stree')\n33 functools.partial(<function check_methods_subset_invariance at 0x128918200>, 'Stree')\n34 functools.partial(<function check_fit2d_1sample at 0x128918320>, 'Stree')\n35 functools.partial(<function check_fit2d_1feature at 0x128918440>, 'Stree')\n36 functools.partial(<function check_fit1d at 0x128918560>, 'Stree')\n37 functools.partial(<function check_get_params_invariance at 0x128927680>, 'Stree')\n38 functools.partial(<function check_set_params at 0x1289277a0>, 'Stree')\n39 functools.partial(<function check_dict_unchanged at 0x128914c20>, 'Stree')\n40 functools.partial(<function check_dont_overwrite_parameters at 0x128914ef0>, 'Stree')\n41 functools.partial(<function check_fit_idempotent at 0x128927b90>, 'Stree')\n42 functools.partial(<function check_n_features_in at 0x128927c20>, 'Stree')\n43 functools.partial(<function check_requires_y_none at 0x128927cb0>, 'Stree')\n"
|
||||
"text": "root\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([314]))\nroot - Down - Up\nroot - Down - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Down - Up - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([12]))\nroot - Up\nroot - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up\nroot - Up - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up\nroot - Up - Up - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([3]))\nroot - Up - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up - Up - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([2]))\nroot - Up - Up - Up - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up - Up - Up - Up - Up, <cgaf> - Leaf class=0 belief= 0.965714 counts=(array([0, 1]), array([676, 24]))\n"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"#check iterator\n",
|
||||
"for i in list(clf):\n",
|
||||
" print(i)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": "root\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([314]))\nroot - Down - Up\nroot - Down - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Down - Up - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([12]))\nroot - Up\nroot - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up\nroot - Up - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up\nroot - Up - Up - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([3]))\nroot - Up - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up - Up - Up - Up - Up - Up\nroot - Up - Up - Up - Up - Up - Up - Up - Down\nroot - Up - Up - Up - Up - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([2]))\nroot - Up - Up - Up - Up - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up - Up - Up - Up - Up, <cgaf> - Leaf class=0 belief= 0.965714 counts=(array([0, 1]), array([676, 24]))\n"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"#check iterator again\n",
|
||||
"for i in clf:\n",
|
||||
" print(i)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Test STree is a sklearn estimator"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": "1 functools.partial(<function check_no_attributes_set_in_init at 0x12bd3b5f0>, 'Stree')\n2 functools.partial(<function check_estimators_dtypes at 0x12bd31710>, 'Stree')\n3 functools.partial(<function check_fit_score_takes_y at 0x12bd315f0>, 'Stree')\n4 functools.partial(<function check_sample_weights_pandas_series at 0x12bd21ef0>, 'Stree')\n5 functools.partial(<function check_sample_weights_not_an_array at 0x12bd2d050>, 'Stree')\n6 functools.partial(<function check_sample_weights_list at 0x12bd2d170>, 'Stree')\n7 functools.partial(<function check_sample_weights_shape at 0x12bd2d290>, 'Stree')\n8 functools.partial(<function check_sample_weights_invariance at 0x12bd2d3b0>, 'Stree')\n9 functools.partial(<function check_estimators_fit_returns_self at 0x12bd37710>, 'Stree')\n10 functools.partial(<function check_estimators_fit_returns_self at 0x12bd37710>, 'Stree', readonly_memmap=True)\n11 functools.partial(<function check_complex_data at 0x12bd2d560>, 'Stree')\n12 functools.partial(<function check_dtype_object at 0x12bd2d4d0>, 'Stree')\n13 functools.partial(<function check_estimators_empty_data_messages at 0x12bd31830>, 'Stree')\n14 functools.partial(<function check_pipeline_consistency at 0x12bd314d0>, 'Stree')\n15 functools.partial(<function check_estimators_nan_inf at 0x12bd31950>, 'Stree')\n16 functools.partial(<function check_estimators_overwrite_params at 0x12bd3b4d0>, 'Stree')\n17 functools.partial(<function check_estimator_sparse_data at 0x12bd21dd0>, 'Stree')\n18 functools.partial(<function check_estimators_pickle at 0x12bd31b90>, 'Stree')\n19 functools.partial(<function check_classifier_data_not_an_array at 0x12bd3b830>, 'Stree')\n20 functools.partial(<function check_classifiers_one_label at 0x12bd37290>, 'Stree')\n21 functools.partial(<function check_classifiers_classes at 0x12bd37cb0>, 'Stree')\n22 functools.partial(<function check_estimators_partial_fit_n_features at 0x12bd31cb0>, 'Stree')\n23 functools.partial(<function check_classifiers_train at 0x12bd373b0>, 'Stree')\n24 functools.partial(<function check_classifiers_train at 0x12bd373b0>, 'Stree', readonly_memmap=True)\n25 functools.partial(<function check_classifiers_train at 0x12bd373b0>, 'Stree', readonly_memmap=True, X_dtype='float32')\n26 functools.partial(<function check_classifiers_regression_target at 0x12bd40320>, 'Stree')\n27 functools.partial(<function check_supervised_y_no_nan at 0x12bd20ef0>, 'Stree')\n28 functools.partial(<function check_supervised_y_2d at 0x12bd37950>, 'Stree')\n29 functools.partial(<function check_estimators_unfitted at 0x12bd37830>, 'Stree')\n30 functools.partial(<function check_non_transformer_estimators_n_iter at 0x12bd3be60>, 'Stree')\n31 functools.partial(<function check_decision_proba_consistency at 0x12bd40440>, 'Stree')\n32 functools.partial(<function check_fit2d_predict1d at 0x12bd2da70>, 'Stree')\n33 functools.partial(<function check_methods_subset_invariance at 0x12bd2dc20>, 'Stree')\n34 functools.partial(<function check_fit2d_1sample at 0x12bd2dd40>, 'Stree')\n35 functools.partial(<function check_fit2d_1feature at 0x12bd2de60>, 'Stree')\n36 functools.partial(<function check_fit1d at 0x12bd2df80>, 'Stree')\n37 functools.partial(<function check_get_params_invariance at 0x12bd400e0>, 'Stree')\n38 functools.partial(<function check_set_params at 0x12bd40200>, 'Stree')\n39 functools.partial(<function check_dict_unchanged at 0x12bd2d680>, 'Stree')\n40 functools.partial(<function check_dont_overwrite_parameters at 0x12bd2d950>, 'Stree')\n41 functools.partial(<function check_fit_idempotent at 0x12bd405f0>, 'Stree')\n42 functools.partial(<function check_n_features_in at 0x12bd40680>, 'Stree')\n43 functools.partial(<function check_requires_y_none at 0x12bd40710>, 'Stree')\n"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
@@ -232,10 +303,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"source": [
|
||||
"# Check if the classifier is a sklearn estimator\n",
|
||||
"from sklearn.utils.estimator_checks import check_estimator\n",
|
||||
"check_estimator(Stree())"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user