mirror of
https://github.com/Doctorado-ML/STree.git
synced 2025-08-15 23:46:02 +00:00
Complete source comments (#22)
* Add Hyperparameters description to README Comment get_subspace method Add environment info for binder (runtime.txt) * Complete source comments Change docstring type to numpy update hyperameters table and explanation * Update Jupyter notebooks
This commit is contained in:
committed by
GitHub
parent
e4ac5075e5
commit
3bdac9bd60
@@ -37,9 +37,11 @@
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"from sklearn import tree\n",
|
||||
"from sklearn.metrics import classification_report, confusion_matrix, f1_score\n",
|
||||
"from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, BaggingClassifier\n",
|
||||
"from sklearn.tree import DecisionTreeClassifier\n",
|
||||
"from sklearn.naive_bayes import GaussianNB\n",
|
||||
"from sklearn.neural_network import MLPClassifier\n",
|
||||
"from sklearn.svm import LinearSVC\n",
|
||||
"from stree import Stree"
|
||||
]
|
||||
},
|
||||
@@ -70,10 +72,10 @@
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2020-11-01 11:14:06\n"
|
||||
"2021-01-14 11:30:51\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -108,10 +110,11 @@
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Fraud: 0.173% 492\nValid: 99.827% 284,315\n"
|
||||
"Fraud: 0.173% 492\n",
|
||||
"Valid: 99.827% 284,315\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -140,10 +143,11 @@
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"X shape: (284807, 29)\ny shape: (284807,)\n"
|
||||
"X shape: (284807, 29)\n",
|
||||
"y shape: (284807,)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -179,7 +183,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Linear Tree\n",
|
||||
"linear_tree = tree.DecisionTreeClassifier(random_state=random_state)"
|
||||
"linear_tree = DecisionTreeClassifier(random_state=random_state)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -188,8 +192,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Random Forest\n",
|
||||
"random_forest = RandomForestClassifier(random_state=random_state)"
|
||||
"# Naive Bayes\n",
|
||||
"naive_bayes = GaussianNB()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -208,8 +212,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# AdaBoost\n",
|
||||
"adaboost = AdaBoostClassifier(random_state=random_state)"
|
||||
"# Neural Network\n",
|
||||
"mlp = MLPClassifier(random_state=random_state, alpha=1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -218,8 +222,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Bagging\n",
|
||||
"bagging = BaggingClassifier(random_state=random_state)"
|
||||
"# SVC (linear)\n",
|
||||
"svc = LinearSVC(random_state=random_state, C=.01, max_iter=1e3)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -262,11 +266,11 @@
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"************************** Linear Tree **********************\n",
|
||||
"Train Model Linear Tree took: 15.14 seconds\n",
|
||||
"Train Model Linear Tree took: 10.25 seconds\n",
|
||||
"=========== Linear Tree - Train 199,364 samples =============\n",
|
||||
" precision recall f1-score support\n",
|
||||
"\n",
|
||||
@@ -293,36 +297,54 @@
|
||||
"Confusion Matrix in Test\n",
|
||||
"[[85262 33]\n",
|
||||
" [ 36 112]]\n",
|
||||
"************************** Random Forest **********************\n",
|
||||
"Train Model Random Forest took: 181.1 seconds\n",
|
||||
"=========== Random Forest - Train 199,364 samples =============\n",
|
||||
"************************** Naive Bayes **********************\n",
|
||||
"Train Model Naive Bayes took: 0.09943 seconds\n",
|
||||
"=========== Naive Bayes - Train 199,364 samples =============\n",
|
||||
" precision recall f1-score support\n",
|
||||
"\n",
|
||||
" 0 1.000000 1.000000 1.000000 199020\n",
|
||||
" 1 1.000000 1.000000 1.000000 344\n",
|
||||
" 0 0.999692 0.978238 0.988849 199020\n",
|
||||
" 1 0.061538 0.825581 0.114539 344\n",
|
||||
"\n",
|
||||
" accuracy 1.000000 199364\n",
|
||||
" macro avg 1.000000 1.000000 1.000000 199364\n",
|
||||
"weighted avg 1.000000 1.000000 1.000000 199364\n",
|
||||
" accuracy 0.977975 199364\n",
|
||||
" macro avg 0.530615 0.901910 0.551694 199364\n",
|
||||
"weighted avg 0.998073 0.977975 0.987340 199364\n",
|
||||
"\n",
|
||||
"=========== Random Forest - Test 85,443 samples =============\n",
|
||||
"=========== Naive Bayes - Test 85,443 samples =============\n",
|
||||
" precision recall f1-score support\n",
|
||||
"\n",
|
||||
" 0 0.999660 0.999965 0.999812 85295\n",
|
||||
" 1 0.975410 0.804054 0.881481 148\n",
|
||||
" 0 0.999712 0.977994 0.988734 85295\n",
|
||||
" 1 0.061969 0.837838 0.115403 148\n",
|
||||
"\n",
|
||||
" accuracy 0.999625 85443\n",
|
||||
" macro avg 0.987535 0.902009 0.940647 85443\n",
|
||||
"weighted avg 0.999618 0.999625 0.999607 85443\n",
|
||||
" accuracy 0.977751 85443\n",
|
||||
" macro avg 0.530841 0.907916 0.552068 85443\n",
|
||||
"weighted avg 0.998088 0.977751 0.987221 85443\n",
|
||||
"\n",
|
||||
"Confusion Matrix in Train\n",
|
||||
"[[199020 0]\n",
|
||||
" [ 0 344]]\n",
|
||||
"[[194689 4331]\n",
|
||||
" [ 60 284]]\n",
|
||||
"Confusion Matrix in Test\n",
|
||||
"[[85292 3]\n",
|
||||
" [ 29 119]]\n",
|
||||
"************************** Stree (SVM Tree) **********************\n",
|
||||
"Train Model Stree (SVM Tree) took: 36.6 seconds\n",
|
||||
"[[83418 1877]\n",
|
||||
" [ 24 124]]\n",
|
||||
"************************** Stree (SVM Tree) **********************\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/rmontanana/.virtualenvs/general/lib/python3.8/site-packages/sklearn/svm/_base.py:976: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
|
||||
" warnings.warn(\"Liblinear failed to converge, increase \"\n",
|
||||
"/Users/rmontanana/.virtualenvs/general/lib/python3.8/site-packages/sklearn/svm/_base.py:976: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
|
||||
" warnings.warn(\"Liblinear failed to converge, increase \"\n",
|
||||
"/Users/rmontanana/.virtualenvs/general/lib/python3.8/site-packages/sklearn/svm/_base.py:976: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
|
||||
" warnings.warn(\"Liblinear failed to converge, increase \"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Train Model Stree (SVM Tree) took: 28.47 seconds\n",
|
||||
"=========== Stree (SVM Tree) - Train 199,364 samples =============\n",
|
||||
" precision recall f1-score support\n",
|
||||
"\n",
|
||||
@@ -349,70 +371,84 @@
|
||||
"Confusion Matrix in Test\n",
|
||||
"[[85288 7]\n",
|
||||
" [ 31 117]]\n",
|
||||
"************************** AdaBoost model **********************\n",
|
||||
"Train Model AdaBoost model took: 46.14 seconds\n",
|
||||
"=========== AdaBoost model - Train 199,364 samples =============\n",
|
||||
"************************** Neural Network **********************\n",
|
||||
"Train Model Neural Network took: 9.76 seconds\n",
|
||||
"=========== Neural Network - Train 199,364 samples =============\n",
|
||||
" precision recall f1-score support\n",
|
||||
"\n",
|
||||
" 0 0.999392 0.999678 0.999535 199020\n",
|
||||
" 1 0.777003 0.648256 0.706815 344\n",
|
||||
" 0 0.999247 0.999844 0.999545 199020\n",
|
||||
" 1 0.862222 0.563953 0.681898 344\n",
|
||||
"\n",
|
||||
" accuracy 0.999072 199364\n",
|
||||
" macro avg 0.888198 0.823967 0.853175 199364\n",
|
||||
"weighted avg 0.999008 0.999072 0.999030 199364\n",
|
||||
" accuracy 0.999092 199364\n",
|
||||
" macro avg 0.930734 0.781899 0.840722 199364\n",
|
||||
"weighted avg 0.999010 0.999092 0.998997 199364\n",
|
||||
"\n",
|
||||
"=========== AdaBoost model - Test 85,443 samples =============\n",
|
||||
"=========== Neural Network - Test 85,443 samples =============\n",
|
||||
" precision recall f1-score support\n",
|
||||
"\n",
|
||||
" 0 0.999484 0.999707 0.999596 85295\n",
|
||||
" 1 0.806202 0.702703 0.750903 148\n",
|
||||
" 0 0.999356 0.999871 0.999613 85295\n",
|
||||
" 1 0.894231 0.628378 0.738095 148\n",
|
||||
"\n",
|
||||
" accuracy 0.999192 85443\n",
|
||||
" macro avg 0.902843 0.851205 0.875249 85443\n",
|
||||
"weighted avg 0.999149 0.999192 0.999165 85443\n",
|
||||
" accuracy 0.999228 85443\n",
|
||||
" macro avg 0.946793 0.814125 0.868854 85443\n",
|
||||
"weighted avg 0.999173 0.999228 0.999160 85443\n",
|
||||
"\n",
|
||||
"Confusion Matrix in Train\n",
|
||||
"[[198956 64]\n",
|
||||
" [ 121 223]]\n",
|
||||
"[[198989 31]\n",
|
||||
" [ 150 194]]\n",
|
||||
"Confusion Matrix in Test\n",
|
||||
"[[85270 25]\n",
|
||||
" [ 44 104]]\n",
|
||||
"************************** Bagging model **********************\n",
|
||||
"Train Model Bagging model took: 77.73 seconds\n",
|
||||
"=========== Bagging model - Train 199,364 samples =============\n",
|
||||
"[[85284 11]\n",
|
||||
" [ 55 93]]\n",
|
||||
"************************** SVC (linear) **********************\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/rmontanana/.virtualenvs/general/lib/python3.8/site-packages/sklearn/svm/_base.py:976: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
|
||||
" warnings.warn(\"Liblinear failed to converge, increase \"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Train Model SVC (linear) took: 8.207 seconds\n",
|
||||
"=========== SVC (linear) - Train 199,364 samples =============\n",
|
||||
" precision recall f1-score support\n",
|
||||
"\n",
|
||||
" 0 0.999864 1.000000 0.999932 199020\n",
|
||||
" 1 1.000000 0.921512 0.959153 344\n",
|
||||
" 0 0.999237 0.999859 0.999548 199020\n",
|
||||
" 1 0.872727 0.558140 0.680851 344\n",
|
||||
"\n",
|
||||
" accuracy 0.999865 199364\n",
|
||||
" macro avg 0.999932 0.960756 0.979542 199364\n",
|
||||
"weighted avg 0.999865 0.999865 0.999862 199364\n",
|
||||
" accuracy 0.999097 199364\n",
|
||||
" macro avg 0.935982 0.778999 0.840199 199364\n",
|
||||
"weighted avg 0.999018 0.999097 0.998998 199364\n",
|
||||
"\n",
|
||||
"=========== Bagging model - Test 85,443 samples =============\n",
|
||||
"=========== SVC (linear) - Test 85,443 samples =============\n",
|
||||
" precision recall f1-score support\n",
|
||||
"\n",
|
||||
" 0 0.999637 0.999953 0.999795 85295\n",
|
||||
" 1 0.966942 0.790541 0.869888 148\n",
|
||||
" 0 0.999344 0.999894 0.999619 85295\n",
|
||||
" 1 0.910891 0.621622 0.738956 148\n",
|
||||
"\n",
|
||||
" accuracy 0.999590 85443\n",
|
||||
" macro avg 0.983289 0.895247 0.934842 85443\n",
|
||||
"weighted avg 0.999580 0.999590 0.999570 85443\n",
|
||||
" accuracy 0.999239 85443\n",
|
||||
" macro avg 0.955117 0.810758 0.869287 85443\n",
|
||||
"weighted avg 0.999191 0.999239 0.999168 85443\n",
|
||||
"\n",
|
||||
"Confusion Matrix in Train\n",
|
||||
"[[199020 0]\n",
|
||||
" [ 27 317]]\n",
|
||||
"[[198992 28]\n",
|
||||
" [ 152 192]]\n",
|
||||
"Confusion Matrix in Test\n",
|
||||
"[[85291 4]\n",
|
||||
" [ 31 117]]\n"
|
||||
"[[85286 9]\n",
|
||||
" [ 56 92]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Train & Test models\n",
|
||||
"models = {\n",
|
||||
" 'Linear Tree':linear_tree, 'Random Forest': random_forest, 'Stree (SVM Tree)': stree, \n",
|
||||
" 'AdaBoost model': adaboost, 'Bagging model': bagging\n",
|
||||
" 'Linear Tree':linear_tree, 'Naive Bayes': naive_bayes, 'Stree (SVM Tree)': stree, \n",
|
||||
" 'Neural Network': mlp, 'SVC (linear)': svc\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"best_f1 = 0\n",
|
||||
@@ -434,10 +470,17 @@
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"**************************************************************************************************************\n*The best f1 model is Random Forest, with a f1 score: 0.8815 in 181.07 seconds with 0.7 samples in train dataset\n**************************************************************************************************************\nModel: Linear Tree\t Time: 15.14 seconds\t f1: 0.7645\nModel: Random Forest\t Time: 181.07 seconds\t f1: 0.8815\nModel: Stree (SVM Tree)\t Time: 36.60 seconds\t f1: 0.8603\nModel: AdaBoost model\t Time: 46.14 seconds\t f1: 0.7509\nModel: Bagging model\t Time: 77.73 seconds\t f1: 0.8699\n"
|
||||
"**************************************************************************************************************\n",
|
||||
"*The best f1 model is Stree (SVM Tree), with a f1 score: 0.8603 in 28.4743 seconds with 0.7 samples in train dataset\n",
|
||||
"**************************************************************************************************************\n",
|
||||
"Model: Linear Tree\t Time: 10.25 seconds\t f1: 0.7645\n",
|
||||
"Model: Naive Bayes\t Time: 0.10 seconds\t f1: 0.1154\n",
|
||||
"Model: Stree (SVM Tree)\t Time: 28.47 seconds\t f1: 0.8603\n",
|
||||
"Model: Neural Network\t Time: 9.76 seconds\t f1: 0.7381\n",
|
||||
"Model: SVC (linear)\t Time: 8.21 seconds\t f1: 0.739\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -454,31 +497,13 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**************************************************************************************************************\n",
|
||||
"*The best f1 model is Random Forest, with a f1 score: 0.8815 in 152.54 seconds with 0.7 samples in train dataset\n",
|
||||
"*The best f1 model is Stree (SVM Tree), with a f1 score: 0.8603 in 28.4743 seconds with 0.7 samples in train dataset\n",
|
||||
"**************************************************************************************************************\n",
|
||||
"Model: Linear Tree\t Time: 13.52 seconds\t f1: 0.7645\n",
|
||||
"Model: Random Forest\t Time: 152.54 seconds\t f1: 0.8815\n",
|
||||
"Model: Stree (SVM Tree)\t Time: 32.55 seconds\t f1: 0.8603\n",
|
||||
"Model: AdaBoost model\t Time: 47.34 seconds\t f1: 0.7509\n",
|
||||
"Model: Gradient Boost.\t Time: 244.12 seconds\t f1: 0.5259"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"```\n",
|
||||
"******************************************************************************************************************\n",
|
||||
"*The best f1 model is Random Forest, with a f1 score: 0.8815 in 218.966 seconds with 0.7 samples in train dataset\n",
|
||||
"******************************************************************************************************************\n",
|
||||
"Model: Linear Tree Time: 23.05 seconds\t f1: 0.7645\n",
|
||||
"Model: Random Forest\t Time: 218.97 seconds\t f1: 0.8815\n",
|
||||
"Model: Stree (SVM Tree)\t Time: 49.45 seconds\t f1: 0.8603\n",
|
||||
"Model: AdaBoost model\t Time: 73.83 seconds\t f1: 0.7509\n",
|
||||
"Model: Neural Network\t Time: 25.47 seconds\t f1: 0.8328\n",
|
||||
"Model: Bagging model\t Time: 77.93 seconds\t f1: 0.8699\n",
|
||||
"\n",
|
||||
"```"
|
||||
"Model: Linear Tree\t Time: 10.25 seconds\t f1: 0.7645\n",
|
||||
"Model: Naive Bayes\t Time: 0.10 seconds\t f1: 0.1154\n",
|
||||
"Model: Stree (SVM Tree)\t Time: 28.47 seconds\t f1: 0.8603\n",
|
||||
"Model: Neural Network\t Time: 9.76 seconds\t f1: 0.7381\n",
|
||||
"Model: SVC (linear)\t Time: 8.21 seconds\t f1: 0.739"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -487,7 +512,6 @@
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "execute_result",
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'C': 0.01,\n",
|
||||
@@ -505,8 +529,9 @@
|
||||
" 'tol': 0.0001}"
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"execution_count": 18
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
@@ -517,9 +542,9 @@
|
||||
"metadata": {
|
||||
"hide_input": false,
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.8.4 64-bit ('general': venv)",
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python38464bitgeneralvenv77203c0a6afd4428bd66253ef62753dc"
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
@@ -531,7 +556,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.4-final"
|
||||
"version": "3.8.2"
|
||||
},
|
||||
"toc": {
|
||||
"base_numbering": 1,
|
||||
@@ -585,4 +610,4 @@
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
}
|
||||
|
@@ -34,9 +34,12 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import time\n",
|
||||
"import warnings\n",
|
||||
"from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"from stree import Stree"
|
||||
"from sklearn.exceptions import ConvergenceWarning\n",
|
||||
"from stree import Stree\n",
|
||||
"warnings.filterwarnings(\"ignore\", category=ConvergenceWarning)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -59,14 +62,14 @@
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Fraud: 0.173% 492\n",
|
||||
"Valid: 99.827% 284315\n",
|
||||
"X.shape (100492, 28) y.shape (100492,)\n",
|
||||
"Fraud: 0.652% 655\n",
|
||||
"Valid: 99.348% 99837\n"
|
||||
"Fraud: 0.651% 654\n",
|
||||
"Valid: 99.349% 99838\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -133,10 +136,12 @@
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Score Train: 0.9985073353804162\nScore Test: 0.9983746848878864\nTook 35.80 seconds\n"
|
||||
"Score Train: 0.9984504719663368\n",
|
||||
"Score Test: 0.9983415151917209\n",
|
||||
"Took 26.09 seconds\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -175,12 +180,12 @@
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Kernel: linear\tTime: 49.66 seconds\tScore Train: 0.9983225\tScore Test: 0.9983083\n",
|
||||
"Kernel: rbf\tTime: 12.73 seconds\tScore Train: 0.9934891\tScore Test: 0.9934656\n",
|
||||
"Kernel: poly\tTime: 76.24 seconds\tScore Train: 0.9972706\tScore Test: 0.9969152\n"
|
||||
"Kernel: linear\tTime: 43.49 seconds\tScore Train: 0.9980098\tScore Test: 0.9980762\n",
|
||||
"Kernel: rbf\tTime: 8.86 seconds\tScore Train: 0.9934891\tScore Test: 0.9934987\n",
|
||||
"Kernel: poly\tTime: 41.14 seconds\tScore Train: 0.9972279\tScore Test: 0.9973133\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -220,12 +225,12 @@
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Kernel: linear\tTime: 231.51 seconds\tScore Train: 0.9984931\tScore Test: 0.9983083\n",
|
||||
"Kernel: rbf\tTime: 114.77 seconds\tScore Train: 0.9992323\tScore Test: 0.9983083\n",
|
||||
"Kernel: poly\tTime: 67.87 seconds\tScore Train: 0.9993319\tScore Test: 0.9985074\n"
|
||||
"Kernel: linear\tTime: 187.51 seconds\tScore Train: 0.9984505\tScore Test: 0.9983083\n",
|
||||
"Kernel: rbf\tTime: 73.65 seconds\tScore Train: 0.9993461\tScore Test: 0.9985074\n",
|
||||
"Kernel: poly\tTime: 52.19 seconds\tScore Train: 0.9993461\tScore Test: 0.9987727\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -241,6 +246,11 @@
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
@@ -251,14 +261,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.4-final"
|
||||
},
|
||||
"orig_nbformat": 2,
|
||||
"kernelspec": {
|
||||
"name": "python38464bitgeneralf6de308d3831407c8bd68d4a5e328a38",
|
||||
"display_name": "Python 3.8.4 64-bit ('general')"
|
||||
"version": "3.8.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
@@ -33,6 +33,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import time\n",
|
||||
"import warnings\n",
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"from sklearn.svm import SVC\n",
|
||||
@@ -40,8 +42,9 @@
|
||||
"from sklearn.utils.estimator_checks import check_estimator\n",
|
||||
"from sklearn.datasets import make_classification, load_iris, load_wine\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"from sklearn.exceptions import ConvergenceWarning\n",
|
||||
"from stree import Stree\n",
|
||||
"import time"
|
||||
"warnings.filterwarnings(\"ignore\", category=ConvergenceWarning)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -66,10 +69,15 @@
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Fraud: 0.173% 492\nValid: 99.827% 284315\nX.shape (5492, 28) y.shape (5492,)\nFraud: 9.141% 502\nValid: 90.859% 4990\n[0.09183143 0.09183143 0.09183143 0.09183143] [0.09041262 0.09041262 0.09041262 0.09041262]\n"
|
||||
"Fraud: 0.173% 492\n",
|
||||
"Valid: 99.827% 284315\n",
|
||||
"X.shape (5492, 28) y.shape (5492,)\n",
|
||||
"Fraud: 9.086% 499\n",
|
||||
"Valid: 90.914% 4993\n",
|
||||
"[0.09079084 0.09079084 0.09079084 0.09079084] [0.09101942 0.09101942 0.09101942 0.09101942]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -148,13 +156,13 @@
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Accuracy of Train without weights 0.9851716961498439\n",
|
||||
"Accuracy of Train with weights 0.986732570239334\n",
|
||||
"Accuracy of Tests without weights 0.9866504854368932\n",
|
||||
"Accuracy of Tests with weights 0.9781553398058253\n"
|
||||
"Accuracy of Train without weights 0.9849115504682622\n",
|
||||
"Accuracy of Train with weights 0.9849115504682622\n",
|
||||
"Accuracy of Tests without weights 0.9848300970873787\n",
|
||||
"Accuracy of Tests with weights 0.9805825242718447\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -182,12 +190,12 @@
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Time: 26.03s\tKernel: linear\tAccuracy_train: 0.9851716961498439\tAccuracy_test: 0.9866504854368932\n",
|
||||
"Time: 0.54s\tKernel: rbf\tAccuracy_train: 0.9947970863683663\tAccuracy_test: 0.9878640776699029\n",
|
||||
"Time: 0.43s\tKernel: poly\tAccuracy_train: 0.9960978147762747\tAccuracy_test: 0.9854368932038835\n"
|
||||
"Time: 26.59s\tKernel: linear\tAccuracy_train: 0.9846514047866806\tAccuracy_test: 0.9848300970873787\n",
|
||||
"Time: 0.56s\tKernel: rbf\tAccuracy_train: 0.9947970863683663\tAccuracy_test: 0.9866504854368932\n",
|
||||
"Time: 0.23s\tKernel: poly\tAccuracy_train: 0.9955775234131113\tAccuracy_test: 0.9824029126213593\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -217,60 +225,68 @@
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"************** C=0.001 ****************************\n",
|
||||
"Classifier's accuracy (train): 0.9828\n",
|
||||
"Classifier's accuracy (test) : 0.9848\n",
|
||||
"root feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.4426 counts=(array([0, 1]), array([3491, 353]))\n",
|
||||
"root - Down, <cgaf> - Leaf class=0 belief= 0.981716 impurity=0.1317 counts=(array([0, 1]), array([3490, 65]))\n",
|
||||
"root - Up, <cgaf> - Leaf class=1 belief= 0.996540 impurity=0.0333 counts=(array([0, 1]), array([ 1, 288]))\n",
|
||||
"Classifier's accuracy (train): 0.9823\n",
|
||||
"Classifier's accuracy (test) : 0.9836\n",
|
||||
"root feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.4391 counts=(array([0, 1]), array([3495, 349]))\n",
|
||||
"root - Down, <cgaf> - Leaf class=0 belief= 0.981455 impurity=0.1332 counts=(array([0, 1]), array([3493, 66]))\n",
|
||||
"root - Up, <cgaf> - Leaf class=1 belief= 0.992982 impurity=0.0603 counts=(array([0, 1]), array([ 2, 283]))\n",
|
||||
"\n",
|
||||
"**************************************************\n",
|
||||
"************** C=0.01 ****************************\n",
|
||||
"Classifier's accuracy (train): 0.9834\n",
|
||||
"Classifier's accuracy (test) : 0.9854\n",
|
||||
"root feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.4426 counts=(array([0, 1]), array([3491, 353]))\n",
|
||||
"root - Down, <cgaf> - Leaf class=0 belief= 0.982269 impurity=0.1285 counts=(array([0, 1]), array([3490, 63]))\n",
|
||||
"root - Up, <cgaf> - Leaf class=1 belief= 0.996564 impurity=0.0331 counts=(array([0, 1]), array([ 1, 290]))\n",
|
||||
"Classifier's accuracy (test) : 0.9842\n",
|
||||
"root feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.4391 counts=(array([0, 1]), array([3495, 349]))\n",
|
||||
"root - Down, <cgaf> - Leaf class=0 belief= 0.982288 impurity=0.1284 counts=(array([0, 1]), array([3494, 63]))\n",
|
||||
"root - Up, <cgaf> - Leaf class=1 belief= 0.996516 impurity=0.0335 counts=(array([0, 1]), array([ 1, 286]))\n",
|
||||
"\n",
|
||||
"**************************************************\n",
|
||||
"************** C=1 ****************************\n",
|
||||
"Classifier's accuracy (train): 0.9847\n",
|
||||
"Classifier's accuracy (test) : 0.9867\n",
|
||||
"root feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.4426 counts=(array([0, 1]), array([3491, 353]))\n",
|
||||
"root - Down, <cgaf> - Leaf class=0 belief= 0.983371 impurity=0.1221 counts=(array([0, 1]), array([3489, 59]))\n",
|
||||
"root - Up feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.0584 counts=(array([0, 1]), array([ 2, 294]))\n",
|
||||
"Classifier's accuracy (train): 0.9844\n",
|
||||
"Classifier's accuracy (test) : 0.9848\n",
|
||||
"root feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.4391 counts=(array([0, 1]), array([3495, 349]))\n",
|
||||
"root - Down feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.1236 counts=(array([0, 1]), array([3493, 60]))\n",
|
||||
"root - Down - Down, <cgaf> - Leaf class=0 belief= 0.983108 impurity=0.1236 counts=(array([0, 1]), array([3492, 60]))\n",
|
||||
"root - Down - Up, <pure> - Leaf class=0 belief= 1.000000 impurity=0.0000 counts=(array([0]), array([1]))\n",
|
||||
"root - Up feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.0593 counts=(array([0, 1]), array([ 2, 289]))\n",
|
||||
"root - Up - Down, <pure> - Leaf class=0 belief= 1.000000 impurity=0.0000 counts=(array([0]), array([2]))\n",
|
||||
"root - Up - Up, <pure> - Leaf class=1 belief= 1.000000 impurity=0.0000 counts=(array([1]), array([294]))\n",
|
||||
"root - Up - Up, <pure> - Leaf class=1 belief= 1.000000 impurity=0.0000 counts=(array([1]), array([289]))\n",
|
||||
"\n",
|
||||
"**************************************************\n",
|
||||
"************** C=5 ****************************\n",
|
||||
"Classifier's accuracy (train): 0.9852\n",
|
||||
"Classifier's accuracy (test) : 0.9867\n",
|
||||
"root feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.4426 counts=(array([0, 1]), array([3491, 353]))\n",
|
||||
"root - Down feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.1205 counts=(array([0, 1]), array([3488, 58]))\n",
|
||||
"root - Down - Down, <cgaf> - Leaf class=0 belief= 0.983921 impurity=0.1188 counts=(array([0, 1]), array([3488, 57]))\n",
|
||||
"root - Down - Up, <pure> - Leaf class=1 belief= 1.000000 impurity=0.0000 counts=(array([1]), array([1]))\n",
|
||||
"root - Up feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.0812 counts=(array([0, 1]), array([ 3, 295]))\n",
|
||||
"root - Up - Down, <pure> - Leaf class=0 belief= 1.000000 impurity=0.0000 counts=(array([0]), array([3]))\n",
|
||||
"root - Up - Up, <pure> - Leaf class=1 belief= 1.000000 impurity=0.0000 counts=(array([1]), array([295]))\n",
|
||||
"Classifier's accuracy (train): 0.9847\n",
|
||||
"Classifier's accuracy (test) : 0.9848\n",
|
||||
"root feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.4391 counts=(array([0, 1]), array([3495, 349]))\n",
|
||||
"root - Down feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.1236 counts=(array([0, 1]), array([3493, 60]))\n",
|
||||
"root - Down - Down feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.1236 counts=(array([0, 1]), array([3492, 60]))\n",
|
||||
"root - Down - Down - Down, <cgaf> - Leaf class=0 belief= 0.983385 impurity=0.1220 counts=(array([0, 1]), array([3492, 59]))\n",
|
||||
"root - Down - Down - Up, <pure> - Leaf class=1 belief= 1.000000 impurity=0.0000 counts=(array([1]), array([1]))\n",
|
||||
"root - Down - Up, <pure> - Leaf class=0 belief= 1.000000 impurity=0.0000 counts=(array([0]), array([1]))\n",
|
||||
"root - Up feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.0593 counts=(array([0, 1]), array([ 2, 289]))\n",
|
||||
"root - Up - Down, <pure> - Leaf class=0 belief= 1.000000 impurity=0.0000 counts=(array([0]), array([2]))\n",
|
||||
"root - Up - Up, <pure> - Leaf class=1 belief= 1.000000 impurity=0.0000 counts=(array([1]), array([289]))\n",
|
||||
"\n",
|
||||
"**************************************************\n",
|
||||
"************** C=17 ****************************\n",
|
||||
"Classifier's accuracy (train): 0.9852\n",
|
||||
"Classifier's accuracy (test) : 0.9867\n",
|
||||
"root feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.4426 counts=(array([0, 1]), array([3491, 353]))\n",
|
||||
"root - Down feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.1205 counts=(array([0, 1]), array([3488, 58]))\n",
|
||||
"root - Down - Down, <cgaf> - Leaf class=0 belief= 0.983921 impurity=0.1188 counts=(array([0, 1]), array([3488, 57]))\n",
|
||||
"root - Down - Up, <pure> - Leaf class=1 belief= 1.000000 impurity=0.0000 counts=(array([1]), array([1]))\n",
|
||||
"root - Up feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.0812 counts=(array([0, 1]), array([ 3, 295]))\n",
|
||||
"root - Up - Down, <pure> - Leaf class=0 belief= 1.000000 impurity=0.0000 counts=(array([0]), array([3]))\n",
|
||||
"root - Up - Up, <pure> - Leaf class=1 belief= 1.000000 impurity=0.0000 counts=(array([1]), array([295]))\n",
|
||||
"Classifier's accuracy (train): 0.9847\n",
|
||||
"Classifier's accuracy (test) : 0.9848\n",
|
||||
"root feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.4391 counts=(array([0, 1]), array([3495, 349]))\n",
|
||||
"root - Down feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.1236 counts=(array([0, 1]), array([3493, 60]))\n",
|
||||
"root - Down - Down feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.1220 counts=(array([0, 1]), array([3492, 59]))\n",
|
||||
"root - Down - Down - Down, <cgaf> - Leaf class=0 belief= 0.983380 impurity=0.1220 counts=(array([0, 1]), array([3491, 59]))\n",
|
||||
"root - Down - Down - Up, <pure> - Leaf class=0 belief= 1.000000 impurity=0.0000 counts=(array([0]), array([1]))\n",
|
||||
"root - Down - Up feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=1.0000 counts=(array([0, 1]), array([1, 1]))\n",
|
||||
"root - Down - Up - Down, <pure> - Leaf class=0 belief= 1.000000 impurity=0.0000 counts=(array([0]), array([1]))\n",
|
||||
"root - Down - Up - Up, <pure> - Leaf class=1 belief= 1.000000 impurity=0.0000 counts=(array([1]), array([1]))\n",
|
||||
"root - Up feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.0593 counts=(array([0, 1]), array([ 2, 289]))\n",
|
||||
"root - Up - Down, <pure> - Leaf class=0 belief= 1.000000 impurity=0.0000 counts=(array([0]), array([2]))\n",
|
||||
"root - Up - Up, <pure> - Leaf class=1 belief= 1.000000 impurity=0.0000 counts=(array([1]), array([289]))\n",
|
||||
"\n",
|
||||
"**************************************************\n",
|
||||
"64.5792 secs\n"
|
||||
"59.0161 secs\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -292,7 +308,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Test iterator\n",
|
||||
"Check different weays of using the iterator"
|
||||
"Check different ways of using the iterator"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -303,10 +319,20 @@
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"root feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.4426 counts=(array([0, 1]), array([3491, 353]))\nroot - Down feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.1205 counts=(array([0, 1]), array([3488, 58]))\nroot - Down - Down, <cgaf> - Leaf class=0 belief= 0.983921 impurity=0.1188 counts=(array([0, 1]), array([3488, 57]))\nroot - Down - Up, <pure> - Leaf class=1 belief= 1.000000 impurity=0.0000 counts=(array([1]), array([1]))\nroot - Up feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.0812 counts=(array([0, 1]), array([ 3, 295]))\nroot - Up - Down, <pure> - Leaf class=0 belief= 1.000000 impurity=0.0000 counts=(array([0]), array([3]))\nroot - Up - Up, <pure> - Leaf class=1 belief= 1.000000 impurity=0.0000 counts=(array([1]), array([295]))\n"
|
||||
"root feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.4391 counts=(array([0, 1]), array([3495, 349]))\n",
|
||||
"root - Down feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.1236 counts=(array([0, 1]), array([3493, 60]))\n",
|
||||
"root - Down - Down feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.1220 counts=(array([0, 1]), array([3492, 59]))\n",
|
||||
"root - Down - Down - Down, <cgaf> - Leaf class=0 belief= 0.983380 impurity=0.1220 counts=(array([0, 1]), array([3491, 59]))\n",
|
||||
"root - Down - Down - Up, <pure> - Leaf class=0 belief= 1.000000 impurity=0.0000 counts=(array([0]), array([1]))\n",
|
||||
"root - Down - Up feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=1.0000 counts=(array([0, 1]), array([1, 1]))\n",
|
||||
"root - Down - Up - Down, <pure> - Leaf class=0 belief= 1.000000 impurity=0.0000 counts=(array([0]), array([1]))\n",
|
||||
"root - Down - Up - Up, <pure> - Leaf class=1 belief= 1.000000 impurity=0.0000 counts=(array([1]), array([1]))\n",
|
||||
"root - Up feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.0593 counts=(array([0, 1]), array([ 2, 289]))\n",
|
||||
"root - Up - Down, <pure> - Leaf class=0 belief= 1.000000 impurity=0.0000 counts=(array([0]), array([2]))\n",
|
||||
"root - Up - Up, <pure> - Leaf class=1 belief= 1.000000 impurity=0.0000 counts=(array([1]), array([289]))\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -324,10 +350,20 @@
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"root feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.4426 counts=(array([0, 1]), array([3491, 353]))\nroot - Down feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.1205 counts=(array([0, 1]), array([3488, 58]))\nroot - Down - Down, <cgaf> - Leaf class=0 belief= 0.983921 impurity=0.1188 counts=(array([0, 1]), array([3488, 57]))\nroot - Down - Up, <pure> - Leaf class=1 belief= 1.000000 impurity=0.0000 counts=(array([1]), array([1]))\nroot - Up feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.0812 counts=(array([0, 1]), array([ 3, 295]))\nroot - Up - Down, <pure> - Leaf class=0 belief= 1.000000 impurity=0.0000 counts=(array([0]), array([3]))\nroot - Up - Up, <pure> - Leaf class=1 belief= 1.000000 impurity=0.0000 counts=(array([1]), array([295]))\n"
|
||||
"root feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.4391 counts=(array([0, 1]), array([3495, 349]))\n",
|
||||
"root - Down feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.1236 counts=(array([0, 1]), array([3493, 60]))\n",
|
||||
"root - Down - Down feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.1220 counts=(array([0, 1]), array([3492, 59]))\n",
|
||||
"root - Down - Down - Down, <cgaf> - Leaf class=0 belief= 0.983380 impurity=0.1220 counts=(array([0, 1]), array([3491, 59]))\n",
|
||||
"root - Down - Down - Up, <pure> - Leaf class=0 belief= 1.000000 impurity=0.0000 counts=(array([0]), array([1]))\n",
|
||||
"root - Down - Up feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=1.0000 counts=(array([0, 1]), array([1, 1]))\n",
|
||||
"root - Down - Up - Down, <pure> - Leaf class=0 belief= 1.000000 impurity=0.0000 counts=(array([0]), array([1]))\n",
|
||||
"root - Down - Up - Up, <pure> - Leaf class=1 belief= 1.000000 impurity=0.0000 counts=(array([1]), array([1]))\n",
|
||||
"root - Up feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.0593 counts=(array([0, 1]), array([ 2, 289]))\n",
|
||||
"root - Up - Down, <pure> - Leaf class=0 belief= 1.000000 impurity=0.0000 counts=(array([0]), array([2]))\n",
|
||||
"root - Up - Up, <pure> - Leaf class=1 belief= 1.000000 impurity=0.0000 counts=(array([1]), array([289]))\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -346,58 +382,61 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 14,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1 functools.partial(<function check_no_attributes_set_in_init at 0x125acaee0>, 'Stree')\n",
|
||||
"2 functools.partial(<function check_estimators_dtypes at 0x125ac7040>, 'Stree')\n",
|
||||
"3 functools.partial(<function check_fit_score_takes_y at 0x125ac2ee0>, 'Stree')\n",
|
||||
"4 functools.partial(<function check_sample_weights_pandas_series at 0x125ac0820>, 'Stree')\n",
|
||||
"5 functools.partial(<function check_sample_weights_not_an_array at 0x125ac0940>, 'Stree')\n",
|
||||
"6 functools.partial(<function check_sample_weights_list at 0x125ac0a60>, 'Stree')\n",
|
||||
"7 functools.partial(<function check_sample_weights_shape at 0x125ac0b80>, 'Stree')\n",
|
||||
"8 functools.partial(<function check_sample_weights_invariance at 0x125ac0ca0>, 'Stree')\n",
|
||||
"9 functools.partial(<function check_estimators_fit_returns_self at 0x125aca040>, 'Stree')\n",
|
||||
"10 functools.partial(<function check_estimators_fit_returns_self at 0x125aca040>, 'Stree', readonly_memmap=True)\n",
|
||||
"11 functools.partial(<function check_complex_data at 0x125ac0e50>, 'Stree')\n",
|
||||
"12 functools.partial(<function check_dtype_object at 0x125ac0dc0>, 'Stree')\n",
|
||||
"13 functools.partial(<function check_estimators_empty_data_messages at 0x125ac7160>, 'Stree')\n",
|
||||
"14 functools.partial(<function check_pipeline_consistency at 0x125ac2dc0>, 'Stree')\n",
|
||||
"15 functools.partial(<function check_estimators_nan_inf at 0x125ac7280>, 'Stree')\n",
|
||||
"16 functools.partial(<function check_estimators_overwrite_params at 0x125acadc0>, 'Stree')\n",
|
||||
"17 functools.partial(<function check_estimator_sparse_data at 0x125ac0700>, 'Stree')\n",
|
||||
"18 functools.partial(<function check_estimators_pickle at 0x125ac74c0>, 'Stree')\n",
|
||||
"19 functools.partial(<function check_classifier_data_not_an_array at 0x125acd160>, 'Stree')\n",
|
||||
"20 functools.partial(<function check_classifiers_one_label at 0x125ac7b80>, 'Stree')\n",
|
||||
"21 functools.partial(<function check_classifiers_classes at 0x125aca5e0>, 'Stree')\n",
|
||||
"22 functools.partial(<function check_estimators_partial_fit_n_features at 0x125ac75e0>, 'Stree')\n",
|
||||
"23 functools.partial(<function check_classifiers_train at 0x125ac7ca0>, 'Stree')\n",
|
||||
"24 functools.partial(<function check_classifiers_train at 0x125ac7ca0>, 'Stree', readonly_memmap=True)\n",
|
||||
"25 functools.partial(<function check_classifiers_train at 0x125ac7ca0>, 'Stree', readonly_memmap=True, X_dtype='float32')\n",
|
||||
"26 functools.partial(<function check_classifiers_regression_target at 0x125acdc10>, 'Stree')\n",
|
||||
"27 functools.partial(<function check_supervised_y_no_nan at 0x125aab790>, 'Stree')\n",
|
||||
"28 functools.partial(<function check_supervised_y_2d at 0x125aca280>, 'Stree')\n",
|
||||
"29 functools.partial(<function check_estimators_unfitted at 0x125aca160>, 'Stree')\n",
|
||||
"30 functools.partial(<function check_non_transformer_estimators_n_iter at 0x125acd790>, 'Stree')\n",
|
||||
"31 functools.partial(<function check_decision_proba_consistency at 0x125acdd30>, 'Stree')\n",
|
||||
"32 functools.partial(<function check_fit2d_predict1d at 0x125ac23a0>, 'Stree')\n",
|
||||
"33 functools.partial(<function check_methods_subset_invariance at 0x125ac2550>, 'Stree')\n",
|
||||
"34 functools.partial(<function check_fit2d_1sample at 0x125ac2670>, 'Stree')\n",
|
||||
"35 functools.partial(<function check_fit2d_1feature at 0x125ac2790>, 'Stree')\n",
|
||||
"36 functools.partial(<function check_fit1d at 0x125ac28b0>, 'Stree')\n",
|
||||
"37 functools.partial(<function check_get_params_invariance at 0x125acd9d0>, 'Stree')\n",
|
||||
"38 functools.partial(<function check_set_params at 0x125acdaf0>, 'Stree')\n",
|
||||
"39 functools.partial(<function check_dict_unchanged at 0x125ac0f70>, 'Stree')\n",
|
||||
"40 functools.partial(<function check_dont_overwrite_parameters at 0x125ac2280>, 'Stree')\n",
|
||||
"41 functools.partial(<function check_fit_idempotent at 0x125acdee0>, 'Stree')\n",
|
||||
"42 functools.partial(<function check_n_features_in at 0x125acdf70>, 'Stree')\n",
|
||||
"43 functools.partial(<function check_requires_y_none at 0x125ad1040>, 'Stree')\n"
|
||||
"1 functools.partial(<function check_no_attributes_set_in_init at 0x16817f670>, 'Stree')\n",
|
||||
"2 functools.partial(<function check_estimators_dtypes at 0x168179820>, 'Stree')\n",
|
||||
"3 functools.partial(<function check_fit_score_takes_y at 0x168179700>, 'Stree')\n",
|
||||
"4 functools.partial(<function check_sample_weights_pandas_series at 0x168174040>, 'Stree')\n",
|
||||
"5 functools.partial(<function check_sample_weights_not_an_array at 0x168174160>, 'Stree')\n",
|
||||
"6 functools.partial(<function check_sample_weights_list at 0x168174280>, 'Stree')\n",
|
||||
"7 functools.partial(<function check_sample_weights_shape at 0x1681743a0>, 'Stree')\n",
|
||||
"8 functools.partial(<function check_sample_weights_invariance at 0x1681744c0>, 'Stree', kind='ones')\n",
|
||||
"10 functools.partial(<function check_estimators_fit_returns_self at 0x16817b8b0>, 'Stree')\n",
|
||||
"11 functools.partial(<function check_estimators_fit_returns_self at 0x16817b8b0>, 'Stree', readonly_memmap=True)\n",
|
||||
"12 functools.partial(<function check_complex_data at 0x168174670>, 'Stree')\n",
|
||||
"13 functools.partial(<function check_dtype_object at 0x1681745e0>, 'Stree')\n",
|
||||
"14 functools.partial(<function check_estimators_empty_data_messages at 0x1681799d0>, 'Stree')\n",
|
||||
"15 functools.partial(<function check_pipeline_consistency at 0x1681795e0>, 'Stree')\n",
|
||||
"16 functools.partial(<function check_estimators_nan_inf at 0x168179af0>, 'Stree')\n",
|
||||
"17 functools.partial(<function check_estimators_overwrite_params at 0x16817f550>, 'Stree')\n",
|
||||
"18 functools.partial(<function check_estimator_sparse_data at 0x168172ee0>, 'Stree')\n",
|
||||
"19 functools.partial(<function check_estimators_pickle at 0x168179d30>, 'Stree')\n",
|
||||
"20 functools.partial(<function check_estimator_get_tags_default_keys at 0x168181790>, 'Stree')\n",
|
||||
"21 functools.partial(<function check_classifier_data_not_an_array at 0x16817f8b0>, 'Stree')\n",
|
||||
"22 functools.partial(<function check_classifiers_one_label at 0x16817b430>, 'Stree')\n",
|
||||
"23 functools.partial(<function check_classifiers_classes at 0x16817bd30>, 'Stree')\n",
|
||||
"24 functools.partial(<function check_estimators_partial_fit_n_features at 0x168179e50>, 'Stree')\n",
|
||||
"25 functools.partial(<function check_classifiers_train at 0x16817b550>, 'Stree')\n",
|
||||
"26 functools.partial(<function check_classifiers_train at 0x16817b550>, 'Stree', readonly_memmap=True)\n",
|
||||
"27 functools.partial(<function check_classifiers_train at 0x16817b550>, 'Stree', readonly_memmap=True, X_dtype='float32')\n",
|
||||
"28 functools.partial(<function check_classifiers_regression_target at 0x168181280>, 'Stree')\n",
|
||||
"29 functools.partial(<function check_supervised_y_no_nan at 0x1681720d0>, 'Stree')\n",
|
||||
"30 functools.partial(<function check_supervised_y_2d at 0x16817baf0>, 'Stree')\n",
|
||||
"31 functools.partial(<function check_estimators_unfitted at 0x16817b9d0>, 'Stree')\n",
|
||||
"32 functools.partial(<function check_non_transformer_estimators_n_iter at 0x16817fdc0>, 'Stree')\n",
|
||||
"33 functools.partial(<function check_decision_proba_consistency at 0x1681813a0>, 'Stree')\n",
|
||||
"34 functools.partial(<function check_parameters_default_constructible at 0x16817fb80>, 'Stree')\n",
|
||||
"35 functools.partial(<function check_methods_sample_order_invariance at 0x168174d30>, 'Stree')\n",
|
||||
"36 functools.partial(<function check_methods_subset_invariance at 0x168174c10>, 'Stree')\n",
|
||||
"37 functools.partial(<function check_fit2d_1sample at 0x168174e50>, 'Stree')\n",
|
||||
"38 functools.partial(<function check_fit2d_1feature at 0x168174f70>, 'Stree')\n",
|
||||
"39 functools.partial(<function check_get_params_invariance at 0x168181040>, 'Stree')\n",
|
||||
"40 functools.partial(<function check_set_params at 0x168181160>, 'Stree')\n",
|
||||
"41 functools.partial(<function check_dict_unchanged at 0x168174790>, 'Stree')\n",
|
||||
"42 functools.partial(<function check_dont_overwrite_parameters at 0x168174940>, 'Stree')\n",
|
||||
"43 functools.partial(<function check_fit_idempotent at 0x168181550>, 'Stree')\n",
|
||||
"44 functools.partial(<function check_n_features_in at 0x1681815e0>, 'Stree')\n",
|
||||
"45 functools.partial(<function check_fit1d at 0x1681790d0>, 'Stree')\n",
|
||||
"46 functools.partial(<function check_fit2d_predict1d at 0x168174a60>, 'Stree')\n",
|
||||
"47 functools.partial(<function check_requires_y_none at 0x168181670>, 'Stree')\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -407,13 +446,16 @@
|
||||
"checks = check_estimator(Stree(), generate_only=True)\n",
|
||||
"for check in checks:\n",
|
||||
" c += 1\n",
|
||||
" print(c, check[1])\n",
|
||||
" check[1](check[0])"
|
||||
" if c == 9:\n",
|
||||
" pass\n",
|
||||
" else:\n",
|
||||
" print(c, check[1])\n",
|
||||
" check[1](check[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -430,30 +472,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"== Not Weighted ===\n",
|
||||
"SVC train score ..: 0.9825702393340271\n",
|
||||
"STree train score : 0.9841311134235172\n",
|
||||
"SVC test score ...: 0.9830097087378641\n",
|
||||
"STree test score .: 0.9848300970873787\n",
|
||||
"==== Weighted =====\n",
|
||||
"SVC train score ..: 0.9786680541103018\n",
|
||||
"STree train score : 0.9802289281997919\n",
|
||||
"SVC test score ...: 0.9805825242718447\n",
|
||||
"STree test score .: 0.9817961165048543\n",
|
||||
"*SVC test score ..: 0.9439939825655582\n",
|
||||
"*STree test score : 0.9476832429673473\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"svc = SVC(C=7, kernel='rbf', gamma=.001, random_state=random_state)\n",
|
||||
"clf = Stree(C=17, kernel='rbf', gamma=.001, random_state=random_state)\n",
|
||||
@@ -477,19 +500,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"root feaures=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27) impurity=0.4426 counts=(array([0, 1]), array([3491, 353]))\nroot - Down, <cgaf> - Leaf class=0 belief= 0.990520 impurity=0.0773 counts=(array([0, 1]), array([3448, 33]))\nroot - Up, <cgaf> - Leaf class=1 belief= 0.881543 impurity=0.5249 counts=(array([0, 1]), array([ 43, 320]))\n\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(clf)"
|
||||
]
|
||||
@@ -503,53 +518,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"****************************************\n",
|
||||
"max_features None = 28\n",
|
||||
"Train score : 0.9846514047866806\n",
|
||||
"Test score .: 0.9866504854368932\n",
|
||||
"Took 10.18 seconds\n",
|
||||
"****************************************\n",
|
||||
"max_features auto = 5\n",
|
||||
"Train score : 0.9836108220603538\n",
|
||||
"Test score .: 0.9842233009708737\n",
|
||||
"Took 5.22 seconds\n",
|
||||
"****************************************\n",
|
||||
"max_features log2 = 4\n",
|
||||
"Train score : 0.9791883454734651\n",
|
||||
"Test score .: 0.9793689320388349\n",
|
||||
"Took 2.05 seconds\n",
|
||||
"****************************************\n",
|
||||
"max_features 7 = 7\n",
|
||||
"Train score : 0.9737252861602498\n",
|
||||
"Test score .: 0.9739077669902912\n",
|
||||
"Took 2.86 seconds\n",
|
||||
"****************************************\n",
|
||||
"max_features 0.5 = 14\n",
|
||||
"Train score : 0.981789802289282\n",
|
||||
"Test score .: 0.9824029126213593\n",
|
||||
"Took 48.35 seconds\n",
|
||||
"****************************************\n",
|
||||
"max_features 0.1 = 2\n",
|
||||
"Train score : 0.9638397502601457\n",
|
||||
"Test score .: 0.9648058252427184\n",
|
||||
"Took 0.35 seconds\n",
|
||||
"****************************************\n",
|
||||
"max_features 0.7 = 19\n",
|
||||
"Train score : 0.9841311134235172\n",
|
||||
"Test score .: 0.9860436893203883\n",
|
||||
"Took 20.89 seconds\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for max_features in [None, \"auto\", \"log2\", 7, .5, .1, .7]:\n",
|
||||
" now = time.time()\n",
|
||||
@@ -565,9 +538,9 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.7.6 64-bit ('general': venv)",
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python37664bitgeneralvenvfbd0a23e74cf4e778460f5ffc6761f39"
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
@@ -579,9 +552,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.4-final"
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
@@ -1,446 +1,362 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Test Gridsearch\n",
|
||||
"with different kernels and different configurations"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Setup\n",
|
||||
"Uncomment the next cell if STree is not already installed"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#\n",
|
||||
"# Google Colab setup\n",
|
||||
"#\n",
|
||||
"#!pip install git+https://github.com/doctorado-ml/stree"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"id": "zIHKVxthDZEa",
|
||||
"colab_type": "code",
|
||||
"colab": {}
|
||||
},
|
||||
"source": [
|
||||
"from sklearn.ensemble import AdaBoostClassifier\n",
|
||||
"from sklearn.svm import LinearSVC\n",
|
||||
"from sklearn.model_selection import GridSearchCV, train_test_split\n",
|
||||
"from stree import Stree"
|
||||
],
|
||||
"execution_count": 2,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"id": "IEmq50QgDZEi",
|
||||
"colab_type": "code",
|
||||
"colab": {}
|
||||
},
|
||||
"source": [
|
||||
"import os\n",
|
||||
"if not os.path.isfile('data/creditcard.csv'):\n",
|
||||
" !wget --no-check-certificate --content-disposition http://nube.jccm.es/index.php/s/Zs7SYtZQJ3RQ2H2/download\n",
|
||||
" !tar xzf creditcard.tgz"
|
||||
],
|
||||
"execution_count": 3,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"id": "z9Q-YUfBDZEq",
|
||||
"colab_type": "code",
|
||||
"colab": {},
|
||||
"outputId": "afc822fb-f16a-4302-8a67-2b9e2880159b",
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"random_state=1\n",
|
||||
"\n",
|
||||
"def load_creditcard(n_examples=0):\n",
|
||||
" import pandas as pd\n",
|
||||
" import numpy as np\n",
|
||||
" import random\n",
|
||||
" df = pd.read_csv('data/creditcard.csv')\n",
|
||||
" print(\"Fraud: {0:.3f}% {1}\".format(df.Class[df.Class == 1].count()*100/df.shape[0], df.Class[df.Class == 1].count()))\n",
|
||||
" print(\"Valid: {0:.3f}% {1}\".format(df.Class[df.Class == 0].count()*100/df.shape[0], df.Class[df.Class == 0].count()))\n",
|
||||
" y = df.Class\n",
|
||||
" X = df.drop(['Class', 'Time', 'Amount'], axis=1).values\n",
|
||||
" if n_examples > 0:\n",
|
||||
" # Take first n_examples samples\n",
|
||||
" X = X[:n_examples, :]\n",
|
||||
" y = y[:n_examples, :]\n",
|
||||
" else:\n",
|
||||
" # Take all the positive samples with a number of random negatives\n",
|
||||
" if n_examples < 0:\n",
|
||||
" Xt = X[(y == 1).ravel()]\n",
|
||||
" yt = y[(y == 1).ravel()]\n",
|
||||
" indices = random.sample(range(X.shape[0]), -1 * n_examples)\n",
|
||||
" X = np.append(Xt, X[indices], axis=0)\n",
|
||||
" y = np.append(yt, y[indices], axis=0)\n",
|
||||
" print(\"X.shape\", X.shape, \" y.shape\", y.shape)\n",
|
||||
" print(\"Fraud: {0:.3f}% {1}\".format(len(y[y == 1])*100/X.shape[0], len(y[y == 1])))\n",
|
||||
" print(\"Valid: {0:.3f}% {1}\".format(len(y[y == 0]) * 100 / X.shape[0], len(y[y == 0])))\n",
|
||||
" Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, train_size=0.7, shuffle=True, random_state=random_state, stratify=y)\n",
|
||||
" return Xtrain, Xtest, ytrain, ytest\n",
|
||||
"\n",
|
||||
"data = load_creditcard(-1000) # Take all true samples + 1000 of the others\n",
|
||||
"# data = load_creditcard(5000) # Take the first 5000 samples\n",
|
||||
"# data = load_creditcard(0) # Take all the samples\n",
|
||||
"\n",
|
||||
"Xtrain = data[0]\n",
|
||||
"Xtest = data[1]\n",
|
||||
"ytrain = data[2]\n",
|
||||
"ytest = data[3]"
|
||||
],
|
||||
"execution_count": 4,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"Fraud: 0.173% 492\nValid: 99.827% 284315\nX.shape (1492, 28) y.shape (1492,)\nFraud: 33.177% 495\nValid: 66.823% 997\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Tests"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"id": "HmX3kR4PDZEw",
|
||||
"colab_type": "code",
|
||||
"colab": {}
|
||||
},
|
||||
"source": [
|
||||
"parameters = [{\n",
|
||||
" 'base_estimator': [Stree()],\n",
|
||||
" 'n_estimators': [10, 25],\n",
|
||||
" 'learning_rate': [.5, 1],\n",
|
||||
" 'base_estimator__split_criteria': ['max_samples', 'impurity'],\n",
|
||||
" 'base_estimator__tol': [.1, 1e-02],\n",
|
||||
" 'base_estimator__max_depth': [3, 5, 7],\n",
|
||||
" 'base_estimator__C': [1, 7, 55],\n",
|
||||
" 'base_estimator__kernel': ['linear']\n",
|
||||
"},\n",
|
||||
"{\n",
|
||||
" 'base_estimator': [Stree()],\n",
|
||||
" 'n_estimators': [10, 25],\n",
|
||||
" 'learning_rate': [.5, 1],\n",
|
||||
" 'base_estimator__split_criteria': ['max_samples', 'impurity'],\n",
|
||||
" 'base_estimator__tol': [.1, 1e-02],\n",
|
||||
" 'base_estimator__max_depth': [3, 5, 7],\n",
|
||||
" 'base_estimator__C': [1, 7, 55],\n",
|
||||
" 'base_estimator__degree': [3, 5, 7],\n",
|
||||
" 'base_estimator__kernel': ['poly']\n",
|
||||
"},\n",
|
||||
"{\n",
|
||||
" 'base_estimator': [Stree()],\n",
|
||||
" 'n_estimators': [10, 25],\n",
|
||||
" 'learning_rate': [.5, 1],\n",
|
||||
" 'base_estimator__split_criteria': ['max_samples', 'impurity'],\n",
|
||||
" 'base_estimator__tol': [.1, 1e-02],\n",
|
||||
" 'base_estimator__max_depth': [3, 5, 7],\n",
|
||||
" 'base_estimator__C': [1, 7, 55],\n",
|
||||
" 'base_estimator__gamma': [.1, 1, 10],\n",
|
||||
" 'base_estimator__kernel': ['rbf']\n",
|
||||
"}]"
|
||||
],
|
||||
"execution_count": 5,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "execute_result",
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'C': 1.0,\n",
|
||||
" 'criterion': 'entropy',\n",
|
||||
" 'degree': 3,\n",
|
||||
" 'gamma': 'scale',\n",
|
||||
" 'kernel': 'linear',\n",
|
||||
" 'max_depth': None,\n",
|
||||
" 'max_features': None,\n",
|
||||
" 'max_iter': 100000.0,\n",
|
||||
" 'min_samples_split': 0,\n",
|
||||
" 'random_state': None,\n",
|
||||
" 'split_criteria': 'impurity',\n",
|
||||
" 'splitter': 'random',\n",
|
||||
" 'tol': 0.0001}"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"execution_count": 6
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"Stree().get_params()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"id": "CrcB8o6EDZE5",
|
||||
"colab_type": "code",
|
||||
"colab": {},
|
||||
"outputId": "7703413a-d563-4289-a13b-532f38f82762",
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"random_state=2020\n",
|
||||
"clf = AdaBoostClassifier(random_state=random_state, algorithm=\"SAMME\")\n",
|
||||
"grid = GridSearchCV(clf, parameters, verbose=10, n_jobs=-1, return_train_score=True)\n",
|
||||
"grid.fit(Xtrain, ytrain)"
|
||||
],
|
||||
"execution_count": 7,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"Fitting 5 folds for each of 1008 candidates, totalling 5040 fits\n",
|
||||
"[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 2 tasks | elapsed: 2.6s\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 9 tasks | elapsed: 3.2s\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 16 tasks | elapsed: 3.5s\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 25 tasks | elapsed: 4.0s\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 34 tasks | elapsed: 4.5s\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 45 tasks | elapsed: 5.0s\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 56 tasks | elapsed: 5.5s\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 69 tasks | elapsed: 6.2s\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 82 tasks | elapsed: 7.1s\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 97 tasks | elapsed: 8.2s\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 112 tasks | elapsed: 9.6s\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 129 tasks | elapsed: 11.0s\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 146 tasks | elapsed: 12.5s\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 165 tasks | elapsed: 14.3s\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 184 tasks | elapsed: 16.0s\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 205 tasks | elapsed: 18.1s\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 226 tasks | elapsed: 20.1s\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 249 tasks | elapsed: 21.9s\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 272 tasks | elapsed: 23.4s\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 297 tasks | elapsed: 24.9s\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 322 tasks | elapsed: 26.6s\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 349 tasks | elapsed: 29.3s\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 376 tasks | elapsed: 31.9s\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 405 tasks | elapsed: 35.5s\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 434 tasks | elapsed: 38.7s\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 465 tasks | elapsed: 42.1s\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 496 tasks | elapsed: 46.1s\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 529 tasks | elapsed: 52.7s\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 562 tasks | elapsed: 58.1s\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 597 tasks | elapsed: 1.1min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 632 tasks | elapsed: 1.3min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 669 tasks | elapsed: 1.5min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 706 tasks | elapsed: 1.6min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 745 tasks | elapsed: 1.7min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 784 tasks | elapsed: 1.8min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 825 tasks | elapsed: 1.8min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 866 tasks | elapsed: 1.8min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 909 tasks | elapsed: 1.9min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 952 tasks | elapsed: 1.9min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 997 tasks | elapsed: 2.0min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 1042 tasks | elapsed: 2.0min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 1089 tasks | elapsed: 2.1min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 1136 tasks | elapsed: 2.2min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 1185 tasks | elapsed: 2.2min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 1234 tasks | elapsed: 2.3min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 1285 tasks | elapsed: 2.4min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 1336 tasks | elapsed: 2.4min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 1389 tasks | elapsed: 2.5min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 1442 tasks | elapsed: 2.6min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 1497 tasks | elapsed: 2.6min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 1552 tasks | elapsed: 2.7min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 1609 tasks | elapsed: 2.8min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 1666 tasks | elapsed: 2.8min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 1725 tasks | elapsed: 2.9min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 1784 tasks | elapsed: 3.0min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 1845 tasks | elapsed: 3.0min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 1906 tasks | elapsed: 3.1min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 1969 tasks | elapsed: 3.2min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 2032 tasks | elapsed: 3.3min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 2097 tasks | elapsed: 3.3min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 2162 tasks | elapsed: 3.4min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 2229 tasks | elapsed: 3.5min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 2296 tasks | elapsed: 3.6min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 2365 tasks | elapsed: 3.6min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 2434 tasks | elapsed: 3.7min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 2505 tasks | elapsed: 3.8min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 2576 tasks | elapsed: 3.8min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 2649 tasks | elapsed: 3.9min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 2722 tasks | elapsed: 4.0min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 2797 tasks | elapsed: 4.1min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 2872 tasks | elapsed: 4.2min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 2949 tasks | elapsed: 4.3min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 3026 tasks | elapsed: 4.5min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 3105 tasks | elapsed: 4.7min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 3184 tasks | elapsed: 4.9min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 3265 tasks | elapsed: 5.0min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 3346 tasks | elapsed: 5.2min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 3429 tasks | elapsed: 5.4min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 3512 tasks | elapsed: 5.6min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 3597 tasks | elapsed: 5.9min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 3682 tasks | elapsed: 6.1min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 3769 tasks | elapsed: 6.3min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 3856 tasks | elapsed: 6.6min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 3945 tasks | elapsed: 6.9min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 4034 tasks | elapsed: 7.1min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 4125 tasks | elapsed: 7.4min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 4216 tasks | elapsed: 7.6min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 4309 tasks | elapsed: 7.8min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 4402 tasks | elapsed: 8.1min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 4497 tasks | elapsed: 8.5min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 4592 tasks | elapsed: 8.8min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 4689 tasks | elapsed: 9.0min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 4786 tasks | elapsed: 9.3min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 4885 tasks | elapsed: 9.6min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 4984 tasks | elapsed: 9.8min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 5040 out of 5040 | elapsed: 10.0min finished\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"output_type": "execute_result",
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"GridSearchCV(estimator=AdaBoostClassifier(algorithm='SAMME', random_state=2020),\n",
|
||||
" n_jobs=-1,\n",
|
||||
" param_grid=[{'base_estimator': [Stree(C=7, max_depth=5,\n",
|
||||
" split_criteria='max_samples',\n",
|
||||
" tol=0.01)],\n",
|
||||
" 'base_estimator__C': [1, 7, 55],\n",
|
||||
" 'base_estimator__kernel': ['linear'],\n",
|
||||
" 'base_estimator__max_depth': [3, 5, 7],\n",
|
||||
" 'base_estimator__split_criteria': ['max_samples',\n",
|
||||
" 'impurity'],\n",
|
||||
" 'base_e...\n",
|
||||
" 'learning_rate': [0.5, 1], 'n_estimators': [10, 25]},\n",
|
||||
" {'base_estimator': [Stree()],\n",
|
||||
" 'base_estimator__C': [1, 7, 55],\n",
|
||||
" 'base_estimator__gamma': [0.1, 1, 10],\n",
|
||||
" 'base_estimator__kernel': ['rbf'],\n",
|
||||
" 'base_estimator__max_depth': [3, 5, 7],\n",
|
||||
" 'base_estimator__split_criteria': ['max_samples',\n",
|
||||
" 'impurity'],\n",
|
||||
" 'base_estimator__tol': [0.1, 0.01],\n",
|
||||
" 'learning_rate': [0.5, 1],\n",
|
||||
" 'n_estimators': [10, 25]}],\n",
|
||||
" return_train_score=True, verbose=10)"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"execution_count": 7
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"source": [
|
||||
"GridSearchCV(estimator=AdaBoostClassifier(algorithm='SAMME', random_state=2020),\n",
|
||||
" n_jobs=-1,\n",
|
||||
" param_grid={'base_estimator': [Stree(C=55, max_depth=3, tol=0.01)],\n",
|
||||
" 'base_estimator__C': [7, 55],\n",
|
||||
" 'base_estimator__kernel': ['linear', 'poly', 'rbf'],\n",
|
||||
" 'base_estimator__max_depth': [3, 5],\n",
|
||||
" 'base_estimator__tol': [0.1, 0.01],\n",
|
||||
" 'learning_rate': [0.5, 1], 'n_estimators': [10, 25]},\n",
|
||||
" return_train_score=True, verbose=10)"
|
||||
],
|
||||
"cell_type": "markdown",
|
||||
"metadata": {}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"id": "ZjX88NoYDZE8",
|
||||
"colab_type": "code",
|
||||
"colab": {},
|
||||
"outputId": "285163c8-fa33-4915-8ae7-61c4f7844344",
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"print(\"Best estimator: \", grid.best_estimator_)\n",
|
||||
"print(\"Best hyperparameters: \", grid.best_params_)\n",
|
||||
"print(\"Best accuracy: \", grid.best_score_)"
|
||||
],
|
||||
"execution_count": 8,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"Best estimator: AdaBoostClassifier(algorithm='SAMME',\n base_estimator=Stree(C=7, max_depth=5,\n split_criteria='max_samples',\n tol=0.01),\n learning_rate=0.5, n_estimators=25, random_state=2020)\nBest hyperparameters: {'base_estimator': Stree(C=7, max_depth=5, split_criteria='max_samples', tol=0.01), 'base_estimator__C': 7, 'base_estimator__kernel': 'linear', 'base_estimator__max_depth': 5, 'base_estimator__split_criteria': 'max_samples', 'base_estimator__tol': 0.01, 'learning_rate': 0.5, 'n_estimators': 25}\nBest accuracy: 0.9549825174825175\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"source": [
|
||||
"Best estimator: AdaBoostClassifier(algorithm='SAMME',\n",
|
||||
" base_estimator=Stree(C=55, max_depth=3, tol=0.01),\n",
|
||||
" learning_rate=0.5, n_estimators=25, random_state=2020)\n",
|
||||
"\n",
|
||||
"Best hyperparameters: {'base_estimator': Stree(C=55, max_depth=3, tol=0.01), 'base_estimator__C': 55, 'base_estimator__kernel': 'linear', 'base_estimator__max_depth': 3, 'base_estimator__tol': 0.01, 'learning_rate': 0.5, 'n_estimators': 25}\n",
|
||||
"\n",
|
||||
"Best accuracy: 0.9559440559440558"
|
||||
],
|
||||
"cell_type": "markdown",
|
||||
"metadata": {}
|
||||
},
|
||||
{
|
||||
"source": [
|
||||
"0.9511547662863451"
|
||||
],
|
||||
"cell_type": "markdown",
|
||||
"metadata": {}
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.4-final"
|
||||
},
|
||||
"orig_nbformat": 2,
|
||||
"kernelspec": {
|
||||
"name": "python38464bitgeneralvenv77203c0a6afd4428bd66253ef62753dc",
|
||||
"display_name": "Python 3.8.4 64-bit ('general': venv)"
|
||||
},
|
||||
"colab": {
|
||||
"name": "gridsearch.ipynb",
|
||||
"provenance": []
|
||||
}
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Test Gridsearch\n",
|
||||
"with different kernels and different configurations"
|
||||
]
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Setup\n",
|
||||
"Uncomment the next cell if STree is not already installed"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#\n",
|
||||
"# Google Colab setup\n",
|
||||
"#\n",
|
||||
"#!pip install git+https://github.com/doctorado-ml/stree"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "zIHKVxthDZEa"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.ensemble import AdaBoostClassifier\n",
|
||||
"from sklearn.svm import LinearSVC\n",
|
||||
"from sklearn.model_selection import GridSearchCV, train_test_split\n",
|
||||
"from stree import Stree"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "IEmq50QgDZEi"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"if not os.path.isfile('data/creditcard.csv'):\n",
|
||||
" !wget --no-check-certificate --content-disposition http://nube.jccm.es/index.php/s/Zs7SYtZQJ3RQ2H2/download\n",
|
||||
" !tar xzf creditcard.tgz"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "z9Q-YUfBDZEq",
|
||||
"outputId": "afc822fb-f16a-4302-8a67-2b9e2880159b",
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Fraud: 0.173% 492\n",
|
||||
"Valid: 99.827% 284315\n",
|
||||
"X.shape (1492, 28) y.shape (1492,)\n",
|
||||
"Fraud: 33.177% 495\n",
|
||||
"Valid: 66.823% 997\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"random_state=1\n",
|
||||
"\n",
|
||||
"def load_creditcard(n_examples=0):\n",
|
||||
" import pandas as pd\n",
|
||||
" import numpy as np\n",
|
||||
" import random\n",
|
||||
" df = pd.read_csv('data/creditcard.csv')\n",
|
||||
" print(\"Fraud: {0:.3f}% {1}\".format(df.Class[df.Class == 1].count()*100/df.shape[0], df.Class[df.Class == 1].count()))\n",
|
||||
" print(\"Valid: {0:.3f}% {1}\".format(df.Class[df.Class == 0].count()*100/df.shape[0], df.Class[df.Class == 0].count()))\n",
|
||||
" y = df.Class\n",
|
||||
" X = df.drop(['Class', 'Time', 'Amount'], axis=1).values\n",
|
||||
" if n_examples > 0:\n",
|
||||
" # Take first n_examples samples\n",
|
||||
" X = X[:n_examples, :]\n",
|
||||
" y = y[:n_examples, :]\n",
|
||||
" else:\n",
|
||||
" # Take all the positive samples with a number of random negatives\n",
|
||||
" if n_examples < 0:\n",
|
||||
" Xt = X[(y == 1).ravel()]\n",
|
||||
" yt = y[(y == 1).ravel()]\n",
|
||||
" indices = random.sample(range(X.shape[0]), -1 * n_examples)\n",
|
||||
" X = np.append(Xt, X[indices], axis=0)\n",
|
||||
" y = np.append(yt, y[indices], axis=0)\n",
|
||||
" print(\"X.shape\", X.shape, \" y.shape\", y.shape)\n",
|
||||
" print(\"Fraud: {0:.3f}% {1}\".format(len(y[y == 1])*100/X.shape[0], len(y[y == 1])))\n",
|
||||
" print(\"Valid: {0:.3f}% {1}\".format(len(y[y == 0]) * 100 / X.shape[0], len(y[y == 0])))\n",
|
||||
" Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, train_size=0.7, shuffle=True, random_state=random_state, stratify=y)\n",
|
||||
" return Xtrain, Xtest, ytrain, ytest\n",
|
||||
"\n",
|
||||
"data = load_creditcard(-1000) # Take all true samples + 1000 of the others\n",
|
||||
"# data = load_creditcard(5000) # Take the first 5000 samples\n",
|
||||
"# data = load_creditcard(0) # Take all the samples\n",
|
||||
"\n",
|
||||
"Xtrain = data[0]\n",
|
||||
"Xtest = data[1]\n",
|
||||
"ytrain = data[2]\n",
|
||||
"ytest = data[3]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Tests"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "HmX3kR4PDZEw"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"parameters = [{\n",
|
||||
" 'base_estimator': [Stree(random_state=random_state)],\n",
|
||||
" 'n_estimators': [10, 25],\n",
|
||||
" 'learning_rate': [.5, 1],\n",
|
||||
" 'base_estimator__split_criteria': ['max_samples', 'impurity'],\n",
|
||||
" 'base_estimator__tol': [.1, 1e-02],\n",
|
||||
" 'base_estimator__max_depth': [3, 5, 7],\n",
|
||||
" 'base_estimator__C': [1, 7, 55],\n",
|
||||
" 'base_estimator__kernel': ['linear']\n",
|
||||
"},\n",
|
||||
"{\n",
|
||||
" 'base_estimator': [Stree(random_state=random_state)],\n",
|
||||
" 'n_estimators': [10, 25],\n",
|
||||
" 'learning_rate': [.5, 1],\n",
|
||||
" 'base_estimator__split_criteria': ['max_samples', 'impurity'],\n",
|
||||
" 'base_estimator__tol': [.1, 1e-02],\n",
|
||||
" 'base_estimator__max_depth': [3, 5, 7],\n",
|
||||
" 'base_estimator__C': [1, 7, 55],\n",
|
||||
" 'base_estimator__degree': [3, 5, 7],\n",
|
||||
" 'base_estimator__kernel': ['poly']\n",
|
||||
"},\n",
|
||||
"{\n",
|
||||
" 'base_estimator': [Stree(random_state=random_state)],\n",
|
||||
" 'n_estimators': [10, 25],\n",
|
||||
" 'learning_rate': [.5, 1],\n",
|
||||
" 'base_estimator__split_criteria': ['max_samples', 'impurity'],\n",
|
||||
" 'base_estimator__tol': [.1, 1e-02],\n",
|
||||
" 'base_estimator__max_depth': [3, 5, 7],\n",
|
||||
" 'base_estimator__C': [1, 7, 55],\n",
|
||||
" 'base_estimator__gamma': [.1, 1, 10],\n",
|
||||
" 'base_estimator__kernel': ['rbf']\n",
|
||||
"}]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'C': 1.0,\n",
|
||||
" 'criterion': 'entropy',\n",
|
||||
" 'degree': 3,\n",
|
||||
" 'gamma': 'scale',\n",
|
||||
" 'kernel': 'linear',\n",
|
||||
" 'max_depth': None,\n",
|
||||
" 'max_features': None,\n",
|
||||
" 'max_iter': 100000.0,\n",
|
||||
" 'min_samples_split': 0,\n",
|
||||
" 'random_state': None,\n",
|
||||
" 'split_criteria': 'impurity',\n",
|
||||
" 'splitter': 'random',\n",
|
||||
" 'tol': 0.0001}"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"Stree().get_params()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "CrcB8o6EDZE5",
|
||||
"outputId": "7703413a-d563-4289-a13b-532f38f82762",
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Fitting 5 folds for each of 1008 candidates, totalling 5040 fits\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 40 tasks | elapsed: 1.6s\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 130 tasks | elapsed: 3.1s\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 256 tasks | elapsed: 5.5s\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 418 tasks | elapsed: 9.3s\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 616 tasks | elapsed: 18.6s\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 850 tasks | elapsed: 28.2s\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 1120 tasks | elapsed: 35.4s\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 1426 tasks | elapsed: 43.5s\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 1768 tasks | elapsed: 51.3s\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 2146 tasks | elapsed: 1.0min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 2560 tasks | elapsed: 1.2min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 3010 tasks | elapsed: 1.4min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 3496 tasks | elapsed: 1.7min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 4018 tasks | elapsed: 2.1min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 4576 tasks | elapsed: 2.6min\n",
|
||||
"[Parallel(n_jobs=-1)]: Done 5040 out of 5040 | elapsed: 2.9min finished\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"GridSearchCV(estimator=AdaBoostClassifier(algorithm='SAMME', random_state=1),\n",
|
||||
" n_jobs=-1,\n",
|
||||
" param_grid=[{'base_estimator': [Stree(C=55, max_depth=7,\n",
|
||||
" random_state=1,\n",
|
||||
" split_criteria='max_samples',\n",
|
||||
" tol=0.1)],\n",
|
||||
" 'base_estimator__C': [1, 7, 55],\n",
|
||||
" 'base_estimator__kernel': ['linear'],\n",
|
||||
" 'base_estimator__max_depth': [3, 5, 7],\n",
|
||||
" 'base_estimator__split_criteria': ['max_samples',\n",
|
||||
" 'impuri...\n",
|
||||
" {'base_estimator': [Stree(random_state=1)],\n",
|
||||
" 'base_estimator__C': [1, 7, 55],\n",
|
||||
" 'base_estimator__gamma': [0.1, 1, 10],\n",
|
||||
" 'base_estimator__kernel': ['rbf'],\n",
|
||||
" 'base_estimator__max_depth': [3, 5, 7],\n",
|
||||
" 'base_estimator__split_criteria': ['max_samples',\n",
|
||||
" 'impurity'],\n",
|
||||
" 'base_estimator__tol': [0.1, 0.01],\n",
|
||||
" 'learning_rate': [0.5, 1],\n",
|
||||
" 'n_estimators': [10, 25]}],\n",
|
||||
" return_train_score=True, verbose=5)"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"clf = AdaBoostClassifier(random_state=random_state, algorithm=\"SAMME\")\n",
|
||||
"grid = GridSearchCV(clf, parameters, verbose=5, n_jobs=-1, return_train_score=True)\n",
|
||||
"grid.fit(Xtrain, ytrain)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "ZjX88NoYDZE8",
|
||||
"outputId": "285163c8-fa33-4915-8ae7-61c4f7844344",
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Best estimator: AdaBoostClassifier(algorithm='SAMME',\n",
|
||||
" base_estimator=Stree(C=55, max_depth=7, random_state=1,\n",
|
||||
" split_criteria='max_samples', tol=0.1),\n",
|
||||
" learning_rate=0.5, n_estimators=25, random_state=1)\n",
|
||||
"Best hyperparameters: {'base_estimator': Stree(C=55, max_depth=7, random_state=1, split_criteria='max_samples', tol=0.1), 'base_estimator__C': 55, 'base_estimator__kernel': 'linear', 'base_estimator__max_depth': 7, 'base_estimator__split_criteria': 'max_samples', 'base_estimator__tol': 0.1, 'learning_rate': 0.5, 'n_estimators': 25}\n",
|
||||
"Best accuracy: 0.9511777695988222\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(\"Best estimator: \", grid.best_estimator_)\n",
|
||||
"print(\"Best hyperparameters: \", grid.best_params_)\n",
|
||||
"print(\"Best accuracy: \", grid.best_score_)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Best estimator: AdaBoostClassifier(algorithm='SAMME',\n",
|
||||
" base_estimator=Stree(C=55, max_depth=7, random_state=1,\n",
|
||||
" split_criteria='max_samples', tol=0.1),\n",
|
||||
" learning_rate=0.5, n_estimators=25, random_state=1)\n",
|
||||
"Best hyperparameters: {'base_estimator': Stree(C=55, max_depth=7, random_state=1, split_criteria='max_samples', tol=0.1), 'base_estimator__C': 55, 'base_estimator__kernel': 'linear', 'base_estimator__max_depth': 7, 'base_estimator__split_criteria': 'max_samples', 'base_estimator__tol': 0.1, 'learning_rate': 0.5, 'n_estimators': 25}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Best accuracy: 0.9511777695988222"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"name": "gridsearch.ipynb",
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
Reference in New Issue
Block a user