Add test for getting 3 feature_sets in Splitter

Add ensemble notebook
This commit is contained in:
2020-06-28 02:45:08 +02:00
parent 5e3a8e3ec5
commit be552fdd6c
2 changed files with 61 additions and 15 deletions

View File

@@ -34,11 +34,8 @@
"outputs": [],
"source": [
"import time\n",
"from sklearn.ensemble import AdaBoostClassifier\n",
"from sklearn.tree import DecisionTreeClassifier\n",
"from sklearn.svm import LinearSVC, SVC\n",
"from sklearn.model_selection import GridSearchCV, train_test_split\n",
"from sklearn.datasets import load_iris\n",
"from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier\n",
"from sklearn.model_selection import train_test_split\n",
"from stree import Stree"
]
},
@@ -64,7 +61,7 @@
{
"output_type": "stream",
"name": "stdout",
"text": "Fraud: 0.173% 492\nValid: 99.827% 284315\nX.shape (284807, 28) y.shape (284807,)\nFraud: 0.173% 492\nValid: 99.827% 284315\n"
"text": "Fraud: 0.173% 492\nValid: 99.827% 284315\nX.shape (100492, 28) y.shape (100492,)\nFraud: 0.644% 647\nValid: 99.356% 99845\n"
}
],
"source": [
@@ -99,8 +96,8 @@
"\n",
"# data = load_creditcard(-1000) # Take all true samples + 1000 of the others\n",
"# data = load_creditcard(5000) # Take the first 5000 samples\n",
"data = load_creditcard(0) # Take all the samples\n",
"# data = load_creditcard(-100000)\n",
"# data = load_creditcard(0) # Take all the samples\n",
"data = load_creditcard(-100000)\n",
"\n",
"Xtrain = data[0]\n",
"Xtest = data[1]\n",
@@ -119,7 +116,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## STree alone on the whole dataset and linear kernel"
"## STree alone with 100.000 samples and linear kernel"
]
},
{
@@ -132,7 +129,7 @@
{
"output_type": "stream",
"name": "stdout",
"text": "Score Train: 0.9994632932726069\nScore Test: 0.9994967405170698\nTook 140.74 seconds\n"
"text": "Score Train: 0.9985784146480154\nScore Test: 0.9981093273185617\nTook 73.27 seconds\n"
}
],
"source": [
@@ -148,7 +145,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Different kernels with different configuations"
"## Adaboost"
]
},
{
@@ -172,13 +169,54 @@
{
"output_type": "stream",
"name": "stdout",
"text": "Kernel: linear\tTime: 307.83 seconds\tScore Train: 0.9991924\tScore Test: 0.9994616\nKernel: rbf\tTime: 29.22 seconds\tScore Train: 0.9982745\tScore Test: 0.9982679\nKernel: poly\tTime: 207.48 seconds\tScore Train: 0.9988062\tScore Test: 0.9990403\n"
"text": "Kernel: linear\tTime: 93.78 seconds\tScore Train: 0.9983083\tScore Test: 0.9983083\nKernel: rbf\tTime: 18.32 seconds\tScore Train: 0.9935602\tScore Test: 0.9935651\nKernel: poly\tTime: 69.68 seconds\tScore Train: 0.9973132\tScore Test: 0.9972801\n"
}
],
"source": [
"for kernel in ['linear', 'rbf', 'poly']:\n",
" now = time.time()\n",
" clf = AdaBoostClassifier(Stree(C=7, kernel=kernel, max_depth=max_depth, random_state=random_state), algorithm=\"SAMME\", n_estimators=n_estimators, random_state=random_state)\n",
" clf = AdaBoostClassifier(base_estimator=Stree(C=C, kernel=kernel, max_depth=max_depth, random_state=random_state), algorithm=\"SAMME\", n_estimators=n_estimators, random_state=random_state)\n",
" clf.fit(Xtrain, ytrain)\n",
" score_train = clf.score(Xtrain, ytrain)\n",
" score_test = clf.score(Xtest, ytest)\n",
" print(f\"Kernel: {kernel}\\tTime: {time.time() - now:.2f} seconds\\tScore Train: {score_train:.7f}\\tScore Test: {score_test:.7f}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Bagging"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"n_estimators = 10\n",
"C = 7\n",
"max_depth = 3"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"tags": []
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "Kernel: linear\tTime: 387.06 seconds\tScore Train: 0.9985784\tScore Test: 0.9981093\nKernel: rbf\tTime: 144.00 seconds\tScore Train: 0.9992750\tScore Test: 0.9983415\nKernel: poly\tTime: 101.78 seconds\tScore Train: 0.9992466\tScore Test: 0.9981757\n"
}
],
"source": [
"for kernel in ['linear', 'rbf', 'poly']:\n",
" now = time.time()\n",
" clf = BaggingClassifier(base_estimator=Stree(C=C, kernel=kernel, max_depth=max_depth, random_state=random_state), n_estimators=n_estimators, random_state=random_state)\n",
" clf.fit(Xtrain, ytrain)\n",
" score_train = clf.score(Xtrain, ytrain)\n",
" score_test = clf.score(Xtest, ytest)\n",
@@ -201,7 +239,7 @@
},
"orig_nbformat": 2,
"kernelspec": {
"name": "python37664bitgeneralvenvfbd0a23e74cf4e778460f5ffc6761f39",
"name": "python37664bitgeneralvenve3128601eb614c5da59c5055670b6040",
"display_name": "Python 3.7.6 64-bit ('general': venv)"
}
},