mirror of
https://github.com/Doctorado-ML/Odte.git
synced 2025-07-11 16:22:00 +00:00
fix flake error msg
Fix wine_iris notebook
This commit is contained in:
parent
19543b48fa
commit
e1bfa9f9bf
177
notebooks/wine_iris.ipynb
Normal file
177
notebooks/wine_iris.ipynb
Normal file
@ -0,0 +1,177 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import datetime, time\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"from sklearn.model_selection import train_test_split, cross_validate\n",
|
||||||
|
"from sklearn import tree\n",
|
||||||
|
"from sklearn.metrics import classification_report, confusion_matrix, f1_score\n",
|
||||||
|
"from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, BaggingClassifier\n",
|
||||||
|
"from stree import Stree\n",
|
||||||
|
"from odte import Odte\n",
|
||||||
|
"\n",
|
||||||
|
"random_state = 1"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from sklearn.datasets import load_wine\n",
|
||||||
|
"X, y = load_wine(return_X_y=True)\n",
|
||||||
|
"Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=.2, random_state=random_state)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"n_estimators = 20\n",
|
||||||
|
"clf = {}\n",
|
||||||
|
"clf[\"stree\"] = Stree(random_state=random_state, max_depth=5)\n",
|
||||||
|
"clf[\"stree\"].set_params(**dict(splitter=\"best\", kernel=\"linear\", max_features=\"auto\"))\n",
|
||||||
|
"clf[\"odte\"] = Odte(base_estimator=clf[\"stree\"], random_state=random_state, n_estimators=n_estimators, max_features=.8)\n",
|
||||||
|
"clf[\"adaboost\"] = AdaBoostClassifier(base_estimator=clf[\"stree\"], n_estimators=n_estimators, random_state=random_state, algorithm=\"SAMME\")\n",
|
||||||
|
"clf[\"bagging\"] = BaggingClassifier(base_estimator=clf[\"stree\"], n_estimators=n_estimators)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"output_type": "stream",
|
||||||
|
"name": "stdout",
|
||||||
|
"text": "****************************** Results for wine ******************************\nTraining stree...\nScore: 94.444 in 0.17 seconds\nTraining odte...\nScore: 97.222 in 2.70 seconds\nTraining adaboost...\nScore: 94.444 in 0.60 seconds\nTraining bagging...\nScore: 100.000 in 2.55 seconds\n"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"print(\"*\"*30,\"Results for wine\", \"*\"*30)\n",
|
||||||
|
"for clf_type, item in clf.items():\n",
|
||||||
|
" print(f\"Training {clf_type}...\")\n",
|
||||||
|
" now = time.time()\n",
|
||||||
|
" item.fit(Xtrain, ytrain)\n",
|
||||||
|
" print(f\"Score: {item.score(Xtest, ytest) * 100:.3f} in {time.time()-now:.2f} seconds\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 5,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from sklearn.datasets import load_iris\n",
|
||||||
|
"X, y = load_iris(return_X_y=True)\n",
|
||||||
|
"Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=.2, random_state=random_state)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 6,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"n_estimators = 10\n",
|
||||||
|
"clf = {}\n",
|
||||||
|
"clf[\"stree\"] = Stree(random_state=random_state, max_depth=3)\n",
|
||||||
|
"clf[\"odte\"] = Odte(random_state=random_state, n_estimators=n_estimators, max_features=1.0)\n",
|
||||||
|
"clf[\"adaboost\"] = AdaBoostClassifier(base_estimator=clf[\"stree\"], n_estimators=n_estimators, random_state=random_state, algorithm=\"SAMME\")\n",
|
||||||
|
"clf[\"bagging\"] = BaggingClassifier(base_estimator=clf[\"stree\"], n_estimators=n_estimators)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"output_type": "stream",
|
||||||
|
"name": "stdout",
|
||||||
|
"text": "****************************** Results for iris ******************************\nTraining stree...\nScore: 100.000 in 0.02 seconds\nTraining odte...\nScore: 93.333 in 0.12 seconds\nTraining adaboost...\nScore: 83.333 in 0.01 seconds\nTraining bagging...\nScore: 100.000 in 0.11 seconds\n"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"print(\"*\"*30,\"Results for iris\", \"*\"*30)\n",
|
||||||
|
"for clf_type, item in clf.items():\n",
|
||||||
|
" print(f\"Training {clf_type}...\")\n",
|
||||||
|
" now = time.time()\n",
|
||||||
|
" item.fit(Xtrain, ytrain)\n",
|
||||||
|
" print(f\"Score: {item.score(Xtest, ytest) * 100:.3f} in {time.time()-now:.2f} seconds\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 8,
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"output_type": "stream",
|
||||||
|
"name": "stdout",
|
||||||
|
"text": "{'fit_time': array([0.15752316, 0.18354201, 0.14742589, 0.13827896, 0.14534211]), 'score_time': array([0.00940681, 0.01064587, 0.01085019, 0.00925183, 0.00878191]), 'test_score': array([0.8 , 0.93333333, 0.93333333, 0.93333333, 0.96666667]), 'train_score': array([0.875 , 0.95 , 0.98333333, 0.98333333, 0.95833333])}\n91.333 +- 0.058\n"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"cross = cross_validate(estimator=clf[\"odte\"], X=X, y=y, n_jobs=-1, return_train_score=True)\n",
|
||||||
|
"print(cross)\n",
|
||||||
|
"print(f\"{np.mean(cross['test_score'])*100:.3f} +- {np.std(cross['test_score']):.3f}\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 9,
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"output_type": "stream",
|
||||||
|
"name": "stdout",
|
||||||
|
"text": "{'fit_time': array([0.01752877, 0.03304005, 0.03542018, 0.03398919, 0.03945518]), 'score_time': array([0.00135112, 0.00164104, 0.00159597, 0.0018959 , 0.00189495]), 'test_score': array([1. , 0.93333333, 0.93333333, 0.93333333, 0.96666667]), 'train_score': array([0.93333333, 0.96666667, 0.96666667, 0.96666667, 0.95 ])}\n95.333 +- 0.027\n"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"cross = cross_validate(estimator=clf[\"adaboost\"], X=X, y=y, n_jobs=-1, return_train_score=True)\n",
|
||||||
|
"print(cross)\n",
|
||||||
|
"print(f\"{np.mean(cross['test_score'])*100:.3f} +- {np.std(cross['test_score']):.3f}\")"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.7.6-final"
|
||||||
|
},
|
||||||
|
"orig_nbformat": 2,
|
||||||
|
"kernelspec": {
|
||||||
|
"name": "python37664bitgeneralvenvfbd0a23e74cf4e778460f5ffc6761f39",
|
||||||
|
"display_name": "Python 3.7.6 64-bit ('general': venv)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
@ -192,10 +192,12 @@ class Odte(BaseEnsemble, ClassifierMixin):
|
|||||||
# Input validation
|
# Input validation
|
||||||
X = check_array(X)
|
X = check_array(X)
|
||||||
if self.n_features_ != X.shape[1]:
|
if self.n_features_ != X.shape[1]:
|
||||||
raise ValueError("Number of features of the model must "
|
raise ValueError(
|
||||||
|
"Number of features of the model must "
|
||||||
"match the input. Model n_features is {0} and "
|
"match the input. Model n_features is {0} and "
|
||||||
"input n_features is {1}."
|
"input n_features is {1}."
|
||||||
"".format(self.n_features_, X.shape[1]))
|
"".format(self.n_features_, X.shape[1])
|
||||||
|
)
|
||||||
for tree, features in zip(self.estimators_, self.subspaces_):
|
for tree, features in zip(self.estimators_, self.subspaces_):
|
||||||
n_samples = X.shape[0]
|
n_samples = X.shape[0]
|
||||||
result = np.zeros((n_samples, self.n_classes_))
|
result = np.zeros((n_samples, self.n_classes_))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user