Añade Classification Report
This commit is contained in:
@@ -8,9 +8,8 @@
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from sklearn.ensemble import RandomForestClassifier\n",
|
||||
"from sklearn.model_selection import cross_validate, StratifiedKFold\n",
|
||||
"from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"from sklearn.model_selection import cross_validate, StratifiedKFold, train_test_split\n",
|
||||
"from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"random_state = 19\n",
|
||||
@@ -34,27 +33,43 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Column Timestamp is not useful, dropping it\n",
|
||||
"Column Analog Channel 0 has only one unique value, dropping it\n",
|
||||
"Column Analog Channel 1 has only one unique value, dropping it\n",
|
||||
"Column Analog Channel 2 has only one unique value, dropping it\n",
|
||||
"Column Marker Channel has only one unique value, dropping it\n",
|
||||
"----------------------------------------------------------------------\n",
|
||||
"X shape: (164078, 24)\n",
|
||||
"y shape: (164078,)\n"
|
||||
"y shape: (164078,)\n",
|
||||
"Labels distribution\n",
|
||||
"===================\n",
|
||||
"Movement\n",
|
||||
"none 44.742744\n",
|
||||
"i 30.171016\n",
|
||||
"d 25.086239\n",
|
||||
"Name: proportion, dtype: float64\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"dataset = pd.read_csv(os.path.join(\"csv\", 'openbci.csv'))\n",
|
||||
"# Clean the dataset\n",
|
||||
"print(\"Column Timestamp is not useful, dropping it\")\n",
|
||||
"dataset = dataset.drop(columns=['Timestamp'])\n",
|
||||
"for column in dataset.columns:\n",
|
||||
" if dataset[column].unique().size == 1:\n",
|
||||
" print(f\"Column {column} has only one unique value, dropping it\")\n",
|
||||
" dataset = dataset.drop(columns=[column])\n",
|
||||
"print(\"-\"*70)\n",
|
||||
"# Split the dataset\n",
|
||||
"X = dataset.iloc[:, :-1].values\n",
|
||||
"y = dataset.iloc[:,-1].values\n",
|
||||
"# Show the characteristics of the dataset\n",
|
||||
"print(f\"X shape: {X.shape}\")\n",
|
||||
"print(f\"y shape: {y.shape}\")"
|
||||
"print(f\"y shape: {y.shape}\")\n",
|
||||
"print(\"Labels distribution\")\n",
|
||||
"print(\"===================\")\n",
|
||||
"print(dataset.Movement.value_counts(normalize=True)*100)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -79,7 +94,6 @@
|
||||
],
|
||||
"source": [
|
||||
"clf = RandomForestClassifier(n_estimators=estimators, n_jobs=-1, random_state=random_state)\n",
|
||||
"clf.fit(X,y)\n",
|
||||
"cv = StratifiedKFold(n_splits=splits, shuffle=True, random_state=random_state)\n",
|
||||
"scores = cross_validate(clf, X, y, scoring=\"accuracy\", cv=cv, n_jobs=-1, return_train_score=True)\n",
|
||||
"print(f\"Accuracy: {np.mean(scores['test_score'])} (+/- {np.std(scores['test_score'])})\")"
|
||||
@@ -89,7 +103,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Confusion Matrix"
|
||||
"# Classification Report"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -101,16 +115,45 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Accuracy on test set: 0.9939358849341785\n"
|
||||
" precision recall f1-score support\n",
|
||||
"\n",
|
||||
" d 0.9923609 0.9941691 0.9932642 8232\n",
|
||||
" i 0.9961570 0.9948490 0.9955026 9901\n",
|
||||
" none 0.9933247 0.9931894 0.9932570 14683\n",
|
||||
"\n",
|
||||
" accuracy 0.9939359 32816\n",
|
||||
" macro avg 0.9939475 0.9940692 0.9940079 32816\n",
|
||||
"weighted avg 0.9939374 0.9939359 0.9939363 32816\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=random_state, stratify=y)\n",
|
||||
"clf.fit(X_train, y_train)\n",
|
||||
"y_pred = clf.predict(X_test)\n",
|
||||
"print(classification_report(y_test, y_pred, digits=7))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Confusion Matrix"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay at 0x7f510ffb1dd0>"
|
||||
"<sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay at 0x7f62df7a5cd0>"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
},
|
||||
@@ -126,10 +169,6 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=random_state, stratify=y)\n",
|
||||
"clf.fit(X_train, y_train)\n",
|
||||
"y_pred = clf.predict(X_test)\n",
|
||||
"print(f\"Accuracy on test set: {np.mean(y_test == y_pred)}\")\n",
|
||||
"cm = confusion_matrix(y_test, y_pred)\n",
|
||||
"disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=clf.classes_)\n",
|
||||
"disp.plot()"
|
||||
|
Reference in New Issue
Block a user