From ee3b3600f67b319db30ff9cbf69045e3aa728726 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Tue, 18 Feb 2025 00:15:12 +0100 Subject: [PATCH] =?UTF-8?q?A=C3=B1ade=20Classification=20Report?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- classifier.ipynb | 69 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 54 insertions(+), 15 deletions(-) diff --git a/classifier.ipynb b/classifier.ipynb index 68581c6..fa91c7e 100644 --- a/classifier.ipynb +++ b/classifier.ipynb @@ -8,9 +8,8 @@ "source": [ "import os\n", "from sklearn.ensemble import RandomForestClassifier\n", - "from sklearn.model_selection import cross_validate, StratifiedKFold\n", - "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n", - "from sklearn.model_selection import train_test_split\n", + "from sklearn.model_selection import cross_validate, StratifiedKFold, train_test_split\n", + "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report\n", "import pandas as pd\n", "import numpy as np\n", "random_state = 19\n", @@ -34,27 +33,43 @@ "name": "stdout", "output_type": "stream", "text": [ + "Column Timestamp is not useful, dropping it\n", "Column Analog Channel 0 has only one unique value, dropping it\n", "Column Analog Channel 1 has only one unique value, dropping it\n", "Column Analog Channel 2 has only one unique value, dropping it\n", "Column Marker Channel has only one unique value, dropping it\n", + "----------------------------------------------------------------------\n", "X shape: (164078, 24)\n", - "y shape: (164078,)\n" + "y shape: (164078,)\n", + "Labels distribution\n", + "===================\n", + "Movement\n", + "none 44.742744\n", + "i 30.171016\n", + "d 25.086239\n", + "Name: proportion, dtype: float64\n" ] } ], "source": [ "dataset = pd.read_csv(os.path.join(\"csv\", 'openbci.csv'))\n", "# Clean the dataset\n", + "print(\"Column Timestamp is not useful, dropping it\")\n", "dataset = dataset.drop(columns=['Timestamp'])\n", "for column in dataset.columns:\n", " if dataset[column].unique().size == 1:\n", " print(f\"Column {column} has only one unique value, dropping it\")\n", " dataset = dataset.drop(columns=[column])\n", + "print(\"-\"*70)\n", + "# Split the dataset\n", "X = dataset.iloc[:, :-1].values\n", "y = dataset.iloc[:,-1].values\n", + "# Show the characteristics of the dataset\n", "print(f\"X shape: {X.shape}\")\n", - "print(f\"y shape: {y.shape}\")" + "print(f\"y shape: {y.shape}\")\n", + "print(\"Labels distribution\")\n", + "print(\"===================\")\n", + "print(dataset.Movement.value_counts(normalize=True)*100)" ] }, { @@ -79,7 +94,6 @@ ], "source": [ "clf = RandomForestClassifier(n_estimators=estimators, n_jobs=-1, random_state=random_state)\n", - "clf.fit(X,y)\n", "cv = StratifiedKFold(n_splits=splits, shuffle=True, random_state=random_state)\n", "scores = cross_validate(clf, X, y, scoring=\"accuracy\", cv=cv, n_jobs=-1, return_train_score=True)\n", "print(f\"Accuracy: {np.mean(scores['test_score'])} (+/- {np.std(scores['test_score'])})\")" @@ -89,7 +103,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Confusion Matrix" + "# Classification Report" ] }, { @@ -101,16 +115,45 @@ "name": "stdout", "output_type": "stream", "text": [ - "Accuracy on test set: 0.9939358849341785\n" + " precision recall f1-score support\n", + "\n", + " d 0.9923609 0.9941691 0.9932642 8232\n", + " i 0.9961570 0.9948490 0.9955026 9901\n", + " none 0.9933247 0.9931894 0.9932570 14683\n", + "\n", + " accuracy 0.9939359 32816\n", + " macro avg 0.9939475 0.9940692 0.9940079 32816\n", + "weighted avg 0.9939374 0.9939359 0.9939363 32816\n", + "\n" ] - }, + } + ], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=random_state, stratify=y)\n", + "clf.fit(X_train, y_train)\n", + "y_pred = clf.predict(X_test)\n", + "print(classification_report(y_test, y_pred, digits=7))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Confusion Matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 4, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" }, @@ -126,10 +169,6 @@ } ], "source": [ - "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=random_state, stratify=y)\n", - "clf.fit(X_train, y_train)\n", - "y_pred = clf.predict(X_test)\n", - "print(f\"Accuracy on test set: {np.mean(y_test == y_pred)}\")\n", "cm = confusion_matrix(y_test, y_pred)\n", "disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=clf.classes_)\n", "disp.plot()"