Finaliza generación de dataset y añade eda

This commit is contained in:
2025-02-17 21:40:44 +01:00
parent ebdfd3740b
commit 95c78059f5
5 changed files with 715721 additions and 11 deletions

View File

@@ -16,6 +16,8 @@
},
"outputs": [],
"source": [
"import os\n",
"import matplotlib\n",
"import pandas as pd\n",
"from datetime import datetime, timezone\n",
"import zoneinfo\n"
@@ -184,7 +186,7 @@
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>2025-01-27 17:31:42.443204608+01:00</td>\n",
" <td>2025-01-27 17:31:42.443204641+01:00</td>\n",
" <td>0.0</td>\n",
" <td></td>\n",
" </tr>\n",
@@ -208,7 +210,7 @@
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>2025-01-27 17:31:42.443369984+01:00</td>\n",
" <td>2025-01-27 17:31:42.443370104+01:00</td>\n",
" <td>0.0</td>\n",
" <td></td>\n",
" </tr>\n",
@@ -232,7 +234,7 @@
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>2025-01-27 17:31:42.443432704+01:00</td>\n",
" <td>2025-01-27 17:31:42.443432808+01:00</td>\n",
" <td>0.0</td>\n",
" <td></td>\n",
" </tr>\n",
@@ -256,7 +258,7 @@
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>2025-01-27 17:31:42.443464704+01:00</td>\n",
" <td>2025-01-27 17:31:42.443464756+01:00</td>\n",
" <td>0.0</td>\n",
" <td></td>\n",
" </tr>\n",
@@ -280,7 +282,7 @@
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>2025-01-27 17:31:42.443489536+01:00</td>\n",
" <td>2025-01-27 17:31:42.443489552+01:00</td>\n",
" <td>0.0</td>\n",
" <td></td>\n",
" </tr>\n",
@@ -319,11 +321,11 @@
"4 112.0 120.0 0.0 \n",
"\n",
" Analog Channel 1 Analog Channel 2 Timestamp \\\n",
"0 0.0 0.0 2025-01-27 17:31:42.443204608+01:00 \n",
"1 0.0 0.0 2025-01-27 17:31:42.443369984+01:00 \n",
"2 0.0 0.0 2025-01-27 17:31:42.443432704+01:00 \n",
"3 0.0 0.0 2025-01-27 17:31:42.443464704+01:00 \n",
"4 0.0 0.0 2025-01-27 17:31:42.443489536+01:00 \n",
"0 0.0 0.0 2025-01-27 17:31:42.443204641+01:00 \n",
"1 0.0 0.0 2025-01-27 17:31:42.443370104+01:00 \n",
"2 0.0 0.0 2025-01-27 17:31:42.443432808+01:00 \n",
"3 0.0 0.0 2025-01-27 17:31:42.443464756+01:00 \n",
"4 0.0 0.0 2025-01-27 17:31:42.443489552+01:00 \n",
"\n",
" Marker Channel Movement \n",
"0 0.0 \n",
@@ -564,6 +566,244 @@
"source": [
"data_df.value_counts(label_name)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "1dfbc859",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Movement\n",
"none 73413\n",
"i 49504\n",
"d 41161\n",
"Name: count, dtype: int64"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Clean dataset\n",
"data_df.dropna(inplace=True)\n",
"data_df = data_df[data_df.Movement != \"\"] # Remove empty labels\n",
"data_df = data_df[data_df.Movement != \"c\"] # Remove calibration\n",
"data_df.reset_index(drop=True, inplace=True)\n",
"data_df[\"Movement\"] = data_df[\"Movement\"].astype(\"category\")\n",
"data_df.value_counts(label_name)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "aa4aa1a2",
"metadata": {},
"outputs": [],
"source": [
"data_df.to_csv(os.path.join(\"csv\", \"openbci.csv\"), index=False)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "ef4a6478",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "20d636799a67496f8cdf8619162fef24",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Summarize dataset: 0%| | 0/5 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/rmontanana/miniconda3/lib/python3.11/site-packages/ydata_profiling/model/pandas/describe_date_pandas.py:57: UserWarning: Discarding nonzero nanoseconds in conversion.\n",
" \"min\": pd.Timestamp.to_pydatetime(series.min()),\n",
"/home/rmontanana/miniconda3/lib/python3.11/site-packages/ydata_profiling/model/pandas/describe_date_pandas.py:58: UserWarning: Discarding nonzero nanoseconds in conversion.\n",
" \"max\": pd.Timestamp.to_pydatetime(series.max()),\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "35e53f0f76e14e7f923b1055285670d3",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Generate report structure: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "6bbac27856134eef912956682f6cf5f3",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Render HTML: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "d2697cde02024e359b2e9df92b78179a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Export report to file: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "ac17608271ce4757b0156753b390fc28",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Summarize dataset: 0%| | 0/5 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/rmontanana/miniconda3/lib/python3.11/site-packages/ydata_profiling/model/pandas/describe_date_pandas.py:57: UserWarning: Discarding nonzero nanoseconds in conversion.\n",
" \"min\": pd.Timestamp.to_pydatetime(series.min()),\n",
"/home/rmontanana/miniconda3/lib/python3.11/site-packages/ydata_profiling/model/pandas/describe_date_pandas.py:58: UserWarning: Discarding nonzero nanoseconds in conversion.\n",
" \"max\": pd.Timestamp.to_pydatetime(series.max()),\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f4a5bd4acb4c406492ad4fab3aed97cd",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Generate report structure: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "e8487fd4f12a493497e4b86b567149d8",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Render HTML: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "2c62e1d308d9498f8bca16b58f44204b",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Export report to file: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Generate analysis info into html folder\n",
"from ydata_profiling import ProfileReport\n",
"\n",
"profile = ProfileReport(data_df, title=\"OpenBCI Data Profiling Report\", explorative=True)\n",
"profile.to_file(os.path.join(\"html\", \"openbci_report.html\"))\n",
"report = ProfileReport(\n",
" data_df,\n",
" title=\"origin file OpenBCI\",\n",
" html={\"style\": {\"full_width\": True}},\n",
" missing_diagrams={\n",
" \"heatmap\": False,\n",
" \"dendrogram\": False,\n",
" \"Count\": False,\n",
" \"bar\": False,\n",
" \"matrix\": False,\n",
" \"sparkline\": False,\n",
" \"table\": False,\n",
" },\n",
" )\n",
"report.to_file(os.path.join(\"html\", \"OpenBCI_report.html\"))\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "09039cdf",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "985c493925d6403684ddaba1b2280a5e",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" | | [ 0%] 00:00 -> (? left)"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Report html/OpenBCI_report_sv.html was generated.\n"
]
}
],
"source": [
"import sweetviz as sv\n",
"report = sv.analyze(data_df)\n",
"report.show_html(os.path.join(\"html\", \"OpenBCI_report_sv.html\"), open_browser=False, layout=\"widescreen\")"
]
}
],
"metadata": {
@@ -582,7 +822,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.8"
"version": "3.11.9"
}
},
"nbformat": 4,

164079
csv/openbci.csv Normal file

File diff suppressed because it is too large Load Diff

266143
html/OpenBCI_report.html Normal file

File diff suppressed because one or more lines are too long

17442
html/OpenBCI_report_sv.html Normal file

File diff suppressed because one or more lines are too long

267806
html/openbci_report.html Normal file

File diff suppressed because one or more lines are too long