This commit is contained in:
2025-02-13 22:12:03 +01:00
parent 87f3c82451
commit ebdfd3740b
3 changed files with 994 additions and 1797 deletions

View File

@@ -0,0 +1,538 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"id": "5fe31b7f-2243-4587-9740-46164846cff3",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-13T20:09:41.300262Z",
"start_time": "2025-02-13T20:09:41.297886Z"
},
"collapsed": true,
"jupyter": {
"outputs_hidden": true
}
},
"outputs": [],
"source": [
"import pandas as pd\n",
"from datetime import datetime"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "cbb445af-6351-43b0-8142-f9aa35388e6e",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-13T20:09:41.308855Z",
"start_time": "2025-02-13T20:09:41.306360Z"
},
"collapsed": true,
"jupyter": {
"outputs_hidden": true
}
},
"outputs": [],
"source": [
"logs_path = \"join/mergedLog_output.txt\"\n",
"openbci_path = \"join/mergedData_output.txt\"\n",
"output_csv_path = \"OpenBCI_with_labels.csv\"\n",
"label_name = \"Movement\"\n",
"timestamp_col=\"Timestamp\""
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "f613abe9-8d5b-444c-af3c-b0ab826ed3f4",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-13T20:09:43.026443Z",
"start_time": "2025-02-13T20:09:41.323419Z"
},
"collapsed": true,
"jupyter": {
"outputs_hidden": true
}
},
"outputs": [],
"source": [
"# Leer los logs y extraer tiempos y etiquetas\n",
"with open(logs_path, \"r\") as logs_file:\n",
" logs_data = [log.strip().split(\",\") for log in logs_file.readlines()]\n",
"with open(openbci_path, \"r\") as openbci_file:\n",
" lineas = [linea.strip().split(\",\") for linea in openbci_file.readlines() if not linea.strip().startswith('%')]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "d8eba4ab-63ee-40f5-b30d-f8ae58d5aa79",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-13T20:09:43.043659Z",
"start_time": "2025-02-13T20:09:43.039923Z"
},
"collapsed": true,
"jupyter": {
"outputs_hidden": true
}
},
"outputs": [],
"source": [
"# Utility functions\n",
"def remove_columns(df, columns):\n",
" df.drop(columns, axis=\"columns\", inplace=True)\n",
"def set_type_columns(df, types):\n",
" for key, value in types.items():\n",
" df[key].astype(value)\n",
"def label_sample(data, start_time, end_time, label):\n",
" mask = (data[timestamp_col] >= start_time) & (data[timestamp_col] < end_time)\n",
" if data.loc[mask].empty:\n",
" print(f\"*Error, no hay registros en el intervalo [{start_time}, {end_time})\")\n",
" return False\n",
" else:\n",
" data.loc[mask, label_name] = label\n",
" return True"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "1ecf8906-828c-4e13-a267-2b96f61e2c2e",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-13T20:09:46.253744Z",
"start_time": "2025-02-13T20:09:43.057864Z"
},
"collapsed": true,
"jupyter": {
"outputs_hidden": true
}
},
"outputs": [],
"source": [
"#Convertir los datos en un DataFrame\n",
"columns = \"Sample Index, EXG Channel 0, EXG Channel 1, EXG Channel 2, EXG Channel 3, EXG Channel 4, EXG Channel 5, EXG Channel 6, EXG Channel 7, EXG Channel 8, EXG Channel 9, EXG Channel 10, EXG Channel 11, EXG Channel 12, EXG Channel 13, EXG Channel 14, EXG Channel 15, Accel Channel 0, Accel Channel 1, Accel Channel 2, Not_Used1, Digital Channel 0 (D11), Digital Channel 1 (D12), Digital Channel 2 (D13), Digital Channel 3 (D17), Not_Used2, Digital Channel 4 (D18), Analog Channel 0, Analog Channel 1, Analog Channel 2, Timestamp, Marker Channel, Timestamp (Formatted)\".split(\",\")\n",
"columns = [column.strip() for column in columns]\n",
"types = {}\n",
"\n",
"data_df = pd.DataFrame(lineas, columns=columns)\n",
"exceptions = [\"Timestamp (Formatted)\"]\n",
"\n",
"for column in data_df.columns:\n",
" if column in exceptions:\n",
" continue\n",
" data_df[column] = pd.to_numeric(data_df[column], errors='coerce')\n",
"remove_columns(data_df, [\"Not_Used1\", \"Not_Used2\", \"Timestamp (Formatted)\", \"Sample Index\"])\n",
"data_df[timestamp_col] = pd.to_datetime(data_df[timestamp_col], unit=\"s\", utc=True)\n",
"# Create the label column\n",
"data_df[label_name]= \"\""
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "7f4bf8f8-99dd-4f0a-aa20-aa09643d9553",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-13T20:12:07.148168Z",
"start_time": "2025-02-13T20:12:07.142272Z"
},
"collapsed": true,
"jupyter": {
"outputs_hidden": true
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Timestamp</th>\n",
" <th>Label</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2025-01-27 17:31:40+00:00</td>\n",
" <td>i</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2025-01-27 17:32:13+00:00</td>\n",
" <td>none</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2025-01-27 17:33:08+00:00</td>\n",
" <td>none</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2025-01-27 17:50:46+00:00</td>\n",
" <td>c</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2025-01-27 17:51:11+00:00</td>\n",
" <td>i</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Timestamp Label\n",
"0 2025-01-27 17:31:40+00:00 i\n",
"1 2025-01-27 17:32:13+00:00 none\n",
"2 2025-01-27 17:33:08+00:00 none\n",
"3 2025-01-27 17:50:46+00:00 c\n",
"4 2025-01-27 17:51:11+00:00 i"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Convertir los logs en un DataFrame\n",
"log_df = pd.DataFrame(logs_data, columns=[\"Timestamp\", \"Label\"])\n",
"log_df[timestamp_col] = pd.to_datetime(log_df[timestamp_col], errors=\"coerce\", utc=True)\n",
"log_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "0c8715db-9090-4fd6-9ade-9796c268b0b5",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-13T20:09:46.288560Z",
"start_time": "2025-02-13T20:09:46.285065Z"
},
"collapsed": true,
"jupyter": {
"outputs_hidden": true
}
},
"outputs": [],
"source": [
"def test(data, start_time, end_time):\n",
" mask = (data[timestamp_col] >= start_time) & (data[\"Timestamp\"] < end_time)\n",
" xdata = data[(data[timestamp_col]>=start_time) & (data[\"Timestamp\"] < end_time)]\n",
" print(\"xdata=\")\n",
" print(xdata.empty)\n",
" print(data.loc[mask,timestamp_col])"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "90095eb1-91fb-4159-ae70-0ff42cfb2b4b",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-13T20:09:46.444389Z",
"start_time": "2025-02-13T20:09:46.316312Z"
},
"collapsed": true,
"jupyter": {
"outputs_hidden": true
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"*Error, no hay registros en el intervalo [2025-01-27 17:31:40+00:00, 2025-01-27 17:32:13+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-27 17:50:46+00:00, 2025-01-27 17:51:11+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-27 17:51:11+00:00, 2025-01-27 17:51:53+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-27 17:51:53+00:00, 2025-01-27 17:52:28+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-27 17:52:28+00:00, 2025-01-27 17:52:43+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-27 17:52:43+00:00, 2025-01-27 17:52:56+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-27 17:52:56+00:00, 2025-01-27 17:53:58+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-27 17:53:58+00:00, 2025-01-27 17:54:14+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-27 17:54:14+00:00, 2025-01-27 18:32:55.900000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-27 18:32:55.900000+00:00, 2025-01-27 18:34:18.100000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-27 18:34:18.100000+00:00, 2025-01-27 18:34:24.800000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-27 19:34:39.400000+00:00, 2025-01-27 19:35:25.100000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-27 19:35:25.100000+00:00, 2025-01-27 19:35:44.600000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-27 19:35:44.600000+00:00, 2025-01-27 19:36:11+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-27 19:36:11+00:00, 2025-01-27 19:36:31.100000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-27 19:36:31.100000+00:00, 2025-01-27 19:36:50.600000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-28 18:00:12.700000+00:00, 2025-01-28 18:00:48.200000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-28 18:00:48.200000+00:00, 2025-01-28 18:01:11.500000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-28 18:01:11.500000+00:00, 2025-01-28 18:01:34.800000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-28 18:01:34.800000+00:00, 2025-01-28 18:01:50+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-28 18:01:50+00:00, 2025-01-28 18:02:23+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-28 18:02:23+00:00, 2025-01-28 18:02:26.900000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-28 18:44:45.900000+00:00, 2025-01-28 18:45:22.900000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-28 18:45:22.900000+00:00, 2025-01-28 18:45:48.600000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-28 18:45:48.600000+00:00, 2025-01-28 18:46:08.900000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-28 18:46:08.900000+00:00, 2025-01-28 18:46:34.300000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-28 18:46:34.300000+00:00, 2025-01-28 18:46:55.100000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-28 18:46:55.100000+00:00, 2025-01-28 18:47:22.500000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-28 18:47:22.500000+00:00, 2025-01-28 18:47:37.700000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-28 18:47:37.700000+00:00, 2025-01-28 19:11:31.400000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-28 19:11:31.400000+00:00, 2025-01-28 19:11:49.100000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-28 19:11:49.100000+00:00, 2025-01-28 19:11:55.300000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-28 19:11:55.300000+00:00, 2025-01-28 19:12:25+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-28 19:12:25+00:00, 2025-01-28 19:12:40.600000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-28 19:12:40.600000+00:00, 2025-01-28 19:13:03+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-28 19:13:03+00:00, 2025-01-28 19:13:21.200000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-28 19:13:21.200000+00:00, 2025-01-28 19:13:42+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-28 19:13:42+00:00, 2025-01-28 19:31:40.200000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-28 19:31:40.200000+00:00, 2025-01-28 19:32:11.400000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-28 19:32:11.400000+00:00, 2025-01-28 19:32:24.500000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-28 19:32:24.500000+00:00, 2025-01-28 19:32:50.500000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-28 19:32:50.500000+00:00, 2025-01-28 19:33:15.600000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-28 19:33:15.600000+00:00, 2025-01-28 19:34:10.700000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-28 19:34:10.700000+00:00, 2025-01-28 19:34:39.200000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-28 19:34:39.200000+00:00, 2025-01-28 19:34:58.900000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-28 19:34:58.900000+00:00, 2025-01-28 19:35:19.800000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-28 19:35:19.800000+00:00, 2025-01-28 19:35:44.700000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-28 19:35:44.700000+00:00, 2025-01-28 19:36:02.200000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-28 19:36:02.200000+00:00, 2025-01-28 19:36:30.400000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-01-28 19:36:30.400000+00:00, 2025-01-28 19:36:32+00:00)\n",
"*Error, no hay registros en el intervalo [2025-02-03 19:08:58.200000+00:00, 2025-02-03 19:11:33.500000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-02-03 19:11:33.500000+00:00, 2025-02-03 19:12:32.600000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-02-03 19:12:32.600000+00:00, 2025-02-03 19:13:18.400000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-02-03 19:13:18.400000+00:00, 2025-02-03 19:13:41.700000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-02-03 19:13:41.700000+00:00, 2025-02-03 19:14:21+00:00)\n",
"*Error, no hay registros en el intervalo [2025-02-03 19:14:21+00:00, 2025-02-03 19:27:37.400000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-02-03 19:27:37.400000+00:00, 2025-02-03 19:28:15.600000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-02-03 19:28:15.600000+00:00, 2025-02-03 19:28:47.200000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-02-03 19:28:47.200000+00:00, 2025-02-03 19:29:18+00:00)\n",
"*Error, no hay registros en el intervalo [2025-02-03 19:29:18+00:00, 2025-02-03 19:29:30.200000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-02-03 19:29:30.200000+00:00, 2025-02-03 19:29:52.500000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-02-03 19:29:52.500000+00:00, 2025-02-03 19:40:28.800000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-02-03 19:40:28.800000+00:00, 2025-02-03 19:40:49.500000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-02-03 19:40:49.500000+00:00, 2025-02-03 19:40:57.500000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-02-03 19:40:57.500000+00:00, 2025-02-03 19:41:36.800000+00:00)\n",
"*Error, no hay registros en el intervalo [2025-02-03 19:41:36.800000+00:00, 2025-02-03 20:01:52+00:00)\n",
"*Error, no hay registros en el intervalo [2025-02-03 20:01:52+00:00, 2025-02-03 20:02:39+00:00)\n",
"*Error, no hay registros en el intervalo [2025-02-03 20:02:39+00:00, 2025-02-03 20:03:08+00:00)\n",
"*Error, no hay registros en el intervalo [2025-02-03 20:03:08+00:00, 2025-02-03 20:03:30.300000+00:00)\n",
"There were 69 errors in 79 intervals\n"
]
}
],
"source": [
"errors = 0\n",
"for i in range(len(log_df) - 1):\n",
" start_time = log_df.loc[i, timestamp_col]\n",
" end_time = log_df.loc[i + 1, timestamp_col]\n",
" label = log_df.loc[i, \"Label\"]\n",
" success = label_sample(data_df, start_time, end_time, label)\n",
" if not success:\n",
" errors +=1\n",
" #test(data_df, start_time, end_time)\n",
" #break\n",
"print(f\"There were {errors} errors in {len(log_df)} intervals\")"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "da9ef7e2-5b7b-4505-9d6c-c2221bbd448d",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-13T20:13:11.062089Z",
"start_time": "2025-02-13T20:13:11.032296Z"
},
"collapsed": true,
"jupyter": {
"outputs_hidden": true
}
},
"outputs": [],
"source": [
"# 2025-01-27 17:31:40+00:00, 2025-01-27 17:32:13+00:00"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "7a65b3c9-e0d3-406e-9b35-3be6b54d6d97",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-13T20:09:46.548106Z",
"start_time": "2025-02-13T20:01:16.083929Z"
},
"collapsed": true,
"jupyter": {
"outputs_hidden": true
}
},
"outputs": [
{
"data": {
"text/plain": [
"EXG Channel 0 float64\n",
"EXG Channel 1 float64\n",
"EXG Channel 2 float64\n",
"EXG Channel 3 float64\n",
"EXG Channel 4 float64\n",
"EXG Channel 5 float64\n",
"EXG Channel 6 float64\n",
"EXG Channel 7 float64\n",
"EXG Channel 8 float64\n",
"EXG Channel 9 float64\n",
"EXG Channel 10 float64\n",
"EXG Channel 11 float64\n",
"EXG Channel 12 float64\n",
"EXG Channel 13 float64\n",
"EXG Channel 14 float64\n",
"EXG Channel 15 float64\n",
"Accel Channel 0 float64\n",
"Accel Channel 1 float64\n",
"Accel Channel 2 float64\n",
"Digital Channel 0 (D11) float64\n",
"Digital Channel 1 (D12) float64\n",
"Digital Channel 2 (D13) float64\n",
"Digital Channel 3 (D17) float64\n",
"Digital Channel 4 (D18) float64\n",
"Analog Channel 0 float64\n",
"Analog Channel 1 float64\n",
"Analog Channel 2 float64\n",
"Timestamp datetime64[ns, UTC]\n",
"Marker Channel float64\n",
"Movement object\n",
"dtype: object"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"k=data_df[data_df[timestamp_col] > datetime("
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "a25f656e-7a84-4582-9017-a49ddb9ffe76",
"metadata": {
"collapsed": true,
"jupyter": {
"outputs_hidden": true
}
},
"outputs": [
{
"ename": "TypeError",
"evalue": "'minutes' is an invalid keyword argument for this function",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[15], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m inicio \u001b[38;5;241m=\u001b[39m datetime(year\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m2025\u001b[39m,month\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m,day\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m27\u001b[39m,hour\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m17\u001b[39m,minutes\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m31\u001b[39m,seconds\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m40\u001b[39m)\n",
"\u001b[0;31mTypeError\u001b[0m: 'minutes' is an invalid keyword argument for this function"
]
}
],
"source": [
"inicio = datetime(year=2025,month=1,day=27,hour=17,minutes=31,seconds=40)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2a3ab74e-89aa-4b8d-881c-ad391680524f",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-13T20:09:46.536509200Z",
"start_time": "2025-02-13T20:01:15.888660Z"
},
"collapsed": true,
"jupyter": {
"outputs_hidden": true
}
},
"outputs": [],
"source": [
"data_df[data_df[datetime_col] >= inicio & data_df"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cbcd0c88-45a9-4018-a462-bc9f652f727a",
"metadata": {
"ExecuteTime": {
"end_time": "2025-02-13T20:09:46.536509200Z",
"start_time": "2025-02-13T20:01:15.888660Z"
},
"collapsed": true,
"jupyter": {
"outputs_hidden": true
}
},
"outputs": [],
"source": [
"data_df[\"Movement\"].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0e2a79fb-5578-42a8-bcbb-f9ce6c8eae24",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "4752c811-aa32-4785-b74f-396c1c309085",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,96 @@
import pandas as pd
from datetime import datetime
# Cargar archivo de logs
logs_path = "join/mergedLog_output.txt"
openbci_path = "join/mergedData_output.txt"
output_csv_path = "OpenBCI_with_labels.csv"
# Leer los logs y extraer tiempos y etiquetas
with open(logs_path, "r") as logs_file:
logs_data = [log.strip().split(",") for log in logs_file.readlines()]
with open(openbci_path, "r") as openbci_file:
lineas = [linea.strip().split(",") for linea in openbci_file.readlines() if not linea.strip().startswith('%')]
#Convertir los logs en un DataFrame
log_df = pd.DataFrame(logs_data, columns=["Timestamp", "Label"])
columns = "Sample Index, EXG Channel 0, EXG Channel 1, EXG Channel 2, EXG Channel 3, EXG Channel 4, EXG Channel 5, EXG Channel 6, EXG Channel 7, EXG Channel 8, EXG Channel 9, EXG Channel 10, EXG Channel 11, EXG Channel 12, EXG Channel 13, EXG Channel 14, EXG Channel 15, Accel Channel 0, Accel Channel 1, Accel Channel 2, Not Used, Digital Channel 0 (D11), Digital Channel 1 (D12), Digital Channel 2 (D13), Digital Channel 3 (D17), Not Used, Digital Channel 4 (D18), Analog Channel 0, Analog Channel 1, Analog Channel 2, Timestamp, Marker Channel, Timestamp (Formatted)".split(",")
data_df = pd.DataFrame(lineas, columns=columns)
# Buscar la columna correcta de timestamp
timestamp_col = next((col for col in data_df.columns if "Timestamp (Formatted)" in col), None)
if timestamp_col is None:
raise ValueError("No se encontró la columna de Timestamp en OpenBCI.")
# Convertir a formato datetime
data_df[timestamp_col] = pd.to_datetime(data_df[timestamp_col])
log_df["Timestamp"] = pd.to_datetime(log_df["Timestamp"], errors="coerce")
# Crear una nueva columna vacía para la etiqueta
data_df["Movement Label"] = ""
# Asignar etiquetas de acuerdo a los rangos en log_df
for i in range(len(log_df) - 1):
start_time = log_df.loc[i, "Timestamp"]
end_time = log_df.loc[i + 1, "Timestamp"]
label = log_df.loc[i, "Label"]
# Asignar etiqueta a los timestamps en el rango [start_time, end_time)
mask = (data_df[timestamp_col] >= start_time) & (data_df[timestamp_col] < end_time)
# Si el label es "none", "i" o "d", asignar valores numéricos; si es "c", ignorarlo
data_df.loc[mask, "Movement Label"] = label
data_df["Movement Label"] = data_df["Movement Label"].str.strip().str.lower()
# Reemplazar etiquetas con valores numéricos
label_mapping = {"none": 0, "i": 1, "d": 2}
data_df["Movement Label"] = data_df["Movement Label"].map(label_mapping)
# Verificar si hay valores NaN en la columna "Movement Label"
print(f"Valores sin etiquetar: {data_df['Movement Label'].isna().sum()} filas con NaN")
print("🔍 Timestamps en log_df (primeros 5 valores):")
print(log_df["Timestamp"].head())
print("\n🔍 Timestamps en data_df (primeros 5 valores):")
print(data_df[timestamp_col].head())
print("\n🔍 Últimos valores en log_df:")
print(log_df["Timestamp"].tail())
print("\n🔍 Últimos valores en data_df:")
print(data_df[timestamp_col].tail())
log_df["Timestamp"] = log_df["Timestamp"].dt.floor("S") # Redondea a segundos
data_df[timestamp_col] = data_df[timestamp_col].dt.floor("S") # Redondea a segundos
if log_df["Timestamp"].isna().sum() > 0:
print(f"⚠️ Hay {log_df['Timestamp'].isna().sum()} valores NaT en log_df. Eliminando...")
log_df = log_df.dropna(subset=["Timestamp"])
data_df["Movement Label"] = ""
for i in range(len(log_df) - 1):
start_time = log_df.loc[i, "Timestamp"]
end_time = log_df.loc[i + 1, "Timestamp"]
label = log_df.loc[i, "Label"]
print(f"Asignando etiqueta '{label}' a registros entre {start_time} y {end_time}")
mask = (data_df[timestamp_col] >= start_time) & (data_df[timestamp_col] < end_time)
data_df.loc[mask, "Movement Label"] = label
# Eliminar registros con etiqueta 'c'
data_df = data_df[data_df["Movement Label"].notna()]
# Guardar en un archivo CSV
data_df.to_csv("openbci_with_labels_filtered.csv", index=False)

File diff suppressed because it is too large Load Diff