96 lines
3.9 KiB
Python
96 lines
3.9 KiB
Python
import pandas as pd
|
|
from datetime import datetime
|
|
|
|
# Cargar archivo de logs
|
|
logs_path = "../join/new/mergedLog_output.txt"
|
|
openbci_path = "../join/new/mergedData_output.txt"
|
|
output_csv_path = "OpenBCI_with_labels.csv"
|
|
|
|
# Leer los logs y extraer tiempos y etiquetas
|
|
with open(logs_path, "r") as logs_file:
|
|
logs_data = [log.strip().split(",") for log in logs_file.readlines()]
|
|
|
|
|
|
|
|
with open(openbci_path, "r") as openbci_file:
|
|
lineas = [linea.strip().split(",") for linea in openbci_file.readlines() if not linea.strip().startswith('%')]
|
|
|
|
#Convertir los logs en un DataFrame
|
|
log_df = pd.DataFrame(logs_data, columns=["Timestamp", "Label"])
|
|
columns = "Sample Index, EXG Channel 0, EXG Channel 1, EXG Channel 2, EXG Channel 3, EXG Channel 4, EXG Channel 5, EXG Channel 6, EXG Channel 7, EXG Channel 8, EXG Channel 9, EXG Channel 10, EXG Channel 11, EXG Channel 12, EXG Channel 13, EXG Channel 14, EXG Channel 15, Accel Channel 0, Accel Channel 1, Accel Channel 2, Not Used, Digital Channel 0 (D11), Digital Channel 1 (D12), Digital Channel 2 (D13), Digital Channel 3 (D17), Not Used, Digital Channel 4 (D18), Analog Channel 0, Analog Channel 1, Analog Channel 2, Timestamp, Marker Channel, Timestamp (Formatted)".split(",")
|
|
data_df = pd.DataFrame(lineas, columns=columns)
|
|
|
|
|
|
# Buscar la columna correcta de timestamp
|
|
timestamp_col = next((col for col in data_df.columns if "Timestamp (Formatted)" in col), None)
|
|
if timestamp_col is None:
|
|
raise ValueError("No se encontró la columna de Timestamp en OpenBCI.")
|
|
|
|
# Convertir a formato datetime
|
|
data_df[timestamp_col] = pd.to_datetime(data_df[timestamp_col])
|
|
log_df["Timestamp"] = pd.to_datetime(log_df["Timestamp"], errors="coerce")
|
|
|
|
# Crear una nueva columna vacía para la etiqueta
|
|
data_df["Movement Label"] = ""
|
|
|
|
# Asignar etiquetas de acuerdo a los rangos en log_df
|
|
for i in range(len(log_df) - 1):
|
|
start_time = log_df.loc[i, "Timestamp"]
|
|
end_time = log_df.loc[i + 1, "Timestamp"]
|
|
label = log_df.loc[i, "Label"]
|
|
|
|
# Asignar etiqueta a los timestamps en el rango [start_time, end_time)
|
|
mask = (data_df[timestamp_col] >= start_time) & (data_df[timestamp_col] < end_time)
|
|
|
|
# Si el label es "none", "i" o "d", asignar valores numéricos; si es "c", ignorarlo
|
|
data_df.loc[mask, "Movement Label"] = label
|
|
|
|
|
|
data_df["Movement Label"] = data_df["Movement Label"].str.strip().str.lower()
|
|
|
|
# Reemplazar etiquetas con valores numéricos
|
|
label_mapping = {"none": 0, "i": 1, "d": 2}
|
|
data_df["Movement Label"] = data_df["Movement Label"].map(label_mapping)
|
|
|
|
# Verificar si hay valores NaN en la columna "Movement Label"
|
|
print(f"Valores sin etiquetar: {data_df['Movement Label'].isna().sum()} filas con NaN")
|
|
|
|
print("🔍 Timestamps en log_df (primeros 5 valores):")
|
|
print(log_df["Timestamp"].head())
|
|
|
|
print("\n🔍 Timestamps en data_df (primeros 5 valores):")
|
|
print(data_df[timestamp_col].head())
|
|
|
|
print("\n🔍 Últimos valores en log_df:")
|
|
print(log_df["Timestamp"].tail())
|
|
|
|
print("\n🔍 Últimos valores en data_df:")
|
|
print(data_df[timestamp_col].tail())
|
|
|
|
log_df["Timestamp"] = log_df["Timestamp"].dt.floor("S") # Redondea a segundos
|
|
data_df[timestamp_col] = data_df[timestamp_col].dt.floor("S") # Redondea a segundos
|
|
|
|
if log_df["Timestamp"].isna().sum() > 0:
|
|
print(f"⚠️ Hay {log_df['Timestamp'].isna().sum()} valores NaT en log_df. Eliminando...")
|
|
log_df = log_df.dropna(subset=["Timestamp"])
|
|
|
|
data_df["Movement Label"] = ""
|
|
|
|
for i in range(len(log_df) - 1):
|
|
start_time = log_df.loc[i, "Timestamp"]
|
|
end_time = log_df.loc[i + 1, "Timestamp"]
|
|
label = log_df.loc[i, "Label"]
|
|
|
|
print(f"Asignando etiqueta '{label}' a registros entre {start_time} y {end_time}")
|
|
|
|
mask = (data_df[timestamp_col] >= start_time) & (data_df[timestamp_col] < end_time)
|
|
|
|
data_df.loc[mask, "Movement Label"] = label
|
|
|
|
|
|
|
|
# Eliminar registros con etiqueta 'c'
|
|
data_df = data_df[data_df["Movement Label"].notna()]
|
|
|
|
# Guardar en un archivo CSV
|
|
data_df.to_csv("openbci_with_labels_filtered.csv", index=False) |