#%% import pandas as pd from datetime import datetime #%% logs_path = "join/old/mergedLog_output.txt" openbci_path = "join/mergedData_output.txt" output_csv_path = "OpenBCI_with_labels.csv" label_name = "Movement" timestamp_col="Timestamp" #%% # Leer los logs y extraer tiempos y etiquetas with open(logs_path, "r") as logs_file: logs_data = [log.strip().split(",") for log in logs_file.readlines()] with open(openbci_path, "r") as openbci_file: lineas = [linea.strip().split(",") for linea in openbci_file.readlines() if not linea.strip().startswith('%')] #%% # Utility functions def remove_columns(df, columns): df.drop(columns, axis="columns", inplace=True) def set_type_columns(df, types): for key, value in types.items(): df[key].astype(value) def label_sample(data, start_time, end_time, label): mask = (data[timestamp_col] >= start_time) & (data[timestamp_col] < end_time) if data.loc[mask].empty: print(f"*Error, no hay registros en el intervalo [{start_time}, {end_time})") return False else: data.loc[mask, label_name] = label return True #%% #Convertir los datos en un DataFrame columns = "Sample Index, EXG Channel 0, EXG Channel 1, EXG Channel 2, EXG Channel 3, EXG Channel 4, EXG Channel 5, EXG Channel 6, EXG Channel 7, EXG Channel 8, EXG Channel 9, EXG Channel 10, EXG Channel 11, EXG Channel 12, EXG Channel 13, EXG Channel 14, EXG Channel 15, Accel Channel 0, Accel Channel 1, Accel Channel 2, Not_Used1, Digital Channel 0 (D11), Digital Channel 1 (D12), Digital Channel 2 (D13), Digital Channel 3 (D17), Not_Used2, Digital Channel 4 (D18), Analog Channel 0, Analog Channel 1, Analog Channel 2, Timestamp, Marker Channel, Timestamp (Formatted)".split(",") columns = [column.strip() for column in columns] types = {} data_df = pd.DataFrame(lineas, columns=columns) exceptions = ["Timestamp (Formatted)"] for column in data_df.columns: if column in exceptions: continue data_df[column] = pd.to_numeric(data_df[column], errors='coerce') remove_columns(data_df, ["Not_Used1", "Not_Used2", "Timestamp (Formatted)", "Sample Index"]) data_df[timestamp_col] = pd.to_datetime(data_df[timestamp_col], unit="s") # Create the label column data_df[label_name] = "" print(data_df.dtypes) #%% # Convertir los logs en un DataFrame log_df = pd.DataFrame(logs_data, columns=["Timestamp", "Label"]) log_df[timestamp_col] = pd.to_datetime(log_df[timestamp_col], errors="coerce") #%% def test(data, start_time, end_time): mask = (data[timestamp_col] >= start_time) & (data["Timestamp"] < end_time) xdata = data[(data[timestamp_col]>=start_time) & (data["Timestamp"] < end_time)] print("xdata=") print(xdata.empty) print(data.loc[mask,timestamp_col]) #%% errors = 0 for i in range(len(log_df) - 1): start_time = log_df.loc[i, timestamp_col] end_time = log_df.loc[i + 1, timestamp_col] label = log_df.loc[i, "Label"] success = label_sample(data_df, start_time, end_time, label) if not success: errors +=1 #test(data_df, start_time, end_time) #break print(f"There were {errors} errors in {len(log_df)} intervals") #%% data_df #%% data_df["Movement"].value_counts() #%%