78 lines
3.1 KiB
Python
78 lines
3.1 KiB
Python
#%%
|
|
import pandas as pd
|
|
from datetime import datetime
|
|
#%%
|
|
logs_path = "join/mergedLog_output.txt"
|
|
openbci_path = "join/mergedData_output.txt"
|
|
output_csv_path = "OpenBCI_with_labels.csv"
|
|
label_name = "Movement"
|
|
timestamp_col="Timestamp"
|
|
#%%
|
|
# Leer los logs y extraer tiempos y etiquetas
|
|
with open(logs_path, "r") as logs_file:
|
|
logs_data = [log.strip().split(",") for log in logs_file.readlines()]
|
|
with open(openbci_path, "r") as openbci_file:
|
|
lineas = [linea.strip().split(",") for linea in openbci_file.readlines() if not linea.strip().startswith('%')]
|
|
#%%
|
|
# Utility functions
|
|
def remove_columns(df, columns):
|
|
df.drop(columns, axis="columns", inplace=True)
|
|
def set_type_columns(df, types):
|
|
for key, value in types.items():
|
|
df[key].astype(value)
|
|
def label_sample(data, start_time, end_time, label):
|
|
mask = (data[timestamp_col] >= start_time) & (data[timestamp_col] < end_time)
|
|
if data.loc[mask].empty:
|
|
print(f"*Error, no hay registros en el intervalo [{start_time}, {end_time})")
|
|
return False
|
|
else:
|
|
data.loc[mask, label_name] = label
|
|
return True
|
|
#%%
|
|
#Convertir los datos en un DataFrame
|
|
columns = "Sample Index, EXG Channel 0, EXG Channel 1, EXG Channel 2, EXG Channel 3, EXG Channel 4, EXG Channel 5, EXG Channel 6, EXG Channel 7, EXG Channel 8, EXG Channel 9, EXG Channel 10, EXG Channel 11, EXG Channel 12, EXG Channel 13, EXG Channel 14, EXG Channel 15, Accel Channel 0, Accel Channel 1, Accel Channel 2, Not_Used1, Digital Channel 0 (D11), Digital Channel 1 (D12), Digital Channel 2 (D13), Digital Channel 3 (D17), Not_Used2, Digital Channel 4 (D18), Analog Channel 0, Analog Channel 1, Analog Channel 2, Timestamp, Marker Channel, Timestamp (Formatted)".split(",")
|
|
columns = [column.strip() for column in columns]
|
|
types = {}
|
|
|
|
data_df = pd.DataFrame(lineas, columns=columns)
|
|
exceptions = ["Timestamp (Formatted)"]
|
|
|
|
for column in data_df.columns:
|
|
if column in exceptions:
|
|
continue
|
|
data_df[column] = pd.to_numeric(data_df[column], errors='coerce')
|
|
remove_columns(data_df, ["Not_Used1", "Not_Used2", "Timestamp (Formatted)", "Sample Index"])
|
|
data_df[timestamp_col] = pd.to_datetime(data_df[timestamp_col], unit="s")
|
|
|
|
# Create the label column
|
|
data_df[label_name] = ""
|
|
print(data_df.dtypes)
|
|
#%%
|
|
# Convertir los logs en un DataFrame
|
|
log_df = pd.DataFrame(logs_data, columns=["Timestamp", "Label"])
|
|
log_df[timestamp_col] = pd.to_datetime(log_df[timestamp_col], errors="coerce")
|
|
#%%
|
|
def test(data, start_time, end_time):
|
|
mask = (data[timestamp_col] >= start_time) & (data["Timestamp"] < end_time)
|
|
xdata = data[(data[timestamp_col]>=start_time) & (data["Timestamp"] < end_time)]
|
|
print("xdata=")
|
|
print(xdata.empty)
|
|
print(data.loc[mask,timestamp_col])
|
|
#%%
|
|
errors = 0
|
|
for i in range(len(log_df) - 1):
|
|
start_time = log_df.loc[i, timestamp_col]
|
|
end_time = log_df.loc[i + 1, timestamp_col]
|
|
label = log_df.loc[i, "Label"]
|
|
success = label_sample(data_df, start_time, end_time, label)
|
|
if not success:
|
|
errors +=1
|
|
#test(data_df, start_time, end_time)
|
|
#break
|
|
print(f"There were {errors} errors in {len(log_df)} intervals")
|
|
#%%
|
|
data_df
|
|
#%%
|
|
data_df["Movement"].value_counts()
|
|
#%%
|