PythonProject/.ipynb_checkpoints/TestEtiquetado-checkpoint.py

#%%
import pandas as pd
from datetime import datetime
#%%
logs_path = "join/mergedLog_output.txt"
openbci_path = "join/mergedData_output.txt"
output_csv_path = "OpenBCI_with_labels.csv"
label_name = "Movement"
timestamp_col="Timestamp"
#%%
# Leer los logs y extraer tiempos y etiquetas
with open(logs_path, "r") as logs_file:
    logs_data = [log.strip().split(",") for log in logs_file.readlines()]
with open(openbci_path, "r") as openbci_file:
    lineas = [linea.strip().split(",") for linea in openbci_file.readlines() if not linea.strip().startswith('%')]
#%%
# Utility functions
def remove_columns(df, columns):
        df.drop(columns, axis="columns", inplace=True)
def set_type_columns(df, types):
    for key, value in types.items():
        df[key].astype(value)
def label_sample(data, start_time, end_time, label):
    mask = (data[timestamp_col] >= start_time) & (data[timestamp_col] < end_time)
    if data.loc[mask].empty:
        print(f"*Error, no hay registros en el intervalo [{start_time}, {end_time})")
        return False
    else:
        data.loc[mask, label_name] = label
        return True
#%%
#Convertir los datos en un DataFrame
columns = "Sample Index, EXG Channel 0, EXG Channel 1, EXG Channel 2, EXG Channel 3, EXG Channel 4, EXG Channel 5, EXG Channel 6, EXG Channel 7, EXG Channel 8, EXG Channel 9, EXG Channel 10, EXG Channel 11, EXG Channel 12, EXG Channel 13, EXG Channel 14, EXG Channel 15, Accel Channel 0, Accel Channel 1, Accel Channel 2, Not_Used1, Digital Channel 0 (D11), Digital Channel 1 (D12), Digital Channel 2 (D13), Digital Channel 3 (D17), Not_Used2, Digital Channel 4 (D18), Analog Channel 0, Analog Channel 1, Analog Channel 2, Timestamp, Marker Channel, Timestamp (Formatted)".split(",")
columns = [column.strip() for column in columns]
types = {}

data_df = pd.DataFrame(lineas, columns=columns)
exceptions = ["Timestamp (Formatted)"]

for column in data_df.columns:
    if column in exceptions:
        continue
    data_df[column] = pd.to_numeric(data_df[column], errors='coerce')
remove_columns(data_df, ["Not_Used1", "Not_Used2", "Timestamp (Formatted)", "Sample Index"])
data_df[timestamp_col] = pd.to_datetime(data_df[timestamp_col], unit="s")

# Create the label column
data_df[label_name] = ""
print(data_df.dtypes)
#%%
# Convertir los logs en un DataFrame
log_df = pd.DataFrame(logs_data, columns=["Timestamp", "Label"])
log_df[timestamp_col] = pd.to_datetime(log_df[timestamp_col], errors="coerce")
#%%
def test(data, start_time, end_time):
    mask = (data[timestamp_col] >= start_time) & (data["Timestamp"] < end_time)
    xdata = data[(data[timestamp_col]>=start_time)  & (data["Timestamp"] < end_time)]
    print("xdata=")
    print(xdata.empty)
    print(data.loc[mask,timestamp_col])
#%%
errors = 0
for i in range(len(log_df) - 1):
    start_time = log_df.loc[i, timestamp_col]
    end_time = log_df.loc[i + 1, timestamp_col]
    label = log_df.loc[i, "Label"]
    success = label_sample(data_df, start_time, end_time, label)
    if not success:
        errors +=1
    #test(data_df, start_time, end_time)
    #break
print(f"There were {errors} errors in {len(log_df)} intervals")
#%%
data_df
#%%
data_df["Movement"].value_counts()
#%%