Correciones para reentrenamiento

This commit is contained in:
2025-05-04 20:58:43 +02:00
parent 56991c9f26
commit 721992d6f3
33 changed files with 740951 additions and 240889 deletions

View File

@@ -0,0 +1,77 @@
#%%
import pandas as pd
from datetime import datetime
#%%
logs_path = "join/mergedLog_output.txt"
openbci_path = "join/mergedData_output.txt"
output_csv_path = "OpenBCI_with_labels.csv"
label_name = "Movement"
timestamp_col="Timestamp"
#%%
# Leer los logs y extraer tiempos y etiquetas
with open(logs_path, "r") as logs_file:
logs_data = [log.strip().split(",") for log in logs_file.readlines()]
with open(openbci_path, "r") as openbci_file:
lineas = [linea.strip().split(",") for linea in openbci_file.readlines() if not linea.strip().startswith('%')]
#%%
# Utility functions
def remove_columns(df, columns):
df.drop(columns, axis="columns", inplace=True)
def set_type_columns(df, types):
for key, value in types.items():
df[key].astype(value)
def label_sample(data, start_time, end_time, label):
mask = (data[timestamp_col] >= start_time) & (data[timestamp_col] < end_time)
if data.loc[mask].empty:
print(f"*Error, no hay registros en el intervalo [{start_time}, {end_time})")
return False
else:
data.loc[mask, label_name] = label
return True
#%%
#Convertir los datos en un DataFrame
columns = "Sample Index, EXG Channel 0, EXG Channel 1, EXG Channel 2, EXG Channel 3, EXG Channel 4, EXG Channel 5, EXG Channel 6, EXG Channel 7, EXG Channel 8, EXG Channel 9, EXG Channel 10, EXG Channel 11, EXG Channel 12, EXG Channel 13, EXG Channel 14, EXG Channel 15, Accel Channel 0, Accel Channel 1, Accel Channel 2, Not_Used1, Digital Channel 0 (D11), Digital Channel 1 (D12), Digital Channel 2 (D13), Digital Channel 3 (D17), Not_Used2, Digital Channel 4 (D18), Analog Channel 0, Analog Channel 1, Analog Channel 2, Timestamp, Marker Channel, Timestamp (Formatted)".split(",")
columns = [column.strip() for column in columns]
types = {}
data_df = pd.DataFrame(lineas, columns=columns)
exceptions = ["Timestamp (Formatted)"]
for column in data_df.columns:
if column in exceptions:
continue
data_df[column] = pd.to_numeric(data_df[column], errors='coerce')
remove_columns(data_df, ["Not_Used1", "Not_Used2", "Timestamp (Formatted)", "Sample Index"])
data_df[timestamp_col] = pd.to_datetime(data_df[timestamp_col], unit="s")
# Create the label column
data_df[label_name] = ""
print(data_df.dtypes)
#%%
# Convertir los logs en un DataFrame
log_df = pd.DataFrame(logs_data, columns=["Timestamp", "Label"])
log_df[timestamp_col] = pd.to_datetime(log_df[timestamp_col], errors="coerce")
#%%
def test(data, start_time, end_time):
mask = (data[timestamp_col] >= start_time) & (data["Timestamp"] < end_time)
xdata = data[(data[timestamp_col]>=start_time) & (data["Timestamp"] < end_time)]
print("xdata=")
print(xdata.empty)
print(data.loc[mask,timestamp_col])
#%%
errors = 0
for i in range(len(log_df) - 1):
start_time = log_df.loc[i, timestamp_col]
end_time = log_df.loc[i + 1, timestamp_col]
label = log_df.loc[i, "Label"]
success = label_sample(data_df, start_time, end_time, label)
if not success:
errors +=1
#test(data_df, start_time, end_time)
#break
print(f"There were {errors} errors in {len(log_df)} intervals")
#%%
data_df
#%%
data_df["Movement"].value_counts()
#%%