I am working on my first Full Machine Learning, I am now trying to process raw data, to transform it to supervised learning.
In one of the steps, in order to resamble the data : I have used the resamble, then the apply method and passed The sampling dictionary specifies aggregation functions for different columns.
as the following :
import pandas as pd
from glob import glob
# --------------------------------------------------------------
# Turn into function
# --------------------------------------------------------------
files = glob("../../data/raw/MetaMotion/MetaMotion/*.csv")
data_path = "../../data/raw/MetaMotion/MetaMotion"
def read_data_from_files(files):
acc_df = pd.DataFrame()
gyr_df = pd.DataFrame()
acc_set = 1
gyr_set = 1
for f in files:
participant = (f.split("-")[0].replace(data_path, ""))[-1]
label = f.split("-")[1]
category = f.split("-")[2].rstrip("123").rstrip("_MetaWear_2019")
df = pd.read_csv(f)
df["participant"] = participant
df["label"] = label
df["category"] = category
if "Accelerometer" in f:
df["set"] = acc_set
acc_set =+ 1
acc_df = pd.concat([acc_df, df])
if "Gyroscope" in f:
df["set"] = gyr_set
gyr_set =+ 1
gyr_df = pd.concat([gyr_df, df])
acc_df.index = pd.to_datetime(acc_df["epoch (ms)"], unit="ms")
gyr_df.index = pd.to_datetime(gyr_df["epoch (ms)"], unit="ms")
del acc_df["epoch (ms)"]
del acc_df["time (01:00)"]
del acc_df["elapsed (s)"]
del gyr_df["epoch (ms)"]
del gyr_df["time (01:00)"]
del gyr_df["elapsed (s)"]
return acc_df, gyr_df
acc_df, gyr_df = read_data_from_files(files)
# --------------------------------------------------------------
# Merging datasets
# --------------------------------------------------------------
data_merged = pd.concat([acc_df.iloc[:,:3], gyr_df], axis=1)
data_merged.dropna()
data_merged.info()
#rename columns
data_merged.columns = {
"acc_x",
"acc_y",
"acc_z",
"gyr_x",
"gyr_y",
"gyr_z",
"label",
"category",
"participant",
"set",
}
# --------------------------------------------------------------
# Resample data (frequency conversion)
# --------------------------------------------------------------
# Accelerometer: 12.500HZ
# Gyroscope: 25.000Hz
sampling = {
"acc_x": "mean",
"acc_y": "mean",
"acc_z": "mean",
"gyr_x": "mean",
"gyr_y": "mean",
"gyr_z": "mean",
"label": "last",
"category": "last",
"participant": "last",
"set": "last",
}
(data_merged[:1000].resample(rule="200ms")).apply(sampling)
resampled_data = data_merged[:1000].resample(rule="200ms").agg(sampling, numeric_only=False)
New contributor
Abderrahim Laribi is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.