I tried to build a dataset in R based on a publication at this URL: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8799698/#MOESM1
In fine, the goal is to build an elastic net model that predicts athletes’ performances.
So, I’ve my dataset with predictor values and I want to implement Gimp and Gser functions and then convolute these variables on a period from Sunday to Sunday. Sunday is the day when I test the true performance of my athletes.
Here’s my code :
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
library(tidyverse)
library(ggplot2)
library(ggpubr)
library(lubridate)
library(readxl)
library(caret)
library(glmnet)
library(Ecdat)
library(corrplot)
Dataset <- read_excel("Dataset_modelization.xlsx")
tau_I <- 3 # 3 jours pour la réponse impulsionnelle
tau_G <- 1 # 1 jour pour la phase de croissance
tau_D <- 7 # 7 jours pour la phase de déclin
TD <- 4 * tau_G # Délai avant le début de la phase de déclin
# Supposons que 'Dataset' est déjà chargé dans votre environnement R
# Étape 1 : Trier le jeu de données
Dataset <- Dataset %>%
arrange(Athlete_ID, Date)
# Étape 2 : Identifier les jours de performance
# Supposons que la performance est enregistrée tous les dimanches
Dataset <- Dataset %>%
mutate(Performance_Day = wday(Date) == 1) # 1 représente le dimanche dans lubridate
# Étape 3 : Regrouper les données d'entraînement
# Créer une fonction pour regrouper les données d'entraînement pour chaque jour de performance
group_training_data <- function(df) {
df %>%
group_by(Athlete_ID) %>%
mutate(Group_ID = cumsum(Performance_Day)) %>%
ungroup()
}
# Fonction de réponse impulsionnelle corrigée
g_imp <- function(t) {
w <- exp(-t / tau_I)
return(w)
}
# Fonction de réponse cumulative sérielle corrigée
g_ser <- function(t) {
U <- ifelse(t < TD, 1, 0)
w1 <- (1 - exp(-t / tau_G)) * U
w2 <- exp(-(t - TD) / tau_D) * (1 - U)
w <- w1 + w2
return(w)
}
# Appliquer la fonction pour regrouper les données
Dataset <- group_training_data(Dataset)
Dataset <- Dataset |>
select(!Performance_Day)
# Fonction pour appliquer la convolution
convoluer_variables <- function(variable_brute, g_func) {
# Calculer la réponse pour une gamme de temps allant jusqu'à la longueur de la variable brute
t <- seq_along(variable_brute) - 1
reponse <- g_func(t)
# Appliquer la convolution discrète
variable_conv <- stats::convolve_filter(reponse, 1, variable_brute)
return(variable_conv)
}
# Fonction pour appliquer la convolution à toutes les lignes du dataframe
convolve_training_data <- function(df, variable, g_func) {
df %>%
group_by(Group_ID) %>%
mutate(!!paste0("Conv_", variable) := convoluer_variables(df[[variable]], g_func)) %>%
ungroup()
}
# Appliquer la convolution pour chaque variable prédictive
Dataset <- convolve_training_data(Dataset, "CDE", g_imp)
Dataset <- convolve_training_data(Dataset, "CDE", g_ser)
Here’s my Dataset :
dput(Dataset)
structure(list(Athlete_ID = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3), Date = structure(c(1710201600,
1710288000, 1710374400, 1710460800, 1710547200, 1710720000, 1710806400,
1710892800, 1710979200, 1711065600, 1711152000, 1711324800, 1711411200,
1711497600, 1711584000, 1711670400, 1711756800, 1711929600, 1712016000,
1712102400, 1712188800, 1712275200, 1712361600, 1712534400, 1712620800,
1712707200, 1712793600, 1712880000, 1712966400, 1713139200, 1710201600,
1710288000, 1710374400, 1710460800, 1710547200, 1710720000, 1710806400,
1710892800, 1710979200, 1711065600, 1711152000, 1711324800, 1711411200,
1711497600, 1711584000, 1711670400, 1711756800, 1711929600, 1712016000,
1712102400, 1712188800, 1712275200, 1712361600, 1712534400, 1712620800,
1712707200, 1712793600, 1712880000, 1712966400, 1713139200, 1710201600,
1710288000, 1710374400, 1710460800, 1710547200, 1710720000, 1710806400,
1710892800, 1710979200, 1711065600, 1711152000, 1711324800, 1711411200,
1711497600, 1711584000, 1711670400, 1711756800, 1711929600, 1712016000,
1712102400, 1712188800, 1712275200, 1712361600, 1712534400, 1712620800,
1712707200, 1712793600, 1712880000, 1712966400, 1713139200), tzone = "UTC", class = c("POSIXct",
"POSIXt")), CDE = c(398, 231, 140, 176, 186, 402, 406, 387, 326,
101, 174, 204, 484, 315, 405, 294, 295, 155, 119, 219, 331, 305,
426, 351, 135, 400, 424, 246, 408, 264, 494, 349, 161, 418, 423,
240, 466, 281, 358, 371, 316, 208, 448, 484, 180, 320, 341, 385,
272, 141, 244, 279, 116, 136, 159, 193, 427, 320, 267, 165, 281,
286, 118, 259, 451, 154, 424, 161, 336, 132, 380, 482, 272, 182,
144, 495, 295, 304, 459, 371, 264, 282, 384, 300, 153, 300, 441,
212, 244, 180), Sleep = c(5, 4, 4, 3, 1, 1, 3, 5, 2, 2, 4, 4,
3, 2, 1, 1, 1, 2, 5, 2, 4, 4, 3, 3, 2, 1, 2, 2, 3, 5, 1, 1, 1,
2, 1, 2, 1, 2, 3, 1, 2, 1, 1, 3, 3, 2, 3, 2, 1, 4, 4, 5, 2, 1,
3, 4, 1, 4, 1, 1, 3, 2, 3, 4, 1, 5, 5, 3, 4, 2, 3, 5, 3, 3, 3,
3, 3, 1, 4, 2, 1, 5, 1, 5, 3, 5, 3, 2, 4, 5), Lowerbody = c(2,
3, 5, 2, 5, 3, 2, 2, 5, 2, 4, 2, 3, 2, 2, 1, 1, 4, 2, 1, 1, 1,
1, 2, 3, 1, 3, 4, 2, 1, 5, 5, 5, 2, 5, 3, 5, 3, 3, 4, 3, 1, 1,
1, 5, 1, 1, 3, 4, 1, 2, 4, 3, 5, 4, 4, 5, 5, 3, 1, 5, 5, 3, 2,
3, 2, 1, 3, 3, 1, 2, 4, 3, 1, 1, 2, 1, 5, 3, 3, 2, 4, 3, 4, 3,
5, 5, 1, 2, 5), Upperbody = c(1, 1, 4, 1, 1, 4, 5, 3, 2, 2, 1,
5, 2, 2, 1, 2, 5, 2, 4, 1, 3, 1, 2, 5, 3, 3, 2, 5, 2, 4, 2, 4,
5, 4, 4, 2, 5, 4, 1, 3, 4, 4, 2, 1, 2, 5, 3, 3, 4, 1, 2, 4, 1,
5, 5, 2, 5, 4, 4, 5, 4, 5, 4, 4, 4, 4, 1, 2, 1, 5, 4, 3, 5, 2,
2, 1, 4, 3, 4, 4, 1, 3, 3, 3, 2, 2, 5, 2, 1, 1), Mood = c(3,
4, 5, 1, 2, 1, 5, 5, 1, 1, 3, 1, 2, 5, 1, 5, 3, 2, 4, 3, 5, 4,
3, 4, 4, 5, 1, 3, 4, 3, 2, 4, 1, 1, 4, 1, 1, 4, 1, 4, 3, 4, 3,
3, 2, 1, 3, 3, 1, 1, 3, 1, 2, 1, 5, 2, 3, 1, 4, 2, 5, 1, 2, 2,
1, 5, 3, 4, 5, 5, 3, 5, 4, 2, 1, 3, 5, 3, 3, 2, 5, 5, 1, 2, 4,
2, 3, 4, 1, 4), Stress = c(4, 2, 1, 1, 4, 3, 5, 2, 2, 1, 2, 3,
3, 4, 5, 4, 1, 4, 4, 1, 4, 3, 1, 5, 3, 1, 4, 4, 5, 3, 2, 3, 5,
5, 5, 5, 3, 2, 1, 1, 5, 3, 4, 1, 3, 3, 5, 2, 4, 4, 3, 1, 1, 5,
2, 5, 1, 1, 1, 5, 1, 5, 2, 2, 1, 1, 4, 1, 5, 5, 2, 2, 5, 1, 1,
3, 3, 1, 3, 1, 4, 5, 1, 1, 4, 4, 3, 2, 1, 1), Rest = c(3, 3,
3, 2, 1, 1, 3, 1, 2, 1, 2, 2, 2, 1, 3, 3, 2, 1, 2, 3, 2, 2, 1,
1, 2, 1, 2, 2, 2, 2, 3, 3, 1, 3, 3, 1, 1, 2, 1, 2, 1, 3, 2, 2,
3, 1, 1, 3, 2, 1, 1, 2, 2, 1, 2, 2, 1, 3, 3, 1, 2, 3, 2, 2, 3,
2, 2, 1, 1, 3, 1, 1, 2, 1, 3, 1, 2, 3, 2, 3, 2, 1, 2, 1, 1, 3,
3, 3, 2, 1), Distance = c(3146, 818, 3450, 3391, 1252, 1584,
2671, 3590, 2851, 3012, 1530, 3307, 3588, 3240, 2734, 834, 1854,
1940, 3317, 1427, 3560, 879, 2361, 1435, 3600, 3628, 2226, 2124,
2605, 804, 3082, 1302, 1871, 1556, 2427, 3521, 3466, 928, 1949,
2481, 3140, 3468, 1116, 2337, 2298, 1510, 1728, 2939, 1464, 3177,
1158, 1871, 3298, 1767, 3987, 3920, 2953, 2597, 1821, 2437, 2456,
1790, 2217, 1314, 2443, 3906, 1714, 3923, 2618, 3513, 875, 3135,
1613, 2928, 968, 1453, 1515, 2009, 3990, 980, 3829, 2427, 1820,
2066, 3322, 1646, 3363, 3322, 2127, 1910), Group_ID = c(0L, 0L,
0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 0L, 0L, 0L, 0L,
0L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 0L, 0L, 0L, 0L, 0L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
3L, 4L, 4L, 4L, 4L, 4L, 4L, 5L)), row.names = c(NA, -90L), class = c("tbl_df",
"tbl", "data.frame"))
But when I run the code I got this issue:
# Appliquer la convolution pour chaque variable prédictive
> Dataset <- convolve_training_data(Dataset, "CDE", g_imp)
Error in `mutate()`:
ℹ In argument: `Conv_CDE = convoluer_variables(df[[variable]], g_func)`.
ℹ In group 1: `Group_ID = 0`.
Caused by error:
! 'convolve_filter' is not an exported object from 'namespace:stats'
Run `rlang::last_trace()` to see where the error occurred.
Called from: signal_abort(cnd, .file)
I thank about stats::filter function error because filter is also present in tidyverse package but despite this I don’t know what to do. I’d be grateful for your help mates !