I am trying to cross-validate a model with exogenous variables, however R shows this error:
Error in window.default(x, …) : ‘start’ cannot be after ‘end’ Also: Warning messages: 1: In window.default(x, …) : ‘end’ value not changed 2: In window.default(x, …) : ‘end’ value not changed
The code in R is the folloging:
install.packages("forecast")
library(forecast)
# Create an example data set with series A, EX1 y EX2
set.seed(123)
n <- 100
period <- 24 # Define the period of the time series
A <- ts(sin(1:n) + rnorm(n, sd = 0.1), frequency = period, start = c(1, 1))
EX1 <- ts(rnorm(n), frequency = period, start = c(1, 1))
EX2 <- ts(rnorm(n), frequency = period, start = c(1, 1))
# Define the length of the training and test sets
train_len <- round(length(A) * 0.7)
test_len <- length(A) - train_len
# Function to fit and predict the ARIMA model with exogenous and harmonic regressors
train_and_forecast <- function(train_A, test_EX1, test_EX2, K, h) {
harmonics <- fourier(train_A, K = K)
xreg_train <- cbind(harmonics, as.matrix(window(EX1, end = length(train_A))), as.matrix(window(EX2, end = length(train_A))))
model <- auto.arima(train_A, xreg = xreg_train)
harmonics_test <- fourier(train_A, K = K, h = h)
xreg_test <- cbind(harmonics_test, as.matrix(test_EX1), as.matrix(test_EX2))
forecast_result <- forecast(model, xreg = xreg_test)
return(forecast_result)
}
# Manual implementation of cross validation
tsCV_manual <- function(A, EX1, EX2, K, h) {
errors <- ts(numeric(length(A) - h), start = start(A))
for (i in seq(h, length(A) - 1)) {
train_A <- window(A, end = i)
test_A <- window(A, start = i + 1, end = i + h)
train_EX1 <- window(EX1, end = i)
test_EX1 <- window(EX1, start = i + 1, end = i + h)
train_EX2 <- window(EX2, end = i)
test_EX2 <- window(EX2, start = i + 1, end = i + h)
if (length(train_A) > K * 2) {
forecast_result <- train_and_forecast(train_A, test_EX1, test_EX2, K, h)
forecast_mean <- as.numeric(forecast_result$mean)
errors[(i + 1):(i + h)] <- test_A - forecast_mean
}
}
return(errors)
}
K <- 6 # Maximum number of harmonics must be <= period / 2
h <- 12 # Prediction horizon
# Calculate cross-validation errors
errors <- tsCV_manual(A, EX1, EX2, K, h)
# Calculate the mean square error (RMSE)
rmse <- sqrt(mean(errors^2, na.rm = TRUE))
print(paste("RMSE: ", round(rmse, 2)))
# Plot prediction errors
plot(errors, main = "Prediction errors in cross validation", ylab = "Error", xlab = "time index")
I have not been able to solve the problem, could you help me?