I am using Pycharm to execute a Machine Learning algorithm called VARIMA, included in darts framework for Python, which I am using because I have multivariate data. What I am trying to do is, given a time step, the model predicts the next 10 time steps based on the 30 previous ones. Either way, I believe the issue comes from the dataframe I am processing (which consists of a table with 90 entries), because of the type of error I am getting.
Part of the code I use for processing and modeling the data:
def rolling_predict(p=3, d=0, q=2):
global series, colums, df, dataset, dataset_plot_series
rolling_mean_errors = []
best_trajectory, series, forecasted_values = None, None, None
for rolling_index in range(num_steps_simulation - first_mandatory_steps - horizon):
print('INDEX')
print(rolling_index)
df = pd.DataFrame(dataset, columns=colums)
df, start_time_str_datetime = time_stamps_extraction(df, dataset, rolling_index)
series = TimeSeries.from_dataframe(df, 'Timestamp', colums)
val_start_time_str = ''
for i in range(rolling_index, rolling_index + dataset.shape[0] + 1):
val_start_time_str = start_time_str_datetime + datetime.timedelta(seconds=i)
# train, val = series.split_before(pd.Timestamp('19580101'))
train, to_split = dataset_plot_series.split_before(pd.Timestamp(val_start_time_str))
val_finish_time_str = ''
for i in range(rolling_index + dataset.shape[0] + 1, rolling_index + dataset.shape[0] + 11):
val_finish_time_str = start_time_str_datetime + datetime.timedelta(seconds=i)
val, _ = to_split.split_before(pd.Timestamp(val_finish_time_str))
best_sample, _rolling_mean_error = evaluate_model(p=p, d=d, q=q, train=train, h=horizon, val=val)
rolling_mean_errors.append(_rolling_mean_error)
dataset = dataset[1:, :]
best_sample = np.reshape(best_sample, (1, best_sample.shape[0]))
dataset = np.append(dataset, best_sample, axis=0)
if rolling_index == 0:
forecasted_values = best_sample
else:
forecasted_values = np.append(forecasted_values, best_sample, axis=0)
plt.clf()
series.plot(label='actual')
# best_prediction.plot(label='forecast', lw=3)
plt.legend()
plt.savefig('./timeseriesimages/3agentsVARIMA_window_instants/3agentsVARIMA_' + str(rolling_index) + '.png')
# plt.close()
return rolling_mean_errors, forecasted_values
def process_file():
global dataset, dataset_ground_truth_plot, dataset_plot_series
with h5py.File('./archive/dataset_' + str(num_agents) + 'UAV.hdf5', 'r') as f:
# Access the dataset
base_data = f['data'][0] # with '...[0]' access to the first simulation
base_data = np.reshape(base_data, (base_data.shape[0], base_data.shape[1] * base_data.shape[2]))
aux_actions_colums = []
for i in range(num_agents):
actions_pos = (i * 2) + 1
cos_sin_columns = np.array([action_to_cos_sin(action) for action in base_data[:, actions_pos]])
aux_actions_colums.append(cos_sin_columns)
for i in range(num_agents):
actions_pos = (i * 2) + 1 + i
base_data = np.delete(base_data, actions_pos, axis=1)
base_data_part1 = base_data[:, :actions_pos]
base_data_part2 = base_data[:, actions_pos:]
base_data = np.concatenate((base_data_part1, aux_actions_colums[i], base_data_part2), axis=1)
dataset = base_data[:first_mandatory_steps]
dataset_ground_truth_plot = base_data
print(dataset.shape)
df = pd.DataFrame(dataset_ground_truth_plot, columns=colums)
df, _ = time_stamps_extraction(df, dataset_ground_truth_plot)
dataset_plot_series = TimeSeries.from_dataframe(df, 'Timestamp', colums)
f.close()
def main():
p_values = range(1, 0)
d_values = range(0, 1)
q_values = range(1, 0)
iters_counter = 0
f_params_name = 'params_log.txt'
pathlib.Path(f_params_name).unlink(missing_ok=True)
global best_score, best_params, best_forecasted_values
best_score, best_params, best_forecasted_values = float("inf"), None, None
for p in p_values:
for d in d_values:
for q in q_values:
with open(f_params_name, 'a') as f_params:
print("ITER: ", iters_counter)
iters_counter += 1
params = (p, d, q)
try:
process_file()
rolling_error, forecasted_values = rolling_predict(p, d, q)
rolling_error = sum(rolling_error) / len(rolling_error)
if rolling_error < best_score:
best_score, best_params, best_forecasted_values = rolling_error, params, forecasted_values
f_params.write(str('Number of possible combinations: ') + str(
len(p_values) * len(d_values) * len(q_values)) + 'n')
f_params.write(str(iters_counter) + ' ' + str(best_score) + ' ' +
str(p) + ' ' + str(d) + ' ' + str(q) + 'n')
except:
traceback.print_exc()
continue
f_params.close()
The error traceback plus a bunch of warnings:
Traceback (most recent call last):
File "/home/envilk/Documentos/Timeseries/darts/dartstestARIMA.py", line 246, in main
rolling_error, forecasted_values = rolling_predict(p, d, q)
File "/home/envilk/Documentos/Timeseries/darts/dartstestARIMA.py", line 150, in rolling_predict
best_sample, _rolling_mean_error = evaluate_model(p=p, d=d, q=q, train=train, h=horizon, val=val)
File "/home/envilk/Documentos/Timeseries/darts/dartstestARIMA.py", line 41, in evaluate_model
model.fit(train)
File "/home/envilk/Documentos/Timeseries/darts/venv/lib/python3.10/site-packages/darts/models/forecasting/varima.py", line 130, in fit
super().fit(series, future_covariates)
File "/home/envilk/Documentos/Timeseries/darts/venv/lib/python3.10/site-packages/darts/models/forecasting/forecasting_model.py", line 2788, in fit
return self._fit(series, future_covariates=future_covariates)
File "/home/envilk/Documentos/Timeseries/darts/venv/lib/python3.10/site-packages/darts/models/forecasting/varima.py", line 151, in _fit
self.model = m.fit(disp=0)
File "/home/envilk/Documentos/Timeseries/darts/venv/lib/python3.10/site-packages/statsmodels/tsa/statespace/mlemodel.py", line 703, in fit
mlefit = super().fit(start_params, method=method,
File "/home/envilk/Documentos/Timeseries/darts/venv/lib/python3.10/site-packages/statsmodels/base/model.py", line 566, in fit
xopt, retvals, optim_settings = optimizer._fit(f, score, start_params,
File "/home/envilk/Documentos/Timeseries/darts/venv/lib/python3.10/site-packages/statsmodels/base/optimizer.py", line 243, in _fit
xopt, retvals = func(objective, gradient, start_params, fargs, kwargs,
File "/home/envilk/Documentos/Timeseries/darts/venv/lib/python3.10/site-packages/statsmodels/base/optimizer.py", line 660, in _fit_lbfgs
retvals = optimize.fmin_l_bfgs_b(func, start_params, maxiter=maxiter,
File "/home/envilk/Documentos/Timeseries/darts/venv/lib/python3.10/site-packages/scipy/optimize/_lbfgsb_py.py", line 237, in fmin_l_bfgs_b
res = _minimize_lbfgsb(fun, x0, args=args, jac=jac, bounds=bounds,
File "/home/envilk/Documentos/Timeseries/darts/venv/lib/python3.10/site-packages/scipy/optimize/_lbfgsb_py.py", line 407, in _minimize_lbfgsb
f, g = func_and_grad(x)
File "/home/envilk/Documentos/Timeseries/darts/venv/lib/python3.10/site-packages/scipy/optimize/_differentiable_functions.py", line 296, in fun_and_grad
self._update_fun()
File "/home/envilk/Documentos/Timeseries/darts/venv/lib/python3.10/site-packages/scipy/optimize/_differentiable_functions.py", line 262, in _update_fun
self._update_fun_impl()
File "/home/envilk/Documentos/Timeseries/darts/venv/lib/python3.10/site-packages/scipy/optimize/_differentiable_functions.py", line 163, in update_fun
self.f = fun_wrapped(self.x)
File "/home/envilk/Documentos/Timeseries/darts/venv/lib/python3.10/site-packages/scipy/optimize/_differentiable_functions.py", line 145, in fun_wrapped
fx = fun(np.copy(x), *args)
File "/home/envilk/Documentos/Timeseries/darts/venv/lib/python3.10/site-packages/statsmodels/base/model.py", line 534, in f
return -self.loglike(params, *args) / nobs
File "/home/envilk/Documentos/Timeseries/darts/venv/lib/python3.10/site-packages/statsmodels/tsa/statespace/mlemodel.py", line 938, in loglike
loglike = self.ssm.loglike(complex_step=complex_step, **kwargs)
File "/home/envilk/Documentos/Timeseries/darts/venv/lib/python3.10/site-packages/statsmodels/tsa/statespace/kalman_filter.py", line 1001, in loglike
kfilter = self._filter(**kwargs)
File "/home/envilk/Documentos/Timeseries/darts/venv/lib/python3.10/site-packages/statsmodels/tsa/statespace/kalman_filter.py", line 921, in _filter
self._initialize_state(prefix=prefix, complex_step=complex_step)
File "/home/envilk/Documentos/Timeseries/darts/venv/lib/python3.10/site-packages/statsmodels/tsa/statespace/representation.py", line 1058, in _initialize_state
self._statespaces[prefix].initialize(self.initialization,
File "statsmodels/tsa/statespace/_representation.pyx", line 1362, in statsmodels.tsa.statespace._representation.dStatespace.initialize
File "statsmodels/tsa/statespace/_initialization.pyx", line 288, in statsmodels.tsa.statespace._initialization.dInitialization.initialize
File "statsmodels/tsa/statespace/_initialization.pyx", line 406, in statsmodels.tsa.statespace._initialization.dInitialization.initialize_stationary_stationary_cov
File "statsmodels/tsa/statespace/_tools.pyx", line 1626, in statsmodels.tsa.statespace._tools._dsolve_discrete_lyapunov
numpy.linalg.LinAlgError: Schur decomposition solver error.
/home/envilk/Documentos/Timeseries/darts/venv/lib/python3.10/site-packages/statsmodels/tsa/statespace/varmax.py:160: EstimationWarning: Estimation of VARMA(p,q) models is not generically robust, due especially to identification issues.
warn('Estimation of VARMA(p,q) models is not generically robust,'
/home/envilk/Documentos/Timeseries/darts/venv/lib/python3.10/site-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
warnings.warn("Maximum Likelihood optimization failed to "
Trends are not well supported yet for getting probabilistic forecasts with ARIMA.If you run into issues, try calling fit() with num_samples=1 or removing the trend fromyour model.
I also have to mention that I’ve experienced other similar errors, such as numpy.linalg.LinAlgError: Singular matrix
, numpy.linalg.LinAlgError: LU decomposition error
, or numpy.linalg.LinAlgError: Matrix is not positive definite
.