I’m using VAR to forecast the spy using oil. I’m noticing that applying an oil shock does not affect pricing forcasting
I defined a function to apply an oil shock to the end of the data for simplicity
import yfinance as yf
import pandas_datareader.data as web
import pandas as pd
import numpy as np
from statsmodels.tsa.api import VAR
import matplotlib.pyplot as plt
def apply_shock(data, shock_key, total_shock_percentage, shock_periods):
if shock_key in data.columns and shock_periods > 0:
increment = total_shock_percentage / 100
print(f"Applying a total shock of {total_shock_percentage}% over {shock_periods} periods")
for i in range(shock_periods):
period_index = -(shock_periods - i)
date = data.index[period_index]
original_value = data[shock_key].iloc[period_index]
shock_increment = original_value * ((i + 1) / shock_periods * increment)
data[shock_key].iloc[period_index] += shock_increment
print(f"Date {date}: Original {shock_key} value = {original_value:.4f}, Increment = {shock_increment:.4f}, Shocked {shock_key} value = {data[shock_key].iloc[period_index]:.4f}")
return data
Then I fetch the data and apply the VAR model
# Define the variables
startDate = '2000-01-01'
endDate = '2024-07-01'
periodsInMonths = 12
# Shock variables
total_shock_percentage = 30 # Total shock percentage
shock_periods = 5 # Number of periods to spread the shock over
shock_key = 'Oil' # The key in the data to which the shock should be applied
maxlags = 6
# Fetch data
data = pd.DataFrame()
# Fetch SPY data
spy = yf.download('SPY', start=startDate, end=endDate)
spy = spy['Adj Close']
spy = spy.tz_localize(None) # Remove timezone information
spy = spy.resample('M').ffill()
data['SPY'] = spy
# Fetch Oil data
oil = web.DataReader('DCOILWTICO', 'fred', start=startDate, end=endDate)
oil = oil.tz_localize(None) # Remove timezone information
oil = oil.resample('M').ffill()
data['Oil'] = oil
# Forward fill the missing data
data.ffill(inplace=True)
# Ensure the index has the correct frequency
data.index = pd.to_datetime(data.index)
data = data.asfreq('M')
# Drop any remaining NaN values
data.dropna(inplace=True)
# Apply the shock to the specified key in the data
data = apply_shock(data, shock_key, total_shock_percentage, shock_periods)
# Fit the VAR model on monthly data
model = VAR(data)
lag_order = model.select_order(maxlags)
print(f"Selected lag order: {lag_order.summary()}")
results = model.fit(lag_order.aic)
# Forecasting
forecast_input = data.values[-results.k_ar:]
forecast = results.forecast(y=forecast_input, steps=periodsInMonths)
forecast_df = pd.DataFrame(forecast, index=pd.date_range(start=data.index[-1], periods=periodsInMonths, freq='M'), columns=data.columns)
# Append forecast to original data for plotting
combined = pd.concat([data, forecast_df])
# Normalize the data for plotting
data_normalized = data / data.iloc[0] * 100
forecast_df_normalized = forecast_df / data.iloc[0] * 100
combined_normalized = pd.concat([data_normalized, forecast_df_normalized])
When I apply even a 100% oil shock this does not reflect in the any way similar to how it has historically for the SPY.
Here’s the plot code for more clarity
# Plot the original data and forecast with shock
plt.figure(figsize=(12, 6))
plt.plot(data_normalized['SPY'], label='SPY (Original)', color='blue')
plt.plot(forecast_df_normalized['SPY'], label='SPY (Forecast)', linestyle='--', color='blue')
plt.plot(data_normalized['Oil'], label='Oil (Original)', color='green')
plt.plot(forecast_df_normalized['Oil'], label='Oil (Forecast)', linestyle='--', color='green')
plt.axvline(x=data.index[-1], linestyle='--', color='grey') # Mark the forecast start
plt.legend()
plt.xlabel('Time')
plt.ylabel('Normalized Values (Base 100)')
plt.title(f'VAR Model: Original Data and Forecast with Incremental {shock_key} Shock')
plt.show()
I’ve also tried increasing the max lags, however the forecasting seems to still ignore the shock.
How can I improve forecasting in the VAR model?