Hello everybody i am beginner with Python and i would like to use ARIMA to make prediction of Microsoft stock. Unfortunately i have an error that i do not understand.
When i run my code the last plot about forecast.screen capture does not appear.I made research on internet but i did not find any solutions about my problem. I tried to use ‘tuple’ with the variable ‘conf’ at lines 137 and 138 but there was no change. That is why i would like someone to help me to solve my problem. Thank you in advance.
Here is my code:
import pandas as pd
import numpy as np
import yfinance as yf
import matplotlib.pyplot as plt
import statsmodels.api as sm
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
from datetime import datetime
from matplotlib.pylab import rcParams
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import acf, pacf
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import kpss
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, explained_variance_score
from statsmodels.tools.eval_measures import rmse
from pandas.plotting import autocorrelation_plot
from pandas.tseries.offsets import DateOffset
from pandas.plotting import register_matplotlib_converters
from sklearn.preprocessing import MinMaxScaler
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
msft_data = yf.download("MSFT", period='1d', start="2010-01-01", end="2024-08-07")
df = msft_data[['Close']].copy()
print(df.describe())
print()
#Plot MSFT
msft_data['Close'].plot()
plt.title("Microsoft")
plt.xlabel('Date')
plt.ylabel('Close')
plt.show()
#ADF Test Original Data
adf_test = adfuller(msft_data['Close'].dropna())
print('ADF Statistics : {}'.format(adf_test[0]))
print('p-value : {}'.format(adf_test[1]))
if adf_test[1] <= 0.05:
print("Data are Stationary")
else:
print("Data are not Stationary")
print('Critical values :')
for key, value in adf_test[4].items():
print('t{}: {}'.format(key, value))
print()
#Differencing Data
f = plt.figure()
ax1 = f.add_subplot(121)
ax1.set_title('Original Data')
ax1.plot(msft_data['Close'])
ax2 = f.add_subplot(122)
plot_acf(msft_data['Close'].dropna(), ax=ax2)
plt.show()
f = plt.figure()
ax1 = f.add_subplot(121)
ax1.set_title('First order Differencing')
ax1.plot(msft_data['Close'].diff())
ax2 = f.add_subplot(122)
plot_acf(msft_data['Close'].diff().dropna(), ax=ax2)
plt.show()
f = plt.figure()
ax1 = f.add_subplot(121)
ax1.set_title('Second order Differencing')
ax1.plot(msft_data['Close'].diff().diff())
ax2 = f.add_subplot(122)
plot_acf(msft_data['Close'].diff().diff().dropna(), ax=ax2)
plt.show()
#ADF Test Differencing Series
print("ADF Test Differencing Series:")
adf_test = adfuller(msft_data['Close'].diff().dropna())
print('p-value First Differencing:' '%.40f' % adf_test[1])
if adf_test[1] <= 0.05:
print("Data is Stationary")
else:
print("Data is not Stationary")
adf_test = adfuller(msft_data['Close'].diff().diff().dropna())
print('p-value Second Differencing:' '%.40f' % adf_test[1])
if adf_test[1] <= 0.05:
print("Data is Stationary")
else:
print("Data is not Stationary")
print()
#Autocorelation
autocorrelation_plot(msft_data['Close'])
plt.show()
#ACF
plot_acf(msft_data['Close'].diff().dropna(), alpha=0.05)
plt.show()
#Partial autocorelation
plot_pacf(msft_data['Close'].diff().dropna(), lags=20, alpha=0.05)
plt.show()
#ARIMA
model = ARIMA(msft_data['Close'], order=(2, 1, 2))
model_fit = model.fit()
print(model_fit.summary())
#Residues and Density
residuals = pd.DataFrame(model_fit.resid)
fig, ax = plt.subplots(1,2, figsize=(12, 6))
residuals.plot(title="Residuals", ax=ax[0])
residuals.plot(kind='kde', title='Density', ax=ax[1])
plt.show()
#Train Test Split
nbr = int(len(df) * 0.8)
train_data = df['Close'][:nbr]
test_data = df['Close'][nbr:]
model = ARIMA(train_data, order=(2, 1, 2))
result = model.fit()
print(result.summary())
steps = 14
fc = result.forecast(steps)
conf = result.forecast(steps)
fc = pd.Series(fc, index=test_data[:steps].index)
lower = pd.Series(conf[:,0], index=test_data[:nbr].index)
upper = pd.Series(conf[:,1], index=test_data[:nbr].index)
plt.figure(figsize=(16, 8))
plt.plot(test_data[:nbr], label='Actual')
plt.plot(fc, label='Forecast')
plt.fill_between(lower.index, lower, upper, color='k', alpha=0.1)
plt.title("Forecast vs Actual")
plt.legend(loc="upper left")
FredM5 is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.