I’m encountering a ValueError in my Python code when trying to perform time series prediction using an LSTM model. The error specifically states:
ValueError: Found input variables with inconsistent numbers of samples: [741, 2223]
Here’s a simplified version of my code structure:
import pandas as pd
import numpy as np
import tensorflow as tf
from keras.preprocessing.sequence import TimeseriesGenerator
import dash
from dash import dcc
from dash import html
from datetime import datetime
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler(feature_range=(0,1))
colors = {
'background': '#111111',
'text': '#7FDBFF'
}
tab_selected_style = {
'borderTop': '1px solid #111111',
'borderBottom': '1px solid #111111',
'backgroundColor': 'hotpink',
'color': '#111111',
}
tab_style = {
'fontWeight': 'bold',
'backgroundColor': '#111111',
'color': 'hotpink',
}
def download_and_process_data(stock_name):
import yfinance
import pandas as pd
df = yfinance.download(stock_name, period="15y")
df.reset_index(inplace=True)
df['Date'] = pd.to_datetime(df['Date'])
df.set_axis(df['Date'], inplace=True)
close_data = df['Close'].values
close_data = close_data.reshape((-1,1))
info = yfinance.Ticker(stock_name)
return df, close_data, info
def split_data(close_data, df):
split_percent = 80/100
split = int(split_percent * len(close_data))
close_train = close_data[:split]
close_test = close_data[split:]
date_train = df['Date'][:split]
date_test = df['Date'][split:]
return close_train, close_test, date_train, date_test
def scale_data(close_train, close_test):
close_train=scaler.fit_transform(close_train)
close_test=scaler.transform(close_test)
return close_train, close_test
def sequence_to_supervised(look_back, close_train, close_test):
from keras.preprocessing.sequence import TimeseriesGenerator
train_generator = TimeseriesGenerator(close_train, close_train, length=look_back, batch_size=20)
test_generator = TimeseriesGenerator(close_test, close_test, length=look_back, batch_size=1)
return train_generator, test_generator
def train_model(look_back, train_generator, epochs):
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.optimizers import Adagrad
lstm_model = Sequential()
lstm_model.add(
LSTM(10,
activation='relu',
input_shape=(look_back,1))
)
lstm_model.add(Dense(1, activation='relu'))
lstm_model.add(Dense(2, activation='relu'))
lstm_model.add(Dense(3))
opt = Adagrad(lr = 0.001)
lstm_model.compile(optimizer=opt, loss='mse')
lstm_model.fit(train_generator,epochs=epochs)
return lstm_model
def plot_train_test_graph(stock, model, test_generator, close_train, close_test, date_train, date_test):
from plotly import graph_objs as go
prediction = model.predict(test_generator)
# close_test = scaler.inverse_transform(close_test)
# close_train = scaler.inverse_transform(close_train)
# print(close_train, close_test, prediction)
close_train = close_train.reshape((-1))
close_test = close_test.reshape((-1))
prediction = prediction.reshape((-1))
trace1 = go.Scatter(
x = date_train,
y = close_train,
mode = 'lines',
name = 'Data'
)
trace2 = go.Scatter(
x = date_test,
y = prediction,
mode = 'lines',
name = 'Prediction',
line=dict(color='red')
)
trace3 = go.Scatter(
x = date_test,
y = close_test,
mode='lines',
name = 'Ground Truth'
)
layout = go.Layout(
title = stock,
xaxis = {'title' : "Date"},
yaxis = {'title' : "Close"}
)
figure = go.Figure(data=[trace1, trace2, trace3], layout=layout)
from sklearn.metrics import r2_score
score = r2_score(close_test[:-15],prediction)
figure.update_layout(
paper_bgcolor=colors['background'],
plot_bgcolor=colors["background"],
font_color=colors['text'])
return figure, score
def predict(num_prediction, model, close_data, look_back):
prediction_list = close_data[-look_back:]
for _ in range(num_prediction):
x = prediction_list[-look_back:]
x = x.reshape((1, look_back, 1))
out = model.predict(x)[0][0]
prediction_list = np.append(prediction_list, out)
prediction_list = prediction_list[look_back-1:]
return prediction_list
def predict_dates(num_prediction, df):
last_date = df['Date'].values[-1]
prediction_dates = pd.date_range(last_date, periods=num_prediction+1).tolist()
return prediction_dates
def predicting(close_data, model, look_back, df):
close_data = close_data.reshape((-1))
num_prediction = 30
forecast = predict(num_prediction, model, close_data, look_back)
forecast_dates = predict_dates(num_prediction, df)
return close_data, forecast, forecast_dates
def plot_future_prediction(model, test_generator, close_train, close_test, df, forecast_dates, forecast):
from plotly import graph_objs as go
prediction = model.predict(test_generator)
# prediction = scaler.inverse_transform(prediction)
# close_test = scaler.inverse_transform(close_test)
# close_train = scaler.inverse_transform(close_train)
close_train = close_train.reshape((-1))
close_test = close_test.reshape((-1))
prediction = prediction.reshape((-1))
trace1 = go.Scatter(
x = df['Date'],
y = df['Close'],
mode = 'lines',
name = 'Data'
)
trace2 = go.Scatter(
x = forecast_dates,
y = forecast,
mode = 'lines',
name = 'Prediction'
)
layout = go.Layout(
title = "FUTURE PREDICTION",
xaxis = {'title' : "Date"},
yaxis = {'title' : "Close"}
)
figure = go.Figure(data=[trace1, trace2], layout=layout)
figure.update_layout(
paper_bgcolor=colors['background'],
plot_bgcolor=colors["background"],
font_color=colors['text'])
return figure
'''
Main Variables :
stock_name, df, close_data, look_back, close_train, close_test, train_generator, epochs
model, test_generator, date_train, date_test, num_predictions
'''
'''
1. download & process data
2. split the data
3. convert sequenced data to supervised data.
4. train the model
5. plot the training prediction
6. predict future rates
'''
def return_empty_graph():
empty = {
"layout": {
"xaxis": {
"visible": False
},
"yaxis": {
"visible": False
},
"annotations": [
{
"text": "No matching data foundnOrnYou entered incorrect stock name...",
"xref": "paper",
"yref": "paper",
"showarrow": False,
"font": {
"size": 28
}
}
],
'plot_bgcolor': colors['background'],
"paper_bgcolor":"#111111",
"font_color":colors["text"]
}
}
return empty
def update_graph(value):
stock = value
df, close_data, info = download_and_process_data(stock)
empty = return_empty_graph()
close_train, close_test, date_train, date_test = split_data(close_data, df)
close_train, close_test = scale_data(close_train, close_test)
train_generator, test_generator = sequence_to_supervised(15,close_train,close_test)
lstm_model = train_model(15,train_generator, 20)
figure_1, r2_score = plot_train_test_graph(stock, lstm_model, test_generator, close_train, close_test, date_train, date_test)
close_data, forecast, forecast_dates = predicting(close_data, lstm_model, 15, df)
figure_2 = plot_future_prediction(lstm_model, test_generator, close_train, close_test, df, forecast_dates, forecast)
r2_score = "R2 Score :s {}".format(r2_score)
return figure_1, figure_2, r2_score, info.info['longBusinessSummary']
if __name__=='__main__':
update_graph('AAPL')
The error occurs when calling the plot_train_test_graph function, particularly during the calculation of the R2 score. I suspect that the lengths of close_train, close_test, and prediction arrays are inconsistent at this stage, leading to the ValueError.
I’ve already checked the data splitting and sequence generation functions to ensure consistent lengths, but I’m still encountering this issue. Could someone please help me identify where the inconsistency is coming from and how I can resolve it?
Vaibhav Sharma is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.