I want to make timeseries analysis on the given data, where the model will predict the next 24 timesteps by looking at the last n number of datapoints (window size). I wrote the code and run it without any problems, however when I increase the windows size from 12 to 120, I do not get any predictions. I only receive nan values from my loss functions. I would like to ask help on this issue.
This is my code: (I am open to any suggestions to improve my code and model)
import pandas as pd
import numpy as np
def get_dataset(x):
dataset = pd.read_csv(x, sep=',')
# Convert the 'start' column to datetime
dataset['starttime'] = pd.to_datetime(dataset['starttime'])
# Set the 'start' column as the index of the DataFrame
dataset.set_index('starttime', inplace=True)
# Drop the last row of the DataFrame
dataset.drop(dataset.head(3).index, inplace=True)
return dataset
def mldatasets(X,y):
train_size = int(len(X)*0.8)
val_size= int(len(X)*0.1)+train_size
# Preparing the training, validation and test datasets
X_train, y_train = X[:train_size], y[:train_size]
X_val, y_val= X[train_size:val_size], y[train_size:val_size]
X_test, y_test = X[val_size:], y[val_size:]
return X_train, y_train, X_val, y_val, X_test, y_test
def df_to_X_y_horizon(df, window_size, forecast_horizon):
X, y = [], []
for i in range(len(df) - window_size - forecast_horizon + 1):
X.append(df.iloc[i:i+window_size].values)
y.append(df.iloc[i+window_size:i+window_size+forecast_horizon].values)
return np.array(X), np.array(y)
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense, LSTMV1, InputLayer
from tensorflow.python.keras.callbacks import ModelCheckpoint
from dataset_modifications import df_to_X_y_horizon, get_dataset, mldatasets
import random
# Setting random seed for reproducibility
tf.random.set_seed(0)
# Loading and preprocessing the dataset
dataset_path = "C:/Users/Yoked/Desktop/Internship/Forecasting/datasets/residential_load_h_2023.csv"
tm_df = get_dataset(x=dataset_path)
# Defining the window size and horizon
window_size = 24 * 5
forecast_horizon= 24
# Converting the DataFrame to feature and target arrays
X, y = df_to_X_y_horizon(tm_df, window_size, forecast_horizon)
# Scaling the data using StandardScaler
scaler_X = StandardScaler()
scaler_y = StandardScaler()
X = scaler_X.fit_transform(X.reshape(-1, X.shape[-1])).reshape(X.shape)
y = scaler_y.fit_transform(y.reshape(-1, 1)).reshape(-1, forecast_horizon)
# Preparing the training, validation, and test datasets
X_train, y_train, X_val, y_val, X_test, y_test = mldatasets(X, y)
# Defining the LSTM model with the correct input shape
model = Sequential()
model.add(InputLayer(input_shape=(window_size, X.shape[-1] )))
model.add(LSTMV1(64,activation= 'relu', dropout = 0.02 ))
model.add(Dense(forecast_horizon, activation='linear'))
# Displaying the model summary
model.summary()
# Setting up model checkpointing
cp = ModelCheckpoint('model1/', save_best_only=True)
# Compiling the model
model.compile(loss='mse', optimizer='adam', metrics=['mae'])
# Training the model and capturing the history
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=30, callbacks=[cp])
# Plotting the learning curve
plt.figure(figsize=(12, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Learning Curve')
plt.legend()
plt.show()
# Making predictions on the test set
y_pred = model.predict(X_test)
# Reshape y_test and y_pred to 2D for inverse transformation
y_test_flat = y_test.reshape(-1, forecast_horizon)
y_pred_flat = y_pred.reshape(-1, forecast_horizon)
# Inverse transform the predictions and actual values
y_pred_inv = scaler_y.inverse_transform(y_pred_flat)
y_test_inv = scaler_y.inverse_transform(y_test_flat)
# Reshape back to original 3D shape
y_pred_inv = y_pred_inv.reshape(y_pred.shape)
y_test_inv = y_test_inv.reshape(y_test.shape)
# Plot predictions vs actual values
plt.figure(figsize=(12, 6))
plt.plot(y_test_inv.flatten(),alpha= 0.5, label='Actual')
plt.plot(y_pred_inv.flatten(), alpha= 0.5, label='Predicted')
plt.legend()
plt.show()
# Plot random predictions vs actual values
for _ in range(5):
random_idx = random.randint(0, len(y_test_inv) - 1)
plt.figure(figsize=(12, 6))
plt.plot(y_test_inv[random_idx], color='blue', label='Actual')
plt.plot(y_pred_inv[random_idx], color='red', label='Predicted')
plt.legend()
plt.show()
after running this code, I had really stupid results that do not make any sense. The loss values after each epoch is shown below:
Epoch 1/10
216/216 [==============================] - 10s 39ms/step - loss: nan - mae: nan - val_loss: nan - val_mae: nan
Epoch 2/10
216/216 [==============================] - 8s 35ms/step - loss: nan - mae: nan - val_loss: nan - val_mae: nan
Epoch 3/10
216/216 [==============================] - 8s 35ms/step - loss: nan - mae: nan - val_loss: nan - val_mae: nan
Epoch 4/10
216/216 [==============================] - 9s 40ms/step - loss: nan - mae: nan - val_loss: nan - val_mae: nan
Epoch 5/10
216/216 [==============================] - 9s 40ms/step - loss: nan - mae: nan - val_loss: nan - val_mae: nan
Epoch 6/10
216/216 [==============================] - 9s 40ms/step - loss: nan - mae: nan - val_loss: nan - val_mae: nan
Epoch 7/10
216/216 [==============================] - 8s 37ms/step - loss: nan - mae: nan - val_loss: nan - val_mae: nan
Epoch 8/10
216/216 [==============================] - 7s 34ms/step - loss: nan - mae: nan - val_loss: nan - val_mae: nan
Epoch 9/10
216/216 [==============================] - 7s 34ms/step - loss: nan - mae: nan - val_loss: nan - val_mae: nan
Epoch 10/10
216/216 [==============================] - 8s 35ms/step - loss: nan - mae: nan - val_loss: nan - val_mae: nan
I get sensible results after I use a window size of 12 to 48.