Thiết kế website giá rẻ

Question

so I’m doing a times series forecasting project using LSTM. The data pre-processing and training seems fine to me, however my testing MAE and RMSE are twice as low as the training MAE and RMSE, which makes no sense. I’ve checked for data leakage issues but nothing. I can’t seem to locate the problem. Please help :/

My time series dataset has a shape (samples =141681, features = 14) for x and (141681, 1) for y. The 14 features are different time lags. I then separated the data into training and test sets and normalised it.


X = df_lagged.iloc[:, :-1]
y = df_lagged.iloc[:, -1:]

X_values = X.values
X_values = np.flip(X_values, axis=1) #Flipping the elements of the sequences to keep temporal order

y_values = y.values

# Split the data into training and testing sets
split_index = 113664

X_train, X_test = X_values[:split_index], X_values[split_index:]
y_train, y_test = y_values[:split_index], y_values[split_index:]

scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

y_train_scaled = scaler.fit_transform(y_train)
y_test_scaled = scaler.transform(y_test)

Then I put it into a (batch_size, seq_length_, features) shape using a custom Dataset and a DataLoader object.


from torch.utils.data import Dataset, DataLoader

class TimeSeriesDataset(Dataset):
    def __init__(self, data, target, seq_length):
        self.data = data
        self.target = target
        self.seq_length = seq_length

    def __len__(self):
        return len(self.data) - self.seq_length

    def __getitem__(self, index):
        x = self.data[index:index + self.seq_length]
        y = self.target[index + self.seq_length - 1]
        return x, y

from torch.utils.data import DataLoader, TensorDataset

X_train_tensors = torch.from_numpy(X_train_scaled).float().to(device)
y_train_tensors = torch.from_numpy(y_train_scaled).float().to(device)


X_test_tensors = torch.from_numpy(X_test_scaled).float().to(device)
y_test_tensors = torch.from_numpy(y_test_scaled).float().to(device)


seq_length = 48
batch_size = 64

train_dataset = TimeSeriesDataset(X_train_tensors, y_train_tensors, seq_length)
train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle=False)

test_dataset = TimeSeriesDataset(X_test_tensors, y_test_tensors, seq_length)
test_loader = DataLoader(test_dataset, batch_size = 1, shuffle=False)

This is the hyperparameters I used and how I conducted the epochs:

input_size = X_train_tensors.shape[1]
hidden_size = 128
num_layers = 3
output_size = 1
num_epochs = 20
learning_rate = 0.001
batch_size = 64


model = LSTM(input_size, hidden_size, num_layers, output_size).to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

epoch_losses = []
for epoch in range(num_epochs):
    model.train()
    losses = []
    for batch_x, batch_y in train_loader:
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)
        outputs = model(batch_x)
        optimizer.zero_grad()
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()

        losses.append(float(loss))

    epoch_losses.append(np.mean(losses))

    print(f'Epoch {epoch + 1}/{num_epochs}. Loss: {epoch_losses[-1]:.5f}.')

Then how I made predictions on the training data:


model.eval()

train_predictions = []
with torch.no_grad():
    for batch_x, _ in train_loader:
        batch_x = batch_x.to(device)
        preds = model(batch_x)
        train_predictions.append(preds.cpu().numpy())

train_predictions = np.concatenate(train_predictions, axis=0)

train_predictions_inverse = scaler.inverse_transform(train_predictions)

mae_train = mean_absolute_error(y_train[:113616], train_predictions_inverse)
rmse_train = np.sqrt(mean_squared_error(y_train[:113616], train_predictions_inverse))

print(f'Training Mean Absolute Error: {mae_train}, Training Root Mean Squared Error: {rmse_train}')

And on the test data: (I predicted the next 336 steps i.e. the next week ahead)


def predict_n_steps(model, test_loader, n_steps):
    model.eval() 
    predictions = []

    with torch.no_grad(): 
        total_steps = 0 

        for i, (batch_x, batch_y) in enumerate(test_loader):
            if total_steps >= n_steps:
                break 

            current_data = batch_x
            # print(f'Batch {i}, Current data shape: {current_data.shape}')


            # Predict the next time step
            next_pred = model(current_data)

            # Append the prediction to the list of predictions
            predictions.append(next_pred.squeeze().cpu().detach().numpy())

            total_steps += 1  # Increment the step counter

        test_predictions = np.array(predictions)

    return torch.from_numpy(test_predictions).float()


n_steps = 336  # Number of steps to predict
test_predictions = predict_n_steps(model, test_loader, n_steps)

predictions_inverse = scaler.inverse_transform(test_predictions.unsqueeze(0))

mae = mean_absolute_error(y_test[:n_steps], predictions_inverse[0])
rmse = np.sqrt(mean_squared_error(y_test[:n_steps], predictions_inverse[0]))

print(f'Mean Absolute Error: {mae}, Root Mean Squared Error: {rmse}')

The results I have are:

Training MAE: 2088.61, Training RMSE: 2777.73
Test MAE: 1340.74 , Test RMSE: 1904.74
And this is the loss curve (with MSE as a loss criterion):enter image description here

I tried changing the hyperparameters etc. but no big change.

Thiết kế website giá rẻ

Danh mục

Why are my testing MAE and RMSE twice as low as the training MAE and RMSE?