Thiết kế website giá rẻ

Question

i am trying to build a model Gru forecasting multivariate for my AI Assigment. And I chosen Gru for predict number of profile will arise in the future (about 1 month). And i have dataset like this:

| date | number_profile | average resolution period | average attachment| is_weekend | is_holiday |
| —- | ———— | ———————— | —————- | ——— | ——— |
| 01/01/2019 | 0 | 0 0 | 1 | 1 |
| 02/01/2019 | 15 | 3.4 | 2.7 | 0 | 0 |
………………………
| date | number_profile | average resolution period | average attachment| is_weekend | is_holiday |
| —- | ———— | ———————— | —————- | ——— | ——— |
| 28/12/2023 | 190 | 1.6 2.1 | 0 | 0 |
| 29/12/2023 | 350 | 1.4 | 1.5 | 0 | 0 |

from 01/01/2019 to 31/12/2023 about 1,800 rows

And my target predictin is number_profile . And heer is my code:

import numpy as np
import pandas as pd
import seaborn as sns
sns.set_style('whitegrid')
import matplotlib.pyplot as plt
plt.style.use("fivethirtyeight")

from keras.models import Sequential
from keras.callbacks import EarlyStopping
from keras.layers import Dense, GRU, Dropout
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
# Load data
data_dir = '/kaggle/input/datatest/final_fantasy_new.csv'
df = pd.read_csv(data_dir)

# Select features
features = df[['number_profile','average_resolution_period','average_attachment','is_weekend', 'is_holiday']]

# Scale data
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

# Create training and testing sets
train_size = int(len(scaled_features) * 0.90)
test_size = len(scaled_features) - train_size

train_data = scaled_features[:train_size]
test_data = scaled_features[train_size:]

# Create dataset with time_steps
def create_dataset(data, time_steps=1):
    X, y = [], []
    for i in range(time_steps, len(data)):
        X.append(data[i-time_steps:i, :])
        y.append(data[i, 0]) 
    return np.array(X), np.array(y)

time_steps = 7
X_train, y_train = create_dataset(train_data, time_steps)
X_test, y_test = create_dataset(test_data, time_steps)


# Build GRU model
model = Sequential([
    GRU(100, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(0.2),
    GRU(64, return_sequences=False),
    Dropout(0.2),
    Dense(32, activation='tanh'),
    Dense(1)  # Output layer cho dự đoán hs_online_phatsinh
])

model.compile(optimizer='adam', loss='mse', metrics=['mean_absolute_error'])
model.summary()

# Add early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=40, restore_best_weights=True)

# Fit model
history = model.fit(X_train, y_train, epochs=200, batch_size=106, validation_data=(X_test, y_test), callbacks=[early_stopping])

# Plot loss
plt.figure(figsize=(12, 8))
plt.plot(history.history["loss"])
plt.plot(history.history["val_loss"])
plt.legend(['Training Loss', 'Validation Loss'])
plt.title("Losses")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.show()

# Predictions
predictions = model.predict(X_test)
predictions = scaler.inverse_transform(np.concatenate((predictions, np.zeros((predictions.shape[0], scaled_features.shape[1] - 1))), axis=1))[:, 0]
y_test = scaler.inverse_transform(np.concatenate((y_test.reshape(-1, 1), np.zeros((y_test.shape[0], scaled_features.shape[1] - 1))), axis=1))[:, 0]

# Evaluate model
RMSE = np.sqrt(mean_squared_error(y_test, predictions)).round(2)
MSE = mean_squared_error(y_test, predictions).round(2)
MAE = mean_absolute_error(y_test, predictions).round(2)
SMAPE = (100 * np.mean(np.abs(predictions - y_test) / ((np.abs(y_test) + np.abs(predictions)) / 2))).round(2)

print(f'RMSE: {RMSE}')
print(f'MSE: {MSE}')
print(f'MAE: {MAE}')
print(f'SMAPE: {SMAPE}')

# Adjust test set size for plotting
test = df.iloc[train_size+time_steps:, :]  # start from train_size + time_steps to match prediction length

# Plot predictions vs actuals
plt.figure(figsize=(16, 6))
plt.plot(y_test, label='Actuals')
plt.plot(predictions, label='Predictions')
plt.legend()
plt.show()

# Plot train and test
train = df.iloc[:train_size, :]
test = df.iloc[train_size:, :].copy()
test = test.iloc[time_steps:]  # Skip initial time_steps to match the length of predictions
test['Predictions'] = predictions

plt.figure(figsize=(16, 8))
plt.title('number_profile Prediction', fontsize=18)
plt.xlabel('Date', fontsize=18)
plt.ylabel('number_profile', fontsize=18)
plt.plot(train['number_profile'], linewidth=3)
plt.plot(test['number_profile'], linewidth=3)
plt.plot(test['Predictions'], linewidth=3)
plt.legend(['Train', 'Test', 'Predictions'])
plt.show()

And this is my evaluate model:
RMSE: 41.73
MSE: 1741.44
MAE: 26.31
SMAPE: 78.89

Where did i make a mistake? 🙁

I want to increase the prediction rate closer to the actual value

Thiết kế website giá rẻ

Danh mục

Why is my model LSTM predict not close actual value?