I Have a future forecasting using SVR, The future forecasting doesn’t have any errors, but the visualization seems a little off, it so different with the actual dataset for training
here’s my full code
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_predict
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score
# Load the data
df = pd.read_csv('covid1.csv')
print(df.head())
df['ObservationDate'] = pd.to_datetime(df['ObservationDate'])
start_date = df['ObservationDate'].min()
df['Days'] = (df['ObservationDate'] - start_date).dt.days
# Daily Confirm column
df.sort_values('Days', inplace=True)
df['Daily Confirmed'] = df['Confirmed'].diff()
df.loc[0, 'Daily Confirmed'] = df['Daily Confirmed'].iloc[1] # Handle NaN if necessary
# Reshape 'Days' into 2D array for SVR
X = df['Days'].values.reshape(-1, 1)
y_deaths = df['Deaths']
y_recovered = df['Recovered']
y_confirmed = df['Confirmed']
y_daily_confirmed = df['Daily Confirmed']
# Split the data into training and testing sets
X_train_deaths, X_test_deaths, y_train_deaths, y_test_deaths = train_test_split(X, y_deaths, test_size=0.4, random_state=1)
X_train_recovered, X_test_recovered, y_train_recovered, y_test_recovered = train_test_split(X, y_recovered, test_size=0.4, random_state=1)
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(X, y_confirmed, test_size=0.4, random_state=1)
X_train_daily_confirmed, X_test_daily_confirmed, y_train_daily_confirmed, y_test_daily_confirmed = train_test_split(X, y_daily_confirmed, test_size=0.4, random_state=1)
# Scale the data
scaler = StandardScaler()
X_train_scaled_deaths = scaler.fit_transform(X_train_deaths)
X_test_scaled_deaths = scaler.transform(X_test_deaths)
X_train_scaled_recovered = scaler.fit_transform(X_train_recovered)
X_test_scaled_recovered = scaler.transform(X_test_recovered)
X_train_scaled_confirmed = scaler.fit_transform(X_train_confirmed)
X_test_scaled_confirmed = scaler.transform(X_test_confirmed)
X_train_scaled_daily_confirmed = scaler.fit_transform(X_train_daily_confirmed)
X_test_scaled_daily_confirmed = scaler.transform(X_test_daily_confirmed)
# Define the Support Vector Regression models with the provided reference hyperparameters
regressor_deaths = SVR(C=100000, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto', kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)
regressor_recovered = SVR(C=500000, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto', kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)
regressor_confirmed = SVR(C=500000, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto', kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)
regressor_daily_confirmed = SVR(C=100000, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto', kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)
# Train the models
regressor_deaths.fit(X_train_scaled_deaths, y_train_deaths)
regressor_recovered.fit(X_train_scaled_recovered, y_train_recovered)
regressor_confirmed.fit(X_train_scaled_confirmed, y_train_confirmed)
regressor_daily_confirmed.fit(X_train_scaled_daily_confirmed, y_train_daily_confirmed)
# Make predictions
y_pred_deaths = regressor_deaths.predict(X_test_scaled_deaths)
y_pred_recovered = regressor_recovered.predict(X_test_scaled_recovered)
y_pred_confirmed = regressor_confirmed.predict(X_test_scaled_confirmed)
y_pred_daily_confirmed = regressor_daily_confirmed.predict(X_test_scaled_daily_confirmed)
# Calculate performance metrics
def calculate_metrics(y_true, y_pred):
mse = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_true, y_pred)
return mse, rmse, r2
mse_deaths, rmse_deaths, r2_deaths = calculate_metrics(y_test_deaths, y_pred_deaths)
mse_recovered, rmse_recovered, r2_recovered = calculate_metrics(y_test_recovered, y_pred_recovered)
mse_confirmed, rmse_confirmed, r2_confirmed = calculate_metrics(y_test_confirmed, y_pred_confirmed)
mse_daily_confirmed, rmse_daily_confirmed, r2_daily_confirmed = calculate_metrics(y_test_daily_confirmed, y_pred_daily_confirmed)
print(f"Deaths - MSE: {mse_deaths}, RMSE: {rmse_deaths}, R2: {r2_deaths}")
print(f"Recovered - MSE: {mse_recovered}, RMSE: {rmse_recovered}, R2: {r2_recovered}")
print(f"Confirmed - MSE: {mse_confirmed}, RMSE: {rmse_confirmed}, R2: {r2_confirmed}")
print(f"Daily Confirmed - MSE: {mse_daily_confirmed}, RMSE: {rmse_daily_confirmed}, R2: {r2_daily_confirmed}")
# Plot the regression results
plt.figure(figsize=(10, 6))
# Plot for Deaths
plt.subplot(221)
plt.scatter(X, y_deaths, color='magenta', label='Original Data')
plt.scatter(X_test_deaths, y_pred_deaths, color='green', label='Test Data')
plt.title('Covid19 Deaths (Support Vector Regression Model)')
plt.xlabel('Days')
plt.ylabel('Deaths')
plt.legend()
# Plot for Recovered
plt.subplot(222)
plt.scatter(X, y_recovered, color='magenta', label='Original Data')
plt.scatter(X_test_recovered, y_pred_recovered, color='green', label='Test Data')
plt.title('Covid19 Recovered (Support Vector Regression Model)')
plt.xlabel('Days')
plt.ylabel('Recovered')
plt.legend()
# Plot for Confirmed
plt.subplot(223)
plt.scatter(X, y_confirmed, color='magenta', label='Original Data')
plt.scatter(X_test_confirmed, y_pred_confirmed, color='green', label='Test Data')
plt.title('Covid19 Confirmed (Support Vector Regression Model)')
plt.xlabel('Days')
plt.ylabel('Confirmed')
plt.legend()
# Plot for Daily Confirmed
plt.subplot(224)
plt.scatter(X, y_daily_confirmed, color='magenta', label='Original Data')
plt.scatter(X_test_daily_confirmed, y_pred_daily_confirmed, color='green', label='Test Data')
plt.title('Covid19 Daily Confirmed (Support Vector Regression Model)')
plt.xlabel('Days')
plt.ylabel('Daily Confirmed')
plt.legend()
plt.tight_layout()
plt.show()
# Predict for future 30 days
future_days = 30
future_X = np.arange(df['Days'].max() + 1, df['Days'].max() + future_days + 1).reshape(-1, 1)
future_X_scaled = scaler.transform(future_X)
future_pred_deaths = regressor_deaths.predict(future_X_scaled)
future_pred_recovered = regressor_recovered.predict(future_X_scaled)
future_pred_confirmed = regressor_confirmed.predict(future_X_scaled)
future_pred_daily_confirmed = regressor_daily_confirmed.predict(future_X_scaled)
# Combine original data with future predictions
future_days_df = pd.DataFrame({
'Days': np.arange(df['Days'].max() + 1, df['Days'].max() + future_days + 1),
'Deaths': future_pred_deaths,
'Recovered': future_pred_recovered,
'Confirmed': future_pred_confirmed,
'Daily Confirmed': future_pred_daily_confirmed
})
combined_df = pd.concat([df, future_days_df], ignore_index=True)
# Plot the regression results with future predictions
plt.figure(figsize=(10, 6))
# Plot for Deaths
plt.subplot(221)
plt.plot(combined_df['Days'], combined_df['Deaths'], color='magenta', label='Original Data')
plt.plot(future_days_df['Days'], future_days_df['Deaths'], color='green', label='Future Predictions')
plt.title('Covid19 Deaths (Support Vector Regression Model)')
plt.xlabel('Days')
plt.ylabel('Deaths')
plt.legend()
# Plot for Recovered
plt.subplot(222)
plt.plot(combined_df['Days'], combined_df['Recovered'], color='magenta', label='Original Data')
plt.plot(future_days_df['Days'], future_days_df['Recovered'], color='green', label='Future Predictions')
plt.title('Covid19 Recovered (Support Vector Regression Model)')
plt.xlabel('Days')
plt.ylabel('Recovered')
plt.legend()
# Plot for Confirmed
plt.subplot(223)
plt.plot(combined_df['Days'], combined_df['Confirmed'], color='magenta', label='Original Data')
plt.plot(future_days_df['Days'], future_days_df['Confirmed'], color='green', label='Future Predictions')
plt.title('Covid19 Confirmed (Support Vector Regression Model)')
plt.xlabel('Days')
plt.ylabel('Confirmed')
plt.legend()
# Plot for Daily Confirmed
plt.subplot(224)
plt.plot(combined_df['Days'], combined_df['Daily Confirmed'], color='magenta', label='Original Data')
plt.plot(future_days_df['Days'], future_days_df['Daily Confirmed'], color='green', label='Future Predictions')
plt.title('Covid19 Daily Confirmed (Support Vector Regression Model)')
plt.xlabel('Days')
plt.ylabel('Daily Confirmed')
plt.legend()
plt.tight_layout()
plt.show()
here’s my visualization for future forecasting
Plot for future forecasting
Maybe there’s something wrong with my model, i am not allowed to use any model except SVR.
New contributor
lil Biggas is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.