Thiết kế website giá rẻ

Question

I have recently trained a predictive model using historical climate data from the file ClimateFlow_Data1950_2021_Daily_spider.xlsx, specifically focusing on the ‘RCP26’ sheet. My aim is to utilize this model to forecast Flow values using data from another file, RCPs_2025_2100.xlsx, with the target sheet being ‘RCPs_2025_2100’.

To provide some context, the predictors in this scenario are Precipitation (PPT) and Maximum Temperature (Tmax), while the target variable I am seeking to predict is Flow.

However, I’ve encountered an issue: the Flow column in the second data file (RCPs_2025_2100.xlsx) is empty, and I need to fill it with the predicted values generated by my model.

Could someone kindly guide me on how to proceed with this task? I’d greatly appreciate any insights or suggestions you might have.

Thank you in advance!



df.head()
Out[6]: 
                 Date       PPT      Tmax  Flow
0 1950-01-01 12:00:00 -0.000092 -3.384451  58.0
1 1950-01-02 12:00:00  2.026306 -4.189279  61.2
2 1950-01-03 12:00:00 -0.000092 -2.875478  63.4
3 1950-01-04 12:00:00 -0.000092 -3.103854  49.0
4 1950-01-05 12:00:00  1.196228 -2.425506  60.9

df2=df = pd.read_excel('C:/LSTM_Python_Climate_streamflow/RCPs_2025_2100.xlsx', sheet_name='RCPs_2025_2100')

df2.head()
Out[8]: 
        Date   PPT  Tmax  Flow
0 2025-01-01  3.96 -0.87   NaN
1 2025-01-02  2.93 -2.05   NaN
2 2025-01-03  3.01 -1.32   NaN
3 2025-01-04  4.57 -1.67   NaN
4 2025-01-05  2.16 -1.89   NaN


import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

df = pd.read_excel('C:/LSTM_Python_Climate_streamflow/ClimateFlow_Data1950_2021_Daily_spider.xlsx')

df.head()

# Drop rows with missing values

df.dropna(inplace=True)



#Find the correlation

import seaborn as sn

sn.heatmap(df.corr())



training_set = df.iloc[:26298, 1:5].values

test_set = df.iloc[26298:, 1:5].values



from sklearn.preprocessing import MinMaxScaler



sc = MinMaxScaler(feature_range = (0,1))



training_set_scaled = sc.fit_transform(training_set)

test_set_scaled = sc.fit_transform(test_set)



test_set_scaled = test_set_scaled[:, 0:3]



X_train = []

y_train = []

WS = 24



for i in range(WS, len(training_set_scaled)):

    X_train.append(training_set_scaled[i-WS:i, 0:4])

    y_train.append(training_set_scaled[i,3])

   

X_train, y_train = np.array(X_train), np.array(y_train)



#X_train = np.reshape(X_train,(X_train.shape[0], X_train.shape[3], 4))



X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], X_train.shape[2]))

from keras.models import Sequential

from keras.layers import LSTM

from keras.layers import Dense

from keras.layers import Dropout

Model = Sequential()

Model.add(LSTM(units = 70, return_sequences = True, input_shape = (X_train.shape[1], 4)))

Model.add(Dropout(0.2))

Model.add(LSTM(units = 70, return_sequences = True))

Model.add(Dropout(0.2))

Model.add(LSTM(units = 70, return_sequences = True))

Model.add(Dropout(0.2))

Model.add(LSTM(units = 70))

Model.add(Dropout(0.2))

Model.add(Dense(units = 1))

Model.compile(optimizer = 'adam', loss = 'mean_squared_error')

Model.fit(X_train,y_train, epochs = 5, batch_size = 32)



plt.plot(range(len(Model.history.history['loss'])), Model.history.history['loss'])

plt.xlabel('Epoch Number')

plt.ylabel('Loss')

plt.show()



Model.save('LSTM_ClimateChange_RCP85_24.h5')



from keras.models import load_model

Model = load_model('LSTM_ClimateChange_RCP85_24.h5')



prediction_test = []



Batch_one = training_set_scaled[-24:]

Batch_New = Batch_one.reshape((1,24,4))



for i in range(48):

    First_pred = Model.predict(Batch_New)[0]

    prediction_test.append(First_pred)   

    New_var = test_set_scaled[i,:]

    New_var = New_var.reshape(1,3)

    New_test = np.insert(New_var, 3, [First_pred], axis =1)   

    New_test = New_test.reshape(1,1,4)   

    Batch_New = np.append(Batch_New[:,1:,:], New_test, axis=1)

   

prediction_test = np.array(prediction_test)



SI = MinMaxScaler(feature_range = (0,3))

y_Scale = training_set[:,3:4]

SI.fit_transform(y_Scale)



predictions = SI.inverse_transform(prediction_test)

real_values = test_set[:, 3]



# Plot the first 100 values

plt.plot(real_values[:100], color='black', label='Actual Flow')

plt.plot(predictions[:100], color='blue', label='Predicted Flow')

plt.title('Humber River Flow Prediction')

plt.xlabel('Time (Day)')

plt.ylabel('Flow (M3/s)')

plt.legend()

plt.show()


#### Future data prediction from second data file

df2=df = pd.read_excel('C:/LSTM_Python_Climate_streamflow/RCPs_2025_2100.xlsx', sheet_name='RCPs_2025_2100')
df2.head()
df2.columns = df.columns.str.strip()
df2.index = pd.to_datetime(df['Date'], format='%Y.%m.%d')
df2.head()

print(df2.columns)

flow = df2['Flow']


def df_to_X_y(df2, window_size=5):
  df_as_np = df.to_numpy()
  X = []
  y = []
  for i in range(len(df_as_np)-window_size):
    row = [[a] for a in df_as_np[i:i+window_size]]
    X.append(row)
    label = df_as_np[i+window_size]
    y.append(label)
  return np.array(X), np.array(y)

WINDOW_SIZE = 5
X1, y1 = df_to_X_y(flow, WINDOW_SIZE)
X1.shape, y1.shape

X_pred, y_pred = X1[:20000], y1[:20000]

X_pred.shape, y_pred.shape, 

predictions = model1.predict(X_pred)


df2.index = pd.to_datetime(df2['Date'])


df2.drop(columns=['Date'], inplace=True)

def df_to_sequences(df2, window_size=5):
    sequences = []
    for i in range(len(df) - window_size):
        sequence = df.iloc[i:i + window_size]
        sequences.append(sequence)
    return sequences


sequences = df_to_sequences(df2)

X_new = np.array(sequences)

#X_new = X_new.reshape(X_new.shape[0], X_new.shape[1], X_new.shape[2])

predictions = model1.predict(X_new)

print("X_new shape:", X_new.shape)  # Debugging statement
predictions = model1.predict(X_new)

[enter image description here](https://i.sstatic.net/vQzzPSo7.jpg)

Thiết kế website giá rẻ

Danh mục

LSTM Prediction difficulties – Help Needed with Using Trained Model to Predict Future Flow Values