In my code, I put the labels as the first feature in the first timestep, and the LSTM is unable to learn that the answer is in the first timestemp, almost like it is blind to it.
I ran this test because in my real data, my LSTM thinks some variables in the last timestep have no importance in the prediction, yet a gradient tree on the same data finds this pattern.
What’s up with my LSTMs? They seems so poor.
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout, Bidirectional
test_length = 0.15
test_length = 0.15
#features = np.random.rand(1000000, 20, 20)
features = np.zeros((100000, 40, 40))
labels = np.random.rand(100000)
features[:, 0, 0] = labels.copy()
features_train = features[0:int(len(features)*(1-test_length))]
labels_train = labels[0:int(len(labels)*(1-test_length))]
features_test = features[int(len(features)*(1-test_length)):]
labels_test = labels[int(len(labels)*(1-test_length)):]
model = Sequential([
LSTM(100, return_sequences=False),
Dropout(0.2),
Dense(50, activation='relu'),
Dropout(0.2),
Dense(30, activation='relu'),
Dropout(0.2),
Dense(1, activation='linear')
])
model.compile(optimizer='adam', loss='mean_squared_error')
history = model.fit(
features_train, labels_train,
epochs=1,
batch_size=40,
validation_data=(features_test, labels_test),
verbose=1
)