Hi everyone, I have a problem with training a model containing an LSTM layer.
I suspect that the error may be due to the addition of a Data Loader, because when I did training with full datasets, the loss decreased.
I will be very grateful for your help.
Config:
num_epochs = 200
learning_rate = 1e-1
input_size = 599
hidden_size = 100
num_layers = 2
num_classes = 1
batch_size = 32
Data preparation:
X_train_t = Variable(torch.Tensor(X_train.values))
y_train_t = Variable(torch.Tensor(y_train))
X_test_t = Variable(torch.Tensor(X_test.values))
y_test_t = Variable(torch.Tensor(y_test))
X_train_t_final = torch.reshape(X_train_t, (X_train_t.shape[0], 1, X_train_t.shape[1]))
X_test_t_final = torch.reshape(X_test_t, (X_test_t.shape[0], 1, X_test_t.shape[1]))
print("Training Shape", X_train_t_final.shape, y_train_t.shape)
print("Testing Shape", X_test_t_final.shape, y_test_t.shape)
train_dataset = TensorDataset(X_train_t_final, y_train_t)
test_dataset = TensorDataset(X_test_t_final, y_test_t)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, drop_last=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, drop_last=True)
Output:
Training Shape torch.Size([8000, 1, 599]) torch.Size([8000, 1])
Testing Shape torch.Size([998, 1, 599]) torch.Size([998, 1])
Model:
class LSTM1(nn.Module):
def __init__(self, num_classes, input_size, hidden_size, num_layers):
super(LSTM1, self).__init__()
self.num_classes = num_classes
self.num_layers = num_layers
self.input_size = input_size
self.hidden_size = hidden_size
self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
self.fc_1 = nn.Linear(hidden_size, 128)
self.fc = nn.Linear(128, num_classes)
self.relu = nn.ReLU()
def forward(self,x):
h_0 = torch.zeros(self.num_layers, x.shape[0], self.hidden_size)
c_0 = torch.zeros(self.num_layers, x.shape[0], self.hidden_size)
output, (hn, cn) = self.lstm(x, (h_0, c_0))
hn = hn[-1]
out = self.relu(hn)
out = self.fc_1(out)
out = self.relu(out)
out = self.fc(out)
return out
Training:
lstm1 = LSTM1(num_classes, input_size, hidden_size, num_layers)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(lstm1.parameters(), lr=learning_rate)
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[30, 60, 90, 120, 150, 180], gamma=0.2)
loss_train, loss_test = [], []
mean_loss_train, mean_loss_test = 0, 0
for epoch in tqdm(range(num_epochs)):
for xb, yb in train_loader:
optimizer.zero_grad()
outputs = lstm1.forward(xb)
loss = criterion(outputs, yb)
loss.backward()
optimizer.step()
mean_loss_train += loss.item()
loss_train.append(mean_loss_train / len(train_loader))
scheduler.step()
with torch.no_grad():
for xb, yb in test_loader:
outputs = lstm1.forward(xb)
loss = criterion(outputs, yb)
mean_loss_test += loss.item()
loss_test.append(mean_loss_test / len(test_loader))
if (epoch+1) % 20 == 0:
print(f"Epoch: {epoch+1}, loss_train: {round(loss_train[-1], 5)}, loss_test: {round(loss_test[-1], 5)}, lr: {round(scheduler.get_last_lr()[0], 8)}")
enter code here
Learning output:
Epoch: 20, loss_train: 82.76209, loss_test: 15.35703, lr: 0.1
Epoch: 40, loss_train: 85.63364, loss_test: 18.2109, lr: 0.02
Epoch: 60, loss_train: 85.87259, loss_test: 18.30187, lr: 0.004
Epoch: 80, loss_train: 85.95106, loss_test: 18.39842, lr: 0.004
Epoch: 100, loss_train: 86.0281, loss_test: 18.49544, lr: 0.0008
Epoch: 120, loss_train: 86.10304, loss_test: 18.58921, lr: 0.00016
Epoch: 140, loss_train: 86.21459, loss_test: 18.65351, lr: 0.00016
Epoch: 160, loss_train: 86.32599, loss_test: 18.80259, lr: 3.2e-05
Epoch: 180, loss_train: 86.43954, loss_test: 19.03469, lr: 6.4e-06
Epoch: 200, loss_train: 86.58578, loss_test: 19.16584, lr: 6.4e-06