Recently I’m working on a binary sentiment analysis task using Pytorch. The task has offered train set, valid set, test set and a preprocessed word2vec binary file, named as train.txt, valid.txt, test.txt, wiki_word2vec_50.bin respectively. The corpus format is as follows:
1 I’m sorry to hear all of this, ……
Here “1” represents negative, while “0” represents positive.
I’ve built a LSTM model and trained it. After debugging, the program run well. But I found that the train and valid accuracy hardly improved in the early 30 epochs. And sudden rise and fall of the curve also confused me. In other LSTM result of this task, it seems the accuracy should rise smoothly from the 1st epoch, and peaked at about the 50th epoch.
In my limited experience with Pytorch, I wrote this code with the assistance of copilot, and debug it manually. But copilot could not help with the above problem. I doubted if I had built the model wrong, or I have messed up the training pipeline.
Before I write this post, I have turned to several people for advice, and they suggested that I should adjust hyperparameters, but it didn’t work. Please help or try to give some ideas how to deal with this problem.
My code and accuracy curve is showed below. For simplicity, I have delete some irrelevant parts.
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from gensim.models import KeyedVectors
import numpy as np
from sklearn.metrics import accuracy_score, f1_score
from tensorboardX import SummaryWriter
VOCAB_SIZE = 426677
EMBEDDING_DIM = 50
HIDDEN_DIM = 128
BATCH_SIZE = 16
LEARNING_RATE = 0.001
NUM_EPOCHS = 20
if (torch.cuda.is_available()):
print('GPU is available')
device = torch.device('cuda')
else:
device = torch.device('cpu')
# load word vector dict
word_vectors = KeyedVectors.load_word2vec_format('./Dataset/wiki_word2vec_50.bin', binary=True)
VOCAB_SIZE = word_vectors.vectors.shape[0]
# preprocess data
class SentimentDataset(Dataset):
def __init__(self, filename, word_vectors, max_sequence_length=120):
self.data = []
self.labels = []
self.max_sequence_length = max_sequence_length
with open(filename, 'r', encoding='utf-8') as f:
for line in f:
parts = line.strip().split()
label = int(parts[0])
sentence = parts[1:max_sequence_length]
# convert words to vectors
vector = [word_vectors[word] if word in word_vectors else np.zeros(EMBEDDING_DIM) for word in sentence]
# add zero vectors for padding
if len(vector) < max_sequence_length:
vector += [np.zeros(EMBEDDING_DIM)] * (max_sequence_length - len(vector))
self.data.append(vector)
self.labels.append(label)
def __len__(self):
return len(self.labels)
def __getitem__(self, index):
return torch.tensor(self.data[index], dtype=torch.float), torch.tensor(self.labels[index], dtype=torch.long)
# define model
class LSTMClassifier(nn.Module):
def __init__(self, embedding_dim, hidden_dim, vocab_size, label_size, batch_size):
super(LSTMClassifier, self).__init__()
self.hidden_dim = hidden_dim
self.batch_size = batch_size
self.lstm = nn.LSTM(embedding_dim, hidden_dim)
self.hidden2label = nn.Linear(hidden_dim, label_size)
self.hidden = self.init_hidden()
def init_hidden(self):
return (torch.zeros(1, self.batch_size, self.hidden_dim, device=device),
torch.zeros(1, self.batch_size, self.hidden_dim, device=device))
def forward(self, sentence):
lstm_out, self.hidden = self.lstm(sentence.view(len(sentence), self.batch_size, -1), self.hidden)
label_space = self.hidden2label(lstm_out[-1])
label_scores = torch.log_softmax(label_space, dim=1)
return label_scores
# train model
def train_model(model, dataloader, criterion, optimizer, writer, epoch):
model.train()
total_loss = 0
all_predictions = []
all_labels = []
for sentences, labels in dataloader:
sentences = sentences.to(device)
labels = labels.to(device)
optimizer.zero_grad()
model.hidden = model.init_hidden()
if sentences.size(0) != BATCH_SIZE:
continue
output = model(sentences.permute(1, 0, 2))
loss = criterion(output, labels)
loss.backward()
optimizer.step()
total_loss += loss.item()
predictions = output.argmax(dim=1)
all_predictions.extend(predictions.tolist())
all_labels.extend(labels.tolist())
accuracy = accuracy_score(all_labels, all_predictions)
f1 = f1_score(all_labels, all_predictions, average='binary')
writer.add_scalar('train/loss', total_loss, epoch)
writer.add_scalar('train/accuracy', accuracy, epoch)
writer.add_scalar('train/f1_score', f1, epoch)
# assess model
def evaluate_model(model, dataloader, criterion, writer, epoch):
model.eval()
total_loss = 0
all_predictions = []
all_labels = []
with torch.no_grad():
for sentences, labels in dataloader:
sentences = sentences.to(device)
labels = labels.to(device)
model.hidden = model.init_hidden()
if sentences.size(0) != BATCH_SIZE:
continue
output = model(sentences.permute(1, 0, 2))
loss = criterion(output, labels)
total_loss += loss.item()
predictions = output.argmax(dim=1)
all_predictions.extend(predictions.tolist())
all_labels.extend(labels.tolist())
accuracy = accuracy_score(all_labels, all_predictions)
f1 = f1_score(all_labels, all_predictions, average='binary')
writer.add_scalar('validation/loss', total_loss, epoch)
writer.add_scalar('validation/accuracy', accuracy, epoch)
writer.add_scalar('validation/f1_score', f1, epoch)
def main():
# initialize TensorBoard
writer = SummaryWriter()
# load dataset
train_dataset = SentimentDataset('./Dataset/train.txt', word_vectors)
validation_dataset = SentimentDataset('./Dataset/validation.txt', word_vectors)
test_dataset = SentimentDataset('./Dataset/test.txt', word_vectors)
# create data loader
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
validation_loader = DataLoader(validation_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)
# define model, loss function and optimizer
model = LSTMClassifier(EMBEDDING_DIM, HIDDEN_DIM, VOCAB_SIZE, 2, BATCH_SIZE)
model.to(device)
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
# train model
for epoch in range(NUM_EPOCHS):
train_model(model, train_loader, criterion, optimizer, writer, epoch)
evaluate_model(model, validation_loader, criterion, writer, epoch)
# evaluate model on test set
evaluate_model(model, test_loader, criterion, writer, NUM_EPOCHS)
# save model
torch.save(model.state_dict(), 'sentiment_analysis_model.pth')
# close TensorBoard
writer.close()
if __name__ == '__main__':
main()
train acc
valid acc
I’ve tried adjusting hyperparameters like learning rate and batch size, but no use. In detail, if I tried to decrease the learning rate to 1e-4 and add weight_decay=1e-5 in Adam optimizer, it will even have no improvement in 100 epochs.
I’m expecting a smooth curve, peaked before about the 50th epoch, which has been proved possible in others’ model.
If you have figured out what I have coded wrong, please point it out. Else, please offer me some feasible suggestions that I could try to implement.
user24874436 is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.