Thiết kế website giá rẻ

Question

Recently I’m working on a binary sentiment analysis task using Pytorch. The task has offered train set, valid set, test set and a preprocessed word2vec binary file, named as train.txt, valid.txt, test.txt, wiki_word2vec_50.bin respectively. The corpus format is as follows:

1 I’m sorry to hear all of this, ……

Here “1” represents negative, while “0” represents positive.

I’ve built a LSTM model and trained it. After debugging, the program run well. But I found that the train and valid accuracy hardly improved in the early 30 epochs. And sudden rise and fall of the curve also confused me. In other LSTM result of this task, it seems the accuracy should rise smoothly from the 1st epoch, and peaked at about the 50th epoch.

In my limited experience with Pytorch, I wrote this code with the assistance of copilot, and debug it manually. But copilot could not help with the above problem. I doubted if I had built the model wrong, or I have messed up the training pipeline.

Before I write this post, I have turned to several people for advice, and they suggested that I should adjust hyperparameters, but it didn’t work. Please help or try to give some ideas how to deal with this problem.

My code and accuracy curve is showed below. For simplicity, I have delete some irrelevant parts.

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from gensim.models import KeyedVectors
import numpy as np
from sklearn.metrics import accuracy_score, f1_score
from tensorboardX import SummaryWriter

VOCAB_SIZE = 426677  
EMBEDDING_DIM = 50  
HIDDEN_DIM = 128   
BATCH_SIZE = 16    
LEARNING_RATE = 0.001
NUM_EPOCHS = 20 

if (torch.cuda.is_available()):
    print('GPU is available')
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

# load word vector dict
word_vectors = KeyedVectors.load_word2vec_format('./Dataset/wiki_word2vec_50.bin', binary=True)
VOCAB_SIZE = word_vectors.vectors.shape[0]

# preprocess data
class SentimentDataset(Dataset):
    def __init__(self, filename, word_vectors, max_sequence_length=120):
        self.data = []
        self.labels = []
        self.max_sequence_length = max_sequence_length
        with open(filename, 'r', encoding='utf-8') as f:
            for line in f:
                parts = line.strip().split()
                label = int(parts[0])
                sentence = parts[1:max_sequence_length]
                # convert words to vectors
                vector = [word_vectors[word] if word in word_vectors else np.zeros(EMBEDDING_DIM) for word in sentence]
                # add zero vectors for padding
                if len(vector) < max_sequence_length:
                    vector += [np.zeros(EMBEDDING_DIM)] * (max_sequence_length - len(vector))
                self.data.append(vector)
                self.labels.append(label)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, index):
        return torch.tensor(self.data[index], dtype=torch.float), torch.tensor(self.labels[index], dtype=torch.long)

# define model
class LSTMClassifier(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, vocab_size, label_size, batch_size):
        super(LSTMClassifier, self).__init__()
        self.hidden_dim = hidden_dim
        self.batch_size = batch_size
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)
        self.hidden2label = nn.Linear(hidden_dim, label_size)
        self.hidden = self.init_hidden()

    def init_hidden(self):
        return (torch.zeros(1, self.batch_size, self.hidden_dim, device=device),
                torch.zeros(1, self.batch_size, self.hidden_dim, device=device))

    def forward(self, sentence):
        lstm_out, self.hidden = self.lstm(sentence.view(len(sentence), self.batch_size, -1), self.hidden)
        label_space = self.hidden2label(lstm_out[-1])
        label_scores = torch.log_softmax(label_space, dim=1)
        return label_scores

# train model
def train_model(model, dataloader, criterion, optimizer, writer, epoch):
    model.train()
    total_loss = 0
    all_predictions = []
    all_labels = []
    for sentences, labels in dataloader:
        sentences = sentences.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        model.hidden = model.init_hidden()
        if sentences.size(0) != BATCH_SIZE:
            continue
        output = model(sentences.permute(1, 0, 2))
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        predictions = output.argmax(dim=1)
        all_predictions.extend(predictions.tolist())
        all_labels.extend(labels.tolist())

    accuracy = accuracy_score(all_labels, all_predictions)
    f1 = f1_score(all_labels, all_predictions, average='binary')
    writer.add_scalar('train/loss', total_loss, epoch)
    writer.add_scalar('train/accuracy', accuracy, epoch)
    writer.add_scalar('train/f1_score', f1, epoch)

# assess model
def evaluate_model(model, dataloader, criterion, writer, epoch):
    model.eval()
    total_loss = 0
    all_predictions = []
    all_labels = []
    with torch.no_grad():
        for sentences, labels in dataloader:
            sentences = sentences.to(device)
            labels = labels.to(device)
            model.hidden = model.init_hidden()
            if sentences.size(0) != BATCH_SIZE:
                continue
            output = model(sentences.permute(1, 0, 2))
            loss = criterion(output, labels)
            total_loss += loss.item()
            predictions = output.argmax(dim=1)
            all_predictions.extend(predictions.tolist())
            all_labels.extend(labels.tolist())

    accuracy = accuracy_score(all_labels, all_predictions)
    f1 = f1_score(all_labels, all_predictions, average='binary')
    writer.add_scalar('validation/loss', total_loss, epoch)
    writer.add_scalar('validation/accuracy', accuracy, epoch)
    writer.add_scalar('validation/f1_score', f1, epoch)

def main():
    # initialize TensorBoard
    writer = SummaryWriter()

    # load dataset
    train_dataset = SentimentDataset('./Dataset/train.txt', word_vectors)
    validation_dataset = SentimentDataset('./Dataset/validation.txt', word_vectors)
    test_dataset = SentimentDataset('./Dataset/test.txt', word_vectors)

    # create data loader
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    validation_loader = DataLoader(validation_dataset, batch_size=BATCH_SIZE, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

    # define model, loss function and optimizer
    model = LSTMClassifier(EMBEDDING_DIM, HIDDEN_DIM, VOCAB_SIZE, 2, BATCH_SIZE)
    model.to(device)
    criterion = nn.NLLLoss()
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

    # train model
    for epoch in range(NUM_EPOCHS):
        train_model(model, train_loader, criterion, optimizer, writer, epoch)
        evaluate_model(model, validation_loader, criterion, writer, epoch)

    # evaluate model on test set
    evaluate_model(model, test_loader, criterion, writer, NUM_EPOCHS)

    # save model
    torch.save(model.state_dict(), 'sentiment_analysis_model.pth')

    # close TensorBoard
    writer.close()

if __name__ == '__main__':
    main()

train acc
valid acc

I’ve tried adjusting hyperparameters like learning rate and batch size, but no use. In detail, if I tried to decrease the learning rate to 1e-4 and add weight_decay=1e-5 in Adam optimizer, it will even have no improvement in 100 epochs.

I’m expecting a smooth curve, peaked before about the 50th epoch, which has been proved possible in others’ model.

If you have figured out what I have coded wrong, please point it out. Else, please offer me some feasible suggestions that I could try to implement.

Thiết kế website giá rẻ

Danh mục

Accuracy improves little when training LSTM. Have I coded wrong?