I am using the following dataset and trying to do binary text classification on a Recurrent Neural Network. The current issue is that even after about 15 epochs the model does not seem to be learning and the accuracy remains at around 50%, which is no better than guessing. I have included the code for preprocessing, the RNN, and the running of epochs below:
Libraries used:
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from google.colab import drive
from torch.utils.data import Subset
from nltk.corpus import stopwords
<code>import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re
import warnings
import nltk
from google.colab import drive
import os
from torch.utils.data import Subset
from nltk.corpus import stopwords
</code>
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re
import warnings
import nltk
from google.colab import drive
import os
from torch.utils.data import Subset
from nltk.corpus import stopwords
Data preprocessing:
<code>train_data = pd.read_csv('train.csv', on_bad_lines='skip')
test_data = pd.read_csv('test.csv', on_bad_lines='skip')
test_labels_raw = pd.read_csv('submit.csv', on_bad_lines='skip')
train_data.dropna(subset=['text'], inplace=True)
test_data.dropna(subset=['text'], inplace=True)
# Removing columns, combining title and text
train_data['news'] = train_data['title'] + ' ' + train_data['text']
test_data['news'] = test_data['title'] + ' ' + test_data['text']
delete_columns = ['title', 'text', 'author']
train_data = train_data.drop(delete_columns, axis=1)
test_data = test_data.drop(delete_columns, axis=1)
# Filters out special chars and extra spaces
train_data['news'] = train_data['news'].str.replace("[^a-zA-Z0-9s]", "", regex=True)
train_data['news'] = train_data['news'].str.replace("n", "")
train_data['news'] = train_data['news'].str.replace('s+',' ', regex=True) #THIS LINE REMOVES ALL SPACES FROM THE DATASET?
test_data['news'] = test_data['news'].str.replace("[^a-zA-Z0-9s]", "", regex=True)
test_data['news'] = test_data['news'].str.replace("n", "")
test_data['news'] = test_data['news'].str.replace("s+", " ", regex=True)
train_data['news'] = train_data['news'].str.lower()
test_data['news'] = test_data['news'].str.lower()
train_data['news'] = train_data['news'].astype(str)
test_data['news'] = test_data['news'].astype(str)
nltk.download('stopwords')
stop = stopwords.words('english')
train_data['news'] = train_data['news'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))
test_data['news'] = test_data['news'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))
class NewsDataset(Dataset):
def __init__(self, padded_sequences, labels):
self.padded_sequences = padded_sequences
if isinstance(labels, torch.Tensor):
self.labels = labels.unsqueeze(1) if len(labels.shape) == 1 else labels
self.labels = torch.tensor(labels.values, dtype=torch.float32).unsqueeze(1)
return len(self.padded_sequences)
def __getitem__(self, idx):
return self.padded_sequences[idx], self.labels[idx]
# Tokenizing training data
tokenizer = get_tokenizer("basic_english")
train_tokens = train_data['news'].apply(tokenizer)
train_vocab = build_vocab_from_iterator(train_tokens, specials=["<unk>"])
train_vocab.set_default_index(train_vocab["<unk>"])
def numerical_encoding(token_list):
return train_vocab(token_list)
train_sequences = train_tokens.apply(numerical_encoding)
print(type(train_sequences))
train_padded_sequences = torch.nn.utils.rnn.pad_sequence(
[torch.tensor(seq) for seq in train_sequences], batch_first=True
# Convert labels to tensor
train_labels = torch.tensor(train_data["label"].values, dtype=torch.float32) # Ensure dtype matches
# Create dataset and dataloader
train_padded_sequences = train_padded_sequences[:2000,:1000]
print(f'train_padded_sequences shape: {train_padded_sequences.shape}')
train_dataset = NewsDataset(train_padded_sequences, train_labels)
train_loader = DataLoader(train_dataset, batch_size=batchsize, shuffle=True, num_workers=4)
tokenizer = get_tokenizer("basic_english")
test_tokens = test_data['news'].apply(tokenizer)
# Build vocabulary (ensure it matches with training vocabulary or use the same vocab)
test_vocab = build_vocab_from_iterator(test_tokens, specials=["<unk>"])
test_vocab.set_default_index(test_vocab["<unk>"])
test_sequences = test_tokens.apply(numerical_encoding)
test_padded_sequences = torch.nn.utils.rnn.pad_sequence(
[torch.tensor(seq) for seq in test_sequences], batch_first=True
# Convert labels to tensor
test_labeled = test_labels_raw['label'].astype(int)
labels = torch.tensor(test_labeled.values, dtype=torch.float32) # Ensure dtype matches
test_labels = labels.unsqueeze(1)
# Prepare test dataset and dataloader
test_labels = test_labels_raw['label']
test_padded_sequences = test_padded_sequences[:2000,:1000]
test_dataset = NewsDataset(test_padded_sequences, test_labels)
test_loader = DataLoader(test_dataset, batch_size=batchsize, shuffle=False, num_workers=4)
<code>train_data = pd.read_csv('train.csv', on_bad_lines='skip')
test_data = pd.read_csv('test.csv', on_bad_lines='skip')
test_labels_raw = pd.read_csv('submit.csv', on_bad_lines='skip')
train_data.dropna(subset=['text'], inplace=True)
test_data.dropna(subset=['text'], inplace=True)
# Removing columns, combining title and text
train_data['news'] = train_data['title'] + ' ' + train_data['text']
test_data['news'] = test_data['title'] + ' ' + test_data['text']
delete_columns = ['title', 'text', 'author']
train_data = train_data.drop(delete_columns, axis=1)
test_data = test_data.drop(delete_columns, axis=1)
# Filters out special chars and extra spaces
train_data['news'] = train_data['news'].str.replace("[^a-zA-Z0-9s]", "", regex=True)
train_data['news'] = train_data['news'].str.replace("n", "")
train_data['news'] = train_data['news'].str.replace('s+',' ', regex=True) #THIS LINE REMOVES ALL SPACES FROM THE DATASET?
test_data['news'] = test_data['news'].str.replace("[^a-zA-Z0-9s]", "", regex=True)
test_data['news'] = test_data['news'].str.replace("n", "")
test_data['news'] = test_data['news'].str.replace("s+", " ", regex=True)
train_data['news'] = train_data['news'].str.lower()
test_data['news'] = test_data['news'].str.lower()
# Removing float types
train_data['news'] = train_data['news'].astype(str)
test_data['news'] = test_data['news'].astype(str)
# Removing stopwords
nltk.download('stopwords')
stop = stopwords.words('english')
train_data['news'] = train_data['news'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))
test_data['news'] = test_data['news'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))
# Tokenizing data
batchsize = 64
class NewsDataset(Dataset):
def __init__(self, padded_sequences, labels):
self.padded_sequences = padded_sequences
if isinstance(labels, torch.Tensor):
self.labels = labels.unsqueeze(1) if len(labels.shape) == 1 else labels
else:
self.labels = torch.tensor(labels.values, dtype=torch.float32).unsqueeze(1)
def __len__(self):
return len(self.padded_sequences)
def __getitem__(self, idx):
return self.padded_sequences[idx], self.labels[idx]
# Tokenizing training data
tokenizer = get_tokenizer("basic_english")
train_tokens = train_data['news'].apply(tokenizer)
# Build vocabulary
train_vocab = build_vocab_from_iterator(train_tokens, specials=["<unk>"])
train_vocab.set_default_index(train_vocab["<unk>"])
def numerical_encoding(token_list):
return train_vocab(token_list)
train_sequences = train_tokens.apply(numerical_encoding)
print(type(train_sequences))
# Pad sequences
train_padded_sequences = torch.nn.utils.rnn.pad_sequence(
[torch.tensor(seq) for seq in train_sequences], batch_first=True
)
# Convert labels to tensor
train_labels = torch.tensor(train_data["label"].values, dtype=torch.float32) # Ensure dtype matches
# Create dataset and dataloader
train_padded_sequences = train_padded_sequences[:2000,:1000]
print(f'train_padded_sequences shape: {train_padded_sequences.shape}')
train_dataset = NewsDataset(train_padded_sequences, train_labels)
train_loader = DataLoader(train_dataset, batch_size=batchsize, shuffle=True, num_workers=4)
#Tokenizing testing data
tokenizer = get_tokenizer("basic_english")
test_tokens = test_data['news'].apply(tokenizer)
# Build vocabulary (ensure it matches with training vocabulary or use the same vocab)
test_vocab = build_vocab_from_iterator(test_tokens, specials=["<unk>"])
test_vocab.set_default_index(test_vocab["<unk>"])
test_sequences = test_tokens.apply(numerical_encoding)
# Pad sequences
test_padded_sequences = torch.nn.utils.rnn.pad_sequence(
[torch.tensor(seq) for seq in test_sequences], batch_first=True
)
# Convert labels to tensor
test_labeled = test_labels_raw['label'].astype(int)
labels = torch.tensor(test_labeled.values, dtype=torch.float32) # Ensure dtype matches
test_labels = labels.unsqueeze(1)
# Prepare test dataset and dataloader
test_labels = test_labels_raw['label']
test_padded_sequences = test_padded_sequences[:2000,:1000]
test_dataset = NewsDataset(test_padded_sequences, test_labels)
test_loader = DataLoader(test_dataset, batch_size=batchsize, shuffle=False, num_workers=4)
</code>
train_data = pd.read_csv('train.csv', on_bad_lines='skip')
test_data = pd.read_csv('test.csv', on_bad_lines='skip')
test_labels_raw = pd.read_csv('submit.csv', on_bad_lines='skip')
train_data.dropna(subset=['text'], inplace=True)
test_data.dropna(subset=['text'], inplace=True)
# Removing columns, combining title and text
train_data['news'] = train_data['title'] + ' ' + train_data['text']
test_data['news'] = test_data['title'] + ' ' + test_data['text']
delete_columns = ['title', 'text', 'author']
train_data = train_data.drop(delete_columns, axis=1)
test_data = test_data.drop(delete_columns, axis=1)
# Filters out special chars and extra spaces
train_data['news'] = train_data['news'].str.replace("[^a-zA-Z0-9s]", "", regex=True)
train_data['news'] = train_data['news'].str.replace("n", "")
train_data['news'] = train_data['news'].str.replace('s+',' ', regex=True) #THIS LINE REMOVES ALL SPACES FROM THE DATASET?
test_data['news'] = test_data['news'].str.replace("[^a-zA-Z0-9s]", "", regex=True)
test_data['news'] = test_data['news'].str.replace("n", "")
test_data['news'] = test_data['news'].str.replace("s+", " ", regex=True)
train_data['news'] = train_data['news'].str.lower()
test_data['news'] = test_data['news'].str.lower()
# Removing float types
train_data['news'] = train_data['news'].astype(str)
test_data['news'] = test_data['news'].astype(str)
# Removing stopwords
nltk.download('stopwords')
stop = stopwords.words('english')
train_data['news'] = train_data['news'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))
test_data['news'] = test_data['news'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))
# Tokenizing data
batchsize = 64
class NewsDataset(Dataset):
def __init__(self, padded_sequences, labels):
self.padded_sequences = padded_sequences
if isinstance(labels, torch.Tensor):
self.labels = labels.unsqueeze(1) if len(labels.shape) == 1 else labels
else:
self.labels = torch.tensor(labels.values, dtype=torch.float32).unsqueeze(1)
def __len__(self):
return len(self.padded_sequences)
def __getitem__(self, idx):
return self.padded_sequences[idx], self.labels[idx]
# Tokenizing training data
tokenizer = get_tokenizer("basic_english")
train_tokens = train_data['news'].apply(tokenizer)
# Build vocabulary
train_vocab = build_vocab_from_iterator(train_tokens, specials=["<unk>"])
train_vocab.set_default_index(train_vocab["<unk>"])
def numerical_encoding(token_list):
return train_vocab(token_list)
train_sequences = train_tokens.apply(numerical_encoding)
print(type(train_sequences))
# Pad sequences
train_padded_sequences = torch.nn.utils.rnn.pad_sequence(
[torch.tensor(seq) for seq in train_sequences], batch_first=True
)
# Convert labels to tensor
train_labels = torch.tensor(train_data["label"].values, dtype=torch.float32) # Ensure dtype matches
# Create dataset and dataloader
train_padded_sequences = train_padded_sequences[:2000,:1000]
print(f'train_padded_sequences shape: {train_padded_sequences.shape}')
train_dataset = NewsDataset(train_padded_sequences, train_labels)
train_loader = DataLoader(train_dataset, batch_size=batchsize, shuffle=True, num_workers=4)
#Tokenizing testing data
tokenizer = get_tokenizer("basic_english")
test_tokens = test_data['news'].apply(tokenizer)
# Build vocabulary (ensure it matches with training vocabulary or use the same vocab)
test_vocab = build_vocab_from_iterator(test_tokens, specials=["<unk>"])
test_vocab.set_default_index(test_vocab["<unk>"])
test_sequences = test_tokens.apply(numerical_encoding)
# Pad sequences
test_padded_sequences = torch.nn.utils.rnn.pad_sequence(
[torch.tensor(seq) for seq in test_sequences], batch_first=True
)
# Convert labels to tensor
test_labeled = test_labels_raw['label'].astype(int)
labels = torch.tensor(test_labeled.values, dtype=torch.float32) # Ensure dtype matches
test_labels = labels.unsqueeze(1)
# Prepare test dataset and dataloader
test_labels = test_labels_raw['label']
test_padded_sequences = test_padded_sequences[:2000,:1000]
test_dataset = NewsDataset(test_padded_sequences, test_labels)
test_loader = DataLoader(test_dataset, batch_size=batchsize, shuffle=False, num_workers=4)
Here is the code for the RNN
<code>class RNNModel(nn.Module):
def __init__(self, vocab_size, embedding_dim, hidden_size, output_size, num_layers):
super(RNNModel, self).__init__()
self.num_layers = num_layers
self.hidden_size = hidden_size
# Define the embedding layer
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.rnn = nn.RNN(input_size=embedding_dim, # Use embedding_dim as input size
nonlinearity='relu') # Use ReLU activation function
# Define the output layer
self.fc = nn.Linear(hidden_size, output_size)
self.sigmoid = nn.Sigmoid()
x = self.embedding(x) # Convert token indices to embeddings
# Initialize hidden state
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)#.to(x.device)
# Pass through output layer
out = self.fc(out[:, -1, :])
<code>class RNNModel(nn.Module):
def __init__(self, vocab_size, embedding_dim, hidden_size, output_size, num_layers):
super(RNNModel, self).__init__()
self.num_layers = num_layers
self.hidden_size = hidden_size
# Define the embedding layer
self.embedding = nn.Embedding(vocab_size, embedding_dim)
# Define the RNN layer
self.rnn = nn.RNN(input_size=embedding_dim, # Use embedding_dim as input size
hidden_size=hidden_size,
num_layers=num_layers,
batch_first=True,
nonlinearity='relu') # Use ReLU activation function
# Define the output layer
self.fc = nn.Linear(hidden_size, output_size)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
# Apply embedding layer
x = self.embedding(x) # Convert token indices to embeddings
# Initialize hidden state
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)#.to(x.device)
# Pass through RNN
out, _ = self.rnn(x, h0)
# Pass through output layer
out = self.fc(out[:, -1, :])
return out
</code>
class RNNModel(nn.Module):
def __init__(self, vocab_size, embedding_dim, hidden_size, output_size, num_layers):
super(RNNModel, self).__init__()
self.num_layers = num_layers
self.hidden_size = hidden_size
# Define the embedding layer
self.embedding = nn.Embedding(vocab_size, embedding_dim)
# Define the RNN layer
self.rnn = nn.RNN(input_size=embedding_dim, # Use embedding_dim as input size
hidden_size=hidden_size,
num_layers=num_layers,
batch_first=True,
nonlinearity='relu') # Use ReLU activation function
# Define the output layer
self.fc = nn.Linear(hidden_size, output_size)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
# Apply embedding layer
x = self.embedding(x) # Convert token indices to embeddings
# Initialize hidden state
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)#.to(x.device)
# Pass through RNN
out, _ = self.rnn(x, h0)
# Pass through output layer
out = self.fc(out[:, -1, :])
return out
Here is the code building the RNN and running the epochs:
<code># Declaring first preliminary model
first_model = RNNModel(len(train_vocab), input_size, hidden_size, output_size, num_layers)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(first_model.parameters(), lr=0.001)
for epoch in range(num_epochs):
for inputs, labels in train_loader:
# print("shape of inputs", inputs.shape)
# print("shape of labels", labels.shape)
outputs = first_model(inputs)
loss = criterion(outputs, labels)
train_loss += loss.item() * inputs.size(0)
predicted = torch.round(torch.sigmoid(outputs))
# predicted = (outputs >= 0.5).float().view(-1, 1)
# print("prediction:",predicted, predicted.shape)
# print("labels", labels, labels.shape)
total_train += labels.size(0)
# print("total training loss:", total_train)
correct_train += (predicted == labels).sum().item()
# print("current correct", (predicted == labels).sum().item())
train_loss = train_loss / total_train
train_accuracy = correct_train / total_train
first_train_loss.append(train_loss)
first_train_acc.append(train_accuracy)
for inputs, labels in test_loader:
outputs = first_model(inputs)
loss = criterion(outputs, labels)
val_loss += loss.item() * inputs.size(0)
# print("output:", outputs[0])
predicted = torch.round(torch.sigmoid(outputs))
# predicted = (outputs >= 0.5).float().view(-1, 1)
predicted = predicted.view(-1, 1)
# print("predicted:", predicted[1])
# print("post output:", outputs[0])
total_val += labels.size(0)
correct_val += (predicted == labels).sum().item()
val_loss = val_loss / total_val
val_accuracy = correct_val / total_val
first_val_loss.append(val_loss)
first_val_acc.append(val_accuracy)
print(f'Epoch {epoch+1}/{num_epochs}, '
f'Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}, '
f'Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}')
<code># Declaring first preliminary model
input_size = 64
embedding_dim = 64
hidden_size = 64
output_size = 1
num_layers = 6
first_model = RNNModel(len(train_vocab), input_size, hidden_size, output_size, num_layers)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(first_model.parameters(), lr=0.001)
num_epochs = 10
first_train_acc = []
first_train_loss = []
first_val_acc = []
first_val_loss = []
for epoch in range(num_epochs):
# Training phase
first_model.train()
train_loss = 0.0
correct_train = 0
total_train = 0
for inputs, labels in train_loader:
# print(inputs[1])
# print(labels[1])
# print("shape of inputs", inputs.shape)
# print("shape of labels", labels.shape)
optimizer.zero_grad()
outputs = first_model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
train_loss += loss.item() * inputs.size(0)
predicted = torch.round(torch.sigmoid(outputs))
# predicted = (outputs >= 0.5).float().view(-1, 1)
# print("prediction:",predicted, predicted.shape)
# print("labels", labels, labels.shape)
total_train += labels.size(0)
# print("total training loss:", total_train)
correct_train += (predicted == labels).sum().item()
# print("current correct", (predicted == labels).sum().item())
train_loss = train_loss / total_train
train_accuracy = correct_train / total_train
first_train_loss.append(train_loss)
first_train_acc.append(train_accuracy)
# Validation phase
first_model.eval()
val_loss = 0.0
correct_val = 0
total_val = 0
with torch.no_grad():
for inputs, labels in test_loader:
outputs = first_model(inputs)
loss = criterion(outputs, labels)
val_loss += loss.item() * inputs.size(0)
# print(outputs.shape)
# print("output:", outputs[0])
predicted = torch.round(torch.sigmoid(outputs))
# predicted = (outputs >= 0.5).float().view(-1, 1)
predicted = predicted.view(-1, 1)
# print("predicted:", predicted[1])
# print("post output:", outputs[0])
total_val += labels.size(0)
correct_val += (predicted == labels).sum().item()
val_loss = val_loss / total_val
val_accuracy = correct_val / total_val
first_val_loss.append(val_loss)
first_val_acc.append(val_accuracy)
print(f'Epoch {epoch+1}/{num_epochs}, '
f'Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}, '
f'Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}')
</code>
# Declaring first preliminary model
input_size = 64
embedding_dim = 64
hidden_size = 64
output_size = 1
num_layers = 6
first_model = RNNModel(len(train_vocab), input_size, hidden_size, output_size, num_layers)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(first_model.parameters(), lr=0.001)
num_epochs = 10
first_train_acc = []
first_train_loss = []
first_val_acc = []
first_val_loss = []
for epoch in range(num_epochs):
# Training phase
first_model.train()
train_loss = 0.0
correct_train = 0
total_train = 0
for inputs, labels in train_loader:
# print(inputs[1])
# print(labels[1])
# print("shape of inputs", inputs.shape)
# print("shape of labels", labels.shape)
optimizer.zero_grad()
outputs = first_model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
train_loss += loss.item() * inputs.size(0)
predicted = torch.round(torch.sigmoid(outputs))
# predicted = (outputs >= 0.5).float().view(-1, 1)
# print("prediction:",predicted, predicted.shape)
# print("labels", labels, labels.shape)
total_train += labels.size(0)
# print("total training loss:", total_train)
correct_train += (predicted == labels).sum().item()
# print("current correct", (predicted == labels).sum().item())
train_loss = train_loss / total_train
train_accuracy = correct_train / total_train
first_train_loss.append(train_loss)
first_train_acc.append(train_accuracy)
# Validation phase
first_model.eval()
val_loss = 0.0
correct_val = 0
total_val = 0
with torch.no_grad():
for inputs, labels in test_loader:
outputs = first_model(inputs)
loss = criterion(outputs, labels)
val_loss += loss.item() * inputs.size(0)
# print(outputs.shape)
# print("output:", outputs[0])
predicted = torch.round(torch.sigmoid(outputs))
# predicted = (outputs >= 0.5).float().view(-1, 1)
predicted = predicted.view(-1, 1)
# print("predicted:", predicted[1])
# print("post output:", outputs[0])
total_val += labels.size(0)
correct_val += (predicted == labels).sum().item()
val_loss = val_loss / total_val
val_accuracy = correct_val / total_val
first_val_loss.append(val_loss)
first_val_acc.append(val_accuracy)
print(f'Epoch {epoch+1}/{num_epochs}, '
f'Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}, '
f'Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}')
I have no idea why this isn’t working, I’ve looked through it a bunch of times. Help would be greatly appreciated!
So far, I have tried to change the model ie. number of layers, size of layers, different activation functions (tanh, sigmoid, relu). I’ve also tried removing and adding the sigmoid activation at the last layer (removed because the loss function uses the sigmoid activation function anyways). I have also tried truncating the data at different lengths and ensured the data is padded correctly.
I expected my model to learn but it doesn’t seem to be improving at all even though the weights are changing (I can see slight fluctuations in accuracy and loss but no improvements).
Here are the results I got:
Epoch 1/10, Train Loss: 0.6604, Train Accuracy: 0.5360, Val Loss: 1.4992, Val Accuracy: 0.5565
Epoch 2/10, Train Loss: 0.6523, Train Accuracy: 0.5340, Val Loss: 1.3877, Val Accuracy: 0.5550
Epoch 3/10, Train Loss: 0.6522, Train Accuracy: 0.5155, Val Loss: 1.3744, Val Accuracy: 0.4425
Epoch 4/10, Train Loss: 0.6515, Train Accuracy: 0.5280, Val Loss: 1.3799, Val Accuracy: 0.5570
Epoch 5/10, Train Loss: 0.6513, Train Accuracy: 0.5355, Val Loss: 1.4947, Val Accuracy: 0.5570
Epoch 6/10, Train Loss: 0.6509, Train Accuracy: 0.5355, Val Loss: 1.6039, Val Accuracy: 0.5575
Epoch 7/10, Train Loss: 0.6510, Train Accuracy: 0.5165, Val Loss: 1.6807, Val Accuracy: 0.5575
Epoch 8/10, Train Loss: 0.6509, Train Accuracy: 0.5355, Val Loss: 1.6828, Val Accuracy: 0.5585
Epoch 9/10, Train Loss: 0.6509, Train Accuracy: 0.5355, Val Loss: 1.7836, Val Accuracy: 0.5580
Epoch 10/10, Train Loss: 0.6509, Train Accuracy: 0.5355, Val Loss: 1.8336, Val Accuracy: 0.5595