I am a creating a CNN model in order to detect emotions (I am very new to creating neural networks). The dataset I used had this structure:
DatasetName ->
train ->
0
1
2
3
4
5
6
7
where each number was a folder of data for a singular emotion. Each image is colored with a variety of sizes (all resized to 48×48). It has 37553 images in total.
This is the code:
# Imports
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision
import torch.nn.functional as F
import os
from skimage import io
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable
# Custom dataset
class EmotionDataset(Dataset):
def __init__(self, root_dir, transform, train):
self.root_dir = root_dir
self.transform = transform
self.train = train
self.zero = sorted(os.listdir(os.path.join(self.root_dir, str(0))))
self.one = sorted(os.listdir(os.path.join(self.root_dir, str(1))))
self.two = sorted(os.listdir(os.path.join(self.root_dir, str(2))))
self.three = sorted(os.listdir(os.path.join(self.root_dir, str(3))))
self.four = sorted(os.listdir(os.path.join(self.root_dir, str(4))))
self.five = sorted(os.listdir(os.path.join(self.root_dir, str(5))))
self.six = sorted(os.listdir(os.path.join(self.root_dir, str(6))))
self.seven = sorted(os.listdir(os.path.join(self.root_dir, str(7))))
self.zerol = len(self.zero)
self.onel = len(self.one)
self.twol = len(self.two)
self.threel = len(self.three)
self.fourl = len(self.four)
self.fivel = len(self.five)
self.sixl = len(self.six)
self.sevenl = len(self.seven)
self.data = [self.zero, self.one, self.two, self.three, self.four, self.five, self.six, self.seven]
self.length = self.zerol + self.onel + self.twol + self.threel + self.fourl + self.fivel + self.sixl + self.sevenl
def __len__(self):
return self.length
def sub_folder(self, index):
if index < self.zerol:
return 0, index
elif index >= self.zerol and index < self.zerol + self.onel:
return 1, index - self.zerol
elif index >= self.zerol + self.onel and index < self.zerol + self.onel + self.twol:
return 2, index - (self.zerol + self.onel)
elif index >= self.zerol + self.onel + self.twol and index < self.zerol + self.onel + self.twol + self.threel:
return 3, index - (self.zerol + self.onel + self.twol)
elif index >= self.zerol + self.onel + self.twol + self.threel and index < self.zerol + self.onel + self.twol + self.threel + self.fourl:
return 4, index - (self.zerol + self.onel + self.twol + self.threel)
elif index >= self.zerol + self.onel + self.twol + self.threel + self.fourl and index < self.zerol + self.onel + self.twol + self.threel + self.fourl + self.fivel:
return 5, index - (self.zerol + self.onel + self.twol + self.threel + self.fourl)
elif index >= self.zerol + self.onel + self.twol + self.threel + self.fourl + self.fivel and index < self.zerol + self.onel + self.twol + self.threel + self.fourl + self.fivel + self.sixl:
return 6, index - (self.zerol + self.onel + self.twol + self.threel + self.fourl + self.fivel)
else:
return 7, index - (self.zerol + self.onel + self.twol + self.threel + self.fourl + self.fivel + self.sixl)
def __getitem__(self, index):
subfolder, mod_index = self.sub_folder(index)
img_path = os.path.join(self.root_dir, str(subfolder), self.data[subfolder][mod_index])
image = io.imread(img_path)
target = torch.tensor(int(subfolder))
if self.transform:
image = self.transform(image)
t = torchvision.transforms.Resize((48, 48))
image = t(image)
#image = image.expand(3, -1, -1)
return image, target
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Convolutional Neural Network
class ConvolutionalNeuralNetwork(nn.Module):
def __init__(self, in_channels, num_classes):
super(ConvolutionalNeuralNetwork, self).__init__()
self.conv1 = nn.Conv2d(in_channels=in_channels, out_channels=8, kernel_size=(3,3), stride=(1,1), padding=(1,1)) #keeps size
self.pool = nn.MaxPool2d(kernel_size=(2,2), stride =(2,2)) #cuts size in half
self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(3,3), stride=(1,1), padding=(1,1)) #keeps size
self.conv3 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3,3), stride=(1,1), padding=(1,1)) #keeps size
self.conv4 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) # keeps size
self.fc1 = nn.Linear(64*6*6, num_classes)
def forward(self, x):
x = F.relu((self.conv1(x)))
x = self.pool(x)
x = F.relu(self.conv2(x))
x = F.relu(self.conv3(x))
x = self.pool(x)
x = F.relu(self.conv4(x))
x = self.pool(x)
x = x.reshape(x.shape[0], -1)
x = self.fc1(x)
return x
# Saving model
def save_model(state, filename = "saved_model1.pth.tar"):
print("Saving model")
torch.save(state, filename)
# Hyperparameters
in_channels = 3
num_classes = 8
learning_rate = 0.01
batch_size = 64
num_epochs = 11
# Load Data
train_set = EmotionDataset(root_dir = os.path.join("DatasetName", "train"), transform = transforms.ToTensor(), train=True)
test_set = EmotionDataset(root_dir = os.path.join("DatasetName", "test"), transform = transforms.ToTensor(), train=False)
train_loader = DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_set, batch_size=batch_size, shuffle=False)
# Init network
model = ConvolutionalNeuralNetwork(in_channels=in_channels, num_classes=num_classes).to(device)
# Loss - cost function
criterion = nn.CrossEntropyLoss()
# Learning algorithm
optimizer = optim.Adam(model.parameters(), lr = learning_rate)
# Training
def train_model():
print("Training Model")
for epoch in range(num_epochs):
losses = []
for batch_idx, (data, targets) in enumerate(train_loader):
# get data in cpu
data = data.to(device=device)
targets = targets.to(device=device)
# forward
scores = model(data)
loss = criterion(scores, targets)
losses.append(loss.item())
# backward
optimizer.zero_grad()
loss.backward()
# gradient descent or adam step
optimizer.step()
print(f"Cost at epoch {epoch} is {sum(losses) / len(losses)}")
# Testing
def check_accuracy(loader, model):
num_correct = 0
num_samples = 0
model.eval()
with torch.no_grad():
if loader.dataset.train:
print("Checking accuracy on training data")
else:
print("Checking accuracy on test data")
for x, y in loader:
x = x.to(device = device)
y = y.to(device=device)
#x = x.reshape(x.shape[0], -1) #for NN
scores = model(x)
_, predictions = scores.max(1)
num_correct += (predictions == y).sum()
num_samples += predictions.size(0)
print(f'Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}')
model.train()
if __name__ == "__main__":
# train and save model
train_model()
saved_model = {'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}
save_model(saved_model)
check_accuracy(train_loader, model)
check_accuracy(test_loader, model)
I have tried adjusting the learning rate, batch_size, num_epochs, but when I begin the training process, the model has an initial loss and then every consequential epoch has a very similar loss value (not decreasing at all).
I have tried using other datasets and with them, the loss function decreases but the model is very inaccurate. What could I do to improve this network so that the loss function decreases?
HKG is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.