I need to recognize 35 classes (9 numbers and 26 English letters). I have 440 images, example. I split it 80% – train, 20% – validation.
My model is:
import torch
from torch.nn import Module, Conv2d, MaxPool2d, ReLU, BatchNorm2d, Linear, BatchNorm1d, Softmax, Dropout, LeakyReLU
class CNNBlock(Module):
def __init__(self, in_channels, out_channels):
super().__init__()
self.conv = Conv2d(
in_channels = in_channels,
out_channels = out_channels,
kernel_size = 3,
padding = "same"
)
self.batchnorm = BatchNorm2d(num_features = out_channels)
self.act = ReLU()
self.maxpool = MaxPool2d(
kernel_size = 2,
stride = 2
)
def forward(self, x):
return self.maxpool(self.act(self.batchnorm(self.conv(x))))
class RecognitionModel(Module):
def __init__(self):
super().__init__()
# Input 1x80x180
self.block1 = CNNBlock(1, 32) # Block1 32x40x90
self.block2 = CNNBlock(32, 64) # Block2 64x20x45
self.block3 = CNNBlock(64, 128) # Block3 128x10x22
self.block4 = CNNBlock(128, 256) # Block4 256x5x11
self.conv1 = Conv2d( # Conv1 256x3x9
in_channels = 256,
out_channels = 256,
kernel_size = 3
)
self.batchnorm1 = BatchNorm2d(num_features = 256)
self.act1 = ReLU()
self.conv2 = Conv2d( # Conv2 256x1x7
in_channels = 256,
out_channels = 256,
kernel_size = 3
)
self.batchnorm2 = BatchNorm2d(num_features = 256)
self.act2 = ReLU()
self.rshp = lambda x: x.reshape(-1, 1792) # Reshape 1792
self.linear1 = Linear(
in_features = 1792,
out_features = 128
)
self.batchnorm3 = BatchNorm1d(128)
self.act3 = LeakyReLU()
self.dropout1 = Dropout(0.2)
self.linear2 = Linear(
in_features = 128,
out_features = 35
)
self.act4 = Softmax(dim = 1)
def forward(self, x):
x = self.block1(x)
x = self.block2(x)
x = self.block3(x)
x = self.block4(x)
x = self.act1(self.batchnorm1(self.conv1(x)))
x = self.act2(self.batchnorm2(self.conv2(x)))
x = self.rshp(x)
x = self.dropout1(self.act3(self.batchnorm3(self.linear1(x))))
y = self.act4(self.linear2(x))
return y
def save(self):
torch.save(self.state_dict(), './trainedmodel.pt')
def load(self):
self.load_state_dict(torch.load('./trainedmodel.pt', weights_only = True))
Also I have augmentations:
transform = transforms.Compose([
transforms.Resize((80, 180)),
transforms.RandomHorizontalFlip(),
transforms.RandomRotation(20),
transforms.RandomAffine(degrees=15, translate=(0.1, 0.1)),
transforms.Grayscale(),
transforms.ToTensor(),
])
I also divide image by 255.
Train code:
from torch.utils.data import DataLoader, random_split
from torch.nn import CrossEntropyLoss
from torch.optim import Adam
from torchvision import transforms
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
from torchmetrics import functional
from dataset.dataset import CellsDataset
from model import RecognitionModel
def set_seed(seed: int = 42) -> None:
import numpy as np
import random
import torch
import os
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
# When running on the CuDNN backend, two further options must be set
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
# Set a fixed value for the hash seed
os.environ["PYTHONHASHSEED"] = str(seed)
print(f"Random seed set as {seed}")
batch_size = 32
epochs = 100
lr = 0.005
set_seed(512)
transform = transforms.Compose([
transforms.Resize((80, 180)),
transforms.RandomHorizontalFlip(),
transforms.RandomRotation(20),
transforms.RandomAffine(degrees=15, translate=(0.1, 0.1)),
transforms.Grayscale(),
transforms.ToTensor(),
])
dataset = CellsDataset(transform)
train_dataset, valid_dataset = random_split(dataset, [0.8, 0.2])
train_dataloader = DataLoader(train_dataset, batch_size = batch_size, shuffle = True)
valid_dataloader = DataLoader(valid_dataset, batch_size = batch_size, shuffle = True)
model = RecognitionModel()
loss_func = CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr = lr)
train_losses = []
valid_losses = []
train_accurs = []
valid_accurs = []
for epoch in range(epochs):
total_loss = 0
total_accuracy_valid = []
total_accuracy_train = []
num_batches = 0
samples = train_dataloader
bar = tqdm(total = len(samples), desc=f"Epoch {epoch + 1}/{epochs}")
for image, label in samples:
# Train
optimizer.zero_grad()
pred = model(image)
total_accuracy_train.append(functional.accuracy(pred, label, task = "multiclass", num_classes = 35))
loss = loss_func(pred, label)
total_loss += loss.item()
num_batches += 1
# Backward
loss.backward()
optimizer.step()
bar.set_description(f"Epoch: {epoch + 1}/{epochs} Train loss: {round(total_loss / num_batches, 3)} Train accuracy: {round(np.mean(total_accuracy_train).item(), 2)}")
bar.update()
train_accurs.append(round(np.mean(total_accuracy_train).item(), 2))
train_losses.append(total_loss / num_batches)
# Validation
model.eval()
valid_temp = []
for image, label in valid_dataloader:
pred = model(image)
total_accuracy_valid.append(functional.accuracy(pred, label, task = "multiclass", num_classes = 35))
loss_ = loss_func(pred, label)
valid_temp.append(loss_.item())
valid_accurs.append(round(np.mean(total_accuracy_valid).item(), 2))
valid_losses.append(np.mean(valid_temp))
model.train()
bar.set_description(f"Epoch: {epoch + 1}/{epochs} Train loss: {round(total_loss / num_batches, 3)} Validation loss: {round(valid_losses[-1], 3)} Train accuracy: {train_accurs[-1]} Validation ccuracy: {valid_accurs[-1]}")
bar.refresh()
When I’m trying to teach it, it learns only on train dataset, not on validation: blue – train loss, yellow – validation loss. I had a bigger model earlier, and it was learning like this. Also I want to add a plot of accuracy metric blue – train, yellow – validaion I don’t know what i need to do, help me please 🙁
10