Problem
I wrote a program to train LeNet
. However, the training results (train_l
, train_acc
and test_acc
) remain unchanged. I’ve checked codes and I think they should be right. I don’t know what’s the problem and how to deal with it.
Code
import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader
from torchvision import transforms
class LeNet(nn.Module):
def __init__(self):
super().__init__()
self.net = nn.Sequential(
nn.Conv2d(1, 6, kernel_size=5, padding=2), nn.Sigmoid(),
nn.AvgPool2d(kernel_size=2, stride=2),
nn.Conv2d(6, 16, kernel_size=5), nn.Sigmoid(),
nn.AvgPool2d(kernel_size=2, stride=2),
nn.Flatten(),
nn.Linear(16 * 5 * 5, 120), nn.Sigmoid(),
nn.Linear(120, 84), nn.Sigmoid(),
nn.Linear(84, 10)
)
def forward(self, X):
return self.net(X)
class Accumulator:
def __init__(self, n):
self.data = [0.0] * n
def add(self, *args):
self.data = [a + float(b) for a, b in zip(self.data, args)]
def reset(self):
self.data = [0.0] * len(self.data)
def __getitem__(self, idx):
return self.data[idx]
def accuracy(y_hat, y):
y_hat = y_hat.argmax(axis=1)
cmp = y_hat.type(y.dtype) == y
return float(cmp.sum())
batch_size = 256
lr = 0.9
num_epochs = 10
trans = transforms.Compose([transforms.ToTensor()])
mnist_train = torchvision.datasets.MNIST('./LeNet/dataset', train=True, transform=trans, download=True)
mnist_test = torchvision.datasets.MNIST('./LeNet/dataset', train=False, transform=trans, download=True)
train_iter = DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=4)
test_iter = DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=4)
model = LeNet()
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.net.parameters(), lr=lr)
num_batches = len(train_iter)
for epoch in range(num_epochs):
train_metric = Accumulator(3)
model.net.train()
for i, (X, y) in enumerate(train_iter):
optimizer.zero_grad()
y_hat = model(X)
l = loss(y_hat, y)
l.backward()
optimizer.step()
train_metric.add(l * X.shape[0], accuracy(y_hat, y), X.shape[0])
if (i + 1) % (num_batches // 5) == 0 or i == num_batches - 1:
print(f'epoch {epoch + 1} batch {i} t train_l: {train_metric[0] / train_metric[2]:.3f} ',
f'train_acc: {train_metric[1] / train_metric[2]:.3f}')
with torch.no_grad():
test_metric = Accumulator(2)
for X, y in test_iter:
test_metric.add(accuracy(model(X), y), X.shape[0])
print(f'epoch {epoch + 1} t test_acc: {test_metric[0] / test_metric[1]:.3f}', end='n')
Output
epoch 1 batch 46 train_l: 2.343 train_acc: 0.101
epoch 1 batch 93 train_l: 2.328 train_acc: 0.103
epoch 1 batch 140 train_l: 2.321 train_acc: 0.106
epoch 1 batch 187 train_l: 2.317 train_acc: 0.106
epoch 1 batch 234 train_l: 2.315 train_acc: 0.106
epoch 1 test_acc: 0.114
epoch 2 batch 46 train_l: 2.305 train_acc: 0.104
epoch 2 batch 93 train_l: 2.304 train_acc: 0.105
epoch 2 batch 140 train_l: 2.304 train_acc: 0.106
epoch 2 batch 187 train_l: 2.304 train_acc: 0.106
epoch 2 batch 234 train_l: 2.304 train_acc: 0.107
epoch 2 test_acc: 0.096
epoch 3 batch 46 train_l: 2.303 train_acc: 0.108
epoch 3 batch 93 train_l: 2.303 train_acc: 0.109
epoch 3 batch 140 train_l: 2.303 train_acc: 0.108
epoch 3 batch 187 train_l: 2.303 train_acc: 0.109
epoch 3 batch 234 train_l: 2.303 train_acc: 0.109
epoch 3 test_acc: 0.114
epoch 4 batch 46 train_l: 2.303 train_acc: 0.109
epoch 4 batch 93 train_l: 2.303 train_acc: 0.111
epoch 4 batch 140 train_l: 2.302 train_acc: 0.111
epoch 4 batch 187 train_l: 2.303 train_acc: 0.110
epoch 4 batch 234 train_l: 2.302 train_acc: 0.110
epoch 4 test_acc: 0.114
epoch 5 batch 46 train_l: 2.303 train_acc: 0.106
epoch 5 batch 93 train_l: 2.302 train_acc: 0.111
epoch 5 batch 140 train_l: 2.302 train_acc: 0.110
epoch 5 batch 187 train_l: 2.302 train_acc: 0.111
epoch 5 batch 234 train_l: 2.302 train_acc: 0.109
epoch 5 test_acc: 0.114
epoch 6 batch 46 train_l: 2.303 train_acc: 0.110
epoch 6 batch 93 train_l: 2.302 train_acc: 0.112
epoch 6 batch 140 train_l: 2.302 train_acc: 0.111
epoch 6 batch 187 train_l: 2.302 train_acc: 0.112
epoch 6 batch 234 train_l: 2.302 train_acc: 0.111
epoch 6 test_acc: 0.114
epoch 7 batch 46 train_l: 2.302 train_acc: 0.113
epoch 7 batch 93 train_l: 2.302 train_acc: 0.111
epoch 7 batch 140 train_l: 2.302 train_acc: 0.112
epoch 7 batch 187 train_l: 2.302 train_acc: 0.111
epoch 7 batch 234 train_l: 2.302 train_acc: 0.110
epoch 7 test_acc: 0.101
epoch 8 batch 46 train_l: 2.303 train_acc: 0.110
epoch 8 batch 93 train_l: 2.302 train_acc: 0.111
epoch 8 batch 140 train_l: 2.302 train_acc: 0.109
epoch 8 batch 187 train_l: 2.302 train_acc: 0.109
epoch 8 batch 234 train_l: 2.302 train_acc: 0.109
epoch 8 test_acc: 0.114
epoch 9 batch 46 train_l: 2.302 train_acc: 0.108
epoch 9 batch 93 train_l: 2.302 train_acc: 0.112
epoch 9 batch 140 train_l: 2.302 train_acc: 0.111
epoch 9 batch 187 train_l: 2.302 train_acc: 0.112
epoch 9 batch 234 train_l: 2.302 train_acc: 0.111
epoch 9 test_acc: 0.114
epoch 10 batch 46 train_l: 2.302 train_acc: 0.111
epoch 10 batch 93 train_l: 2.302 train_acc: 0.113
epoch 10 batch 140 train_l: 2.302 train_acc: 0.110
epoch 10 batch 187 train_l: 2.302 train_acc: 0.110
epoch 10 batch 234 train_l: 2.302 train_acc: 0.110
epoch 10 test_acc: 0.114
Expectation
For the correct result, I hope that the training results will be better, which means train_l
decreases, train_acc
and test_acc
both increases, and the gap between train_acc
and test_acc
is not large.