I have implemented a deep learning model using VGG19 as a backbone with an FPN (Feature Pyramid Network) architecture for an image segmentation task. Size of my images are (256,256). However, I am facing the error this error: Given groups=1, weight of size [64, 3, 3, 3], expected input[32, 256, 256, 3] to have 3 channels, but got 256 channels instead
I am unable to resolve this error. Here is my code. Suggest some possible solution.
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models
from torchsummary import summary
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class VGG19Backbone(nn.Module):
def __init__(self):
super(VGG19Backbone, self).__init__()
vgg19 = models.vgg19(pretrained=True)
self.stage1 = nn.Sequential(*vgg19.features[:5])
self.stage2 = nn.Sequential(*vgg19.features[5:10])
self.stage3 = nn.Sequential(*vgg19.features[10:19])
self.stage4 = nn.Sequential(*vgg19.features[19:28])
self.stage5 = nn.Sequential(*vgg19.features[28:])
def forward(self, x):
c1 = self.stage1(x)
c2 = self.stage2(c1)
c3 = self.stage3(c2)
c4 = self.stage4(c3)
c5 = self.stage5(c4)
return c2, c3, c4, c5
class FPN(nn.Module):
def __init__(self, num_classes):
super(FPN, self).__init__()
self.backbone = VGG19Backbone().to(device)
self.num_features_out = 256
self.lateral_c2 = nn.Conv2d(in_channels=128, out_channels=self.num_features_out, kernel_size=1)
self.lateral_c3 = nn.Conv2d(in_channels=256, out_channels=self.num_features_out, kernel_size=1)
self.lateral_c4 = nn.Conv2d(in_channels=512, out_channels=self.num_features_out, kernel_size=1)
self.lateral_c5 = nn.Conv2d(in_channels=512, out_channels=self.num_features_out, kernel_size=1)
self.dealiasing_p2 = nn.Conv2d(in_channels=self.num_features_out, out_channels=self.num_features_out, kernel_size=3, padding=1)
self.dealiasing_p3 = nn.Conv2d(in_channels=self.num_features_out, out_channels=self.num_features_out, kernel_size=3, padding=1)
self.dealiasing_p4 = nn.Conv2d(in_channels=self.num_features_out, out_channels=self.num_features_out, kernel_size=3, padding=1)
self.segmentation = nn.Conv2d(self.num_features_out, num_classes, kernel_size=1)
def forward(self, image):
image = image.to(device)
c2, c3, c4, c5 = self.backbone(image)
p5 = self.lateral_c5(c5)
p4 = self.lateral_c4(c4) + F.interpolate(p5, size=(c4.shape[2], c4.shape[3]), mode='nearest')
p3 = self.lateral_c3(c3) + F.interpolate(p4, size=(c3.shape[2], c3.shape[3]), mode='nearest')
p2 = self.lateral_c2(c2) + F.interpolate(p3, size=(c2.shape[2], c2.shape[3]), mode='nearest')
p4 = self.dealiasing_p4(p4)
p3 = self.dealiasing_p3(p3)
p2 = self.dealiasing_p2(p2)
print(f'c2 shape: {c2.shape}')
print(f'c3 shape: {c3.shape}')
print(f'p2 shape after lateral_c2: {p2.shape}')
segmentation_map = self.segmentation(p2)
return segmentation_map
fpn_segmentation = FPN(num_classes=3).to(device)
summary(fpn_segmentation, (3, 256, 256))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(fpn_segmentation.parameters(), lr=0.001)
num_epochs = 10
for epoch in range(num_epochs):
fpn_segmentation.train() # Set model to training mode
running_loss = 0.0
for images, masks in train_loader:
optimizer.zero_grad()
outputs = fpn_segmentation(images)
masks = masks.long()
loss = criterion(outputs, masks)
loss.backward()
optimizer.step()
running_loss += loss.item()
print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(train_loader)}")