I am building a custom model for instance segmentation task that takes in 2D images, and a target for training. I think there is an issue with the model arch but not sure where it is. I have tried many things and used diff ways to extract features but stock on similar errors like above.
I’m new to deep learning and not sure what is going on here, but I find something suspicious about the output numbers and thus getting this error. Any help is appreciated. Thanks in advance!
Module
class CustomModel(nn.Module):
def __init__(self):
super().__init__()
self.backbone = backbone_utils.resnet_fpn_backbone(
backbone_name='resnet50',
weights=models.ResNet50_Weights.DEFAULT
)
self.rpn_layer = rpn.RegionProposalNetwork(
anchor_generator = anchor_utils.AnchorGenerator(
sizes = ((6, 9, 16), (6, 9, 16), (6, 9, 16), (6, 9, 16), (6, 9, 16)),
aspect_ratios = (
(1.0, 1.25, 1.5),
(1.0, 1.25, 1.5),
(1.0, 1.25, 1.5),
(1.0, 1.25, 1.5),
(1.0, 1.25, 1.5)
)
),
batch_size_per_image=4,
bg_iou_thresh=0.3,
fg_iou_thresh=0.7,
head=rpn.RPNHead(in_channels=256, num_anchors=9),
nms_thresh=0.7,
positive_fraction=0.5,
post_nms_top_n={'training': 2000, 'testing': 300},
pre_nms_top_n={'training': 12000, 'testing': 6000},
score_thresh=0.0
)
self.roi_align = ops.MultiScaleRoIAlign(
output_size=7,
sampling_ratio=2,
featmap_names=['0', '1', '2', '3'],
canonical_scale=224,
canonical_level=4
)
self.mask_head = mask_rcnn.MaskRCNNPredictor(
in_channels=256,
dim_reduced=10,
num_classes=10
)
self.bbox3d = Bbox3DPredictor(
features=256,
bins=2,
w=0.4
)
def forward(self, x, targets):
backbone_features = self.backbone(x)
print(backbone_features)
for i, feature in backbone_features.items():
print(f"Backbone features shapes", feature.shape)
img_list = image_list.ImageList(x, [img.shape[-2:] for img in x])
proposals, proposals_losses = self.rpn_layer(
img_list,
backbone_features,
targets
)
for proposal in proposals:
print(f"Proposals shapes", proposal.shape)
image_shapes = img_list.image_sizes
roi_features = self.roi_align(
backbone_features,
proposals,
image_shapes
)
print(f"ROI Align shape: {roi_features.shape}")
if self.training:
losses = {}
for target in targets:
masks = target["mask"]
bbox3ds = target["boxes"]
pcs = target["pc"]
mask_logits = self.mask_head(roi_features)
mask_loss = fnc.binary_cross_entropy_with_logits(mask_logits, masks)
losses['mask_loss'] = mask_loss
print(f"Mask logits: {mask_logits}")
Output
Backbone features shapes torch.Size([4, 256, 64, 64])
Backbone features shapes torch.Size([4, 256, 32, 32])
Backbone features shapes torch.Size([4, 256, 16, 16])
Backbone features shapes torch.Size([4, 256, 8, 8])
Backbone features shapes torch.Size([4, 256, 4, 4])
Proposals shapes torch.Size([2000, 4])
Proposals shapes torch.Size([2000, 4])
Proposals shapes torch.Size([2000, 4])
Proposals shapes torch.Size([2000, 4])
ROI Align shape: torch.Size([8000, 256, 7, 7])