I am building a custom model for instance segmentation task that takes in 2D images, and a target for training. I think there is an issue with the model arch but not sure where it is. I have tried many things and used diff ways to extract features but stock on similar errors like above.
I’m new to deep learning and not sure what is going on here, but I find something suspicious about the output numbers and thus getting this error. Any help is appreciated. Thanks in advance!
Module
<code>class CustomModel(nn.Module):
def __init__(self):
super().__init__()
self.backbone = backbone_utils.resnet_fpn_backbone(
backbone_name='resnet50',
weights=models.ResNet50_Weights.DEFAULT
)
self.rpn_layer = rpn.RegionProposalNetwork(
anchor_generator = anchor_utils.AnchorGenerator(
sizes = ((6, 9, 16), (6, 9, 16), (6, 9, 16), (6, 9, 16), (6, 9, 16)),
aspect_ratios = (
(1.0, 1.25, 1.5),
(1.0, 1.25, 1.5),
(1.0, 1.25, 1.5),
(1.0, 1.25, 1.5),
(1.0, 1.25, 1.5)
)
),
batch_size_per_image=4,
bg_iou_thresh=0.3,
fg_iou_thresh=0.7,
head=rpn.RPNHead(in_channels=256, num_anchors=9),
nms_thresh=0.7,
positive_fraction=0.5,
post_nms_top_n={'training': 2000, 'testing': 300},
pre_nms_top_n={'training': 12000, 'testing': 6000},
score_thresh=0.0
)
self.roi_align = ops.MultiScaleRoIAlign(
output_size=7,
sampling_ratio=2,
featmap_names=['0', '1', '2', '3'],
canonical_scale=224,
canonical_level=4
)
self.mask_head = mask_rcnn.MaskRCNNPredictor(
in_channels=256,
dim_reduced=10,
num_classes=10
)
self.bbox3d = Bbox3DPredictor(
features=256,
bins=2,
w=0.4
)
def forward(self, x, targets):
backbone_features = self.backbone(x)
print(backbone_features)
for i, feature in backbone_features.items():
print(f"Backbone features shapes", feature.shape)
img_list = image_list.ImageList(x, [img.shape[-2:] for img in x])
proposals, proposals_losses = self.rpn_layer(
img_list,
backbone_features,
targets
)
for proposal in proposals:
print(f"Proposals shapes", proposal.shape)
image_shapes = img_list.image_sizes
roi_features = self.roi_align(
backbone_features,
proposals,
image_shapes
)
print(f"ROI Align shape: {roi_features.shape}")
if self.training:
losses = {}
for target in targets:
masks = target["mask"]
bbox3ds = target["boxes"]
pcs = target["pc"]
mask_logits = self.mask_head(roi_features)
mask_loss = fnc.binary_cross_entropy_with_logits(mask_logits, masks)
losses['mask_loss'] = mask_loss
print(f"Mask logits: {mask_logits}")
</code>
<code>class CustomModel(nn.Module):
def __init__(self):
super().__init__()
self.backbone = backbone_utils.resnet_fpn_backbone(
backbone_name='resnet50',
weights=models.ResNet50_Weights.DEFAULT
)
self.rpn_layer = rpn.RegionProposalNetwork(
anchor_generator = anchor_utils.AnchorGenerator(
sizes = ((6, 9, 16), (6, 9, 16), (6, 9, 16), (6, 9, 16), (6, 9, 16)),
aspect_ratios = (
(1.0, 1.25, 1.5),
(1.0, 1.25, 1.5),
(1.0, 1.25, 1.5),
(1.0, 1.25, 1.5),
(1.0, 1.25, 1.5)
)
),
batch_size_per_image=4,
bg_iou_thresh=0.3,
fg_iou_thresh=0.7,
head=rpn.RPNHead(in_channels=256, num_anchors=9),
nms_thresh=0.7,
positive_fraction=0.5,
post_nms_top_n={'training': 2000, 'testing': 300},
pre_nms_top_n={'training': 12000, 'testing': 6000},
score_thresh=0.0
)
self.roi_align = ops.MultiScaleRoIAlign(
output_size=7,
sampling_ratio=2,
featmap_names=['0', '1', '2', '3'],
canonical_scale=224,
canonical_level=4
)
self.mask_head = mask_rcnn.MaskRCNNPredictor(
in_channels=256,
dim_reduced=10,
num_classes=10
)
self.bbox3d = Bbox3DPredictor(
features=256,
bins=2,
w=0.4
)
def forward(self, x, targets):
backbone_features = self.backbone(x)
print(backbone_features)
for i, feature in backbone_features.items():
print(f"Backbone features shapes", feature.shape)
img_list = image_list.ImageList(x, [img.shape[-2:] for img in x])
proposals, proposals_losses = self.rpn_layer(
img_list,
backbone_features,
targets
)
for proposal in proposals:
print(f"Proposals shapes", proposal.shape)
image_shapes = img_list.image_sizes
roi_features = self.roi_align(
backbone_features,
proposals,
image_shapes
)
print(f"ROI Align shape: {roi_features.shape}")
if self.training:
losses = {}
for target in targets:
masks = target["mask"]
bbox3ds = target["boxes"]
pcs = target["pc"]
mask_logits = self.mask_head(roi_features)
mask_loss = fnc.binary_cross_entropy_with_logits(mask_logits, masks)
losses['mask_loss'] = mask_loss
print(f"Mask logits: {mask_logits}")
</code>
class CustomModel(nn.Module):
def __init__(self):
super().__init__()
self.backbone = backbone_utils.resnet_fpn_backbone(
backbone_name='resnet50',
weights=models.ResNet50_Weights.DEFAULT
)
self.rpn_layer = rpn.RegionProposalNetwork(
anchor_generator = anchor_utils.AnchorGenerator(
sizes = ((6, 9, 16), (6, 9, 16), (6, 9, 16), (6, 9, 16), (6, 9, 16)),
aspect_ratios = (
(1.0, 1.25, 1.5),
(1.0, 1.25, 1.5),
(1.0, 1.25, 1.5),
(1.0, 1.25, 1.5),
(1.0, 1.25, 1.5)
)
),
batch_size_per_image=4,
bg_iou_thresh=0.3,
fg_iou_thresh=0.7,
head=rpn.RPNHead(in_channels=256, num_anchors=9),
nms_thresh=0.7,
positive_fraction=0.5,
post_nms_top_n={'training': 2000, 'testing': 300},
pre_nms_top_n={'training': 12000, 'testing': 6000},
score_thresh=0.0
)
self.roi_align = ops.MultiScaleRoIAlign(
output_size=7,
sampling_ratio=2,
featmap_names=['0', '1', '2', '3'],
canonical_scale=224,
canonical_level=4
)
self.mask_head = mask_rcnn.MaskRCNNPredictor(
in_channels=256,
dim_reduced=10,
num_classes=10
)
self.bbox3d = Bbox3DPredictor(
features=256,
bins=2,
w=0.4
)
def forward(self, x, targets):
backbone_features = self.backbone(x)
print(backbone_features)
for i, feature in backbone_features.items():
print(f"Backbone features shapes", feature.shape)
img_list = image_list.ImageList(x, [img.shape[-2:] for img in x])
proposals, proposals_losses = self.rpn_layer(
img_list,
backbone_features,
targets
)
for proposal in proposals:
print(f"Proposals shapes", proposal.shape)
image_shapes = img_list.image_sizes
roi_features = self.roi_align(
backbone_features,
proposals,
image_shapes
)
print(f"ROI Align shape: {roi_features.shape}")
if self.training:
losses = {}
for target in targets:
masks = target["mask"]
bbox3ds = target["boxes"]
pcs = target["pc"]
mask_logits = self.mask_head(roi_features)
mask_loss = fnc.binary_cross_entropy_with_logits(mask_logits, masks)
losses['mask_loss'] = mask_loss
print(f"Mask logits: {mask_logits}")
Output
<code>Backbone features shapes torch.Size([4, 256, 64, 64])
Backbone features shapes torch.Size([4, 256, 32, 32])
Backbone features shapes torch.Size([4, 256, 16, 16])
Backbone features shapes torch.Size([4, 256, 8, 8])
Backbone features shapes torch.Size([4, 256, 4, 4])
Proposals shapes torch.Size([2000, 4])
Proposals shapes torch.Size([2000, 4])
Proposals shapes torch.Size([2000, 4])
Proposals shapes torch.Size([2000, 4])
ROI Align shape: torch.Size([8000, 256, 7, 7])
</code>
<code>Backbone features shapes torch.Size([4, 256, 64, 64])
Backbone features shapes torch.Size([4, 256, 32, 32])
Backbone features shapes torch.Size([4, 256, 16, 16])
Backbone features shapes torch.Size([4, 256, 8, 8])
Backbone features shapes torch.Size([4, 256, 4, 4])
Proposals shapes torch.Size([2000, 4])
Proposals shapes torch.Size([2000, 4])
Proposals shapes torch.Size([2000, 4])
Proposals shapes torch.Size([2000, 4])
ROI Align shape: torch.Size([8000, 256, 7, 7])
</code>
Backbone features shapes torch.Size([4, 256, 64, 64])
Backbone features shapes torch.Size([4, 256, 32, 32])
Backbone features shapes torch.Size([4, 256, 16, 16])
Backbone features shapes torch.Size([4, 256, 8, 8])
Backbone features shapes torch.Size([4, 256, 4, 4])
Proposals shapes torch.Size([2000, 4])
Proposals shapes torch.Size([2000, 4])
Proposals shapes torch.Size([2000, 4])
Proposals shapes torch.Size([2000, 4])
ROI Align shape: torch.Size([8000, 256, 7, 7])