I am working on a mask rcnn model that uses rgb aerial imagery to detect two kinds of roofs. I had to adjust many pieces of the mask rcnn model to get what i want, but everything works now. I am now implementing a validation loop at the end of each epoch, and already set that up. But I now want to also calculate the mAP, precision, recall, f1 and IOU of the model at each epoch. But how to do that? When my model is put into .eval(), it predicts these things for each object it finds: bounding boxes, labels, instance masks and confidence scores. But, sometimes it detects the wrong object or too many objects. How do I alter my code so can calculate these scores? Also, bonus question, how do I calculate the score for the predicted objects? Here is my code if you want to take a look. I’ll first send a few functions and then the main code. I’m still in the development stage, so not everything will be perfect:
batch_size = 15
train_ds = DataLoader(train_dict, batch_size=batch_size, shuffle=True, collate_fn=train_dict.collate_fn)
if valid_dict.len_dset > 0:
valid_ds = DataLoader(valid_dict, batch_size=batch_size, shuffle=True, collate_fn=valid_dict.collate_fn)
test_ds = DataLoader(test_dict, batch_size=batch_size, shuffle=True, collate_fn=valid_dict.collate_fn)
# Define a hook function
def hook_fn(module, input, output):
global backbone_output
backbone_output = output
from torchvision.models.detection.transform import GeneralizedRCNNTransform
class CustomGeneralizedRCNNTransform(GeneralizedRCNNTransform):
def __init__(self, min_size, max_size, size_divisible=32, fixed_size=None, **kwargs):
super().__init__(min_size, max_size, image_mean=[0.0, 0.0, 0.0, 0.0], image_std=[1.0, 1.0, 1.0, 1.0], size_divisible=size_divisible, fixed_size=fixed_size, **kwargs)
def normalize(self, image):
# Overslaan van normalisatie omdat je afbeeldingen al genormaliseerd zijn
return image
from torchvision.models.detection import MaskRCNN
# Create a custom Mask R-CNN model
class CustomMaskRCNN(MaskRCNN):
def __init__(self, backbone, num_classes, rpn_anchor_generator, size):
super().__init__(backbone, num_classes, rpn_anchor_generator=rpn_anchor_generator)
self.transform = CustomGeneralizedRCNNTransform(min_size=size, max_size=size)
from torchvision.models import resnet50
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
# Initialize the ResNet-101 backbone
# resnet = resnet_fpn_backbone('resnet101', weights=None)
resnet = resnet101(weights=None)
# Use the children function to get the layers of the model, and slice the list to remove the last layer
backbone = nn.Sequential(*list(resnet.children())[:-1])
backbone.out_channels = 2048
# roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], output_size=7, sampling_ratio=2)
# Define the new anchor sizes and aspect ratios
anchor_sizes = (64, 128, 256, 512)
aspect_ratios = (0.5, 1.0, 2.0, 3.0)
# Define the anchor generator
anchor_generator = AnchorGenerator(sizes=(anchor_sizes,),
aspect_ratios=(aspect_ratios,))
and here is my main code for my training loop:
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from sklearn.metrics import classification_report
from sklearn.metrics import precision_recall_fscore_support
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
gc.collect()
torch.cuda.empty_cache()
# Define the model with the configured backbone and anchor generator
model = CustomMaskRCNN(backbone=backbone, num_classes=3, rpn_anchor_generator=anchor_generator, size=256)
# Move the model to the GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
# Define the optimizer with different learning rates for the backbone and the rest of the model
backbone_lr = 0.00025
other_lr = 0.00025
backbone_parameters = [(id(param), param) for param in model.backbone.parameters() if param.requires_grad]
backbone_ids = set([param_id for param_id, _ in backbone_parameters])
backbone_parameters = [param for _, param in backbone_parameters]
other_parameters = [param for param in model.parameters() if param.requires_grad and id(param) not in backbone_ids]
optimizer = torch.optim.Adam([
{'params': backbone_parameters, 'lr': backbone_lr},
{'params': other_parameters, 'lr': other_lr}
])
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.99)
scaler = GradScaler()
# Register the hook on the backbone
hook = model.backbone.register_forward_hook(hook_fn)
print('Begin training backbone model')
best_loss = 10
best_state = None
backbone_states = [] # Lijst om de staten van het backbone-model op te slaan
loss_training = []
loss_validation = []
individual_losses_history = []
individual_validation_losses_history = []
lr_history = [] # Initialize a list to store learning rates
# Define categories
categories = [{'id': 1, 'name': 'flat_roof'}, {'id': 2, 'name': 'slanted_roof'}] # Replace with your actual categories
categories_names = ['background', 'flat_roof', 'slanted_roof'] # Replace with your actual categories
# Train the backbone for a few epochs
num_epochs = 100
for epoch in range(num_epochs):
model.train()
epoch_loss = []
epoch_val_loss = []
# Initialize lists to store true and predicted labels
y_true = []
y_pred = []
individual_losses = {key: [] for key in ['loss_classifier', 'loss_box_reg', 'loss_mask', 'loss_objectness', 'loss_rpn_box_reg']}
counter = 1
for images, heights, targets, names in train_ds:
print(counter)
# if counter == 10:
# break
images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
optimizer.zero_grad()
with torch.cuda.amp.autocast():
loss_dict = model(images, targets)
losses = sum(loss for key, loss in loss_dict.items())
epoch_loss.append(losses.item())
# Save individual losses
for key in loss_dict:
individual_losses[key].append(loss_dict[key].item())
scaler.scale(losses).backward()
scaler.step(optimizer)
scaler.update()
counter += 1
# Save current learning rate
for param_group in optimizer.param_groups:
lr_history.append(param_group['lr'])
# Validatielus
with torch.no_grad():
individual_validation_losses = {key: [] for key in ['loss_classifier', 'loss_box_reg', 'loss_mask', 'loss_objectness', 'loss_rpn_box_reg']}
for valid_images, valid_heights, valid_targets, valid_names in valid_ds:
valid_images = list(image.to(device) for image in valid_images)
valid_targets = [{k: v.to(device) for k, v in t.items()} for t in valid_targets]
# Compute validation loss
loss_dict = model(valid_images, valid_targets)
losses = sum(loss for key, loss in loss_dict.items())
epoch_val_loss.append(losses.item())
# Save individual validation losses
for key in loss_dict:
individual_validation_losses[key].append(loss_dict[key].item())
average = np.nanmean(epoch_loss)
average_val = np.nanmean(epoch_val_loss)
loss_training.append(average)
loss_validation.append(average_val)
individual_losses_history.append(individual_losses)
individual_validation_losses_history.append(individual_validation_losses)
print(len(epoch_loss))
print(f'Epoch {epoch} finished with average training loss: {average} and validation loss: {average_val}')
print(individual_losses)
# Print average individual losses
for key in individual_losses:
print(f'Average {key}: {np.nanmean(individual_losses[key])}')
if average > best_loss:
best_loss = average
best_state = model.state_dict().copy()
lr_scheduler.step()
# Print learning rate history
print("Learning rate history:", lr_history)
i tried to solve it myself, but even with the help of copilot, i couldnt solve it yet. Anyone have any advice/idea?