I am currently training a deep learning model using PyTorch and Faster-RCNN on the Mapillary Traffic Sign Dataset. However, I am encountering an issue with the high initial loss value during training.
Problem
- The initial loss is extremely high, specifically ‘42.0230’.The initial loss is extremely high, specifically ‘42.0230’, and I am unsure why this is happening or how to address it.
- The learning process plateaus at a loss of around ’33’ and does not improve further.
Context
- Framework: PyTorch
- Model: Faster-RCNN
- Dataset: Mapillary Traffic Sign Dataset
What I Have Tried
- Learning Rate Adjustment: I have tried adjusting the learning rate but the initial loss remains high.
- Normalization: Ensured that the dataset is properly normalized.
- Data Augmentation: Applied data augmentation techniques to the training dataset.
Code Snippet
Here is the relevant part of my custom dataset code:
import json
import os
from PIL import Image
import torch
from torch.utils.data import Dataset
class MapillaryDataset(Dataset):
def __init__(self, img_dir, annotations_dirs, split_files, transform=None):
self.img_dir = img_dir
self.transform = transform
# Initialize an empty set to store all image keys
self.image_keys = set()
# Read all split files and combine the image keys
for split_file in split_files:
with open(split_file, 'r') as f:
keys = [line.strip() for line in f.readlines()]
self.image_keys.update(keys)
print(f"Read {len(keys)} keys from {split_file}")
# Initialize an empty list to store all annotations
self.data = []
# Read annotations from all annotation directories
for ann_dir in annotations_dirs:
for file_name in os.listdir(ann_dir):
if file_name.endswith('.json'):
file_path = os.path.join(ann_dir, file_name)
with open(file_path, 'r') as f:
data = json.load(f)
# Assuming 'objects' field contains the annotations
if 'objects' in data:
for obj in data['objects']:
if 'key' in obj:
obj['image_id'] = file_name.split('.')[0]
self.data.append(obj)
else:
print(f"Object in {file_name} does not contain 'key'")
else:
print(f"Annotation file {file_name} does not contain 'objects'")
# Filter objects based on the combined image keys and check for the 'key' field
self.objects = [obj for obj in self.data if 'image_id' in obj and obj['image_id'] in self.image_keys]
print(f"Total objects after filtering: {len(self.objects)}")
# Extract unique labels
self.labels = {obj['label'] for obj in self.objects}
self.label_to_index = {label: index for index, label in enumerate(self.labels)}
def __len__(self):
return len(self.objects)
def __getitem__(self, idx):
obj = self.objects[idx]
img_path = os.path.join(self.img_dir, obj['image_id'] + ".jpg")
image = Image.open(img_path).convert("RGB")
bbox = obj['bbox']
label = self.label_to_index[obj['label']]
# Convert bbox to the format (xmin, ymin, xmax, ymax)
bbox_tensor = torch.tensor([bbox['xmin'], bbox['ymin'], bbox['xmax'], bbox['ymax']], dtype=torch.float32)
# Create a target dictionary
target = {
'boxes': bbox_tensor.unsqueeze(0), # Add an extra dimension for single bbox
'labels': torch.tensor([label])
}
if self.transform:
image = self.transform(image)
return image, target
def get_num_classes(self):
return len(self.labels)
# Usage example
img_dir = 'path/to/images' # Replace with the actual path to your images
annotations_dirs = [
'path/to/annotations',
'/path/to/annotations'
] # Replace with the actual paths
split_files = [
'path/to/splits',
'path/to/splits',
'path/to/splits',
'path/to/splits'
] # Replace with the actual path
Here is the relevant part of my train code:
import torch
from torch.utils.data import DataLoader, Subset
from torchvision.transforms import v2
from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_V2_Weights
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from data import MapillaryDataset
from torch import nn
from torch.nn import init
import random
import logging
# Configure logging to log training progress and details
logging.basicConfig(filename='training.log', level=logging.INFO,
format='%(asctime)s %(levelname)s %(message)s')
# Define a function to get the training transformations
def get_train_transform():
return v2.Compose([
# Randomly resize and crop the image
v2.RandomResizedCrop(640, scale=(0.8, 1.0)),
# Randomly flip the image horizontally
v2.RandomHorizontalFlip(),
# Randomly flip the image vertically
v2.RandomVerticalFlip(),
# Apply random affine transformations
v2.RandomAffine(degrees=10, translate=(0.1, 0.1), scale=(0.8, 1.2), shear=10),
# Apply random changes in brightness, contrast, saturation, and hue
v2.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1),
# Randomly rotate the image
v2.RandomRotation(degrees=10),
# Randomly convert the image to grayscale
v2.RandomGrayscale(p=0.1),
# Apply Gaussian blur
v2.GaussianBlur(3),
# Convert the image to a tensor
v2.Compose([v2.ToImage(), v2.ToDtype(torch.float32, scale=True)]),
# Normalize the image with mean and std
v2.Normalize(mean=[0.3403, 0.3121, 0.3214], std=[0.2724, 0.2608, 0.2669]),
# Randomly erase parts of the image
v2.RandomErasing(p=0.5)
])
# Define a Trainer class to handle training
class Trainer:
def __init__(self, img_dir, annotations_dirs, split_files, batch_size, num_workers, learning_rate, weight_decay, num_epochs, dropout_prob, subset_size=None):
logging.info("Initializing trainer...")
# Set the class attributes for the trainer
self.img_dir = img_dir
self.annotations_dirs = annotations_dirs
self.split_files = split_files
self.batch_size = batch_size
self.num_workers = num_workers
self.learning_rate = learning_rate
self.weight_decay = weight_decay
self.num_epochs = num_epochs
self.dropout_prob = dropout_prob
self.subset_size = subset_size
logging.info("Loading dataset...")
# Load the dataset with the specified transformations
dataset = MapillaryDataset(self.img_dir, self.annotations_dirs, self.split_files, transform=get_train_transform())
if self.subset_size:
# Select a subset of the dataset if subset_size is specified
indices = list(range(len(dataset)))
random.shuffle(indices)
subset_indices = indices[:self.subset_size]
dataset = Subset(dataset, subset_indices)
# Determine the number of classes in the dataset
self.num_classes = dataset.dataset.get_num_classes() + 1 if isinstance(dataset, Subset) else dataset.get_num_classes() + 1
logging.info(f"Number of classes: {self.num_classes}")
logging.info("Initializing model...")
# Load the pre-trained Faster R-CNN model with ResNet50 backbone
self.weights = FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT
self.model = fasterrcnn_resnet50_fpn_v2(weights=self.weights)
# Get the number of input features for the classifier
in_features = self.model.roi_heads.box_predictor.cls_score.in_features
# Replace the classifier head with a new one
self.model.roi_heads.box_predictor = nn.Sequential(
# Add dropout for regularization
nn.Dropout(p=dropout_prob),
# Add new predictor with the correct number of classes
FastRCNNPredictor(in_features, self.num_classes)
)
# Initialize the weights of the new classifier head
self._initialize_weights()
self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
self.model.to(self.device)
logging.info(f"Using device: {self.device}")
logging.info("Initializing optimizer...")
# Initialize the optimizer
params = [p for p in self.model.parameters() if p.requires_grad]
self.optimizer = torch.optim.AdamW(params, lr=self.learning_rate, weight_decay=self.weight_decay)
# Initialize the learning rate scheduler
self.lr_scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=10, gamma=0.1)
logging.info("Initializing data loaders...")
# Initialize the data loader
self.data_loader = DataLoader(
dataset,
batch_size=self.batch_size,
shuffle=True,
num_workers=self.num_workers,
pin_memory=True,
collate_fn=lambda x: tuple(zip(*x))
)
logging.info("Trainer initialized successfully.")
def _initialize_weights(self):
# Initialize the weights of the classifier head
for m in self.model.roi_heads.box_predictor.modules():
if isinstance(m, nn.Linear):
init.xavier_uniform_(m.weight)
if m.bias is not None:
m.bias.data.fill_(0.01)
def train(self):
logging.info("Starting training...")
for epoch in range(self.num_epochs):
# Set the model to training mode
self.model.train()
# Initialize running loss
running_loss = 0.0
# Initialize number of batches
num_batches = 0
logging.info(f"Starting epoch {epoch + 1}...")
for _, (images, targets) in enumerate(self.data_loader):
# Move images to the device
images = list(image.to(self.device) for image in images)
# Move targets to the device
targets = [{k: v.to(self.device) for k, v in t.items()} for t in targets]
# Zero the gradients
self.optimizer.zero_grad()
# Compute the losses
loss_dict = self.model(images, targets)
# Sum the losses
losses = sum(loss for loss in loss_dict.values())
# Backpropagate the losses
losses.backward()
# Clip the gradients
torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=2.0, norm_type=2)
# Update the parameters
self.optimizer.step()
# Update the running loss
running_loss += losses.item()
# Update the number of batches
num_batches += 1
# Step the learning rate scheduler
self.lr_scheduler.step()
logging.info("Calculating loss...")
# Calculate the average loss
avg_loss = running_loss / num_batches
# Log the average loss
logging.info(f'Epoch [{epoch+1}/{self.num_epochs}] Loss: {avg_loss:.4f}')
def save_model(self, path):
# Log the model saving
logging.info(f"Saving model to {path}...")
# Save the model state dictionary
torch.save(self.model.state_dict(), path)
# Log that the model has been saved
logging.info("Model saved.")
def main():
# Define the parameters for the training
img_dir = 'path/to/images'
annotations_dirs = [
'path/to/annotations',
'/path/to/annotations'
]
split_files = [
'path/to/splits',
'path/to/splits',
'path/to/splits',
'path/to/splits'
]
batch_size = 8
num_epochs = 200
num_workers = 4
learning_rate = 0.0001
weight_decay = 0.0005
dropout_prob = 0.5
subset_size = 10000
# Initialize the trainer
trainer = Trainer(img_dir=img_dir,
annotations_dirs=annotations_dirs,
split_files=split_files,
batch_size=batch_size,
num_workers=num_workers,
learning_rate=learning_rate,
weight_decay=weight_decay,
num_epochs=num_epochs,
dropout_prob=dropout_prob,
subset_size=subset_size)
trainer.train() # Start training
trainer.save_model('Some.pth') # Save the trained model
if __name__ == "__main__":
main() # Run the main function
Error/Unexpected Behavior
The initial loss at the start of the first epoch is consistently around 42.0230, which seems unusually high.
The loss plateaus at approximately 33 and does not decrease further, indicating a learning stagnation.
Questions
-
What could be causing this high initial loss?
-
Are there any common strategies to reduce the initial loss in training Faster-RCNN models?
-
What could be causing the learning plateau, and how can I address it?
-
Is there something I might be missing in my dataset preparation or model setup?
-
Any insights or suggestions would be greatly appreciated.