High Initial Loss and Learning Plateau When Training Faster-RCNN on Mapillary Traffic Sign Dataset Using PyTorch

I am currently training a deep learning model using PyTorch and Faster-RCNN on the Mapillary Traffic Sign Dataset. However, I am encountering an issue with the high initial loss value during training.

Problem

  1. The initial loss is extremely high, specifically ‘42.0230’.The initial loss is extremely high, specifically ‘42.0230’, and I am unsure why this is happening or how to address it.
  2. The learning process plateaus at a loss of around ’33’ and does not improve further.

Context

  • Framework: PyTorch
  • Model: Faster-RCNN
  • Dataset: Mapillary Traffic Sign Dataset

What I Have Tried

  1. Learning Rate Adjustment: I have tried adjusting the learning rate but the initial loss remains high.
  2. Normalization: Ensured that the dataset is properly normalized.
  3. Data Augmentation: Applied data augmentation techniques to the training dataset.

Code Snippet

Here is the relevant part of my custom dataset code:

Plain text
Copy to clipboard
Open code in new window
EnlighterJS 3 Syntax Highlighter
<code>import json
import os
from PIL import Image
import torch
from torch.utils.data import Dataset
class MapillaryDataset(Dataset):
def __init__(self, img_dir, annotations_dirs, split_files, transform=None):
self.img_dir = img_dir
self.transform = transform
# Initialize an empty set to store all image keys
self.image_keys = set()
# Read all split files and combine the image keys
for split_file in split_files:
with open(split_file, 'r') as f:
keys = [line.strip() for line in f.readlines()]
self.image_keys.update(keys)
print(f"Read {len(keys)} keys from {split_file}")
# Initialize an empty list to store all annotations
self.data = []
# Read annotations from all annotation directories
for ann_dir in annotations_dirs:
for file_name in os.listdir(ann_dir):
if file_name.endswith('.json'):
file_path = os.path.join(ann_dir, file_name)
with open(file_path, 'r') as f:
data = json.load(f)
# Assuming 'objects' field contains the annotations
if 'objects' in data:
for obj in data['objects']:
if 'key' in obj:
obj['image_id'] = file_name.split('.')[0]
self.data.append(obj)
else:
print(f"Object in {file_name} does not contain 'key'")
else:
print(f"Annotation file {file_name} does not contain 'objects'")
# Filter objects based on the combined image keys and check for the 'key' field
self.objects = [obj for obj in self.data if 'image_id' in obj and obj['image_id'] in self.image_keys]
print(f"Total objects after filtering: {len(self.objects)}")
# Extract unique labels
self.labels = {obj['label'] for obj in self.objects}
self.label_to_index = {label: index for index, label in enumerate(self.labels)}
def __len__(self):
return len(self.objects)
def __getitem__(self, idx):
obj = self.objects[idx]
img_path = os.path.join(self.img_dir, obj['image_id'] + ".jpg")
image = Image.open(img_path).convert("RGB")
bbox = obj['bbox']
label = self.label_to_index[obj['label']]
# Convert bbox to the format (xmin, ymin, xmax, ymax)
bbox_tensor = torch.tensor([bbox['xmin'], bbox['ymin'], bbox['xmax'], bbox['ymax']], dtype=torch.float32)
# Create a target dictionary
target = {
'boxes': bbox_tensor.unsqueeze(0), # Add an extra dimension for single bbox
'labels': torch.tensor([label])
}
if self.transform:
image = self.transform(image)
return image, target
def get_num_classes(self):
return len(self.labels)
# Usage example
img_dir = 'path/to/images' # Replace with the actual path to your images
annotations_dirs = [
'path/to/annotations',
'/path/to/annotations'
] # Replace with the actual paths
split_files = [
'path/to/splits',
'path/to/splits',
'path/to/splits',
'path/to/splits'
] # Replace with the actual path
</code>
<code>import json import os from PIL import Image import torch from torch.utils.data import Dataset class MapillaryDataset(Dataset): def __init__(self, img_dir, annotations_dirs, split_files, transform=None): self.img_dir = img_dir self.transform = transform # Initialize an empty set to store all image keys self.image_keys = set() # Read all split files and combine the image keys for split_file in split_files: with open(split_file, 'r') as f: keys = [line.strip() for line in f.readlines()] self.image_keys.update(keys) print(f"Read {len(keys)} keys from {split_file}") # Initialize an empty list to store all annotations self.data = [] # Read annotations from all annotation directories for ann_dir in annotations_dirs: for file_name in os.listdir(ann_dir): if file_name.endswith('.json'): file_path = os.path.join(ann_dir, file_name) with open(file_path, 'r') as f: data = json.load(f) # Assuming 'objects' field contains the annotations if 'objects' in data: for obj in data['objects']: if 'key' in obj: obj['image_id'] = file_name.split('.')[0] self.data.append(obj) else: print(f"Object in {file_name} does not contain 'key'") else: print(f"Annotation file {file_name} does not contain 'objects'") # Filter objects based on the combined image keys and check for the 'key' field self.objects = [obj for obj in self.data if 'image_id' in obj and obj['image_id'] in self.image_keys] print(f"Total objects after filtering: {len(self.objects)}") # Extract unique labels self.labels = {obj['label'] for obj in self.objects} self.label_to_index = {label: index for index, label in enumerate(self.labels)} def __len__(self): return len(self.objects) def __getitem__(self, idx): obj = self.objects[idx] img_path = os.path.join(self.img_dir, obj['image_id'] + ".jpg") image = Image.open(img_path).convert("RGB") bbox = obj['bbox'] label = self.label_to_index[obj['label']] # Convert bbox to the format (xmin, ymin, xmax, ymax) bbox_tensor = torch.tensor([bbox['xmin'], bbox['ymin'], bbox['xmax'], bbox['ymax']], dtype=torch.float32) # Create a target dictionary target = { 'boxes': bbox_tensor.unsqueeze(0), # Add an extra dimension for single bbox 'labels': torch.tensor([label]) } if self.transform: image = self.transform(image) return image, target def get_num_classes(self): return len(self.labels) # Usage example img_dir = 'path/to/images' # Replace with the actual path to your images annotations_dirs = [ 'path/to/annotations', '/path/to/annotations' ] # Replace with the actual paths split_files = [ 'path/to/splits', 'path/to/splits', 'path/to/splits', 'path/to/splits' ] # Replace with the actual path </code>
import json
import os
from PIL import Image
import torch
from torch.utils.data import Dataset

class MapillaryDataset(Dataset):
    def __init__(self, img_dir, annotations_dirs, split_files, transform=None):
        self.img_dir = img_dir
        self.transform = transform
        
        # Initialize an empty set to store all image keys
        self.image_keys = set()

        # Read all split files and combine the image keys
        for split_file in split_files:
            with open(split_file, 'r') as f:
                keys = [line.strip() for line in f.readlines()]
                self.image_keys.update(keys)
                print(f"Read {len(keys)} keys from {split_file}")

        # Initialize an empty list to store all annotations
        self.data = []

        # Read annotations from all annotation directories
        for ann_dir in annotations_dirs:
            for file_name in os.listdir(ann_dir):
                if file_name.endswith('.json'):
                    file_path = os.path.join(ann_dir, file_name)
                    with open(file_path, 'r') as f:
                        data = json.load(f)
                        # Assuming 'objects' field contains the annotations
                        if 'objects' in data:
                            for obj in data['objects']:
                                if 'key' in obj:
                                    obj['image_id'] = file_name.split('.')[0]
                                    self.data.append(obj)
                                else:
                                    print(f"Object in {file_name} does not contain 'key'")
                        else:
                            print(f"Annotation file {file_name} does not contain 'objects'")

        # Filter objects based on the combined image keys and check for the 'key' field
        self.objects = [obj for obj in self.data if 'image_id' in obj and obj['image_id'] in self.image_keys]
        print(f"Total objects after filtering: {len(self.objects)}")
        
        # Extract unique labels
        self.labels = {obj['label'] for obj in self.objects}
        self.label_to_index = {label: index for index, label in enumerate(self.labels)}

    def __len__(self):
        return len(self.objects)
    
    def __getitem__(self, idx):
        obj = self.objects[idx]
        img_path = os.path.join(self.img_dir, obj['image_id'] + ".jpg")
        image = Image.open(img_path).convert("RGB")
        
        bbox = obj['bbox']
        label = self.label_to_index[obj['label']]
        
        # Convert bbox to the format (xmin, ymin, xmax, ymax)
        bbox_tensor = torch.tensor([bbox['xmin'], bbox['ymin'], bbox['xmax'], bbox['ymax']], dtype=torch.float32)
        
        # Create a target dictionary
        target = {
            'boxes': bbox_tensor.unsqueeze(0),  # Add an extra dimension for single bbox
            'labels': torch.tensor([label])
        }
        
        if self.transform:
            image = self.transform(image)
        
        return image, target
    
    def get_num_classes(self):
        return len(self.labels)

# Usage example
img_dir = 'path/to/images'  # Replace with the actual path to your images

annotations_dirs = [
    'path/to/annotations',
    '/path/to/annotations'
]  # Replace with the actual paths

split_files = [
    'path/to/splits',
    'path/to/splits',
    'path/to/splits',
    'path/to/splits'
]  # Replace with the actual path

Here is the relevant part of my train code:

Plain text
Copy to clipboard
Open code in new window
EnlighterJS 3 Syntax Highlighter
<code>import torch
from torch.utils.data import DataLoader, Subset
from torchvision.transforms import v2
from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_V2_Weights
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from data import MapillaryDataset
from torch import nn
from torch.nn import init
import random
import logging
# Configure logging to log training progress and details
logging.basicConfig(filename='training.log', level=logging.INFO,
format='%(asctime)s %(levelname)s %(message)s')
# Define a function to get the training transformations
def get_train_transform():
return v2.Compose([
# Randomly resize and crop the image
v2.RandomResizedCrop(640, scale=(0.8, 1.0)),
# Randomly flip the image horizontally
v2.RandomHorizontalFlip(),
# Randomly flip the image vertically
v2.RandomVerticalFlip(),
# Apply random affine transformations
v2.RandomAffine(degrees=10, translate=(0.1, 0.1), scale=(0.8, 1.2), shear=10),
# Apply random changes in brightness, contrast, saturation, and hue
v2.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1),
# Randomly rotate the image
v2.RandomRotation(degrees=10),
# Randomly convert the image to grayscale
v2.RandomGrayscale(p=0.1),
# Apply Gaussian blur
v2.GaussianBlur(3),
# Convert the image to a tensor
v2.Compose([v2.ToImage(), v2.ToDtype(torch.float32, scale=True)]),
# Normalize the image with mean and std
v2.Normalize(mean=[0.3403, 0.3121, 0.3214], std=[0.2724, 0.2608, 0.2669]),
# Randomly erase parts of the image
v2.RandomErasing(p=0.5)
])
# Define a Trainer class to handle training
class Trainer:
def __init__(self, img_dir, annotations_dirs, split_files, batch_size, num_workers, learning_rate, weight_decay, num_epochs, dropout_prob, subset_size=None):
logging.info("Initializing trainer...")
# Set the class attributes for the trainer
self.img_dir = img_dir
self.annotations_dirs = annotations_dirs
self.split_files = split_files
self.batch_size = batch_size
self.num_workers = num_workers
self.learning_rate = learning_rate
self.weight_decay = weight_decay
self.num_epochs = num_epochs
self.dropout_prob = dropout_prob
self.subset_size = subset_size
logging.info("Loading dataset...")
# Load the dataset with the specified transformations
dataset = MapillaryDataset(self.img_dir, self.annotations_dirs, self.split_files, transform=get_train_transform())
if self.subset_size:
# Select a subset of the dataset if subset_size is specified
indices = list(range(len(dataset)))
random.shuffle(indices)
subset_indices = indices[:self.subset_size]
dataset = Subset(dataset, subset_indices)
# Determine the number of classes in the dataset
self.num_classes = dataset.dataset.get_num_classes() + 1 if isinstance(dataset, Subset) else dataset.get_num_classes() + 1
logging.info(f"Number of classes: {self.num_classes}")
logging.info("Initializing model...")
# Load the pre-trained Faster R-CNN model with ResNet50 backbone
self.weights = FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT
self.model = fasterrcnn_resnet50_fpn_v2(weights=self.weights)
# Get the number of input features for the classifier
in_features = self.model.roi_heads.box_predictor.cls_score.in_features
# Replace the classifier head with a new one
self.model.roi_heads.box_predictor = nn.Sequential(
# Add dropout for regularization
nn.Dropout(p=dropout_prob),
# Add new predictor with the correct number of classes
FastRCNNPredictor(in_features, self.num_classes)
)
# Initialize the weights of the new classifier head
self._initialize_weights()
self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
self.model.to(self.device)
logging.info(f"Using device: {self.device}")
logging.info("Initializing optimizer...")
# Initialize the optimizer
params = [p for p in self.model.parameters() if p.requires_grad]
self.optimizer = torch.optim.AdamW(params, lr=self.learning_rate, weight_decay=self.weight_decay)
# Initialize the learning rate scheduler
self.lr_scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=10, gamma=0.1)
logging.info("Initializing data loaders...")
# Initialize the data loader
self.data_loader = DataLoader(
dataset,
batch_size=self.batch_size,
shuffle=True,
num_workers=self.num_workers,
pin_memory=True,
collate_fn=lambda x: tuple(zip(*x))
)
logging.info("Trainer initialized successfully.")
def _initialize_weights(self):
# Initialize the weights of the classifier head
for m in self.model.roi_heads.box_predictor.modules():
if isinstance(m, nn.Linear):
init.xavier_uniform_(m.weight)
if m.bias is not None:
m.bias.data.fill_(0.01)
def train(self):
logging.info("Starting training...")
for epoch in range(self.num_epochs):
# Set the model to training mode
self.model.train()
# Initialize running loss
running_loss = 0.0
# Initialize number of batches
num_batches = 0
logging.info(f"Starting epoch {epoch + 1}...")
for _, (images, targets) in enumerate(self.data_loader):
# Move images to the device
images = list(image.to(self.device) for image in images)
# Move targets to the device
targets = [{k: v.to(self.device) for k, v in t.items()} for t in targets]
# Zero the gradients
self.optimizer.zero_grad()
# Compute the losses
loss_dict = self.model(images, targets)
# Sum the losses
losses = sum(loss for loss in loss_dict.values())
# Backpropagate the losses
losses.backward()
# Clip the gradients
torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=2.0, norm_type=2)
# Update the parameters
self.optimizer.step()
# Update the running loss
running_loss += losses.item()
# Update the number of batches
num_batches += 1
# Step the learning rate scheduler
self.lr_scheduler.step()
logging.info("Calculating loss...")
# Calculate the average loss
avg_loss = running_loss / num_batches
# Log the average loss
logging.info(f'Epoch [{epoch+1}/{self.num_epochs}] Loss: {avg_loss:.4f}')
def save_model(self, path):
# Log the model saving
logging.info(f"Saving model to {path}...")
# Save the model state dictionary
torch.save(self.model.state_dict(), path)
# Log that the model has been saved
logging.info("Model saved.")
def main():
# Define the parameters for the training
img_dir = 'path/to/images'
annotations_dirs = [
'path/to/annotations',
'/path/to/annotations'
]
split_files = [
'path/to/splits',
'path/to/splits',
'path/to/splits',
'path/to/splits'
]
batch_size = 8
num_epochs = 200
num_workers = 4
learning_rate = 0.0001
weight_decay = 0.0005
dropout_prob = 0.5
subset_size = 10000
# Initialize the trainer
trainer = Trainer(img_dir=img_dir,
annotations_dirs=annotations_dirs,
split_files=split_files,
batch_size=batch_size,
num_workers=num_workers,
learning_rate=learning_rate,
weight_decay=weight_decay,
num_epochs=num_epochs,
dropout_prob=dropout_prob,
subset_size=subset_size)
trainer.train() # Start training
trainer.save_model('Some.pth') # Save the trained model
if __name__ == "__main__":
main() # Run the main function
</code>
<code>import torch from torch.utils.data import DataLoader, Subset from torchvision.transforms import v2 from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_V2_Weights from torchvision.models.detection.faster_rcnn import FastRCNNPredictor from data import MapillaryDataset from torch import nn from torch.nn import init import random import logging # Configure logging to log training progress and details logging.basicConfig(filename='training.log', level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') # Define a function to get the training transformations def get_train_transform(): return v2.Compose([ # Randomly resize and crop the image v2.RandomResizedCrop(640, scale=(0.8, 1.0)), # Randomly flip the image horizontally v2.RandomHorizontalFlip(), # Randomly flip the image vertically v2.RandomVerticalFlip(), # Apply random affine transformations v2.RandomAffine(degrees=10, translate=(0.1, 0.1), scale=(0.8, 1.2), shear=10), # Apply random changes in brightness, contrast, saturation, and hue v2.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), # Randomly rotate the image v2.RandomRotation(degrees=10), # Randomly convert the image to grayscale v2.RandomGrayscale(p=0.1), # Apply Gaussian blur v2.GaussianBlur(3), # Convert the image to a tensor v2.Compose([v2.ToImage(), v2.ToDtype(torch.float32, scale=True)]), # Normalize the image with mean and std v2.Normalize(mean=[0.3403, 0.3121, 0.3214], std=[0.2724, 0.2608, 0.2669]), # Randomly erase parts of the image v2.RandomErasing(p=0.5) ]) # Define a Trainer class to handle training class Trainer: def __init__(self, img_dir, annotations_dirs, split_files, batch_size, num_workers, learning_rate, weight_decay, num_epochs, dropout_prob, subset_size=None): logging.info("Initializing trainer...") # Set the class attributes for the trainer self.img_dir = img_dir self.annotations_dirs = annotations_dirs self.split_files = split_files self.batch_size = batch_size self.num_workers = num_workers self.learning_rate = learning_rate self.weight_decay = weight_decay self.num_epochs = num_epochs self.dropout_prob = dropout_prob self.subset_size = subset_size logging.info("Loading dataset...") # Load the dataset with the specified transformations dataset = MapillaryDataset(self.img_dir, self.annotations_dirs, self.split_files, transform=get_train_transform()) if self.subset_size: # Select a subset of the dataset if subset_size is specified indices = list(range(len(dataset))) random.shuffle(indices) subset_indices = indices[:self.subset_size] dataset = Subset(dataset, subset_indices) # Determine the number of classes in the dataset self.num_classes = dataset.dataset.get_num_classes() + 1 if isinstance(dataset, Subset) else dataset.get_num_classes() + 1 logging.info(f"Number of classes: {self.num_classes}") logging.info("Initializing model...") # Load the pre-trained Faster R-CNN model with ResNet50 backbone self.weights = FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT self.model = fasterrcnn_resnet50_fpn_v2(weights=self.weights) # Get the number of input features for the classifier in_features = self.model.roi_heads.box_predictor.cls_score.in_features # Replace the classifier head with a new one self.model.roi_heads.box_predictor = nn.Sequential( # Add dropout for regularization nn.Dropout(p=dropout_prob), # Add new predictor with the correct number of classes FastRCNNPredictor(in_features, self.num_classes) ) # Initialize the weights of the new classifier head self._initialize_weights() self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') self.model.to(self.device) logging.info(f"Using device: {self.device}") logging.info("Initializing optimizer...") # Initialize the optimizer params = [p for p in self.model.parameters() if p.requires_grad] self.optimizer = torch.optim.AdamW(params, lr=self.learning_rate, weight_decay=self.weight_decay) # Initialize the learning rate scheduler self.lr_scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=10, gamma=0.1) logging.info("Initializing data loaders...") # Initialize the data loader self.data_loader = DataLoader( dataset, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers, pin_memory=True, collate_fn=lambda x: tuple(zip(*x)) ) logging.info("Trainer initialized successfully.") def _initialize_weights(self): # Initialize the weights of the classifier head for m in self.model.roi_heads.box_predictor.modules(): if isinstance(m, nn.Linear): init.xavier_uniform_(m.weight) if m.bias is not None: m.bias.data.fill_(0.01) def train(self): logging.info("Starting training...") for epoch in range(self.num_epochs): # Set the model to training mode self.model.train() # Initialize running loss running_loss = 0.0 # Initialize number of batches num_batches = 0 logging.info(f"Starting epoch {epoch + 1}...") for _, (images, targets) in enumerate(self.data_loader): # Move images to the device images = list(image.to(self.device) for image in images) # Move targets to the device targets = [{k: v.to(self.device) for k, v in t.items()} for t in targets] # Zero the gradients self.optimizer.zero_grad() # Compute the losses loss_dict = self.model(images, targets) # Sum the losses losses = sum(loss for loss in loss_dict.values()) # Backpropagate the losses losses.backward() # Clip the gradients torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=2.0, norm_type=2) # Update the parameters self.optimizer.step() # Update the running loss running_loss += losses.item() # Update the number of batches num_batches += 1 # Step the learning rate scheduler self.lr_scheduler.step() logging.info("Calculating loss...") # Calculate the average loss avg_loss = running_loss / num_batches # Log the average loss logging.info(f'Epoch [{epoch+1}/{self.num_epochs}] Loss: {avg_loss:.4f}') def save_model(self, path): # Log the model saving logging.info(f"Saving model to {path}...") # Save the model state dictionary torch.save(self.model.state_dict(), path) # Log that the model has been saved logging.info("Model saved.") def main(): # Define the parameters for the training img_dir = 'path/to/images' annotations_dirs = [ 'path/to/annotations', '/path/to/annotations' ] split_files = [ 'path/to/splits', 'path/to/splits', 'path/to/splits', 'path/to/splits' ] batch_size = 8 num_epochs = 200 num_workers = 4 learning_rate = 0.0001 weight_decay = 0.0005 dropout_prob = 0.5 subset_size = 10000 # Initialize the trainer trainer = Trainer(img_dir=img_dir, annotations_dirs=annotations_dirs, split_files=split_files, batch_size=batch_size, num_workers=num_workers, learning_rate=learning_rate, weight_decay=weight_decay, num_epochs=num_epochs, dropout_prob=dropout_prob, subset_size=subset_size) trainer.train() # Start training trainer.save_model('Some.pth') # Save the trained model if __name__ == "__main__": main() # Run the main function </code>
import torch
from torch.utils.data import DataLoader, Subset
from torchvision.transforms import v2
from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_V2_Weights
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from data import MapillaryDataset
from torch import nn
from torch.nn import init
import random
import logging

# Configure logging to log training progress and details
logging.basicConfig(filename='training.log', level=logging.INFO, 
                    format='%(asctime)s %(levelname)s %(message)s')

# Define a function to get the training transformations
def get_train_transform():
    return v2.Compose([
        # Randomly resize and crop the image
        v2.RandomResizedCrop(640, scale=(0.8, 1.0)), 
        # Randomly flip the image horizontally
        v2.RandomHorizontalFlip(),
        # Randomly flip the image vertically
        v2.RandomVerticalFlip(),
        # Apply random affine transformations
        v2.RandomAffine(degrees=10, translate=(0.1, 0.1), scale=(0.8, 1.2), shear=10),
        # Apply random changes in brightness, contrast, saturation, and hue
        v2.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1),
        # Randomly rotate the image
        v2.RandomRotation(degrees=10),
        # Randomly convert the image to grayscale
        v2.RandomGrayscale(p=0.1), 
        # Apply Gaussian blur
        v2.GaussianBlur(3),
        # Convert the image to a tensor
        v2.Compose([v2.ToImage(), v2.ToDtype(torch.float32, scale=True)]),
        # Normalize the image with mean and std
        v2.Normalize(mean=[0.3403, 0.3121, 0.3214], std=[0.2724, 0.2608, 0.2669]), 
        # Randomly erase parts of the image
        v2.RandomErasing(p=0.5)  
    ])

# Define a Trainer class to handle training
class Trainer:
    def __init__(self, img_dir, annotations_dirs, split_files, batch_size, num_workers, learning_rate, weight_decay, num_epochs, dropout_prob, subset_size=None):
        logging.info("Initializing trainer...")

        # Set the class attributes for the trainer
        self.img_dir = img_dir
        self.annotations_dirs = annotations_dirs
        self.split_files = split_files
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.learning_rate = learning_rate
        self.weight_decay = weight_decay
        self.num_epochs = num_epochs
        self.dropout_prob = dropout_prob
        self.subset_size = subset_size
        
        logging.info("Loading dataset...")
        # Load the dataset with the specified transformations
        dataset = MapillaryDataset(self.img_dir, self.annotations_dirs, self.split_files, transform=get_train_transform())
        
        if self.subset_size:
            # Select a subset of the dataset if subset_size is specified
            indices = list(range(len(dataset)))
            random.shuffle(indices)
            subset_indices = indices[:self.subset_size]
            dataset = Subset(dataset, subset_indices)

        # Determine the number of classes in the dataset
        self.num_classes = dataset.dataset.get_num_classes() + 1 if isinstance(dataset, Subset) else dataset.get_num_classes() + 1
        logging.info(f"Number of classes: {self.num_classes}")

        logging.info("Initializing model...")
        # Load the pre-trained Faster R-CNN model with ResNet50 backbone
        self.weights = FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT
        self.model = fasterrcnn_resnet50_fpn_v2(weights=self.weights)

        # Get the number of input features for the classifier
        in_features = self.model.roi_heads.box_predictor.cls_score.in_features

        # Replace the classifier head with a new one
        self.model.roi_heads.box_predictor = nn.Sequential(
            # Add dropout for regularization
            nn.Dropout(p=dropout_prob),

            # Add new predictor with the correct number of classes
            FastRCNNPredictor(in_features, self.num_classes)
        )

        # Initialize the weights of the new classifier head
        self._initialize_weights()
        self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
        self.model.to(self.device)
        logging.info(f"Using device: {self.device}")
        
        logging.info("Initializing optimizer...")
        # Initialize the optimizer
        params = [p for p in self.model.parameters() if p.requires_grad]
        self.optimizer = torch.optim.AdamW(params, lr=self.learning_rate, weight_decay=self.weight_decay)
        
        # Initialize the learning rate scheduler
        self.lr_scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=10, gamma=0.1)
        
        logging.info("Initializing data loaders...")
        # Initialize the data loader
        self.data_loader = DataLoader(
            dataset,
            batch_size=self.batch_size,
            shuffle=True,
            num_workers=self.num_workers,
            pin_memory=True,
            collate_fn=lambda x: tuple(zip(*x))
        )
        
        logging.info("Trainer initialized successfully.")

    def _initialize_weights(self):
        # Initialize the weights of the classifier head
        for m in self.model.roi_heads.box_predictor.modules():
            if isinstance(m, nn.Linear):
                init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    m.bias.data.fill_(0.01)

    def train(self):
        logging.info("Starting training...")
        for epoch in range(self.num_epochs):

            # Set the model to training mode
            self.model.train()

            # Initialize running loss  
            running_loss = 0.0  

            # Initialize number of batches
            num_batches = 0  
            
            logging.info(f"Starting epoch {epoch + 1}...")
            for _, (images, targets) in enumerate(self.data_loader):
                # Move images to the device
                images = list(image.to(self.device) for image in images)

                # Move targets to the device
                targets = [{k: v.to(self.device) for k, v in t.items()} for t in targets]

                # Zero the gradients
                self.optimizer.zero_grad()  

                # Compute the losses
                loss_dict = self.model(images, targets) 

                # Sum the losses
                losses = sum(loss for loss in loss_dict.values())

                # Backpropagate the losses
                losses.backward()  

                # Clip the gradients
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=2.0, norm_type=2)  

                # Update the parameters
                self.optimizer.step()  

                # Update the running loss
                running_loss += losses.item()  

                # Update the number of batches
                num_batches += 1  
                
            # Step the learning rate scheduler
            self.lr_scheduler.step()  

            logging.info("Calculating loss...")

            # Calculate the average loss
            avg_loss = running_loss / num_batches  

            # Log the average loss
            logging.info(f'Epoch [{epoch+1}/{self.num_epochs}] Loss: {avg_loss:.4f}')  

    def save_model(self, path):
        # Log the model saving
        logging.info(f"Saving model to {path}...") 

        # Save the model state dictionary
        torch.save(self.model.state_dict(), path)  

        # Log that the model has been saved
        logging.info("Model saved.")  

def main():
    # Define the parameters for the training
    img_dir = 'path/to/images'
    annotations_dirs = [
    'path/to/annotations',
    '/path/to/annotations'
]
    split_files = [
    'path/to/splits',
    'path/to/splits',
    'path/to/splits',
    'path/to/splits'
]
    batch_size = 8
    num_epochs = 200
    num_workers = 4
    learning_rate = 0.0001
    weight_decay = 0.0005
    dropout_prob = 0.5
    subset_size = 10000

    # Initialize the trainer
    trainer = Trainer(img_dir=img_dir,
                      annotations_dirs=annotations_dirs,
                      split_files=split_files,
                      batch_size=batch_size,
                      num_workers=num_workers,
                      learning_rate=learning_rate,
                      weight_decay=weight_decay,
                      num_epochs=num_epochs,
                      dropout_prob=dropout_prob,
                      subset_size=subset_size)
    trainer.train()  # Start training
    trainer.save_model('Some.pth')  # Save the trained model

if __name__ == "__main__":
    main()  # Run the main function

Error/Unexpected Behavior

The initial loss at the start of the first epoch is consistently around 42.0230, which seems unusually high.

The loss plateaus at approximately 33 and does not decrease further, indicating a learning stagnation.

Questions

  1. What could be causing this high initial loss?

  2. Are there any common strategies to reduce the initial loss in training Faster-RCNN models?

  3. What could be causing the learning plateau, and how can I address it?

  4. Is there something I might be missing in my dataset preparation or model setup?

  5. Any insights or suggestions would be greatly appreciated.

Trang chủ Giới thiệu Sinh nhật bé trai Sinh nhật bé gái Tổ chức sự kiện Biểu diễn giải trí Dịch vụ khác Trang trí tiệc cưới Tổ chức khai trương Tư vấn dịch vụ Thư viện ảnh Tin tức - sự kiện Liên hệ Chú hề sinh nhật Trang trí YEAR END PARTY công ty Trang trí tất niên cuối năm Trang trí tất niên xu hướng mới nhất Trang trí sinh nhật bé trai Hải Đăng Trang trí sinh nhật bé Khánh Vân Trang trí sinh nhật Bích Ngân Trang trí sinh nhật bé Thanh Trang Thuê ông già Noel phát quà Biểu diễn xiếc khỉ Xiếc quay đĩa Dịch vụ tổ chức sự kiện 5 sao Thông tin về chúng tôi Dịch vụ sinh nhật bé trai Dịch vụ sinh nhật bé gái Sự kiện trọn gói Các tiết mục giải trí Dịch vụ bổ trợ Tiệc cưới sang trọng Dịch vụ khai trương Tư vấn tổ chức sự kiện Hình ảnh sự kiện Cập nhật tin tức Liên hệ ngay Thuê chú hề chuyên nghiệp Tiệc tất niên cho công ty Trang trí tiệc cuối năm Tiệc tất niên độc đáo Sinh nhật bé Hải Đăng Sinh nhật đáng yêu bé Khánh Vân Sinh nhật sang trọng Bích Ngân Tiệc sinh nhật bé Thanh Trang Dịch vụ ông già Noel Xiếc thú vui nhộn Biểu diễn xiếc quay đĩa Dịch vụ tổ chức tiệc uy tín Khám phá dịch vụ của chúng tôi Tiệc sinh nhật cho bé trai Trang trí tiệc cho bé gái Gói sự kiện chuyên nghiệp Chương trình giải trí hấp dẫn Dịch vụ hỗ trợ sự kiện Trang trí tiệc cưới đẹp Khởi đầu thành công với khai trương Chuyên gia tư vấn sự kiện Xem ảnh các sự kiện đẹp Tin mới về sự kiện Kết nối với đội ngũ chuyên gia Chú hề vui nhộn cho tiệc sinh nhật Ý tưởng tiệc cuối năm Tất niên độc đáo Trang trí tiệc hiện đại Tổ chức sinh nhật cho Hải Đăng Sinh nhật độc quyền Khánh Vân Phong cách tiệc Bích Ngân Trang trí tiệc bé Thanh Trang Thuê dịch vụ ông già Noel chuyên nghiệp Xem xiếc khỉ đặc sắc Xiếc quay đĩa thú vị
Trang chủ Giới thiệu Sinh nhật bé trai Sinh nhật bé gái Tổ chức sự kiện Biểu diễn giải trí Dịch vụ khác Trang trí tiệc cưới Tổ chức khai trương Tư vấn dịch vụ Thư viện ảnh Tin tức - sự kiện Liên hệ Chú hề sinh nhật Trang trí YEAR END PARTY công ty Trang trí tất niên cuối năm Trang trí tất niên xu hướng mới nhất Trang trí sinh nhật bé trai Hải Đăng Trang trí sinh nhật bé Khánh Vân Trang trí sinh nhật Bích Ngân Trang trí sinh nhật bé Thanh Trang Thuê ông già Noel phát quà Biểu diễn xiếc khỉ Xiếc quay đĩa
Thiết kế website Thiết kế website Thiết kế website Cách kháng tài khoản quảng cáo Mua bán Fanpage Facebook Dịch vụ SEO Tổ chức sinh nhật