Thiết kế website giá rẻ

Question

I am currently training a deep learning model using PyTorch and Faster-RCNN on the Mapillary Traffic Sign Dataset. However, I am encountering an issue with the high initial loss value during training.

Problem

The initial loss is extremely high, specifically ‘42.0230’.The initial loss is extremely high, specifically ‘42.0230’, and I am unsure why this is happening or how to address it.
The learning process plateaus at a loss of around ’33’ and does not improve further.

Context

Framework: PyTorch
Model: Faster-RCNN
Dataset: Mapillary Traffic Sign Dataset

What I Have Tried

Learning Rate Adjustment: I have tried adjusting the learning rate but the initial loss remains high.
Normalization: Ensured that the dataset is properly normalized.
Data Augmentation: Applied data augmentation techniques to the training dataset.

Code Snippet

Here is the relevant part of my custom dataset code:

<code>import json

import os

from PIL import Image

import torch

from torch.utils.data import Dataset

class MapillaryDataset(Dataset):

def __init__(self, img_dir, annotations_dirs, split_files, transform=None):

self.img_dir = img_dir

self.transform = transform

# Initialize an empty set to store all image keys

self.image_keys = set()

# Read all split files and combine the image keys

for split_file in split_files:

with open(split_file, 'r') as f:

keys = [line.strip() for line in f.readlines()]

self.image_keys.update(keys)

print(f"Read {len(keys)} keys from {split_file}")

# Initialize an empty list to store all annotations

self.data = []

# Read annotations from all annotation directories

for ann_dir in annotations_dirs:

for file_name in os.listdir(ann_dir):

if file_name.endswith('.json'):

file_path = os.path.join(ann_dir, file_name)

with open(file_path, 'r') as f:

data = json.load(f)

# Assuming 'objects' field contains the annotations

if 'objects' in data:

for obj in data['objects']:

if 'key' in obj:

obj['image_id'] = file_name.split('.')[0]

self.data.append(obj)

else:

print(f"Object in {file_name} does not contain 'key'")

else:

print(f"Annotation file {file_name} does not contain 'objects'")

# Filter objects based on the combined image keys and check for the 'key' field

self.objects = [obj for obj in self.data if 'image_id' in obj and obj['image_id'] in self.image_keys]

print(f"Total objects after filtering: {len(self.objects)}")

# Extract unique labels

self.labels = {obj['label'] for obj in self.objects}

self.label_to_index = {label: index for index, label in enumerate(self.labels)}

def __len__(self):

return len(self.objects)

def __getitem__(self, idx):

obj = self.objects[idx]

img_path = os.path.join(self.img_dir, obj['image_id'] + ".jpg")

image = Image.open(img_path).convert("RGB")

bbox = obj['bbox']

label = self.label_to_index[obj['label']]

# Convert bbox to the format (xmin, ymin, xmax, ymax)

bbox_tensor = torch.tensor([bbox['xmin'], bbox['ymin'], bbox['xmax'], bbox['ymax']], dtype=torch.float32)

# Create a target dictionary

target = {

'boxes': bbox_tensor.unsqueeze(0), # Add an extra dimension for single bbox

'labels': torch.tensor([label])

}

if self.transform:

image = self.transform(image)

return image, target

def get_num_classes(self):

return len(self.labels)

# Usage example

img_dir = 'path/to/images' # Replace with the actual path to your images

annotations_dirs = [

'path/to/annotations',

'/path/to/annotations'

] # Replace with the actual paths

split_files = [

'path/to/splits',

'path/to/splits'

] # Replace with the actual path

</code>

<code>import json import os from PIL import Image import torch from torch.utils.data import Dataset class MapillaryDataset(Dataset): def __init__(self, img_dir, annotations_dirs, split_files, transform=None): self.img_dir = img_dir self.transform = transform # Initialize an empty set to store all image keys self.image_keys = set() # Read all split files and combine the image keys for split_file in split_files: with open(split_file, 'r') as f: keys = [line.strip() for line in f.readlines()] self.image_keys.update(keys) print(f"Read {len(keys)} keys from {split_file}") # Initialize an empty list to store all annotations self.data = [] # Read annotations from all annotation directories for ann_dir in annotations_dirs: for file_name in os.listdir(ann_dir): if file_name.endswith('.json'): file_path = os.path.join(ann_dir, file_name) with open(file_path, 'r') as f: data = json.load(f) # Assuming 'objects' field contains the annotations if 'objects' in data: for obj in data['objects']: if 'key' in obj: obj['image_id'] = file_name.split('.')[0] self.data.append(obj) else: print(f"Object in {file_name} does not contain 'key'") else: print(f"Annotation file {file_name} does not contain 'objects'") # Filter objects based on the combined image keys and check for the 'key' field self.objects = [obj for obj in self.data if 'image_id' in obj and obj['image_id'] in self.image_keys] print(f"Total objects after filtering: {len(self.objects)}") # Extract unique labels self.labels = {obj['label'] for obj in self.objects} self.label_to_index = {label: index for index, label in enumerate(self.labels)} def __len__(self): return len(self.objects) def __getitem__(self, idx): obj = self.objects[idx] img_path = os.path.join(self.img_dir, obj['image_id'] + ".jpg") image = Image.open(img_path).convert("RGB") bbox = obj['bbox'] label = self.label_to_index[obj['label']] # Convert bbox to the format (xmin, ymin, xmax, ymax) bbox_tensor = torch.tensor([bbox['xmin'], bbox['ymin'], bbox['xmax'], bbox['ymax']], dtype=torch.float32) # Create a target dictionary target = { 'boxes': bbox_tensor.unsqueeze(0), # Add an extra dimension for single bbox 'labels': torch.tensor([label]) } if self.transform: image = self.transform(image) return image, target def get_num_classes(self): return len(self.labels) # Usage example img_dir = 'path/to/images' # Replace with the actual path to your images annotations_dirs = [ 'path/to/annotations', '/path/to/annotations' ] # Replace with the actual paths split_files = [ 'path/to/splits', 'path/to/splits', 'path/to/splits', 'path/to/splits' ] # Replace with the actual path </code>

import json
import os
from PIL import Image
import torch
from torch.utils.data import Dataset

class MapillaryDataset(Dataset):
    def __init__(self, img_dir, annotations_dirs, split_files, transform=None):
        self.img_dir = img_dir
        self.transform = transform
        
        # Initialize an empty set to store all image keys
        self.image_keys = set()

        # Read all split files and combine the image keys
        for split_file in split_files:
            with open(split_file, 'r') as f:
                keys = [line.strip() for line in f.readlines()]
                self.image_keys.update(keys)
                print(f"Read {len(keys)} keys from {split_file}")

        # Initialize an empty list to store all annotations
        self.data = []

        # Read annotations from all annotation directories
        for ann_dir in annotations_dirs:
            for file_name in os.listdir(ann_dir):
                if file_name.endswith('.json'):
                    file_path = os.path.join(ann_dir, file_name)
                    with open(file_path, 'r') as f:
                        data = json.load(f)
                        # Assuming 'objects' field contains the annotations
                        if 'objects' in data:
                            for obj in data['objects']:
                                if 'key' in obj:
                                    obj['image_id'] = file_name.split('.')[0]
                                    self.data.append(obj)
                                else:
                                    print(f"Object in {file_name} does not contain 'key'")
                        else:
                            print(f"Annotation file {file_name} does not contain 'objects'")

        # Filter objects based on the combined image keys and check for the 'key' field
        self.objects = [obj for obj in self.data if 'image_id' in obj and obj['image_id'] in self.image_keys]
        print(f"Total objects after filtering: {len(self.objects)}")
        
        # Extract unique labels
        self.labels = {obj['label'] for obj in self.objects}
        self.label_to_index = {label: index for index, label in enumerate(self.labels)}

    def __len__(self):
        return len(self.objects)
    
    def __getitem__(self, idx):
        obj = self.objects[idx]
        img_path = os.path.join(self.img_dir, obj['image_id'] + ".jpg")
        image = Image.open(img_path).convert("RGB")
        
        bbox = obj['bbox']
        label = self.label_to_index[obj['label']]
        
        # Convert bbox to the format (xmin, ymin, xmax, ymax)
        bbox_tensor = torch.tensor([bbox['xmin'], bbox['ymin'], bbox['xmax'], bbox['ymax']], dtype=torch.float32)
        
        # Create a target dictionary
        target = {
            'boxes': bbox_tensor.unsqueeze(0),  # Add an extra dimension for single bbox
            'labels': torch.tensor([label])
        }
        
        if self.transform:
            image = self.transform(image)
        
        return image, target
    
    def get_num_classes(self):
        return len(self.labels)

# Usage example
img_dir = 'path/to/images'  # Replace with the actual path to your images

annotations_dirs = [
    'path/to/annotations',
    '/path/to/annotations'
]  # Replace with the actual paths

split_files = [
    'path/to/splits',
    'path/to/splits',
    'path/to/splits',
    'path/to/splits'
]  # Replace with the actual path

Here is the relevant part of my train code:

<code>import torch

from torch.utils.data import DataLoader, Subset

from torchvision.transforms import v2

from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_V2_Weights

from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

from data import MapillaryDataset

from torch import nn

from torch.nn import init

import random

import logging

# Configure logging to log training progress and details

logging.basicConfig(filename='training.log', level=logging.INFO,

format='%(asctime)s %(levelname)s %(message)s')

# Define a function to get the training transformations

def get_train_transform():

return v2.Compose([

# Randomly resize and crop the image

v2.RandomResizedCrop(640, scale=(0.8, 1.0)),

# Randomly flip the image horizontally

v2.RandomHorizontalFlip(),

# Randomly flip the image vertically

v2.RandomVerticalFlip(),

# Apply random affine transformations

v2.RandomAffine(degrees=10, translate=(0.1, 0.1), scale=(0.8, 1.2), shear=10),

# Apply random changes in brightness, contrast, saturation, and hue

v2.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1),

# Randomly rotate the image

v2.RandomRotation(degrees=10),

# Randomly convert the image to grayscale

v2.RandomGrayscale(p=0.1),

# Apply Gaussian blur

v2.GaussianBlur(3),

# Convert the image to a tensor

v2.Compose([v2.ToImage(), v2.ToDtype(torch.float32, scale=True)]),

# Normalize the image with mean and std

v2.Normalize(mean=[0.3403, 0.3121, 0.3214], std=[0.2724, 0.2608, 0.2669]),

# Randomly erase parts of the image

v2.RandomErasing(p=0.5)

])

# Define a Trainer class to handle training

class Trainer:

def __init__(self, img_dir, annotations_dirs, split_files, batch_size, num_workers, learning_rate, weight_decay, num_epochs, dropout_prob, subset_size=None):

logging.info("Initializing trainer...")

# Set the class attributes for the trainer

self.img_dir = img_dir

self.annotations_dirs = annotations_dirs

self.split_files = split_files

self.batch_size = batch_size

self.num_workers = num_workers

self.learning_rate = learning_rate

self.weight_decay = weight_decay

self.num_epochs = num_epochs

self.dropout_prob = dropout_prob

self.subset_size = subset_size

logging.info("Loading dataset...")

# Load the dataset with the specified transformations

dataset = MapillaryDataset(self.img_dir, self.annotations_dirs, self.split_files, transform=get_train_transform())

if self.subset_size:

# Select a subset of the dataset if subset_size is specified

indices = list(range(len(dataset)))

random.shuffle(indices)

subset_indices = indices[:self.subset_size]

dataset = Subset(dataset, subset_indices)

# Determine the number of classes in the dataset

self.num_classes = dataset.dataset.get_num_classes() + 1 if isinstance(dataset, Subset) else dataset.get_num_classes() + 1

logging.info(f"Number of classes: {self.num_classes}")

logging.info("Initializing model...")

# Load the pre-trained Faster R-CNN model with ResNet50 backbone

self.weights = FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT

self.model = fasterrcnn_resnet50_fpn_v2(weights=self.weights)

# Get the number of input features for the classifier

in_features = self.model.roi_heads.box_predictor.cls_score.in_features

# Replace the classifier head with a new one

self.model.roi_heads.box_predictor = nn.Sequential(

# Add dropout for regularization

nn.Dropout(p=dropout_prob),

# Add new predictor with the correct number of classes

FastRCNNPredictor(in_features, self.num_classes)

)

# Initialize the weights of the new classifier head

self._initialize_weights()

self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

self.model.to(self.device)

logging.info(f"Using device: {self.device}")

logging.info("Initializing optimizer...")

# Initialize the optimizer

params = [p for p in self.model.parameters() if p.requires_grad]

self.optimizer = torch.optim.AdamW(params, lr=self.learning_rate, weight_decay=self.weight_decay)

# Initialize the learning rate scheduler

self.lr_scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=10, gamma=0.1)

logging.info("Initializing data loaders...")

# Initialize the data loader

self.data_loader = DataLoader(

dataset,

batch_size=self.batch_size,

shuffle=True,

num_workers=self.num_workers,

pin_memory=True,

collate_fn=lambda x: tuple(zip(*x))

)

logging.info("Trainer initialized successfully.")

def _initialize_weights(self):

# Initialize the weights of the classifier head

for m in self.model.roi_heads.box_predictor.modules():

if isinstance(m, nn.Linear):

init.xavier_uniform_(m.weight)

if m.bias is not None:

m.bias.data.fill_(0.01)

def train(self):

logging.info("Starting training...")

for epoch in range(self.num_epochs):

# Set the model to training mode

self.model.train()

# Initialize running loss

running_loss = 0.0

# Initialize number of batches

num_batches = 0

logging.info(f"Starting epoch {epoch + 1}...")

for _, (images, targets) in enumerate(self.data_loader):

# Move images to the device

images = list(image.to(self.device) for image in images)

# Move targets to the device

targets = [{k: v.to(self.device) for k, v in t.items()} for t in targets]

# Zero the gradients

self.optimizer.zero_grad()

# Compute the losses

loss_dict = self.model(images, targets)

# Sum the losses

losses = sum(loss for loss in loss_dict.values())

# Backpropagate the losses

losses.backward()

# Clip the gradients

torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=2.0, norm_type=2)

# Update the parameters

self.optimizer.step()

# Update the running loss

running_loss += losses.item()

# Update the number of batches

num_batches += 1

# Step the learning rate scheduler

self.lr_scheduler.step()

logging.info("Calculating loss...")

# Calculate the average loss

avg_loss = running_loss / num_batches

# Log the average loss

logging.info(f'Epoch [{epoch+1}/{self.num_epochs}] Loss: {avg_loss:.4f}')

def save_model(self, path):

# Log the model saving

logging.info(f"Saving model to {path}...")

# Save the model state dictionary

torch.save(self.model.state_dict(), path)

# Log that the model has been saved

logging.info("Model saved.")

def main():

# Define the parameters for the training

img_dir = 'path/to/images'

annotations_dirs = [

'path/to/annotations',

'/path/to/annotations'

]

split_files = [

'path/to/splits',

'path/to/splits'

]

batch_size = 8

num_epochs = 200

num_workers = 4

learning_rate = 0.0001

weight_decay = 0.0005

dropout_prob = 0.5

subset_size = 10000

# Initialize the trainer

trainer = Trainer(img_dir=img_dir,

annotations_dirs=annotations_dirs,

split_files=split_files,

batch_size=batch_size,

num_workers=num_workers,

learning_rate=learning_rate,

weight_decay=weight_decay,

num_epochs=num_epochs,

dropout_prob=dropout_prob,

subset_size=subset_size)

trainer.train() # Start training

trainer.save_model('Some.pth') # Save the trained model

if __name__ == "__main__":

main() # Run the main function

</code>

<code>import torch from torch.utils.data import DataLoader, Subset from torchvision.transforms import v2 from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_V2_Weights from torchvision.models.detection.faster_rcnn import FastRCNNPredictor from data import MapillaryDataset from torch import nn from torch.nn import init import random import logging # Configure logging to log training progress and details logging.basicConfig(filename='training.log', level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') # Define a function to get the training transformations def get_train_transform(): return v2.Compose([ # Randomly resize and crop the image v2.RandomResizedCrop(640, scale=(0.8, 1.0)), # Randomly flip the image horizontally v2.RandomHorizontalFlip(), # Randomly flip the image vertically v2.RandomVerticalFlip(), # Apply random affine transformations v2.RandomAffine(degrees=10, translate=(0.1, 0.1), scale=(0.8, 1.2), shear=10), # Apply random changes in brightness, contrast, saturation, and hue v2.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), # Randomly rotate the image v2.RandomRotation(degrees=10), # Randomly convert the image to grayscale v2.RandomGrayscale(p=0.1), # Apply Gaussian blur v2.GaussianBlur(3), # Convert the image to a tensor v2.Compose([v2.ToImage(), v2.ToDtype(torch.float32, scale=True)]), # Normalize the image with mean and std v2.Normalize(mean=[0.3403, 0.3121, 0.3214], std=[0.2724, 0.2608, 0.2669]), # Randomly erase parts of the image v2.RandomErasing(p=0.5) ]) # Define a Trainer class to handle training class Trainer: def __init__(self, img_dir, annotations_dirs, split_files, batch_size, num_workers, learning_rate, weight_decay, num_epochs, dropout_prob, subset_size=None): logging.info("Initializing trainer...") # Set the class attributes for the trainer self.img_dir = img_dir self.annotations_dirs = annotations_dirs self.split_files = split_files self.batch_size = batch_size self.num_workers = num_workers self.learning_rate = learning_rate self.weight_decay = weight_decay self.num_epochs = num_epochs self.dropout_prob = dropout_prob self.subset_size = subset_size logging.info("Loading dataset...") # Load the dataset with the specified transformations dataset = MapillaryDataset(self.img_dir, self.annotations_dirs, self.split_files, transform=get_train_transform()) if self.subset_size: # Select a subset of the dataset if subset_size is specified indices = list(range(len(dataset))) random.shuffle(indices) subset_indices = indices[:self.subset_size] dataset = Subset(dataset, subset_indices) # Determine the number of classes in the dataset self.num_classes = dataset.dataset.get_num_classes() + 1 if isinstance(dataset, Subset) else dataset.get_num_classes() + 1 logging.info(f"Number of classes: {self.num_classes}") logging.info("Initializing model...") # Load the pre-trained Faster R-CNN model with ResNet50 backbone self.weights = FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT self.model = fasterrcnn_resnet50_fpn_v2(weights=self.weights) # Get the number of input features for the classifier in_features = self.model.roi_heads.box_predictor.cls_score.in_features # Replace the classifier head with a new one self.model.roi_heads.box_predictor = nn.Sequential( # Add dropout for regularization nn.Dropout(p=dropout_prob), # Add new predictor with the correct number of classes FastRCNNPredictor(in_features, self.num_classes) ) # Initialize the weights of the new classifier head self._initialize_weights() self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') self.model.to(self.device) logging.info(f"Using device: {self.device}") logging.info("Initializing optimizer...") # Initialize the optimizer params = [p for p in self.model.parameters() if p.requires_grad] self.optimizer = torch.optim.AdamW(params, lr=self.learning_rate, weight_decay=self.weight_decay) # Initialize the learning rate scheduler self.lr_scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=10, gamma=0.1) logging.info("Initializing data loaders...") # Initialize the data loader self.data_loader = DataLoader( dataset, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers, pin_memory=True, collate_fn=lambda x: tuple(zip(*x)) ) logging.info("Trainer initialized successfully.") def _initialize_weights(self): # Initialize the weights of the classifier head for m in self.model.roi_heads.box_predictor.modules(): if isinstance(m, nn.Linear): init.xavier_uniform_(m.weight) if m.bias is not None: m.bias.data.fill_(0.01) def train(self): logging.info("Starting training...") for epoch in range(self.num_epochs): # Set the model to training mode self.model.train() # Initialize running loss running_loss = 0.0 # Initialize number of batches num_batches = 0 logging.info(f"Starting epoch {epoch + 1}...") for _, (images, targets) in enumerate(self.data_loader): # Move images to the device images = list(image.to(self.device) for image in images) # Move targets to the device targets = [{k: v.to(self.device) for k, v in t.items()} for t in targets] # Zero the gradients self.optimizer.zero_grad() # Compute the losses loss_dict = self.model(images, targets) # Sum the losses losses = sum(loss for loss in loss_dict.values()) # Backpropagate the losses losses.backward() # Clip the gradients torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=2.0, norm_type=2) # Update the parameters self.optimizer.step() # Update the running loss running_loss += losses.item() # Update the number of batches num_batches += 1 # Step the learning rate scheduler self.lr_scheduler.step() logging.info("Calculating loss...") # Calculate the average loss avg_loss = running_loss / num_batches # Log the average loss logging.info(f'Epoch [{epoch+1}/{self.num_epochs}] Loss: {avg_loss:.4f}') def save_model(self, path): # Log the model saving logging.info(f"Saving model to {path}...") # Save the model state dictionary torch.save(self.model.state_dict(), path) # Log that the model has been saved logging.info("Model saved.") def main(): # Define the parameters for the training img_dir = 'path/to/images' annotations_dirs = [ 'path/to/annotations', '/path/to/annotations' ] split_files = [ 'path/to/splits', 'path/to/splits', 'path/to/splits', 'path/to/splits' ] batch_size = 8 num_epochs = 200 num_workers = 4 learning_rate = 0.0001 weight_decay = 0.0005 dropout_prob = 0.5 subset_size = 10000 # Initialize the trainer trainer = Trainer(img_dir=img_dir, annotations_dirs=annotations_dirs, split_files=split_files, batch_size=batch_size, num_workers=num_workers, learning_rate=learning_rate, weight_decay=weight_decay, num_epochs=num_epochs, dropout_prob=dropout_prob, subset_size=subset_size) trainer.train() # Start training trainer.save_model('Some.pth') # Save the trained model if __name__ == "__main__": main() # Run the main function </code>

import torch
from torch.utils.data import DataLoader, Subset
from torchvision.transforms import v2
from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_V2_Weights
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from data import MapillaryDataset
from torch import nn
from torch.nn import init
import random
import logging

# Configure logging to log training progress and details
logging.basicConfig(filename='training.log', level=logging.INFO, 
                    format='%(asctime)s %(levelname)s %(message)s')

# Define a function to get the training transformations
def get_train_transform():
    return v2.Compose([
        # Randomly resize and crop the image
        v2.RandomResizedCrop(640, scale=(0.8, 1.0)), 
        # Randomly flip the image horizontally
        v2.RandomHorizontalFlip(),
        # Randomly flip the image vertically
        v2.RandomVerticalFlip(),
        # Apply random affine transformations
        v2.RandomAffine(degrees=10, translate=(0.1, 0.1), scale=(0.8, 1.2), shear=10),
        # Apply random changes in brightness, contrast, saturation, and hue
        v2.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1),
        # Randomly rotate the image
        v2.RandomRotation(degrees=10),
        # Randomly convert the image to grayscale
        v2.RandomGrayscale(p=0.1), 
        # Apply Gaussian blur
        v2.GaussianBlur(3),
        # Convert the image to a tensor
        v2.Compose([v2.ToImage(), v2.ToDtype(torch.float32, scale=True)]),
        # Normalize the image with mean and std
        v2.Normalize(mean=[0.3403, 0.3121, 0.3214], std=[0.2724, 0.2608, 0.2669]), 
        # Randomly erase parts of the image
        v2.RandomErasing(p=0.5)  
    ])

# Define a Trainer class to handle training
class Trainer:
    def __init__(self, img_dir, annotations_dirs, split_files, batch_size, num_workers, learning_rate, weight_decay, num_epochs, dropout_prob, subset_size=None):
        logging.info("Initializing trainer...")

        # Set the class attributes for the trainer
        self.img_dir = img_dir
        self.annotations_dirs = annotations_dirs
        self.split_files = split_files
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.learning_rate = learning_rate
        self.weight_decay = weight_decay
        self.num_epochs = num_epochs
        self.dropout_prob = dropout_prob
        self.subset_size = subset_size
        
        logging.info("Loading dataset...")
        # Load the dataset with the specified transformations
        dataset = MapillaryDataset(self.img_dir, self.annotations_dirs, self.split_files, transform=get_train_transform())
        
        if self.subset_size:
            # Select a subset of the dataset if subset_size is specified
            indices = list(range(len(dataset)))
            random.shuffle(indices)
            subset_indices = indices[:self.subset_size]
            dataset = Subset(dataset, subset_indices)

        # Determine the number of classes in the dataset
        self.num_classes = dataset.dataset.get_num_classes() + 1 if isinstance(dataset, Subset) else dataset.get_num_classes() + 1
        logging.info(f"Number of classes: {self.num_classes}")

        logging.info("Initializing model...")
        # Load the pre-trained Faster R-CNN model with ResNet50 backbone
        self.weights = FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT
        self.model = fasterrcnn_resnet50_fpn_v2(weights=self.weights)

        # Get the number of input features for the classifier
        in_features = self.model.roi_heads.box_predictor.cls_score.in_features

        # Replace the classifier head with a new one
        self.model.roi_heads.box_predictor = nn.Sequential(
            # Add dropout for regularization
            nn.Dropout(p=dropout_prob),

            # Add new predictor with the correct number of classes
            FastRCNNPredictor(in_features, self.num_classes)
        )

        # Initialize the weights of the new classifier head
        self._initialize_weights()
        self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
        self.model.to(self.device)
        logging.info(f"Using device: {self.device}")
        
        logging.info("Initializing optimizer...")
        # Initialize the optimizer
        params = [p for p in self.model.parameters() if p.requires_grad]
        self.optimizer = torch.optim.AdamW(params, lr=self.learning_rate, weight_decay=self.weight_decay)
        
        # Initialize the learning rate scheduler
        self.lr_scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=10, gamma=0.1)
        
        logging.info("Initializing data loaders...")
        # Initialize the data loader
        self.data_loader = DataLoader(
            dataset,
            batch_size=self.batch_size,
            shuffle=True,
            num_workers=self.num_workers,
            pin_memory=True,
            collate_fn=lambda x: tuple(zip(*x))
        )
        
        logging.info("Trainer initialized successfully.")

    def _initialize_weights(self):
        # Initialize the weights of the classifier head
        for m in self.model.roi_heads.box_predictor.modules():
            if isinstance(m, nn.Linear):
                init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    m.bias.data.fill_(0.01)

    def train(self):
        logging.info("Starting training...")
        for epoch in range(self.num_epochs):

            # Set the model to training mode
            self.model.train()

            # Initialize running loss  
            running_loss = 0.0  

            # Initialize number of batches
            num_batches = 0  
            
            logging.info(f"Starting epoch {epoch + 1}...")
            for _, (images, targets) in enumerate(self.data_loader):
                # Move images to the device
                images = list(image.to(self.device) for image in images)

                # Move targets to the device
                targets = [{k: v.to(self.device) for k, v in t.items()} for t in targets]

                # Zero the gradients
                self.optimizer.zero_grad()  

                # Compute the losses
                loss_dict = self.model(images, targets) 

                # Sum the losses
                losses = sum(loss for loss in loss_dict.values())

                # Backpropagate the losses
                losses.backward()  

                # Clip the gradients
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=2.0, norm_type=2)  

                # Update the parameters
                self.optimizer.step()  

                # Update the running loss
                running_loss += losses.item()  

                # Update the number of batches
                num_batches += 1  
                
            # Step the learning rate scheduler
            self.lr_scheduler.step()  

            logging.info("Calculating loss...")

            # Calculate the average loss
            avg_loss = running_loss / num_batches  

            # Log the average loss
            logging.info(f'Epoch [{epoch+1}/{self.num_epochs}] Loss: {avg_loss:.4f}')  

    def save_model(self, path):
        # Log the model saving
        logging.info(f"Saving model to {path}...") 

        # Save the model state dictionary
        torch.save(self.model.state_dict(), path)  

        # Log that the model has been saved
        logging.info("Model saved.")  

def main():
    # Define the parameters for the training
    img_dir = 'path/to/images'
    annotations_dirs = [
    'path/to/annotations',
    '/path/to/annotations'
]
    split_files = [
    'path/to/splits',
    'path/to/splits',
    'path/to/splits',
    'path/to/splits'
]
    batch_size = 8
    num_epochs = 200
    num_workers = 4
    learning_rate = 0.0001
    weight_decay = 0.0005
    dropout_prob = 0.5
    subset_size = 10000

    # Initialize the trainer
    trainer = Trainer(img_dir=img_dir,
                      annotations_dirs=annotations_dirs,
                      split_files=split_files,
                      batch_size=batch_size,
                      num_workers=num_workers,
                      learning_rate=learning_rate,
                      weight_decay=weight_decay,
                      num_epochs=num_epochs,
                      dropout_prob=dropout_prob,
                      subset_size=subset_size)
    trainer.train()  # Start training
    trainer.save_model('Some.pth')  # Save the trained model

if __name__ == "__main__":
    main()  # Run the main function

Error/Unexpected Behavior

The initial loss at the start of the first epoch is consistently around 42.0230, which seems unusually high.

The loss plateaus at approximately 33 and does not decrease further, indicating a learning stagnation.

Questions

What could be causing this high initial loss?
Are there any common strategies to reduce the initial loss in training Faster-RCNN models?
What could be causing the learning plateau, and how can I address it?
Is there something I might be missing in my dataset preparation or model setup?
Any insights or suggestions would be greatly appreciated.