I am using the pytorch resnet for the first time model = models.segmentation.fcn_resnet50(pretrained=True)
to classify satellite images of land. My goal is to get the percentage of the land covered by forests. For input I have satellite images and there is a mask associated with them which is color coded (forests,farmland,urban area, water…).
I made my own Data Structure in pytorch and then call the model but somewhere it’s going wrong. I think it’s because I’m not inputing the color coding but I’m not sure.
Here is what it should look like:
Expected
And here is what happens:
What I get
Here is the code for the data:
label_colors = {
'urban_land': (0, 255, 255),
'agriculture_land': (255, 255, 0),
'rangeland': (255, 0, 255),
'forest_land': (0, 255, 0),
'water': (0, 0, 255),
'barren_land': (255, 255, 255),
'unknown': (0, 0, 0)
}
class ImageMaskDataset(Dataset):
def __init__(self, image_dir, mask_dir, transform=None, mask_transform=None):
self.image_dir = image_dir
self.mask_dir = mask_dir
self.transform = transform
self.mask_transform = mask_transform
self.images = [img for img in os.listdir(image_dir) if img.endswith('_sat.jpg')]
self.masks = [self._find_mask(img) for img in self.images]
def _find_mask(self, image_filename):
base = image_filename.split('_sat')[0]
mask_filename = f"{base}_mask.png"
return mask_filename
def __len__(self):
return len(self.images)
def __getitem__(self, idx):
img_name = self.images[idx]
mask_name = self.masks[idx]
img_path = os.path.join(self.image_dir, img_name)
mask_path = os.path.join(self.mask_dir, mask_name)
image = io.imread(img_path)
mask = io.imread(mask_path)
if self.transform:
image = self.transform(image)
if self.mask_transform:
mask = self.mask_transform(mask)
return image, mask
And here is the code for running on the model:
def main():
# Create a model
model = models.segmentation.fcn_resnet50(pretrained=True)
model.eval() # Set the model to evaluation mode if not training
# Dataset and DataLoader
image_directory = '/Users/archive/train3/images'
mask_directory = '/Users/archive/train3/masks'
transform = Compose([
ToTensor(), # Converts numpy array to tensor
Resize((256, 256)) # Resize the tensor
])
# Assuming transform and dataset setup includes necessary transformations
dataset = ImageMaskDataset(image_directory, mask_directory, transform= transform)
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
with torch.no_grad():
for images, _ in dataloader:
images = images.to(device)
output = model(images)['out']
print(output)
print(output.shape)
probabilities = torch.nn.functional.softmax(output, dim=1)
_, predicted_masks = torch.max(probabilities, dim=1)
predicted_mask = predicted_masks.squeeze(0)
color_mask = class_to_rgb(predicted_mask, label_colors)
print(color_mask)
# Get the most likely class for each pixel
# Visualization
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.imshow(images[0].cpu().permute(1, 2, 0)) # Assuming the image is a torch.Tensor
plt.title('Original Image')
plt.axis('off')
plt.subplot(1, 2, 2)
plt.imshow(predicted_mask)
plt.title('Segmentation Output')
plt.axis('off')
plt.show()
# Optionally, break after the first batch for demonstration
break
def class_to_rgb(prediction, label_colors):
"""Convert class index predictions to RGB color image for visualization."""
color_mask = torch.zeros(prediction.shape + (3,), dtype=torch.uint8)
for class_idx, color in enumerate(label_colors.values()):
color_mask[prediction == class_idx] = torch.tensor(color, dtype=torch.uint8)
return color_mask
if __name__ == "__main__":
main()
Can someone let me know where I went wrong? Or if there is a crucial thing that I need to change?
I tried googling, asking chatgpt and asking peers but none worked. I expected it to be a bug and to find it soon.
Jake is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.