Can someone please help me understand what im doing wrong. this is my first time trying to get this to work and im having some trouble
this is the error im getting
error: stack expects each tensor to be equal size, but got [3, 4] at entry 0 and [1, 4] at entry 1 (object detection)
train.py
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision
from torch.utils.data import DataLoader
from dataset import CustomDataset
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Hyperparameters
in_channel = 3
num_classes = 10
learning_rate = 1e-3
batch_size = 32
num_epochs = 1
# Load Data
dataset_root = "/Users/amandanassar/Desktop/WindTurbineProject/"
annotation_file = "/Users/amandanassar/Desktop/WindTurbineProject/annotations.json"
transform = transforms.Compose([
transforms.Resize((256, 256)),
transforms.ToTensor(),
])
dataset = CustomDataset(dataset_root, annotation_file, transform=transform)
# Split dataset into training and testing sets
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_set, test_set = torch.utils.data.random_split(dataset, [train_size, test_size])
train_loader = DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_set, batch_size=batch_size, shuffle=True)
# Model
model = torchvision.models.googlenet(pretrained=True)
model.to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# Train Network
for epoch in range(num_epochs):
losses = []
for batch_idx, (data, targets) in enumerate(train_loader):
data = data.to(device=device)
targets = targets.to(device=device)
scores = model(data)
loss = criterion(scores, targets)
losses.append(loss.item())
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f'Cost at epoch {epoch} is {sum(losses) / len(losses)}')
# Check accuracy on training and test set
def check_accuracy(loader, model):
num_correct = 0
num_samples = 0
model.eval()
with torch.no_grad():
for x, y in loader:
x = x.to(device=device)
y = y.to(device=device)
scores = model(x)
_, predictions = scores.max(1)
num_correct += (predictions == y).sum().item()
num_samples += predictions.size(0)
accuracy = float(num_correct) / float(num_samples) * 100
print(f'Got {num_correct} / {num_samples} with accuracy {accuracy:.2f}%')
model.train()
print("Checking accuracy on Training Set")
check_accuracy(train_loader, model)
print("Checking accuracy on Test Set")
check_accuracy(test_loader, model)
dataset.py
import json
import os
from PIL import Image
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import torch
class CustomDataset(Dataset):
def __init__(self, dataset_root, annotation_file, transform=None):
self.dataset_root = dataset_root
self.transform = transform
with open(annotation_file) as f:
self.annotations = json.load(f)["images"]
self.skipped_images = []
def __len__(self):
return len(self.annotations)
def __getitem__(self, idx):
annotation = self.annotations[idx]
img_name = annotation["image"]
# Attempt to find the correct subfolder for the image
subfolders = ["cracks", "LE Erosion", "Lightening", "VG Panel"]
img_path = None
for subfolder in subfolders:
potential_path = os.path.join(self.dataset_root, "Data images", subfolder, img_name)
if os.path.exists(potential_path):
img_path = potential_path
break
if img_path is None:
print(f"Image {img_name} not found in any subfolder. Skipping...")
self.skipped_images.append(img_name)
return None
print(f"Loading image from: {img_path}") # Print the image path being loaded
try:
image = Image.open(img_path).convert("RGB")
except FileNotFoundError:
print(f"File not found: {img_path}")
self.skipped_images.append(img_name)
return None
# Apply transformations if specified
if self.transform:
try:
image = self.transform(image)
except Exception as e:
print(f"Error processing image {img_name}: {e}")
self.skipped_images.append(img_name)
return None
boxes = []
labels = []
for anno in annotation["annotations"]:
label = anno["label"]
if label == "crack":
label_id = 0
elif label == "erosion":
label_id = 1
elif label == "lightening":
label_id = 2
elif label == "vg panel":
label_id = 3
else:
continue # Skip this annotation if label is not recognized
labels.append(label_id)
x = anno["coordinates"]["x"]
y = anno["coordinates"]["y"]
width = anno["coordinates"]["width"]
height = anno["coordinates"]["height"]
boxes.append([x, y, x + width, y + height])
target = {
"boxes": torch.tensor(boxes, dtype=torch.float32),
"labels": torch.tensor(labels, dtype=torch.int64)
}
return image, target
def get_skipped_images(self):
return self.skipped_images
# Custom collate function to filter out None entries
def collate_fn(batch):
batch = list(filter(lambda x: x is not None, batch))
return torch.utils.data.dataloader.default_collate(batch)
if __name__ == "__main__":
dataset_root = "/Users/amandanassar/Desktop/WindTurbineProject/"
annotation_file = "/Users/amandanassar/Desktop/WindTurbineProject/annotations.json"
# Define your transformation pipeline, including resizing
transform = transforms.Compose([
transforms.Resize((256, 256)), # Resize images to a fixed size
transforms.ToTensor(),
])
train_dataset = CustomDataset(dataset_root, annotation_file, transform=transform)
# Define DataLoader with custom collate_fn
train_loader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)
# Print the length of the dataset
print(f"Dataset length: {len(train_dataset)}")
# Iterate through the dataset
for i in range(len(train_dataset)):
result = train_dataset[i]
if result is not None:
img, target = result
print(f"Index: {i}")
print(f"Image size: {img.size()}")
print(f"Target annotations: {target}")
print("------------------------------------")
# Print skipped images
skipped_images = train_dataset.get_skipped_images()
if skipped_images:
print("Skipped images:")
for img_name in skipped_images:
print(img_name)
else:
print("No images were skipped.")
i tried resizeing the images and making it a tuple. Also i tried skipping images with more than 1 annotation inside but then i got a new error when i ran my model. which was
TypeError: object of type ‘NoneType’ has no len()
# Define your transformation pipeline, including resizing
transform = transforms.Compose([
transforms.Resize((256, 256)), # Resize images to a fixed size
transforms.ToTensor(),
])
ladyengineerhere is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.