I’m trying to use PyTorch to create an app that will read temperature values off an analog gauge using a webcam.
The gauge has markers that are 10 degrees apart.
gauge image
I have captured close to 400 images of the gauge at various readings, with the same frame of capture. In some intervals, close to 10 different captures that are approximately 1degree apart.
When I use the training images to test, I get fairly good results with differences of around 3-4 degrees and worst case around 5 degrees, which for my purposes is acceptable.
However, when I tried to read new values off the gauge based on the trained model, the predicted values were almost 30-40 degrees off, even going beyond the values that I trained the model on.
On different days the lighting varies and some things may get reflected off the steel parts in the background when there is movement. Also, my webcam is on a tripod that may move slightly on a day-to day basis but I have a reference square within which I position my gauge to maximise similarity of the images.
I would like to know if there is anything I can do to preprocess the images so that I get good values or if there are suggestions on how to go about addressing my task.
FWIW, here’s my training code:
import os
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
class GaugeDataset(Dataset):
def __init__(self, image_dir, label_dir, transform=None):
self.image_dir = image_dir
self.label_dir = label_dir
self.transform = transform
self.image_names = [img for img in os.listdir(image_dir) if img.endswith('.jpg')]
def __len__(self):
return len(self.image_names)
def __getitem__(self, idx):
image_name = self.image_names[idx]
image_path = os.path.join(self.image_dir, image_name)
label_path = os.path.join(self.label_dir, image_name.replace('.jpg', '.txt'))
image = Image.open(image_path)
if self.transform:
image = self.transform(image)
with open(label_path, 'r') as file:
label = float(file.read().strip())
label = torch.tensor(label, dtype=torch.float32) # Ensure label is float32
return image, label
data_transforms = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ColorJitter(brightness=0.2, contrast=0.2), # Random brightness and contrast adjustments
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
class GaugeNet(nn.Module):
def __init__(self):
super(GaugeNet, self).__init__()
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
self.fc1 = nn.Linear(32 * 56 * 56, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 1)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 32 * 56 * 56)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
# Load the dataset
image_dir = 'workspace/AnalogGaugeReader_MLAI/src/root/capturegaugefromwebcam/WIP'
label_dir = 'workspace/AnalogGaugeReader_MLAI/src/root/capturegaugefromwebcam/WIP'
dataset = GaugeDataset(image_dir=image_dir, label_dir=label_dir, transform=data_transforms)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
# Initialize the model, loss function, and optimizer
model = GaugeNet()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Training loop
num_epochs = 300
for epoch in range(num_epochs):
running_loss = 0.0
for inputs, labels in dataloader:
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs.squeeze(), labels) # Labels should now be float32
loss.backward()
optimizer.step()
running_loss += loss.item()
print(f'Epoch {epoch + 1}, Loss: {running_loss / len(dataloader)}')
# Save the trained model
torch.save(model.state_dict(), 'workspace/AnalogGaugeReader_MLAI/src/root/capturegaugefromwebcam/WIP/ET_gaugemodel_300epochs.pth')
# Validation loop
model.eval()
total_loss = 0.0
with torch.no_grad():
for inputs, labels in dataloader:
outputs = model(inputs)
loss = criterion(outputs.squeeze(), labels) # Labels should now be float32
total_loss += loss.item()
print(f'Validation Loss: {total_loss / len(dataloader)}')
And my evaluation code:
from PIL import Image
import torch
from torchvision import transforms
import torch.nn as nn
import torch.nn.functional as F
import os
import matplotlib.pyplot as plt
from collections import deque
from scipy import stats
import numpy as np
# Define the model structure
class GaugeNet(nn.Module):
def __init__(self):
super(GaugeNet, self).__init__()
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
self.fc1 = nn.Linear(32 * 56 * 56, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 1)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 32 * 56 * 56)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
# Load the saved model
model = GaugeNet()
model.load_state_dict(torch.load('workspace/AnalogGaugeReader_MLAI/src/root/capturegaugefromwebcam/WIP/ET_gaugemodel_300epochs.pth'))
model.eval() # Set the model to evaluation mode
# Define the data transformations
data_transforms = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
# Function to predict gauge value from an image
def predict_gauge_value(image_path, model, transform):
image = Image.open(image_path)
image = transform(image).unsqueeze(0) # Add batch dimension
model.eval()
with torch.no_grad():
output = model(image)
return output.item()
# Predict value for a new image
base_path = 'workspace/AnalogGaugeReader_MLAI/src/root/capturegaugefromwebcam/WIP'
data_points = deque(maxlen=282)
errors = deque(maxlen=282)
fig, ax = plt.subplots()
# Set the x-axis and y-axis limits to 100
ax.set_xlim(0, 230)
ax.set_ylim(0, 230)
# Create a scatter plot to visualize the data points
scatter = plt.scatter([], [])
for i in range(501,970):
img_file_name = f'gauge-{i:03d}.jpg' # Format the file name with leading zeros
img_file_path = os.path.join(base_path, img_file_name)
predicted_value = predict_gauge_value(img_file_path, model, data_transforms)
print(f'Predicted Gauge Value: {predicted_value}', 'File:', f'gauge-{i:03d}.jpg')
Zaph Brox is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.