I am here for some assistance or suggestions.
My main work is detecting some objects in 2D grayscale image and extract detected bounding boxes and filter layers.
So, I have layer images of each objects from main image( image where I am detecting objects).
I can’t run same model in layer images to detect objects because of model performance and re train is costly at the moment.
So My main goal is to detect objects in main images, extract bounding box and draw those boxes in each layers. After that, compare the objects of main image and layers with ROI or Pixel. If they matches, I select those layers and move to another folder otherwise I ignore.
Currently. my code doesn’t work in comparison logic. Code and results are attached below:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from skimage.metrics import structural_similarity as ssim
from ultralytics import YOLO
# Initialize YOLO model
model_path = './models/best_augmented.pt' # model path
img_path = './original_img/main.png' # main image
layer_path = './layer_images/layer-1.png' # test in only one for now
desired_class = ['door_normal']
# Load the images
original_img = cv2.imread(img_path)
layer_img = cv2.imread(layer_path)
# Predict using the model
model = YOLO(model_path)
results = model.predict(img_path, imgsz=1024)
def calculate_similarity(original_roi, layer_roi):
# Ensure the ROIs are the same size for SSIM comparison
if original_roi.shape != layer_roi.shape:
layer_roi = cv2.resize(layer_roi, (original_roi.shape[1], original_roi.shape[0]))
# Convert to grayscale
original_gray = cv2.cvtColor(original_roi, cv2.COLOR_BGR2GRAY)
layer_gray = cv2.cvtColor(layer_roi, cv2.COLOR_BGR2GRAY)
# Compute SSIM
ssim_value = ssim(original_gray, layer_gray, win_size=7, multichannel=False)
percentage_difference = (1 - ssim_value) * 100
return percentage_difference
for result in results:
for idx, cls in enumerate(result.boxes.cls):
class_index = int(cls)
class_name = result.names[class_index]
if class_name in desired_class:
box = result.boxes.xyxy[idx].cpu().numpy().astype(np.int32)
detection_details.append([class_name, (box[0], box[1], box[2], box[3])])
# Draw bounding box on the original image
cv2.rectangle(original_img, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2)
cv2.putText(original_img, class_name, (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1, cv2.LINE_AA)
# Draw the bounding boxes on the layer image
for detail in detection_details:
class_name, (x1, y1, x2, y2) = detail
# Draw bounding box on the layer image
cv2.rectangle(layer_img, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.putText(layer_img, class_name, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1, cv2.LINE_AA)
# Prepare for plotting ROIs
fig, axes = plt.subplots(2, len(detection_details), figsize=(15, 10))
if len(detection_details) == 1:
axes = [axes] # Ensure axes is iterable if there's only one subplot
# Extract and plot the ROIs for each detected object
original_rois = []
layer_rois = []
for idx, detail in enumerate(detection_details):
class_name, (x1, y1, x2, y2) = detail
# Slightly crop the ROI to exclude the bounding box border
crop = 2
original_roi = original_img[y1+crop:y2-crop, x1+crop:x2-crop]
layer_roi = layer_img[y1+crop:y2-crop, x1+crop:x2-crop]
original_rois.append(original_roi)
layer_rois.append(layer_roi)
# Plot ROI from the original image
axes[0, idx].imshow(cv2.cvtColor(original_roi, cv2.COLOR_BGR2RGB))
axes[0, idx].set_title(f'Original ROI: {class_name}')
axes[0, idx].axis('off')
# Plot ROI from the layer image
axes[1, idx].imshow(cv2.cvtColor(layer_roi, cv2.COLOR_BGR2RGB))
axes[1, idx].set_title(f'Layer ROI: {class_name}')
axes[1, idx].axis('off')
# Convert images from BGR to RGB for displaying with matplotlib
original_img_rgb = cv2.cvtColor(original_img, cv2.COLOR_BGR2RGB)
layer_img_rgb = cv2.cvtColor(layer_img, cv2.COLOR_BGR2RGB)
# Plot the images side by side
fig, axes = plt.subplots(1, 2, figsize=(15, 10))
axes[0].imshow(original_img_rgb)
axes[0].set_title('Original Image with Bounding Boxes')
axes[0].axis('off')
axes[1].imshow(layer_img_rgb)
axes[1].set_title('Layer Image with Bounding Boxes')
axes[1].axis('off')
plt.tight_layout()
plt.show()
# Calculate and print the percentage difference between the original and layer ROIs
for idx, (original_roi, layer_roi) in enumerate(zip(original_rois, layer_rois)):
percentage_difference = calculate_similarity(original_roi, layer_roi, box)
class_name = detection_details[idx][0]
print(f"ROI {idx + 1} (Class: {class_name}) has a {percentage_difference:.2f}% difference from the original image.")
# Display the ROIs for visual verification
plt.figure()
plt.subplot(1, 2, 1)
plt.imshow(cv2.cvtColor(original_roi, cv2.COLOR_BGR2RGB))
plt.title(f'Original ROI {idx + 1}')
plt.axis('off')
plt.subplot(1, 2, 2)
plt.imshow(cv2.cvtColor(layer_roi, cv2.COLOR_BGR2RGB))
plt.title(f'Layer ROI {idx + 1}')
plt.axis('off')
plt.show()
When I run my code, in comparison it shows really less percentage in difference. For instance,
for the uploaded images and detection ,
I compare percentage for only one box, and I saw just 28% different. And it is similar for al most all boxes.
ROI 1 (Class: door_normal) has a 28.16% difference from the original image.
I was expecting to get around 80 or 90% difference when object inside boxes don’t match or empty. In this case, it is going to make challenge while filtering necessary layer images. I want to select layers which has more than 90% similarity(in average) inside the rectangular box object.
I am not sure what did I miss?
Can you get some help?
I hope I explained my questions clearly..
Thanks.