I’m using a Basler camera with OpenCV’s DNN module to run an SSD object detection model (mobilenetv2_050_Opset18.onnx) with CUDA support enabled on c++. The Basler camera is correctly capturing frames, and OpenCV is processing the images, but the SSD model doesn’t detect any objects, not even with a confidence threshold as low as 0.0001.
Key Details:
Camera: Basler camera is configured for 640×480 resolution and BGR8 pixel format.
Model: SSD model (mobilenetv2_050_Opset18.onnx) loaded using OpenCV DNN module with CUDA backend (DNN_BACKEND_CUDA).
CUDA Support: OpenCV compiled with CUDA and cuDNN support with cmake. The backend is set to use CUDA (net.setPreferableBackend(DNN_BACKEND_CUDA)), and the target is also set to CUDA (net.setPreferableTarget(DNN_TARGET_CUDA)).
Problem: The model is not detecting any objects, even with a confidence threshold as low as 0.0001. I have verified that both the camera and the CUDA environment are functioning as expected.
What is the problem in my code?
#include <iostream>
#include <pylon/PylonIncludes.h>
#include <pylon/InstantCamera.h>
#include <pylon/BaslerUniversalInstantCamera.h>
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <opencv2/core/cuda.hpp>
using namespace Pylon;
using namespace GenApi;
using namespace std;
using namespace cv;
using namespace cv::dnn;
int main() {
PylonInitialize();
try {
// Basler camera setup
CInstantCamera camera(CTlFactory::GetInstance().CreateDevice(CDeviceInfo().SetSerialNumber("40432296")));
camera.Open();
// Configure the camera settings
INodeMap& nodemap = camera.GetNodeMap();
CIntegerParameter width(nodemap, "Width");
CIntegerParameter height(nodemap, "Height");
CEnumParameter pixelFormat(nodemap, "PixelFormat");
CFloatParameter exposureTime(nodemap, "ExposureTime");
CFloatParameter acquisitionFrameRate(nodemap, "AcquisitionFrameRate");
CIntegerParameter xOffset(nodemap, "OffsetX");
CIntegerParameter yOffset(nodemap, "OffsetY");
width.SetValue(640, IntegerValueCorrection_Nearest);
height.SetValue(480, IntegerValueCorrection_Nearest);
pixelFormat.SetValue("BGR8");
exposureTime.SetValue(15000.0);
acquisitionFrameRate.SetValue(5000.0);
xOffset.SetValue(0);
yOffset.SetValue(0);
CGrabResultPtr grabResult;
// Load SSD model
Net net = readNetFromONNX("mobilenetv2_050_Opset18.onnx");
net.setPreferableBackend(DNN_BACKEND_CUDA);
net.setPreferableTarget(DNN_TARGET_CUDA);
camera.StartGrabbing();
Mat frame;
while (camera.IsGrabbing()) {
camera.RetrieveResult(5000, grabResult, TimeoutHandling_ThrowException);
if (grabResult->GrabSucceeded()) {
uint8_t* buffer = (uint8_t*)grabResult->GetBuffer();
frame = Mat(grabResult->GetHeight(), grabResult->GetWidth(), CV_8UC3, buffer);
// Run SSD model
Mat blob = blobFromImage(frame, 1.0, Size(640, 480), Scalar(), true, false);
net.setInput(blob);
Mat detections = net.forward();
for (int i = 0; i < detections.size[2]; ++i) {
float confidence = detections.ptr<float>(0)[i * 7 + 2]; // Access confidence
if (confidence > 0.0001) {
int xLeftBottom = static_cast<int>(detections.ptr<float>(0)[i * 7 + 3] * frame.cols);
int yLeftBottom = static_cast<int>(detections.ptr<float>(0)[i * 7 + 4] * frame.rows);
int xRightTop = static_cast<int>(detections.ptr<float>(0)[i * 7 + 5] * frame.cols);
int yRightTop = static_cast<int>(detections.ptr<float>(0)[i * 7 + 6] * frame.rows);
rectangle(frame, Point(xLeftBottom, yLeftBottom), Point(xRightTop, yRightTop), Scalar(0, 255, 0), 2);
}
}
imshow("SSD Detection", frame);
if (waitKey(1) == 27) break; // Press 'ESC' to exit
}
}
camera.Close();
}
catch (const GenericException& e) {
std::cerr << "Error: " << e.GetDescription() << std::endl;
}
PylonTerminate();
return 0;
}
I am new to Computer Vision and I am aiming for a high performance(100+ fps) real-time object detection model to work with low res footage like 640×480 etc. Is OpenCv good starting point or should I skip C++ and use Python for better support etc. Is using C++ advantageous over Python when it comes to Object Detection model performance?
Instead of
blobFromImage(frame, 1.0, Size(640, 480), Scalar(), true, false);
Try
Mat blob = blobFromImage(frame, 1/127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), true, false);
net.setInput(blob);
Mat detections = net.forward();