I’ve been working on an object detection project for several weeks now. I’ve finally got the model detecting my objects accurately, with a near 90% confidence interval for most objects.
When I pull the .mlpackage
into my project and navigate to the “Preview” tab of the model, I can drop images in that I’ve taken with my phone of the objects and it correctly identifies them and places a bounding box around them.
However, when I use that exact same image in my code and send it to my model, it finds nothing.
At this point I’m not sure what’s going wrong. I tried converting the input image to more accurately match the training images that were fed to the model, but even that did nothing. At this point I’m a bit stumped at what I’m doing wrong.
Here is my full logic for capturing the image and sending it to my model.
import UIKit
import AVFoundation
import Vision
import CoreML
class PhotoViewController: UIViewController, AVCapturePhotoCaptureDelegate {
private let captureSession = AVCaptureSession()
private var photoOutput = AVCapturePhotoOutput()
private var previewLayer: AVCaptureVideoPreviewLayer!
private var capturedImage: UIImage?
private var model: VNCoreMLModel?
private let captureButton: UIButton = {
let button = UIButton(type: .system)
button.setTitle("Capture", for: .normal)
button.backgroundColor = .systemBlue
button.setTitleColor(.white, for: .normal)
button.layer.cornerRadius = 10
button.translatesAutoresizingMaskIntoConstraints = false
button.addTarget(self, action: #selector(capturePhoto), for: .touchUpInside)
return button
}()
private let resultsLabel: UILabel = {
let label = UILabel()
label.textAlignment = .center
label.numberOfLines = 0
label.translatesAutoresizingMaskIntoConstraints = false
return label
}()
override func viewDidLoad() {
super.viewDidLoad()
setupCamera()
setupUI()
loadModel()
}
private func loadModel() {
do {
model = try VNCoreMLModel(for: MyModelPackage().model)
} catch {
print("Failed to load model: (error)")
}
}
private func setupCamera() {
captureSession.beginConfiguration()
let videoDevice = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: .back)
guard let videoDeviceInput = try? AVCaptureDeviceInput(device: videoDevice!), captureSession.canAddInput(videoDeviceInput) else {
return
}
captureSession.addInput(videoDeviceInput)
guard captureSession.canAddOutput(photoOutput) else {
return
}
captureSession.addOutput(photoOutput)
captureSession.commitConfiguration()
previewLayer = AVCaptureVideoPreviewLayer(session: captureSession)
previewLayer.videoGravity = .resizeAspectFill
previewLayer.frame = view.layer.bounds
view.layer.addSublayer(previewLayer)
captureSession.startRunning()
}
private func setupUI() {
view.addSubview(captureButton)
view.addSubview(resultsLabel)
NSLayoutConstraint.activate([
captureButton.bottomAnchor.constraint(equalTo: view.bottomAnchor, constant: -50),
captureButton.centerXAnchor.constraint(equalTo: view.centerXAnchor),
captureButton.widthAnchor.constraint(equalToConstant: 100),
captureButton.heightAnchor.constraint(equalToConstant: 50),
resultsLabel.bottomAnchor.constraint(equalTo: captureButton.topAnchor, constant: -20),
resultsLabel.leadingAnchor.constraint(equalTo: view.leadingAnchor, constant: 20),
resultsLabel.trailingAnchor.constraint(equalTo: view.trailingAnchor, constant: -20)
])
}
@objc private func capturePhoto() {
let photoSettings = AVCapturePhotoSettings()
photoOutput.capturePhoto(with: photoSettings, delegate: self)
}
func photoOutput(_ output: AVCapturePhotoOutput, didFinishProcessingPhoto photo: AVCapturePhoto, error: Error?) {
guard let imageData = photo.fileDataRepresentation() else { return }
capturedImage = UIImage(data: imageData)
processImageWithCoreML()
}
private func processImageWithCoreML() {
guard let model = model else { return }
guard let capturedImage = capturedImage else { return }
// Correct image orientation
let correctedImage = fixOrientation(of: capturedImage)
// Resize image to model's expected input size (640x640 in this example)
let resizedImage = resizeImage(image: correctedImage, targetSize: CGSize(width: 640, height: 640))
// Save the resized and corrected image to the photo library for inspection
saveImageToPhotoLibrary(image: resizedImage)
let orientation = CGImagePropertyOrientation(rawValue: UInt32(resizedImage.imageOrientation.rawValue))
// Ensure the image is correctly oriented and converted to CIImage
guard let ciImage = CIImage(image: resizedImage) else { return }
let request = VNCoreMLRequest(model: model) { [weak self] request, error in
if let error = error {
print("Failed to perform request: (error)")
return
}
guard let results = request.results as? [VNRecognizedObjectObservation], !results.isEmpty else {
DispatchQueue.main.async {
self?.resultsLabel.text = "No objects recognized."
}
return
}
let topResults = results.prefix(3).map { observation in
observation.labels.map { "($0.identifier): ($0.confidence)" }.joined(separator: "n")
}
DispatchQueue.main.async {
self?.resultsLabel.text = topResults.joined(separator: "nn")
}
}
request.imageCropAndScaleOption = .scaleFit
let handler = VNImageRequestHandler(ciImage: ciImage, orientation: orientation!)
do {
try handler.perform([request])
} catch {
print("Failed to perform request: (error)")
}
}
private func fixOrientation(of image: UIImage) -> UIImage {
guard image.imageOrientation != .up else { return image }
UIGraphicsBeginImageContextWithOptions(image.size, false, image.scale)
image.draw(in: CGRect(origin: .zero, size: image.size))
let normalizedImage = UIGraphicsGetImageFromCurrentImageContext() ?? image
UIGraphicsEndImageContext()
return normalizedImage
}
private func resizeImage(image: UIImage, targetSize: CGSize) -> UIImage {
let size = image.size
let widthRatio = targetSize.width / size.width
let heightRatio = targetSize.height / size.height
let newSize = CGSize(width: size.width * widthRatio, height: size.height * heightRatio)
UIGraphicsBeginImageContextWithOptions(newSize, false, 1.0)
image.draw(in: CGRect(origin: .zero, size: newSize))
let newImage = UIGraphicsGetImageFromCurrentImageContext()
UIGraphicsEndImageContext()
return newImage!
}
private func saveImageToPhotoLibrary(image: UIImage) {
UIImageWriteToSavedPhotosAlbum(image, self, #selector(image(_:didFinishSavingWithError:contextInfo:)), nil)
}
@objc private func image(_ image: UIImage, didFinishSavingWithError error: Error?, contextInfo: UnsafeRawPointer) {
if let error = error {
print("Error saving image: (error)")
} else {
print("Image saved successfully.")
}
}
}
So my question: Is there something more I need to do to the image in order for my model to correctly identify objects it’s trained on? Feeling out of ideas.