I’m working on an iOS app where I replaced the pre-trained object detection model in Apple’s Breakfast Finder app with my own model. I trained my model using YOLOv8 from Ultralytics and exported it to the .mlmodel format. It is for detecting the monuments.
The app is detecting objects correctly, but only when the phone is in landscape mode (home button on the right, phone orientation facing left). When the phone is in portrait or any other orientation, the detection does not work correctly.
this is the landscape detection. I want it to detect the same way but in portrait
Here are some details about my setup:
Project: Modified version of Apple’s Breakfast Finder app
Model: Custom-trained YOLOv8 model exported to .mlmodel
Frameworks used: Vision, Core ML, AVFoundation
the relevant code section for setting up the camera capture and handle the orientation is
func updateLayerGeometry() {
let bounds = rootLayer.bounds
var scale: CGFloat
let xScale: CGFloat = bounds.size.width / bufferSize.height
let yScale: CGFloat = bounds.size.height / bufferSize.width
scale = fmax(xScale, yScale)
if scale.isInfinite {
scale = 1.0
}
CATransaction.begin()
CATransaction.setValue(kCFBooleanTrue, forKey: kCATransactionDisableActions)
// Rotate the layer into screen orientation and scale and mirror
detectionOverlay.setAffineTransform(CGAffineTransform(rotationAngle: CGFloat(.pi / 2.0)).scaledBy(x: scale, y: -scale))
// Center the layer
detectionOverlay.position = CGPoint(x: bounds.midX, y: bounds.midY)
CATransaction.commit()
}
public func exifOrientationFromDeviceOrientation() -> CGImagePropertyOrientation {
let curDeviceOrientation = UIDevice.current.orientation
let exifOrientation: CGImagePropertyOrientation
switch curDeviceOrientation {
case UIDeviceOrientation.portraitUpsideDown: // Device oriented vertically, home button on the top
exifOrientation = .left
case UIDeviceOrientation.landscapeLeft: // Device oriented horizontally, home button on the right
exifOrientation = .upMirrored
case UIDeviceOrientation.landscapeRight: // Device oriented horizontally, home button on the left
exifOrientation = .down
case UIDeviceOrientation.portrait: // Device oriented vertically, home button on the bottom
exifOrientation = .up
default:
exifOrientation = .up
}
return exifOrientation
}
this is the code for the model integration and deteciton:
/*
See LICENSE folder for this sample’s licensing information.
Abstract:
Contains the object recognition view controller for the Breakfast Finder.
*/
import UIKit
import AVFoundation
import Vision
import CoreML
class VisionObjectRecognitionViewController: ViewController {
private var detectionOverlay: CALayer! = nil
// Vision parts
private var requests = [VNRequest]()
@discardableResult
func setupVision() -> NSError? {
// Setup Vision parts
let error: NSError! = nil
guard let modelURL = Bundle.main.url(forResource: "secondbest", withExtension: "mlmodelc") else {
print("model not loaded")
return NSError(domain: "VisionObjectRecognitionViewController", code: -1, userInfo: [NSLocalizedDescriptionKey: "Model file is missing"])
}
do {
let visionModel = try VNCoreMLModel(for: MLModel(contentsOf: modelURL))
let objectRecognition = VNCoreMLRequest(model: visionModel, completionHandler: { (request, error) in
DispatchQueue.main.async(execute: {
// perform all the UI updates on the main queue
if let results = request.results {
self.drawVisionRequestResults(results)
}
})
})
self.requests = [objectRecognition]
} catch let error as NSError {
print("Model loading went wrong: (error)")
}
return error
}
func drawVisionRequestResults(_ results: [Any]) {
CATransaction.begin()
CATransaction.setValue(kCFBooleanTrue, forKey: kCATransactionDisableActions)
detectionOverlay.sublayers = nil // remove all the old recognized objects
for observation in results where observation is VNRecognizedObjectObservation {
guard let objectObservation = observation as? VNRecognizedObjectObservation else {
continue
}
// Select only the label with the highest confidence.
let topLabelObservation = objectObservation.labels[0]
let objectBounds = VNImageRectForNormalizedRect(objectObservation.boundingBox, Int(bufferSize.width), Int(bufferSize.height))
let shapeLayer = self.createRoundedRectLayerWithBounds(objectBounds)
let textLayer = self.createTextSubLayerInBounds(objectBounds,
identifier: topLabelObservation.identifier,
confidence: topLabelObservation.confidence)
shapeLayer.addSublayer(textLayer)
detectionOverlay.addSublayer(shapeLayer)
}
self.updateLayerGeometry()
CATransaction.commit()
}
override func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else {
return
}
let exifOrientation = exifOrientationFromDeviceOrientation()
let imageRequestHandler = VNImageRequestHandler(cvPixelBuffer: pixelBuffer, orientation: exifOrientation, options: [:])
do {
try imageRequestHandler.perform(self.requests)
} catch {
print(error)
}
}
override func setupAVCapture() {
super.setupAVCapture()
// setup Vision parts
setupLayers()
updateLayerGeometry()
setupVision()
// start the capture
startCaptureSession()
}
func setupLayers() {
detectionOverlay = CALayer() // container layer that has all the renderings of the observations
detectionOverlay.name = "DetectionOverlay"
detectionOverlay.bounds = CGRect(x: 0.0,
y: 0.0,
width: bufferSize.width,
height: bufferSize.height)
detectionOverlay.position = CGPoint(x: rootLayer.bounds.midX, y: rootLayer.bounds.midY)
rootLayer.addSublayer(detectionOverlay)
}
func updateLayerGeometry() {
let bounds = rootLayer.bounds
var scale: CGFloat
let xScale: CGFloat = bounds.size.width / bufferSize.height
let yScale: CGFloat = bounds.size.height / bufferSize.width
scale = fmax(xScale, yScale)
if scale.isInfinite {
scale = 1.0
}
CATransaction.begin()
CATransaction.setValue(kCFBooleanTrue, forKey: kCATransactionDisableActions)
// rotate the layer into screen orientation and scale and mirror
detectionOverlay.setAffineTransform(CGAffineTransform(rotationAngle: CGFloat(.pi / 2.0)).scaledBy(x: scale, y: -scale))
// center the layer
detectionOverlay.position = CGPoint(x: bounds.midX, y: bounds.midY)
CATransaction.commit()
}
func createTextSubLayerInBounds(_ bounds: CGRect, identifier: String, confidence: VNConfidence) -> CATextLayer {
let textLayer = CATextLayer()
textLayer.name = "Object Label"
let formattedString = NSMutableAttributedString(string: String(format: "(identifier)nConfidence: %.2f", confidence))
let largeFont = UIFont(name: "Helvetica", size: 10.0)!
formattedString.addAttributes([NSAttributedString.Key.font: largeFont], range: NSRange(location: 0, length: identifier.count))
textLayer.string = formattedString
textLayer.bounds = CGRect(x: 0, y: 0, width: bounds.size.height - 10, height: bounds.size.width - 10)
textLayer.position = CGPoint(x: bounds.midX, y: bounds.midY)
textLayer.shadowOpacity = 0.7
textLayer.shadowOffset = CGSize(width: 2, height: 2)
textLayer.foregroundColor = CGColor(colorSpace: CGColorSpaceCreateDeviceRGB(), components: [0.0, 0.0, 0.0, 1.0])
textLayer.contentsScale = 1.0 // retina rendering
// rotate the layer into screen orientation and scale and mirror
textLayer.setAffineTransform(CGAffineTransform(rotationAngle: CGFloat(.pi / 2.0)).scaledBy(x: 1.0, y: -1.0))
return textLayer
}
func createRoundedRectLayerWithBounds(_ bounds: CGRect) -> CALayer {
let shapeLayer = CALayer()
shapeLayer.bounds = bounds
shapeLayer.position = CGPoint(x: bounds.midX, y: bounds.midY)
shapeLayer.name = "Found Object"
shapeLayer.backgroundColor = CGColor(colorSpace: CGColorSpaceCreateDeviceRGB(), components: [1.0, 1.0, 0.2, 0.4])
shapeLayer.cornerRadius = 7
return shapeLayer
}
}
I changed the exifOrientation just in case if that would work. but the labels are now being displayed vertically. I was expecting it to detect in portrait. this is the dsplay after i changed the exifOrientation shown in the code :
public func exifOrientationFromDeviceOrientation() -> CGImagePropertyOrientation {
let curDeviceOrientation = UIDevice.current.orientation
let exifOrientation: CGImagePropertyOrientation
switch curDeviceOrientation {
case UIDeviceOrientation.portraitUpsideDown: // Device oriented vertically, home button on the top
exifOrientation = .up
case UIDeviceOrientation.landscapeLeft: // Device oriented horizontally, home button on the right
exifOrientation = .rightMirrored
case UIDeviceOrientation.landscapeRight: // Device oriented horizontally, home button on the left
exifOrientation = .left
case UIDeviceOrientation.portrait: // Device oriented vertically, home button on the bottom
exifOrientation = .right
default:
exifOrientation = .right
}
return exifOrientation
}
sks is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.