I’m trying to detect a QRCode and take a cropped photo of it using Swift. However I can’t transform the coordinates of the detected AVMetadataObject
to the correct image space coordinates. This is my prototyping code:
import SwiftUI
import AVFoundation
class CodeScannerViewController: UIViewController {
var captureSession: AVCaptureSession!
var previewLayer: AVCaptureVideoPreviewLayer!
var photoOutput: AVCapturePhotoOutput!
var supportedCodeTypes: [AVMetadataObject.ObjectType] = [.qr]
var imageView: UIImageView!
var isCapturing = false
var codeCorners = [CGPoint]()
override func viewDidLoad() {
super.viewDidLoad()
captureSession = AVCaptureSession()
guard let videoCaptureDevice = AVCaptureDevice.default(for: .video) else {
print("Failed to get the camera device")
return
}
do {
let input = try AVCaptureDeviceInput(device: videoCaptureDevice)
if captureSession.canAddInput(input) {
captureSession.addInput(input)
} else {
print("Failed to add input to capture session")
return
}
} catch {
print("Failed to create input from video capture device")
return
}
photoOutput = AVCapturePhotoOutput()
captureSession.addOutput(photoOutput)
let metadataOutput = AVCaptureMetadataOutput()
if captureSession.canAddOutput(metadataOutput) {
captureSession.addOutput(metadataOutput)
metadataOutput.setMetadataObjectsDelegate(self, queue: DispatchQueue.main)
metadataOutput.metadataObjectTypes = supportedCodeTypes
} else {
print("Failed to add metadata output to capture session")
return
}
previewLayer = AVCaptureVideoPreviewLayer(session: captureSession)
previewLayer.frame = view.layer.bounds
previewLayer.videoGravity = .resizeAspectFill
view.layer.addSublayer(previewLayer)
captureSession.startRunning()
imageView = UIImageView()
imageView.translatesAutoresizingMaskIntoConstraints = false
imageView.contentMode = .scaleAspectFit
view.addSubview(imageView)
NSLayoutConstraint.activate([
imageView.heightAnchor.constraint(equalToConstant: 300),
imageView.widthAnchor.constraint(equalToConstant: 300),
imageView.centerXAnchor.constraint(equalTo: view.centerXAnchor),
imageView.bottomAnchor.constraint(equalTo: view.safeAreaLayoutGuide.bottomAnchor)
])
}
override func viewWillDisappear(_ animated: Bool) {
super.viewWillDisappear(animated)
if captureSession.isRunning {
captureSession.stopRunning()
}
}
func drawRectangleOn(image: UIImage, corners: [CGPoint]) -> UIImage {
let imageSize = image.size
let scale: CGFloat = 0
UIGraphicsBeginImageContextWithOptions(imageSize, false, scale)
let context = UIGraphicsGetCurrentContext()!
image.draw(at: CGPoint.zero)
previewLayer.draw(in: context)
let path = CGMutablePath()
path.addLines(between: corners)
path.addLine(to: corners[0])
context.addPath(path)
context.setLineWidth(5)
context.setStrokeColor(UIColor.red.cgColor)
context.drawPath(using: .stroke)
let newImage = UIGraphicsGetImageFromCurrentImageContext()!
UIGraphicsEndImageContext()
return newImage
}
}
extension CodeScannerViewController: AVCaptureMetadataOutputObjectsDelegate {
func metadataOutput(_ output: AVCaptureMetadataOutput, didOutput metadataObjects: [AVMetadataObject], from connection: AVCaptureConnection) {
guard let metadataObject = metadataObjects.first else {
return
}
guard let photoCodeObject = photoOutput.transformedMetadataObject(for: metadataObject, connection: photoOutput.connection(with: .video)!) as? AVMetadataMachineReadableCodeObject else {
return
}
if photoCodeObject.stringValue != nil && !isCapturing {
isCapturing = true
codeCorners = photoCodeObject.corners
let photoSettings = AVCapturePhotoSettings()
photoSettings.photoQualityPrioritization = .speed
photoOutput.capturePhoto(with: photoSettings, delegate: self)
}
}
}
extension CodeScannerViewController: AVCapturePhotoCaptureDelegate {
func photoOutput(_ output: AVCapturePhotoOutput, didFinishProcessingPhoto photo: AVCapturePhoto, error: Error?) {
guard let imageData = photo.fileDataRepresentation() else {
print("Error while generating image from photo capture data.");
return
}
guard let codeImage = UIImage(data: imageData) else {
print("Unable to generate UIImage from image data.");
return
}
imageView.image = drawRectangleOn(image: codeImage, corners: codeCorners)
DispatchQueue.main.asyncAfter(deadline: .now() + 1) {
self.isCapturing = false
}
}
}
struct CodeScannerView: UIViewControllerRepresentable {
func makeUIViewController(context: Context) -> CodeScannerViewController {
return CodeScannerViewController()
}
func updateUIViewController(_ uiViewController: CodeScannerViewController, context: Context) {
// Update the view controller if needed.
}
}
For testing purposes I draw a rectangle around the detected corners in drawRectangleOn(image:)
. Using photoOutput.transformedMetadataObject
doesn’t seem to give the correct result, the rectangle isn’t anywhere close to the QR-Code on the image.
I also tried to use previewLayer.transformedMetadataObject(for: metadataObject)
and converting the coordinates using this function:
func convertToImageCoordinates(points: [CGPoint], image: UIImage, layer: CALayer) -> [CGPoint] {
let scaleFactorX = image.size.width / layer.bounds.size.width
let scaleFactorY = image.size.height / layer.bounds.size.height
return points.map { CGPoint(x: $0.x * scaleFactorX, y: $0.y * scaleFactorY) }
}
Which almost works but I think this fails to take the video gravity into account.
I’m also not happy that the photo output is separate from the detection, which could lead to inconsistencies but I was also unable to get the image data from the previewLayer
.