Thiết kế website giá rẻ

Question

I’ve been working on an object detection project for several weeks now. I’ve finally got the model detecting my objects accurately, with a near 90% confidence interval for most objects.

When I pull the .mlpackage into my project and navigate to the “Preview” tab of the model, I can drop images in that I’ve taken with my phone of the objects and it correctly identifies them and places a bounding box around them.
However, when I use that exact same image in my code and send it to my model, it finds nothing.

At this point I’m not sure what’s going wrong. I tried converting the input image to more accurately match the training images that were fed to the model, but even that did nothing. At this point I’m a bit stumped at what I’m doing wrong.

Here is my full logic for capturing the image and sending it to my model.

import UIKit
import AVFoundation
import Vision
import CoreML

class PhotoViewController: UIViewController, AVCapturePhotoCaptureDelegate {
    
    private let captureSession = AVCaptureSession()
    private var photoOutput = AVCapturePhotoOutput()
    private var previewLayer: AVCaptureVideoPreviewLayer!
    private var capturedImage: UIImage?
    private var model: VNCoreMLModel?

    private let captureButton: UIButton = {
        let button = UIButton(type: .system)
        button.setTitle("Capture", for: .normal)
        button.backgroundColor = .systemBlue
        button.setTitleColor(.white, for: .normal)
        button.layer.cornerRadius = 10
        button.translatesAutoresizingMaskIntoConstraints = false
        button.addTarget(self, action: #selector(capturePhoto), for: .touchUpInside)
        return button
    }()
    
    private let resultsLabel: UILabel = {
        let label = UILabel()
        label.textAlignment = .center
        label.numberOfLines = 0
        label.translatesAutoresizingMaskIntoConstraints = false
        return label
    }()
    
    override func viewDidLoad() {
        super.viewDidLoad()
        setupCamera()
        setupUI()
        loadModel()
    }
    
    private func loadModel() {
        do {
            model = try VNCoreMLModel(for: MyModelPackage().model)
        } catch {
            print("Failed to load model: (error)")
        }
    }

    private func setupCamera() {
        captureSession.beginConfiguration()
        let videoDevice = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: .back)
        guard let videoDeviceInput = try? AVCaptureDeviceInput(device: videoDevice!), captureSession.canAddInput(videoDeviceInput) else {
            return
        }
        captureSession.addInput(videoDeviceInput)
        
        guard captureSession.canAddOutput(photoOutput) else {
            return
        }
        captureSession.addOutput(photoOutput)
        captureSession.commitConfiguration()
        
        previewLayer = AVCaptureVideoPreviewLayer(session: captureSession)
        previewLayer.videoGravity = .resizeAspectFill
        previewLayer.frame = view.layer.bounds
        view.layer.addSublayer(previewLayer)
        
        captureSession.startRunning()
    }
    
    private func setupUI() {
        view.addSubview(captureButton)
        view.addSubview(resultsLabel)
        
        NSLayoutConstraint.activate([
            captureButton.bottomAnchor.constraint(equalTo: view.bottomAnchor, constant: -50),
            captureButton.centerXAnchor.constraint(equalTo: view.centerXAnchor),
            captureButton.widthAnchor.constraint(equalToConstant: 100),
            captureButton.heightAnchor.constraint(equalToConstant: 50),
            
            resultsLabel.bottomAnchor.constraint(equalTo: captureButton.topAnchor, constant: -20),
            resultsLabel.leadingAnchor.constraint(equalTo: view.leadingAnchor, constant: 20),
            resultsLabel.trailingAnchor.constraint(equalTo: view.trailingAnchor, constant: -20)
        ])
    }
    
    @objc private func capturePhoto() {
        let photoSettings = AVCapturePhotoSettings()
        photoOutput.capturePhoto(with: photoSettings, delegate: self)
    }
    
    func photoOutput(_ output: AVCapturePhotoOutput, didFinishProcessingPhoto photo: AVCapturePhoto, error: Error?) {
        guard let imageData = photo.fileDataRepresentation() else { return }
        capturedImage = UIImage(data: imageData)
        processImageWithCoreML()
    }
    
    private func processImageWithCoreML() {
        guard let model = model else { return }
        guard let capturedImage = capturedImage else { return }

        // Correct image orientation
        let correctedImage = fixOrientation(of: capturedImage)

        // Resize image to model's expected input size (640x640 in this example)
        let resizedImage = resizeImage(image: correctedImage, targetSize: CGSize(width: 640, height: 640))

        // Save the resized and corrected image to the photo library for inspection
        saveImageToPhotoLibrary(image: resizedImage)

        let orientation = CGImagePropertyOrientation(rawValue: UInt32(resizedImage.imageOrientation.rawValue))

        // Ensure the image is correctly oriented and converted to CIImage
        guard let ciImage = CIImage(image: resizedImage) else { return }

        let request = VNCoreMLRequest(model: model) { [weak self] request, error in
            if let error = error {
                print("Failed to perform request: (error)")
                return
            }
            guard let results = request.results as? [VNRecognizedObjectObservation], !results.isEmpty else {
                DispatchQueue.main.async {
                    self?.resultsLabel.text = "No objects recognized."
                }
                return
            }
            let topResults = results.prefix(3).map { observation in
                observation.labels.map { "($0.identifier): ($0.confidence)" }.joined(separator: "n")
            }
            DispatchQueue.main.async {
                self?.resultsLabel.text = topResults.joined(separator: "nn")
            }
        }
        
        request.imageCropAndScaleOption = .scaleFit

        let handler = VNImageRequestHandler(ciImage: ciImage, orientation: orientation!)
        do {
            try handler.perform([request])
        } catch {
            print("Failed to perform request: (error)")
        }
    }

    private func fixOrientation(of image: UIImage) -> UIImage {
        guard image.imageOrientation != .up else { return image }

        UIGraphicsBeginImageContextWithOptions(image.size, false, image.scale)
        image.draw(in: CGRect(origin: .zero, size: image.size))
        let normalizedImage = UIGraphicsGetImageFromCurrentImageContext() ?? image
        UIGraphicsEndImageContext()
        return normalizedImage
    }

    private func resizeImage(image: UIImage, targetSize: CGSize) -> UIImage {
        let size = image.size
        let widthRatio  = targetSize.width  / size.width
        let heightRatio = targetSize.height / size.height
        let newSize = CGSize(width: size.width * widthRatio, height: size.height * heightRatio)

        UIGraphicsBeginImageContextWithOptions(newSize, false, 1.0)
        image.draw(in: CGRect(origin: .zero, size: newSize))
        let newImage = UIGraphicsGetImageFromCurrentImageContext()
        UIGraphicsEndImageContext()

        return newImage!
    }

    private func saveImageToPhotoLibrary(image: UIImage) {
        UIImageWriteToSavedPhotosAlbum(image, self, #selector(image(_:didFinishSavingWithError:contextInfo:)), nil)
    }

    @objc private func image(_ image: UIImage, didFinishSavingWithError error: Error?, contextInfo: UnsafeRawPointer) {
        if let error = error {
            print("Error saving image: (error)")
        } else {
            print("Image saved successfully.")
        }
    }
}

So my question: Is there something more I need to do to the image in order for my model to correctly identify objects it’s trained on? Feeling out of ideas.

Thiết kế website giá rẻ

Danh mục

CoreML object detection works in Xcode model preview but not in code with VNCoreMLRequest