Thiết kế website giá rẻ

Question

My Goal: I am trying to create a simple dataset to train a machine learning model. In this dataset, I want to detect the coordinates of points in specific facial regions such as the eyes, eyebrows, and lips.

How It Works/Should Work:

First, I process the frames taken from the webcam using the methods in mediapipe_utils.py to detect face landmarks and obtain face_results.
In the data_capture.py file, I loop through each point on the face with for landmark in face_results.multi_face_landmarks[0].landmark.
I check whether each point obtained from face_results.multi_face_landmarks[0].landmark is one of the points in the eyes, eyebrows, or lips. If the point is from one of these regions, I obtain its coordinates relative to the nose tip (considered as the origin) and save them in JSON format.

Problem: While the landmark content is in the form of x: 0.345954359 y: 0.904962182 z: 0.0607943721, the face_mesh.FACEMESH_LIPS content is in the form of FACEMESH_LIPS = frozenset([(61, 146), (146, 91), ...]). Since these outputs are unrelated, I am unable to filter them.

What I Want: I want to detect the coordinates of only the eyes, eyebrows, and lips points, with the nose tip coordinates as the origin.

Since this is my first time using Mediapipe, I have no idea what to do.

(To keep the question concise, I will only show the relevant code snippets from the necessary files in the project.)

json_utils.py

import json

class JsonUtils:
    def __init__(self):
        pass

    def load_data(self, file_path):
        try:
            with open(file_path, 'r') as f:
                data = json.load(f)
            if data:
                counter = max(item.get('id', 0) for item in data) + 1
            else:
                counter = 1
        except Exception:
            data = []
            counter = 1
        return data, counter

    def save_data(self, data, file_path):
        with open(file_path, 'w') as f:
            json.dump(data, f, indent=4)

camera_utils.py

import cv2

class CameraUtils:
    def __init__(self, camera_index=0):
        self.cap = cv2.VideoCapture(camera_index)

    def read_frame(self):
        ret, image = self.cap.read()
        if not ret:
            print("Ignoring empty camera frame.")
        return image

    def release_camera(self):
        self.cap.release()

mediapipe_utils.py

import mediapipe as mp
import cv2

class MediaPipeUtils:
    def __init__(self):
        self.mp_drawing = mp.solutions.drawing_utils
        self.mp_drawing_styles = mp.solutions.drawing_styles
        self.mp_face_mesh = mp.solutions.face_mesh
        
        self.face_mesh = self.mp_face_mesh.FaceMesh(min_detection_confidence=0.5, min_tracking_confidence=0.5)

    def process(self, image):
        image.flags.writeable = False
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        face_results = self.face_process(image)

        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        return face_results

    def face_process(self, rgb_image):
        face_results = self.face_mesh.process(rgb_image)
        return face_results

data_capture.py

class DataCapture:
    def __init__(self, json_utils):
        self.json_utils = json_utils

    def capture_face_points(self, face_mesh, face_results):
        try:
            nose_tip_landmark = face_results.multi_face_landmarks[0].landmark[1]
            nose_tip_x, nose_tip_y, nose_tip_z = nose_tip_landmark.x, nose_tip_landmark.y, nose_tip_landmark.z
            face_points = []
            for landmark in face_results.multi_face_landmarks[0].landmark:
                if self.is_landmark_in_region(landmark, face_mesh.FACEMESH_LIPS) or 
                self.is_landmark_in_region(landmark, face_mesh.FACEMESH_LEFT_EYE) or 
                self.is_landmark_in_region(landmark, face_mesh.FACEMESH_LEFT_EYEBROW) or 
                self.is_landmark_in_region(landmark, face_mesh.FACEMESH_RIGHT_EYE) or 
                self.is_landmark_in_region(landmark, face_mesh.FACEMESH_RIGHT_EYEBROW):
                    relative_x = landmark.x - nose_tip_x
                    relative_y = landmark.y - nose_tip_y
                    relative_z = landmark.z - nose_tip_z
                    face_points.append({"x": relative_x, "y": relative_y, "z": relative_z})
            return face_points
        except Exception as e:
            print(f"Nose tip not found: {e}")
        return None

    def is_landmark_in_region(self, landmark, region):
        return landmark in region

    def save_data(self, points, data, label, id, file_path):
        data.append({"id": id, "label": label, "landmarks": points})
        self.json_utils.save_data(data, file_path)
        print(f"Captured data saved to {file_path} with label {label} and id {id}")

Thiết kế website giá rẻ

Danh mục

How to filter face landmark coordinates by specific facial regions using Mediapipe?