My Goal: I am trying to create a simple dataset to train a machine learning model. In this dataset, I want to detect the coordinates of points in specific facial regions such as the eyes, eyebrows, and lips.
How It Works/Should Work:
- First, I process the frames taken from the webcam using the methods in
mediapipe_utils.py
to detect face landmarks and obtainface_results
. - In the
data_capture.py
file, I loop through each point on the face withfor landmark in face_results.multi_face_landmarks[0].landmark
. - I check whether each point obtained from
face_results.multi_face_landmarks[0].landmark
is one of the points in the eyes, eyebrows, or lips. If the point is from one of these regions, I obtain its coordinates relative to the nose tip (considered as the origin) and save them in JSON format.
Problem: While the landmark
content is in the form of x: 0.345954359 y: 0.904962182 z: 0.0607943721
, the face_mesh.FACEMESH_LIPS
content is in the form of FACEMESH_LIPS = frozenset([(61, 146), (146, 91), ...])
. Since these outputs are unrelated, I am unable to filter them.
What I Want: I want to detect the coordinates of only the eyes, eyebrows, and lips points, with the nose tip coordinates as the origin.
Since this is my first time using Mediapipe, I have no idea what to do.
(To keep the question concise, I will only show the relevant code snippets from the necessary files in the project.)
json_utils.py
import json
class JsonUtils:
def __init__(self):
pass
def load_data(self, file_path):
try:
with open(file_path, 'r') as f:
data = json.load(f)
if data:
counter = max(item.get('id', 0) for item in data) + 1
else:
counter = 1
except Exception:
data = []
counter = 1
return data, counter
def save_data(self, data, file_path):
with open(file_path, 'w') as f:
json.dump(data, f, indent=4)
camera_utils.py
import cv2
class CameraUtils:
def __init__(self, camera_index=0):
self.cap = cv2.VideoCapture(camera_index)
def read_frame(self):
ret, image = self.cap.read()
if not ret:
print("Ignoring empty camera frame.")
return image
def release_camera(self):
self.cap.release()
mediapipe_utils.py
import mediapipe as mp
import cv2
class MediaPipeUtils:
def __init__(self):
self.mp_drawing = mp.solutions.drawing_utils
self.mp_drawing_styles = mp.solutions.drawing_styles
self.mp_face_mesh = mp.solutions.face_mesh
self.face_mesh = self.mp_face_mesh.FaceMesh(min_detection_confidence=0.5, min_tracking_confidence=0.5)
def process(self, image):
image.flags.writeable = False
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
face_results = self.face_process(image)
image.flags.writeable = True
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
return face_results
def face_process(self, rgb_image):
face_results = self.face_mesh.process(rgb_image)
return face_results
data_capture.py
class DataCapture:
def __init__(self, json_utils):
self.json_utils = json_utils
def capture_face_points(self, face_mesh, face_results):
try:
nose_tip_landmark = face_results.multi_face_landmarks[0].landmark[1]
nose_tip_x, nose_tip_y, nose_tip_z = nose_tip_landmark.x, nose_tip_landmark.y, nose_tip_landmark.z
face_points = []
for landmark in face_results.multi_face_landmarks[0].landmark:
if self.is_landmark_in_region(landmark, face_mesh.FACEMESH_LIPS) or
self.is_landmark_in_region(landmark, face_mesh.FACEMESH_LEFT_EYE) or
self.is_landmark_in_region(landmark, face_mesh.FACEMESH_LEFT_EYEBROW) or
self.is_landmark_in_region(landmark, face_mesh.FACEMESH_RIGHT_EYE) or
self.is_landmark_in_region(landmark, face_mesh.FACEMESH_RIGHT_EYEBROW):
relative_x = landmark.x - nose_tip_x
relative_y = landmark.y - nose_tip_y
relative_z = landmark.z - nose_tip_z
face_points.append({"x": relative_x, "y": relative_y, "z": relative_z})
return face_points
except Exception as e:
print(f"Nose tip not found: {e}")
return None
def is_landmark_in_region(self, landmark, region):
return landmark in region
def save_data(self, points, data, label, id, file_path):
data.append({"id": id, "label": label, "landmarks": points})
self.json_utils.save_data(data, file_path)
print(f"Captured data saved to {file_path} with label {label} and id {id}")