Thiết kế website giá rẻ

Question

I’m trying to detect multi hand gestures using mediapipe. I want to detect both the gestures of both hands independently. Both hands can have the same gesture or different gestures. I the given code the function print_result is printing the the contents of the object after the inference has been run on the frame. the max_num_hands parameter has been set to 2 here with mp_hands.Hands(max_num_hands=2, min_detection_confidence=0.5) as hands:

import cv2
import mediapipe as mp
import time

cap = cv2.VideoCapture(1)

BaseOptions = mp.tasks.BaseOptions
GestureRecognizer = mp.tasks.vision.GestureRecognizer
GestureRecognizerOptions = mp.tasks.vision.GestureRecognizerOptions
GestureRecognizerResult = mp.tasks.vision.GestureRecognizerResult
VisionRunningMode = mp.tasks.vision.RunningMode

# Callback function to print gesture recognition results
def print_result(result: GestureRecognizerResult, output_image: mp.Image, timestamp_ms: int):
    if result.gestures:
        # Get the category name of the recognized gesture
        category_name = result.gestures[0][0].category_name
        # print(category_name)
        print(result)
    else:
        print("No gestures recognized")

# Initialize MediaPipe drawing utils and hands module
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

# Configure options for the gesture recognizer
options = GestureRecognizerOptions(
    base_options=BaseOptions(model_asset_path='C:\Users\golut\OneDrive\Documents\Projects\Virtual Mouse\models\gesture_recognizer.task'),
    running_mode=VisionRunningMode.LIVE_STREAM,
    result_callback=print_result
)

# Create a gesture recognizer instance
with GestureRecognizer.create_from_options(options) as recognizer:
    print('Gesture recognizer created')

    while True:
        success, img = cap.read()
        if not success:
            print("Ignoring empty camera frame.")
            continue

        # Convert BGR image to RGB for MediaPipe processing
        rgb_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # Detect hand landmarks using MediaPipe Hands
        with mp_hands.Hands(max_num_hands=2, min_detection_confidence=0.5) as hands:
            results = hands.process(rgb_img)

            if results.multi_hand_landmarks:
                for hand_landmarks in results.multi_hand_landmarks:
                    # Draw hand landmarks on the image with specified color and thickness
                    mp_drawing.draw_landmarks(
                        img, hand_landmarks, mp_hands.HAND_CONNECTIONS,
                        mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=2),
                        mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2, circle_radius=2)
                    )
    

        # Prepare image for gesture recognition
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_img)
        current_time_ms = int(time.time() * 1000)

        # Perform gesture recognition on the processed image
        detected_gestures = recognizer.recognize_async(mp_image, current_time_ms)

        img = cv2.flip(img, 1)  # Flips the image horizontally
        cv2.imshow("Imshow", img)

        if cv2.waitKey(10) == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

In the object GestureRecognizerResult we see a list handedness that contains the category_name which is either left or right.

The problem is the gesture recognizer only gives one output either left or right hand in the output depending on which hand got detected first and the latter is ignored. In mediapipe’s given try on example, both hands when shown to the camera with different gesture are recognized independently. Link to mediapipe demo

GestureRecognizerResult(gestures=[[Category(index=-1, score=0.7995390892028809,
 display_name='', category_name='Open_Palm')]], handedness=[[Category(index=0, score=0.9178019165992737, display_name='Right', category_name='Right')]], 
hand_landmarks=[[NormalizedLandmark(x=0.23192565143108368, y=0.8508237600326538, z=3.7175095712882467e-07, visibility=0.0, presence=0.0), 
NormalizedLandmark(x=0.2964465022087097, y=0.807819128036499, z=-0.02174699306488037, visibility=0.0, presence=0.0), 
NormalizedLandmark(x=0.3386477530002594, y=0.7381684184074402, z=-0.026875635609030724, visibility=0.0, presence=0.0), 
NormalizedLandmark(x=0.3652242422103882, y=0.6717657446861267, z=-0.03148443624377251, visibility=0.0, presence=0.0), 
NormalizedLandmark(x=0.39171433448791504, y=0.627888560295105, z=-0.03597773239016533, visibility=0.0, presence=0.0),
NormalizedLandmark(x=0.30005523562431335, y=0.6441321969032288, z=-0.002747688442468643, visibility=0.0, presence=0.0), 
NormalizedLandmark(x=0.3194928765296936, y=0.5634738802909851, z=-0.015889683738350868, visibility=0.0, presence=0.0), 
NormalizedLandmark(x=0.3276906907558441, y=0.5102080702781677, z=-0.0299211535602808, visibility=0.0, presence=0.0), 
NormalizedLandmark(x=0.33434727787971497, y=0.46343517303466797, z=-0.04088740795850754, visibility=0.0, presence=0.0), 
NormalizedLandmark(x=0.2615800201892853, y=0.6335919499397278, z=-0.002842121757566929, visibility=0.0, presence=0.0), 
NormalizedLandmark(x=0.26276978850364685, y=0.5426733493804932, z=-0.014345655217766762, visibility=0.0, presence=0.0), 
NormalizedLandmark(x=0.2621628940105438, y=0.48378312587738037, z=-0.028536789119243622, visibility=0.0, presence=0.0), 
NormalizedLandmark(x=0.26235222816467285, y=0.43310630321502686, z=-0.03940063342452049, visibility=0.0, presence=0.0), 
NormalizedLandmark(x=0.22592493891716003, y=0.6417601108551025, z=-0.006861940026283264, visibility=0.0, presence=0.0), 

NormalizedLandmark(x=0.2230750024318695, y=0.5614591240882874, z=-0.01952073909342289, visibility=0.0, presence=0.0), 
NormalizedLandmark(x=0.22449643909931183, y=0.5094373822212219, z=-0.029860520735383034, visibility=0.0, presence=0.0), 
NormalizedLandmark(x=0.229284405708313, y=0.46403464674949646, z=-0.03746004030108452, visibility=0.0, presence=0.0), 
NormalizedLandmark(x=0.19173786044120789, y=0.663299024105072, z=-0.0136506836861372, visibility=0.0, presence=0.0), 
NormalizedLandmark(x=0.18222525715827942, y=0.604834794998169, z=-0.025881653651595116, visibility=0.0, presence=0.0), 
NormalizedLandmark(x=0.18415895104408264, y=0.5673394799232483, z=-0.03144041821360588, visibility=0.0, presence=0.0), 
NormalizedLandmark(x=0.19118154048919678, y=0.5324922800064087, z=-0.034897807985544205, visibility=0.0, presence=0.0)]], 
hand_world_landmarks=[[Landmark(x=-0.012245522812008858, y=0.09203963726758957, z=-0.0038926522247493267, visibility=0.0, presence=0.0), 
Landmark(x=0.021369636058807373, y=0.06962162256240845, z=-0.009559692814946175, visibility=0.0, presence=0.0), 
Landmark(x=0.042654991149902344, y=0.04227661341428757, z=-0.012077674269676208, visibility=0.0, presence=0.0), 
Landmark(x=0.0617685541510582, y=0.014768477529287338, z=-0.011491118930280209, visibility=0.0, presence=0.0), 
Landmark(x=0.07398916780948639, y=-0.012367911636829376, z=-0.0075836945325136185, visibility=0.0, presence=0.0), 
Landmark(x=0.025482138618826866, y=-0.0010876771993935108, z=0.006445789244025946, visibility=0.0, presence=0.0), 
Landmark(x=0.03543740138411522, y=-0.02912675403058529, z=-0.00173004565294832, visibility=0.0, presence=0.0), 
Landmark(x=0.040552493184804916, y=-0.0489623099565506, z=-0.007902431301772594, visibility=0.0, presence=0.0), 
Landmark(x=0.04358145594596863, y=-0.06487865746021271, z=-0.0319957509636879, visibility=0.0, presence=0.0), 
Landmark(x=0.0016808465588837862, y=-0.004498452879488468, z=0.006683729123324156, visibility=0.0, presence=0.0), 
Landmark(x=0.004972374066710472, y=-0.04138147830963135, z=-0.003927251789718866, visibility=0.0, presence=0.0), 
Landmark(x=0.00558849610388279, y=-0.06327502429485321, z=-0.020593348890542984, visibility=0.0, presence=0.0), 
Landmark(x=0.0066368915140628815, y=-0.08291880786418915, z=-0.039193443953990936, visibility=0.0, presence=0.0), 
Landmark(x=-0.018360454589128494, y=-0.0009643810335546732, z=-0.0038148483727127314, visibility=0.0, presence=0.0), 
Landmark(x=-0.015782665461301804, y=-0.03162727132439613, z=-0.013909644447267056, visibility=0.0, presence=0.0), 
Landmark(x=-0.013191262260079384, y=-0.05145301669836044, z=-0.028273196890950203, visibility=0.0, presence=0.0), 
Landmark(x=-0.009723789989948273, y=-0.0685187503695488, z=-0.04024944826960564, visibility=0.0, presence=0.0), 
Landmark(x=-0.035820234566926956, y=0.011946788057684898, z=-0.0120608601719141, visibility=0.0, presence=0.0), 
Landmark(x=-0.03725161403417587, y=-0.009996423497796059, z=-0.017715157940983772, visibility=0.0, presence=0.0), 
Landmark(x=-0.036166295409202576, y=-0.028470497578382492, z=-0.026987750083208084, visibility=0.0, presence=0.0), 
Landmark(x=-0.030654065310955048, y=-0.03972318768501282, z=-0.03699912130832672, visibility=0.0, presence=0.0)]])

I want to achieve the same result as the demo of recognizing both hands individually at once with different gestures.

Thiết kế website giá rẻ

Danh mục

Mediapipe Gesture Recognition Handedness detects both hands but result object has only one