Thiết kế website giá rẻ

Question

I am trying to build a sign language action recognition model, I have frames that I transformed to landmark keypoints using Mediapipe and they are in .npy format,

Below is my model

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam

def enhanced_cnn_lstm_model(num_frames=30, num_keypoints=1662, num_classes=502):
    model = Sequential()

    model.add(LSTM(units=128, return_sequences=True, input_shape=(num_frames, num_keypoints), recurrent_dropout=0.2))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))

    model.add(LSTM(units=256, return_sequences=True))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))

    model.add(LSTM(units=512, return_sequences=False))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))

    model.add(Dense(1024, activation='relu', kernel_regularizer=l2(0.01)))
    model.add(Dropout(0.5))

    model.add(Dense(num_classes, activation='softmax'))

    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model

My data generator

from tensorflow.keras.utils import Sequence
import tensorflow as tf
import numpy as np
import os

tf.config.run_functions_eagerly(True)
class KeypointsDataGenerator(Sequence):
    def __init__(self, dataset_dir, batch_size=64, shuffle=True, num_frames=30, num_keypoints=1662):
        self.dataset_dir = dataset_dir
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.num_frames = num_frames
        self.num_keypoints = num_keypoints
        self.samples, self.labels = self._load_dataset()
        self.on_epoch_end()
    
    def _load_dataset(self):
        samples = []
        labels = {}
        label_idx = 0
        for label_dir in sorted(os.listdir(self.dataset_dir)):
            label_path = os.path.join(self.dataset_dir, label_dir)
            for sample_file in sorted(os.listdir(label_path)):
                sample_path = os.path.join(label_path, sample_file)
                if os.path.isfile(sample_path) and sample_path.endswith('.npy'):
                    samples.append(sample_path)
                    labels[sample_path] = label_idx
            label_idx += 1
        if not samples:
            print("No samples found.")
        return samples, labels


    
    def __len__(self):
        return int(np.floor(len(self.samples) / self.batch_size))
    
    def __getitem__(self, index):
        batch_samples = self.samples[index * self.batch_size:(index + 1) * self.batch_size]
        X, y = self._generate_data(batch_samples)
        return X, y
    
    def _generate_data(self, batch_samples):
        X = np.zeros((self.batch_size, self.num_frames, self.num_keypoints))
        y = np.zeros((self.batch_size), dtype=int)
        
        for i, sample_path in enumerate(batch_samples):
            keypoints = np.load(sample_path)
            keypoints = keypoints.reshape(self.num_frames, self.num_keypoints)
            X[i,] = keypoints
            y[i] = self.labels[sample_path]
        
        return X, tf.keras.utils.to_categorical(y, num_classes=502)
    
    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.samples)

Code used to extract keypoints

import numpy as np

def adjust_sequence_length(features_list, target_length=30):
    uniform_length = max(len(f) for f in features_list) if features_list else 0
    
    padded_features_list = [np.pad(f, (0, uniform_length - len(f)), 'constant') for f in features_list]
    
    num_frames = len(padded_features_list)
    adjusted_features = np.zeros((target_length, uniform_length))
    
    if num_frames > target_length:
        indices = np.round(np.linspace(0, num_frames - 1, target_length)).astype(int)
        adjusted_features = np.array(padded_features_list)[indices]

    elif num_frames < target_length:
        repeat_factor = target_length // num_frames
        additional_frames_needed = target_length % num_frames
        expanded_features = padded_features_list * repeat_factor + padded_features_list[:additional_frames_needed]
        adjusted_features[:len(expanded_features), :] = expanded_features

    else:
        adjusted_features = np.array(padded_features_list)
    
    return adjusted_features


mp_hands = mp.solutions.hands.Hands(static_image_mode=True, max_num_hands=2, min_detection_confidence=0.5)
mp_pose = mp.solutions.pose.Pose(static_image_mode=True, min_detection_confidence=0.5)
mp_face_mesh = mp.solutions.face_mesh.FaceMesh(static_image_mode=True, max_num_faces=1, min_detection_confidence=0.5)

for label_dir in tqdm(os.listdir(dataset_directory)):
    gesture_path = os.path.join(dataset_directory, label_dir)
    features_label_dir = os.path.join(features_root_directory, label_dir)
    os.makedirs(features_label_dir, exist_ok=True) 
    for sample_dir in os.listdir(gesture_path):
        sample_path = os.path.join(gesture_path, sample_dir)
        features_list = []
        
        for frame in sorted(os.listdir(sample_path)):
            image_path = os.path.join(sample_path, frame)
            image = cv2.imread(image_path)
            if image is not None:
                image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                features = extract_features(image_rgb, mp_hands, mp_pose, mp_face_mesh)
                if features.size > 0:
                    features_list.append(features)
        
        adjusted_features = adjust_sequence_length(features_list)
        
        save_path = os.path.join(features_label_dir, f"{sample_dir}.npy")
        np.save(save_path, adjusted_features)

mp_hands.close()
mp_pose.close()
mp_face_mesh.close()

I tried data generator with normailztion, but training accuracy gets stuck at 0.2%

from tensorflow.keras.utils import Sequence
import tensorflow as tf
import numpy as np
import os

tf.config.run_functions_eagerly(True)
class KeypointsDataGenerator(Sequence):
    def __init__(self, dataset_dir, batch_size=32, shuffle=True, num_frames=30, num_keypoints=1662):
        self.dataset_dir = dataset_dir
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.num_frames = num_frames
        self.num_keypoints = num_keypoints
        self.samples, self.labels = self._load_dataset()
        self.min_val, self.max_val = self._find_min_max_values()
        self.on_epoch_end()
    
    def _load_dataset(self):
        samples = []
        labels = {}
        label_idx = 0
        for label_dir in sorted(os.listdir(self.dataset_dir)):
            print(f'Processing label: {label_dir}')
            label_path = os.path.join(self.dataset_dir, label_dir)
            for sample_file in sorted(os.listdir(label_path)):
                sample_path = os.path.join(label_path, sample_file)
                if os.path.isfile(sample_path) and sample_path.endswith('.npy'):
                    samples.append(sample_path)
                    labels[sample_path] = label_idx
            label_idx += 1
        return samples, labels


    
    def __len__(self):
        return int(np.floor(len(self.samples) / self.batch_size))
    
    def __getitem__(self, index):
        batch_samples = self.samples[index * self.batch_size:(index + 1) * self.batch_size]
        X, y = self._generate_data(batch_samples)
        return X, y
    
    def _find_min_max_values(self):
        min_vals = []
        max_vals = []
        for sample_path in self.samples:
            print(f'Processing sample: {sample_path}')
            keypoints = np.load(sample_path)
            min_vals.append(np.min(keypoints))
            max_vals.append(np.max(keypoints))
        return np.min(min_vals), np.max(max_vals)
    
    def _generate_data(self, batch_samples):
        X = np.zeros((self.batch_size, self.num_frames, self.num_keypoints))
        y = np.zeros((self.batch_size), dtype=int)
        for i, sample_path in enumerate(batch_samples):
            keypoints = np.load(sample_path)
            keypoints = keypoints.reshape(self.num_frames, self.num_keypoints)
            keypoints = (keypoints - self.min_val) / (self.max_val - self.min_val)  # Apply Min-Max scaling
            X[i,] = keypoints
            y[i] = self.labels[sample_path]
        return X, tf.keras.utils.to_categorical(y, num_classes=502)

    
    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.samples)

I tried lower dropout rates and still the same, made the model less and more complex, I tried different learning_rate and still nothing helped.

Thiết kế website giá rẻ

Danh mục

Training Accuracy Raising While Validation Accuracy Fluctuating