Thiết kế website giá rẻ

Question

I’m making CNN+LSTM model for video classification, drowsiness classification to be exact. Im using SUSTDDD, where i divided 19 people into training:val:test datasets in proportions 13:3:3 (70:15:15). Im using pretrained model VGG16 with added LSTM layer. Here’s my code:

image_height = 224
image_width = 224
batch_size = 6
n_of_frames = 50
NUM_OF_CLASSES = 2
NUM_OF_CHANNELS = 3

input_shape = (n_of_frames, image_height, image_width, NUM_OF_CHANNELS)

video = Input(shape=input_shape)
cnn_base = VGG16(input_shape=(image_height, image_width, NUM_OF_CHANNELS),
                 weights="imagenet",
                 include_top=False)
cnn_base.trainable = False

cnn_out = GlobalAveragePooling2D()(cnn_base.output)
cnn = Model(cnn_base.input, cnn_out)
encoded_frames = TimeDistributed(cnn)(video)

encoded_sequence1 = LSTM(1024)(encoded_frames)
hidden_layer1 = Dense(512, activation="relu")(encoded_sequence1)
hidden_layer2 = Dropout(0.3)(hidden_layer1)
outputs = Dense(NUM_OF_CLASSES, activation="softmax")(hidden_layer2)

model = Model(video, outputs)


optimizer = Adam(learning_rate=0.0001)
model.compile(loss='categorical_crossentropy',
              optimizer=optimizer,
              metrics=['accuracy'])

model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_1 (InputLayer)        [(None, 50, 224, 224, 3   0         
                             )]                                  
                                                                 
 time_distributed (TimeDist  (None, 50, 512)           14714688  
 ributed)                                                        
                                                                 
 lstm (LSTM)                 (None, 1024)              6295552   
                                                                 
 dense (Dense)               (None, 512)               524800    
                                                                 
 dropout (Dropout)           (None, 512)               0         
                                                                 
 dense_1 (Dense)             (None, 2)                 1026      
                                                                 
=================================================================
Total params: 21536066 (82.15 MB)
Trainable params: 6821378 (26.02 MB)
Non-trainable params: 14714688 (56.13 MB)

The problem is model doesn't seem to train properly. Training accuracy fluctuates and doesn't wanna go up. Validation accuracy is stuck on one/two values. Network predicts only one class all the time.

[Curves](https://i.sstatic.net/6HHG5ysB.png)
[Confusion matrix](https://i.sstatic.net/fzXMbUb6.png)

Here’s my custom data generator:

class CustomSequenceGenerator(tf.keras.utils.Sequence):
    def __init__(self, directory, batch_size, n_of_frames, target_size, shuffle=True):
        self.directory = directory
        self.batch_size = batch_size
        self.n_of_frames = n_of_frames
        self.target_size = target_size
        self.shuffle = shuffle
        self.skip_step = 1
        self.sequences = self.load_filepaths()
        np.random.shuffle(self.sequences)
        if self.directory =='/workspace/SUSTDDD/val' or self.directory =='/workspace/SUSTDDD/test':
            self.sequences = self.balance_dataset(self.sequences, 161)
        if self.directory =='/workspace/SUSTDDD/train':
            self.sequences = self.balance_dataset(self.sequences, 2000)
        print(self.list_sequences(self.sequences))
        
    def load_filepaths(self): 
        filepaths = []
        count = 0
        total_jpg_files = 0
       
        for clas in os.listdir(self.directory):
            for vid in os.listdir(os.path.join(self.directory, clas)):
                single_sequence = []
                if vid.endswith(".zip"):
                    continue
                sub_vids = os.listdir(os.path.join(self.directory, clas, vid))
                for sub_vid in sub_vids:
                    files = os.listdir(os.path.join(self.directory, clas, vid, sub_vid))
                    files.sort()
    
                    for idx, file in enumerate(files):
                        if file.lower().endswith('.png') or file.lower().endswith('.jpg') or file.lower().endswith('.jpeg'):
                            if idx % self.skip_step == 0:
                                filepath = os.path.join(self.directory, clas, vid, sub_vid, file)
                                single_sequence.append(filepath)
                                if len(single_sequence) == self.n_of_frames:
                                    filepaths.append(single_sequence)
                                    single_sequence = []
        if self.shuffle:
            np.random.shuffle(filepaths)
        return filepaths

    def list_sequences(self, filepaths):
        classes = {"D": 0, "ND": 0}
        
        for idx, filepath in enumerate(filepaths):
            for k, v in classes.items():
                if k == filepath[0].split('/')[4]:
                    classes[k] += 1
        return classes

    def balance_dataset(self, filepaths, max_cap):
        classes = {"D": 0, "ND": 0}
        to_remove = []
        for idx, filepath in enumerate(filepaths):
            for k, v in classes.items():
                if k == filepath[0].split('/')[4]:
                    classes[k] += 1
                    if v >= max_cap:
                        to_remove.append(filepath)
        for j in to_remove:
            filepaths.remove(j)
        return filepaths
    
    def __len__(self):
        return len(self.sequences) // self.batch_size
    
    def __getitem__(self, idx):
        batch_sequences = self.sequences[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_images = []
        batch_labels = []
        
        for j, sequence in enumerate(batch_sequences):
            batch_single_sequence = []
            current_class = -1

            for image_path in sequence:
                tf_image = tf.io.read_file(image_path)
                decoded_image = tf.image.decode_image(tf_image, dtype=tf.float32)
                image_resized = tf.image.resize(decoded_image, self.target_size)
                image_batch = tf.keras.applications.vgg16.preprocess_input(image_resized)
                batch_single_sequence.append(image_batch)
                
                current_class = image_path.split('/')[4]
                if current_class == "D":
                    current_class = 1
                elif current_class == "ND":
                    current_class = 0

            batch_images.append(batch_single_sequence)
            batch_labels.append(current_class)
        
        batch_images = np.array(batch_images)
        batch_labels = np.array(batch_labels)
       
        batch_labels = to_categorical(batch_labels, num_classes=NUM_OF_CLASSES)
        #print(np.shape(batch_images), np.shape(batch_labels))
        return batch_images, batch_labels

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.sequences)

So far I’ve tried:

Changing model complexity to 3 Conv Layers and 1 LSTM Layer
Using different pretrained models (VGG16, VGG19, ResNet50, InceptionV3) with expected normalized frames sizes to each of them (i. e. 224 x 224 for VGG16, 299 x 299 for InceptionV3)
Changing learning rate from 0.01 to 1e-6
Using different optimizers (Adam, SGD, RMSprop)
Using different dataset (RLDD – a bit different approach but outcome is the same)
Adding dropout or reccurent dropout
Using binary cross entropy with sigmoid activation function
Using tensorflow built in normalization tools, i. e.

decoded_image = tf.image.decode_image(tf_image, dtype=tf.float32)
image_resized = tf.image.resize(decoded_image, self.target_size)
image = tf.keras.applications.vgg16.preprocess_input(image_resized)

Balancing training, testing and validating dataset

I’m out of ideas.

Thiết kế website giá rẻ

Danh mục

Why is val_accuracy not changing or changing between two values?