I am trying to train a hybrid model with tensorflow and mediapipe. I am using the mediapipe for face detections to get facial landmarks and the tensorflow to train a model with the input landmarks. The mediapipe is to return a shape of 68, 2 for the 68 facial landmarks. I am having an error that shows that the input dimension is different from the expected input shape of the model and the provided input shape during training. Specifically, the model expects inputs with shape (None, 68, 68, 2), but the provided inputs have shape (32, 68, 2).
The code is below…
def load_landmarks(file_path):
if isinstance(file_path, tf.Tensor):
file_path = file_path.numpy() # Convert tensor to numpy array
if isinstance(file_path, bytes):
file_path = file_path.decode('utf-8') # Decode bytes to string
landmarks = np.load(file_path)
if landmarks.shape != (68, 2):
landmarks = landmarks[:68, :2]
return tf.convert_to_tensor(landmarks, dtype=tf.float32)
positives = tf.data.Dataset.zip((anchor_landmark, positive_landmark, tf.data.Dataset.from_tensor_slices(tf.ones(len(anchor_landmark)))))
negatives = tf.data.Dataset.zip((anchor_landmark, negative_landmark, tf.data.Dataset.from_tensor_slices(tf.zeros(len(anchor_landmark)))))
data = positives.concatenate(negatives)
def preprocess(input_img, validation_img, label):
processed_input_img = load_landmarks(input_img)
processed_validation_img = load_landmarks(validation_img)
return (processed_input_img, processed_validation_img, label)
data = data.map(lambda x, y, z: tf.py_function(preprocess, [x, y, z], [tf.float32, tf.float32, tf.float32]))
data = data.map(lambda x, y, z: (tf.ensure_shape(x, [68, 68, 2]), tf.ensure_shape(y, [68, 68, 2]), tf.ensure_shape(z, ())))
data = data.cache().shuffle(buffer_size=10000)
train_data = data.take(round(len(data) * .8)).batch(32).prefetch(8)
test_data = data.skip(round(len(data) * .8))
test_data = test_data.take(round(len(data) * .2)).batch(32).prefetch(8)
def make_embedding():
inp = Input(shape=(68, 68, 2), name='Input_landmarks')
c1 = Conv2D(64, (10, 10), activation='relu')(inp)
m1 = MaxPooling2D(pool_size=(2, 2), padding='same')(c1)
c2 = Conv2D(128, (7, 7), activation='relu')(m1)
m2 = MaxPooling2D(pool_size=(2, 2), padding='same')(c2)
c3 = Conv2D(128, (4, 4), activation='relu')(m2)
m3 = MaxPooling2D(pool_size=(2, 2), padding='same')(c3)
c4 = Conv2D(256, (4, 4), activation='relu')(m3)
f1 = Flatten()(c4)
d1 = Dense(4096, activation='sigmoid')(f1)
return Model(inputs=[inp], outputs=[d1], name='Embedding')
class L1Dist(Layer):
def __init__(self, **kwargs):
super().__init__(**kwargs)
def call(self, input_embedding, validation_embedding):
validation_embedding = tf.convert_to_tensor(validation_embedding)
input_embedding = tf.convert_to_tensor(input_embedding)
return tf.math.abs(input_embedding - validation_embedding)
embedding = make_embedding()
def make_siamese_model():
embedding = make_embedding()
input_landmarks = Input(name='Input_landmarks', shape=(68, 68, 2))
validation_landmarks = Input(name='Validation_landmarks', shape=(68, 68, 2))
input_embedding = embedding(input_landmarks)
validation_embedding = embedding(validation_landmarks)
siamese_layer = L1Dist()
siamese_layer._name = 'distance'
distances = siamese_layer(input_embedding, validation_embedding)
classifier = Dense(1, activation='sigmoid')(distances)
return Model(inputs=[input_landmarks, validation_landmarks], outputs=classifier, name='Siamese_Network')
siamese_model = make_siamese_model()
x = train_data.as_numpy_iterator().next()[:2]
yhat = siamese_model(x, training=True)
yhat = tf.squeeze(yhat, axis=-1)
yhat = siamese_model(x, training=True)
yhat = tf.squeeze(yhat)
y = train_data.as_numpy_iterator().next()[2]
y = tf.reshape(y, (-1, 1))
opt = tf.keras.optimizers.Adam(1e-4)
@tf.function
def train_step(batch, siamese_model, optimizer):
with tf.GradientTape() as tape:
comparison = batch[:2]
labels = batch[2]
predictions = siamese_model(comparison, training=True)
predictions = tf.squeeze(predictions)
loss = binary_cross_loss(labels, predictions)
gradients = tape.gradient(loss, siamese_model.trainable_variables)
optimizer.apply_gradients(zip(gradients, siamese_model.trainable_variables))
return loss, predictions
for idx, batch in enumerate(train_data):
loss, predictions = train_step(batch, siamese_model, opt)
print(idx, loss)
The Error is below…
2024-08-07 17:48:47.296379: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: INVALID_ARGUMENT: Input to reshape is a tensor with 136 values, but the requested shape has 9248
2024-08-07 17:48:47.297268: W tensorflow/core/framework/op_kernel.cc:1827] UNKNOWN: InvalidArgumentError: {{function_node __wrapped__Reshape_device_/job:localhost/replica:0/task:0/device:CPU:0}} Input to reshape is a tensor with 136 values, but the requested shape has 9248 [Op:Reshape]
File "/tmp/ipykernel_29506/1322324465.py", line 8, in preprocess
input_landmarks_reshaped = tf.reshape(input_landmarks, (68, 68, 2))
ValueError: in user code:
File "/tmp/ipykernel_15710/3552378797.py", line 9, in train_step *
predictions = siamese_model(comparison, training=True)
File "/home/ugochukwu/.local/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 122, in error_handler **
raise e.with_traceback(filtered_tb) from None
File "/home/ugochukwu/.local/lib/python3.10/site-packages/keras/src/layers/input_spec.py", line 245, in assert_input_compatibility
raise ValueError(
ValueError: Input 0 of layer "Siamese_Network" is incompatible with the layer: expected shape=(None, 68, 68, 2), found shape=(32, 68, 2)