I’ve tried to implement a U-Net model to classify audio, and after preprocessing, I’ve converted the data into a dataset of float values. These are the shapes of my X and Y
X_train shape: (3806, 2809)
X_test shape: (952, 2809)
y_train shape: (3806, 1)
y_test shape: (952, 1)
I have tried padding and cropping, yet I still end up with the same issue, just with different dimensions. I have attached the code to my U-net architecture, any advice would be appreciated, Thank you in advance.
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, Concatenate, Dropout, ZeroPadding2D
def unet(input_size=(53, 53, 1), num_classes=2):
inputs = Input(input_size)
# Encoder
conv1 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(inputs)
conv1 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv1)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
conv2 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool1)
conv2 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv2)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
conv3 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool2)
conv3 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv3)
pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
conv4 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool3)
conv4 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv4)
drop4 = Dropout(0.5)(conv4)
pool4 = MaxPooling2D(pool_size=(2, 2))(drop4)
conv5 = Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool4)
conv5 = Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv5)
drop5 = Dropout(0.5)(conv5)
# Decoder
up6 = UpSampling2D(size=(2, 2))(drop5)
conv6 = Conv2D(512, 2, activation='relu', padding='same', kernel_initializer='he_normal')(up6)
# Padding
pad_top = (conv4.shape[1] - conv6.shape[1]) // 2
pad_bottom = conv4.shape[1] - conv6.shape[1] - pad_top
pad_left = (conv4.shape[2] - conv6.shape[2]) // 2
pad_right = conv4.shape[2] - conv6.shape[2] - pad_left
padded_conv4 = ZeroPadding2D(padding=((pad_top, pad_bottom), (pad_left, pad_right)))(conv4)
merge6 = Concatenate(axis=3)([padded_conv4, conv6])
conv6 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge6)
conv6 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv6)
up7 = UpSampling2D(size=(2, 2))(conv6)
conv7 = Conv2D(256, 2, activation='relu', padding='same', kernel_initializer='he_normal')(up7)
# Padding
pad_top = (conv3.shape[1] - conv7.shape[1]) // 2
pad_bottom = conv3.shape[1] - conv7.shape[1] - pad_top
pad_left = (conv3.shape[2] - conv7.shape[2]) // 2
pad_right = conv3.shape[2] - conv7.shape[2] - pad_left
padded_conv3 = ZeroPadding2D(padding=((pad_top, pad_bottom), (pad_left, pad_right)))(conv3)
merge7 = Concatenate(axis=3)([padded_conv3, conv7])
conv7 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge7)
conv7 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv7)
up8 = UpSampling2D(size=(2, 2))(conv7)
conv8 = Conv2D(128, 2, activation='relu', padding='same', kernel_initializer='he_normal')(up8)
# Padding
pad_top = (conv2.shape[1] - conv8.shape[1]) // 2
pad_bottom = conv2.shape[1] - conv8.shape[1] - pad_top
pad_left = (conv2.shape[2] - conv8.shape[2]) // 2
pad_right = conv2.shape[2] - conv8.shape[2] - pad_left
padded_conv2 = ZeroPadding2D(padding=((pad_top, pad_bottom), (pad_left, pad_right)))(conv2)
merge8 = Concatenate(axis=3)([padded_conv2, conv8])
conv8 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge8)
conv8 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv8)
up9 = UpSampling2D(size=(2, 2))(conv8)
conv9 = Conv2D(64, 2, activation='relu', padding='same', kernel_initializer='he_normal')(up9)
# Padding
pad_top = (conv1.shape[1] - conv9.shape[1]) // 2
pad_bottom = conv1.shape[1] - conv9.shape[1] - pad_top
pad_left = (conv1.shape[2] - conv9.shape[2]) // 2
pad_right = conv1.shape[2] - conv9.shape[2] - pad_left
padded_conv1 = ZeroPadding2D(padding=((pad_top, pad_bottom), (pad_left, pad_right)))(conv1)
merge9 = Concatenate(axis=3)([padded_conv1, conv9])
conv9 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge9)
conv9 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv9)
conv10 = Conv2D(num_classes, 1, activation='sigmoid')(conv9) # Use 'sigmoid' for binary classification
model = Model(inputs=inputs, outputs=conv10)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
return model
Error:
ValueError Traceback (most recent call last)
Cell In[73], line 1
----> 1 model = unet(input_size=(53, 53, 1))
Cell In[72], line 55, in unet(input_size, num_classes)
52 pad_right = conv3.shape[2] - conv7.shape[2] - pad_left
53 padded_conv3 = ZeroPadding2D(padding=((pad_top, pad_bottom), (pad_left, pad_right)))(conv3)
---> 55 merge7 = Concatenate(axis=3)([padded_conv3, conv7])
56 conv7 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge7)
57 conv7 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv7)
File ~anaconda3libsite-packageskerassrcutilstraceback_utils.py:70, in filter_traceback.<locals>.error_handler(*args, **kwargs)
67 filtered_tb = _process_traceback_frames(e.__traceback__)
68 # To get the full stack trace, call:
69 # `tf.debugging.disable_traceback_filtering()`
---> 70 raise e.with_traceback(filtered_tb) from None
71 finally:
72 del filtered_tb
File ~anaconda3libsite-packageskerassrclayersmergingconcatenate.py:131, in Concatenate.build(self, input_shape)
125 unique_dims = set(
126 shape[axis]
127 for shape in shape_set
128 if shape[axis] is not None
129 )
130 if len(unique_dims) > 1:
--> 131 raise ValueError(err_msg)
ValueError: A `Concatenate` layer requires inputs with matching shapes except for the concatenation axis. Received: input_shape=[(None, 14, 14, 256), (None, 12, 12, 256)]