I’ve tried to implement a U-Net model to classify audio, and after preprocessing, I’ve converted the data into a dataset of float values. These are the shapes of my X and Y
X_train shape: (3806, 2809)
X_test shape: (952, 2809)
y_train shape: (3806, 1)
y_test shape: (952, 1)
I have tried padding and cropping, yet I still end up with the same issue, just with different dimensions. I have attached the code to my U-net architecture, any advice would be appreciated, Thank you in advance.
<code>import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, Concatenate, Dropout, ZeroPadding2D
def unet(input_size=(53, 53, 1), num_classes=2):
inputs = Input(input_size)
# Encoder
conv1 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(inputs)
conv1 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv1)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
conv2 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool1)
conv2 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv2)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
conv3 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool2)
conv3 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv3)
pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
conv4 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool3)
conv4 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv4)
drop4 = Dropout(0.5)(conv4)
pool4 = MaxPooling2D(pool_size=(2, 2))(drop4)
conv5 = Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool4)
conv5 = Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv5)
drop5 = Dropout(0.5)(conv5)
# Decoder
up6 = UpSampling2D(size=(2, 2))(drop5)
conv6 = Conv2D(512, 2, activation='relu', padding='same', kernel_initializer='he_normal')(up6)
# Padding
pad_top = (conv4.shape[1] - conv6.shape[1]) // 2
pad_bottom = conv4.shape[1] - conv6.shape[1] - pad_top
pad_left = (conv4.shape[2] - conv6.shape[2]) // 2
pad_right = conv4.shape[2] - conv6.shape[2] - pad_left
padded_conv4 = ZeroPadding2D(padding=((pad_top, pad_bottom), (pad_left, pad_right)))(conv4)
merge6 = Concatenate(axis=3)([padded_conv4, conv6])
conv6 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge6)
conv6 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv6)
up7 = UpSampling2D(size=(2, 2))(conv6)
conv7 = Conv2D(256, 2, activation='relu', padding='same', kernel_initializer='he_normal')(up7)
# Padding
pad_top = (conv3.shape[1] - conv7.shape[1]) // 2
pad_bottom = conv3.shape[1] - conv7.shape[1] - pad_top
pad_left = (conv3.shape[2] - conv7.shape[2]) // 2
pad_right = conv3.shape[2] - conv7.shape[2] - pad_left
padded_conv3 = ZeroPadding2D(padding=((pad_top, pad_bottom), (pad_left, pad_right)))(conv3)
merge7 = Concatenate(axis=3)([padded_conv3, conv7])
conv7 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge7)
conv7 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv7)
up8 = UpSampling2D(size=(2, 2))(conv7)
conv8 = Conv2D(128, 2, activation='relu', padding='same', kernel_initializer='he_normal')(up8)
# Padding
pad_top = (conv2.shape[1] - conv8.shape[1]) // 2
pad_bottom = conv2.shape[1] - conv8.shape[1] - pad_top
pad_left = (conv2.shape[2] - conv8.shape[2]) // 2
pad_right = conv2.shape[2] - conv8.shape[2] - pad_left
padded_conv2 = ZeroPadding2D(padding=((pad_top, pad_bottom), (pad_left, pad_right)))(conv2)
merge8 = Concatenate(axis=3)([padded_conv2, conv8])
conv8 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge8)
conv8 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv8)
up9 = UpSampling2D(size=(2, 2))(conv8)
conv9 = Conv2D(64, 2, activation='relu', padding='same', kernel_initializer='he_normal')(up9)
# Padding
pad_top = (conv1.shape[1] - conv9.shape[1]) // 2
pad_bottom = conv1.shape[1] - conv9.shape[1] - pad_top
pad_left = (conv1.shape[2] - conv9.shape[2]) // 2
pad_right = conv1.shape[2] - conv9.shape[2] - pad_left
padded_conv1 = ZeroPadding2D(padding=((pad_top, pad_bottom), (pad_left, pad_right)))(conv1)
merge9 = Concatenate(axis=3)([padded_conv1, conv9])
conv9 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge9)
conv9 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv9)
conv10 = Conv2D(num_classes, 1, activation='sigmoid')(conv9) # Use 'sigmoid' for binary classification
model = Model(inputs=inputs, outputs=conv10)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
return model
</code>
<code>import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, Concatenate, Dropout, ZeroPadding2D
def unet(input_size=(53, 53, 1), num_classes=2):
inputs = Input(input_size)
# Encoder
conv1 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(inputs)
conv1 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv1)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
conv2 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool1)
conv2 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv2)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
conv3 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool2)
conv3 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv3)
pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
conv4 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool3)
conv4 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv4)
drop4 = Dropout(0.5)(conv4)
pool4 = MaxPooling2D(pool_size=(2, 2))(drop4)
conv5 = Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool4)
conv5 = Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv5)
drop5 = Dropout(0.5)(conv5)
# Decoder
up6 = UpSampling2D(size=(2, 2))(drop5)
conv6 = Conv2D(512, 2, activation='relu', padding='same', kernel_initializer='he_normal')(up6)
# Padding
pad_top = (conv4.shape[1] - conv6.shape[1]) // 2
pad_bottom = conv4.shape[1] - conv6.shape[1] - pad_top
pad_left = (conv4.shape[2] - conv6.shape[2]) // 2
pad_right = conv4.shape[2] - conv6.shape[2] - pad_left
padded_conv4 = ZeroPadding2D(padding=((pad_top, pad_bottom), (pad_left, pad_right)))(conv4)
merge6 = Concatenate(axis=3)([padded_conv4, conv6])
conv6 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge6)
conv6 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv6)
up7 = UpSampling2D(size=(2, 2))(conv6)
conv7 = Conv2D(256, 2, activation='relu', padding='same', kernel_initializer='he_normal')(up7)
# Padding
pad_top = (conv3.shape[1] - conv7.shape[1]) // 2
pad_bottom = conv3.shape[1] - conv7.shape[1] - pad_top
pad_left = (conv3.shape[2] - conv7.shape[2]) // 2
pad_right = conv3.shape[2] - conv7.shape[2] - pad_left
padded_conv3 = ZeroPadding2D(padding=((pad_top, pad_bottom), (pad_left, pad_right)))(conv3)
merge7 = Concatenate(axis=3)([padded_conv3, conv7])
conv7 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge7)
conv7 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv7)
up8 = UpSampling2D(size=(2, 2))(conv7)
conv8 = Conv2D(128, 2, activation='relu', padding='same', kernel_initializer='he_normal')(up8)
# Padding
pad_top = (conv2.shape[1] - conv8.shape[1]) // 2
pad_bottom = conv2.shape[1] - conv8.shape[1] - pad_top
pad_left = (conv2.shape[2] - conv8.shape[2]) // 2
pad_right = conv2.shape[2] - conv8.shape[2] - pad_left
padded_conv2 = ZeroPadding2D(padding=((pad_top, pad_bottom), (pad_left, pad_right)))(conv2)
merge8 = Concatenate(axis=3)([padded_conv2, conv8])
conv8 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge8)
conv8 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv8)
up9 = UpSampling2D(size=(2, 2))(conv8)
conv9 = Conv2D(64, 2, activation='relu', padding='same', kernel_initializer='he_normal')(up9)
# Padding
pad_top = (conv1.shape[1] - conv9.shape[1]) // 2
pad_bottom = conv1.shape[1] - conv9.shape[1] - pad_top
pad_left = (conv1.shape[2] - conv9.shape[2]) // 2
pad_right = conv1.shape[2] - conv9.shape[2] - pad_left
padded_conv1 = ZeroPadding2D(padding=((pad_top, pad_bottom), (pad_left, pad_right)))(conv1)
merge9 = Concatenate(axis=3)([padded_conv1, conv9])
conv9 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge9)
conv9 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv9)
conv10 = Conv2D(num_classes, 1, activation='sigmoid')(conv9) # Use 'sigmoid' for binary classification
model = Model(inputs=inputs, outputs=conv10)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
return model
</code>
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, Concatenate, Dropout, ZeroPadding2D
def unet(input_size=(53, 53, 1), num_classes=2):
inputs = Input(input_size)
# Encoder
conv1 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(inputs)
conv1 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv1)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
conv2 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool1)
conv2 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv2)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
conv3 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool2)
conv3 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv3)
pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
conv4 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool3)
conv4 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv4)
drop4 = Dropout(0.5)(conv4)
pool4 = MaxPooling2D(pool_size=(2, 2))(drop4)
conv5 = Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool4)
conv5 = Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv5)
drop5 = Dropout(0.5)(conv5)
# Decoder
up6 = UpSampling2D(size=(2, 2))(drop5)
conv6 = Conv2D(512, 2, activation='relu', padding='same', kernel_initializer='he_normal')(up6)
# Padding
pad_top = (conv4.shape[1] - conv6.shape[1]) // 2
pad_bottom = conv4.shape[1] - conv6.shape[1] - pad_top
pad_left = (conv4.shape[2] - conv6.shape[2]) // 2
pad_right = conv4.shape[2] - conv6.shape[2] - pad_left
padded_conv4 = ZeroPadding2D(padding=((pad_top, pad_bottom), (pad_left, pad_right)))(conv4)
merge6 = Concatenate(axis=3)([padded_conv4, conv6])
conv6 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge6)
conv6 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv6)
up7 = UpSampling2D(size=(2, 2))(conv6)
conv7 = Conv2D(256, 2, activation='relu', padding='same', kernel_initializer='he_normal')(up7)
# Padding
pad_top = (conv3.shape[1] - conv7.shape[1]) // 2
pad_bottom = conv3.shape[1] - conv7.shape[1] - pad_top
pad_left = (conv3.shape[2] - conv7.shape[2]) // 2
pad_right = conv3.shape[2] - conv7.shape[2] - pad_left
padded_conv3 = ZeroPadding2D(padding=((pad_top, pad_bottom), (pad_left, pad_right)))(conv3)
merge7 = Concatenate(axis=3)([padded_conv3, conv7])
conv7 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge7)
conv7 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv7)
up8 = UpSampling2D(size=(2, 2))(conv7)
conv8 = Conv2D(128, 2, activation='relu', padding='same', kernel_initializer='he_normal')(up8)
# Padding
pad_top = (conv2.shape[1] - conv8.shape[1]) // 2
pad_bottom = conv2.shape[1] - conv8.shape[1] - pad_top
pad_left = (conv2.shape[2] - conv8.shape[2]) // 2
pad_right = conv2.shape[2] - conv8.shape[2] - pad_left
padded_conv2 = ZeroPadding2D(padding=((pad_top, pad_bottom), (pad_left, pad_right)))(conv2)
merge8 = Concatenate(axis=3)([padded_conv2, conv8])
conv8 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge8)
conv8 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv8)
up9 = UpSampling2D(size=(2, 2))(conv8)
conv9 = Conv2D(64, 2, activation='relu', padding='same', kernel_initializer='he_normal')(up9)
# Padding
pad_top = (conv1.shape[1] - conv9.shape[1]) // 2
pad_bottom = conv1.shape[1] - conv9.shape[1] - pad_top
pad_left = (conv1.shape[2] - conv9.shape[2]) // 2
pad_right = conv1.shape[2] - conv9.shape[2] - pad_left
padded_conv1 = ZeroPadding2D(padding=((pad_top, pad_bottom), (pad_left, pad_right)))(conv1)
merge9 = Concatenate(axis=3)([padded_conv1, conv9])
conv9 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge9)
conv9 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv9)
conv10 = Conv2D(num_classes, 1, activation='sigmoid')(conv9) # Use 'sigmoid' for binary classification
model = Model(inputs=inputs, outputs=conv10)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
return model
Error:
<code>ValueError Traceback (most recent call last)
Cell In[73], line 1
----> 1 model = unet(input_size=(53, 53, 1))
Cell In[72], line 55, in unet(input_size, num_classes)
52 pad_right = conv3.shape[2] - conv7.shape[2] - pad_left
53 padded_conv3 = ZeroPadding2D(padding=((pad_top, pad_bottom), (pad_left, pad_right)))(conv3)
---> 55 merge7 = Concatenate(axis=3)([padded_conv3, conv7])
56 conv7 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge7)
57 conv7 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv7)
File ~anaconda3libsite-packageskerassrcutilstraceback_utils.py:70, in filter_traceback.<locals>.error_handler(*args, **kwargs)
67 filtered_tb = _process_traceback_frames(e.__traceback__)
68 # To get the full stack trace, call:
69 # `tf.debugging.disable_traceback_filtering()`
---> 70 raise e.with_traceback(filtered_tb) from None
71 finally:
72 del filtered_tb
File ~anaconda3libsite-packageskerassrclayersmergingconcatenate.py:131, in Concatenate.build(self, input_shape)
125 unique_dims = set(
126 shape[axis]
127 for shape in shape_set
128 if shape[axis] is not None
129 )
130 if len(unique_dims) > 1:
--> 131 raise ValueError(err_msg)
ValueError: A `Concatenate` layer requires inputs with matching shapes except for the concatenation axis. Received: input_shape=[(None, 14, 14, 256), (None, 12, 12, 256)]
</code>
<code>ValueError Traceback (most recent call last)
Cell In[73], line 1
----> 1 model = unet(input_size=(53, 53, 1))
Cell In[72], line 55, in unet(input_size, num_classes)
52 pad_right = conv3.shape[2] - conv7.shape[2] - pad_left
53 padded_conv3 = ZeroPadding2D(padding=((pad_top, pad_bottom), (pad_left, pad_right)))(conv3)
---> 55 merge7 = Concatenate(axis=3)([padded_conv3, conv7])
56 conv7 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge7)
57 conv7 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv7)
File ~anaconda3libsite-packageskerassrcutilstraceback_utils.py:70, in filter_traceback.<locals>.error_handler(*args, **kwargs)
67 filtered_tb = _process_traceback_frames(e.__traceback__)
68 # To get the full stack trace, call:
69 # `tf.debugging.disable_traceback_filtering()`
---> 70 raise e.with_traceback(filtered_tb) from None
71 finally:
72 del filtered_tb
File ~anaconda3libsite-packageskerassrclayersmergingconcatenate.py:131, in Concatenate.build(self, input_shape)
125 unique_dims = set(
126 shape[axis]
127 for shape in shape_set
128 if shape[axis] is not None
129 )
130 if len(unique_dims) > 1:
--> 131 raise ValueError(err_msg)
ValueError: A `Concatenate` layer requires inputs with matching shapes except for the concatenation axis. Received: input_shape=[(None, 14, 14, 256), (None, 12, 12, 256)]
</code>
ValueError Traceback (most recent call last)
Cell In[73], line 1
----> 1 model = unet(input_size=(53, 53, 1))
Cell In[72], line 55, in unet(input_size, num_classes)
52 pad_right = conv3.shape[2] - conv7.shape[2] - pad_left
53 padded_conv3 = ZeroPadding2D(padding=((pad_top, pad_bottom), (pad_left, pad_right)))(conv3)
---> 55 merge7 = Concatenate(axis=3)([padded_conv3, conv7])
56 conv7 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge7)
57 conv7 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv7)
File ~anaconda3libsite-packageskerassrcutilstraceback_utils.py:70, in filter_traceback.<locals>.error_handler(*args, **kwargs)
67 filtered_tb = _process_traceback_frames(e.__traceback__)
68 # To get the full stack trace, call:
69 # `tf.debugging.disable_traceback_filtering()`
---> 70 raise e.with_traceback(filtered_tb) from None
71 finally:
72 del filtered_tb
File ~anaconda3libsite-packageskerassrclayersmergingconcatenate.py:131, in Concatenate.build(self, input_shape)
125 unique_dims = set(
126 shape[axis]
127 for shape in shape_set
128 if shape[axis] is not None
129 )
130 if len(unique_dims) > 1:
--> 131 raise ValueError(err_msg)
ValueError: A `Concatenate` layer requires inputs with matching shapes except for the concatenation axis. Received: input_shape=[(None, 14, 14, 256), (None, 12, 12, 256)]