I am trying to follow a guide on Kaggle about lung xrays classification here it is
I am stuck with the author’s transfer learning:
This is an old guide that uses VGG16 and I am using VGG19, but I don’t believe that is the issue.
The author transfers weights from the Conv2D
layer into the SeparableConv2D layer in his custom-made model.
I have absolutely no idea how is he doing that.
He is transferring 4 layers – first two I don’t have an issue with this because the type of layer is the same which is Conv2D
and I use this linecustom_model.get_layer('Conv1_1').set_weights(VGG_conv1_weights)
to transfer the first two layers, I believe it works because at the end I check whether the arrays pre-transfer and post-transfer are the same
When I try the same method for 4 and 5th layers I get an error that roughly states that the set_weights
expected 3 values and got 2 which is reasonable, so I tried his approach, I downloaded the weights file he included in his notebook and did the same thing as him, then I checked whether the weights actually changed but they did not.
I am completely dumbfounded because now I don’t know if I am misunderstanding or doing something wrong or if he made a mistake.
I am completely new to the topic of CNN or any machine learning so I might just lack information
Here is the code I am tinkering with:
import h5py
import tensorflow as tf
from tensorflow.keras.applications import VGG19
from tensorflow.keras.layers import Input, Conv2D, SeparableConv2D, MaxPooling2D, BatchNormalization, Flatten, Dense, Dropout
from tensorflow.keras.models import Model
import numpy as np
'''
vgg19_model = VGG19(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
vgg_weights = {
'block1_conv1': vgg19_model.get_layer('block1_conv1').get_weights(),
'block1_conv2': vgg19_model.get_layer('block1_conv2').get_weights(),
'block2_conv1': vgg19_model.get_layer('block2_conv1').get_weights(),
'block2_conv2': vgg19_model.get_layer('block2_conv2').get_weights()
}
'''
file_path = r'pathvgg16_weights_tf_dim_ordering_tf_kernels_notop.h5'
f = h5py.File(file_path, 'r')
'''
def print_structure(name, obj):
if isinstance(obj, h5py.Group):
print(f"Group: {name}")
elif isinstance(obj, h5py.Dataset):
print(f" Dataset: {name}")
#f.visititems(print_structure)
'''
def build_model():
input_img = Input(shape=(224,224,3), name='ImageInput')
x = Conv2D(64, (3,3), activation='relu', padding='same', name='Conv1_1')(input_img)
x = Conv2D(64, (3,3), activation='relu', padding='same', name='Conv1_2')(x)
x = MaxPooling2D((2,2), name='pool1')(x)
x = SeparableConv2D(128, (3,3), activation='relu', padding='same', name='Conv2_1')(x)
x = SeparableConv2D(128, (3,3), activation='relu', padding='same', name='Conv2_2')(x)
x = MaxPooling2D((2,2), name='pool2')(x)
x = SeparableConv2D(256, (3,3), activation='relu', padding='same', name='Conv3_1')(x)
x = BatchNormalization(name='bn1')(x)
x = SeparableConv2D(256, (3,3), activation='relu', padding='same', name='Conv3_2')(x)
x = BatchNormalization(name='bn2')(x)
x = SeparableConv2D(256, (3,3), activation='relu', padding='same', name='Conv3_3')(x)
x = MaxPooling2D((2,2), name='pool3')(x)
x = SeparableConv2D(512, (3,3), activation='relu', padding='same', name='Conv4_1')(x)
x = BatchNormalization(name='bn3')(x)
x = SeparableConv2D(512, (3,3), activation='relu', padding='same', name='Conv4_2')(x)
x = BatchNormalization(name='bn4')(x)
x = SeparableConv2D(512, (3,3), activation='relu', padding='same', name='Conv4_3')(x)
x = MaxPooling2D((2,2), name='pool4')(x)
x = Flatten(name='flatten')(x)
x = Dense(1024, activation='relu', name='fc1')(x)
x = Dropout(0.7, name='dropout1')(x)
x = Dense(512, activation='relu', name='fc2')(x)
x = Dropout(0.5, name='dropout2')(x)
x = Dense(2, activation='softmax', name='fc3')(x)
model = Model(inputs=input_img, outputs=x)
return model
custom_model = build_model()
#custom_model.summary()
initial_array_Conv1_1 = custom_model.get_layer('Conv1_1').get_weights()
initial_array_Conv1_2 = custom_model.get_layer('Conv1_2').get_weights()
initial_array_Conv2_1 = custom_model.get_layer('Conv2_1').get_weights()
initial_array_Conv2_2 = custom_model.get_layer('Conv2_2').get_weights()
w,b = f['block1_conv1']['block1_conv1_W_1:0'], f['block1_conv1']['block1_conv1_b_1:0']
VGG_conv1_weights = [w,b]
#custom_model.layers[1].set_weights = [w,b]
custom_model.get_layer('Conv1_1').set_weights(VGG_conv1_weights)
w,b = f['block1_conv2']['block1_conv2_W_1:0'], f['block1_conv2']['block1_conv2_b_1:0']
VGG_conv2_weights = [w,b]
#custom_model.layers[2].set_weights = [w,b]
custom_model.get_layer('Conv1_2').set_weights(VGG_conv2_weights)
w,b = f['block2_conv1']['block2_conv1_W_1:0'], f['block2_conv1']['block2_conv1_b_1:0']
custom_model.layers[4].set_weights = [w,b]
w,b = f['block2_conv2']['block2_conv2_W_1:0'], f['block2_conv2']['block2_conv2_b_1:0']
custom_model.layers[5].set_weights = [w,b]
f.close()
custom_model.summary()
updated_array_Conv1_1 = custom_model.get_layer('Conv1_1').get_weights()
updated_array_Conv1_2 = custom_model.get_layer('Conv1_2').get_weights()
updated_array_Conv2_1 = custom_model.get_layer('Conv2_1').get_weights()
updated_array_Conv2_2 = custom_model.get_layer('Conv2_2').get_weights()
print(np.array_equal(initial_array_Conv1_1[0], updated_array_Conv1_1[0]))
print(np.array_equal(initial_array_Conv1_2[0], updated_array_Conv1_2[0]))
print(np.array_equal(initial_array_Conv2_1[0], updated_array_Conv2_1[0]))
print(np.array_equal(initial_array_Conv2_2[0], updated_array_Conv2_2[0]))
print("NEXT")
print(np.array_equal(initial_array_Conv1_1[1], updated_array_Conv1_1[1]))
print(np.array_equal(initial_array_Conv1_2[1], updated_array_Conv1_2[1]))
print(np.array_equal(initial_array_Conv2_1[1], updated_array_Conv2_1[1]))
print(np.array_equal(initial_array_Conv2_2[1], updated_array_Conv2_2[1]))
'''
print("n")
print("shapes for SeparableConv2D 2_1: n")
print(f" 0th element shape: {initial_array_Conv2_1[0].shape}")
print(f" 1st element shape: {initial_array_Conv2_1[1].shape}")
print(f" 2nd element shape: {initial_array_Conv2_1[2].shape}n")
w,b = f['block2_conv1']['block2_conv1_W_1:0'], f['block2_conv1']['block2_conv1_b_1:0']
custom_model.layers[4].set_weights = ([w,b])
initial_array_Conv2_1 = custom_model.get_layer('Conv2_1').get_weights()
print("Printing shapes of block2_conv1 n")
print(f" 0th element shape: {w.shape}")
print(f" 1st element shape: {b.shape}")
print(np.array_equal(w, initial_array_Conv2_1[2]))
'''
I left the comments in the code to maybe save someone some work and also included the VGG19 model if somebody would try to use it instead.
I already check a lot of stuff like tensor shapes, the structural differences between VGG16 vs VGG19, because i thought I just put them in the wrong places and this is the reason they don’t fit, I also learned that this custom_model.layers[4].set_weights = [w,b]
is probably wrong because set weights accepts parameters in brackets ()
not after the =