I was replicating a paper that has a complex model built in pytorch and wanted it in tensorflow for ease of use (I have been working for a while with tensorflow seems user friendly more for me please don’t punch me), anyways now i run into some problems with the subclassed model it trains fine and everything but some problems with saving and loading and also types of save (.h5, etc…) and i also would like to quantize it later on either by QAT or quantization after training and it seems doable/easier to do in functional without all the errors i have been getting.
here is my subclassed model code also bear in mind im using fully custom built layers that consists of call() and init() functions could provide the layers if needed:
from tensorflow.keras.activations import softmax
class MyModel(tf.keras.Model):
def __init__(self,num_classes,embed_dim):
super(MyModel, self).__init__()
# Instantiate all the layers here
self.stemApply = StemModule()
self.convFirst = ConvBlock(outplanes=64, res_conv=True, stride=1)
self.patching = PatchEmbedding()
self.trans = TransBlock()
self.convTrans = ConvTransBlock(inPlain=16, outPlain=64)
self.convTrans2 = ConvTransBlock(inPlain=16, outPlain=64)
self.convTrans3 = ConvTransBlock(inPlain=16, outPlain=64)
self.convTransDown1 = ConvTransBlockDown(inPlain=32, outPlain=128, downSample=2)
self.convTransDown2 = ConvTransBlockDown(inPlain=32, outPlain=128)
self.convTransDown3 = ConvTransBlockDown(inPlain=32, outPlain=128)
self.convTransDown4 = ConvTransBlockDown(inPlain=32, outPlain=128)
self.convTransDown5 = ConvTransBlockDown(inPlain=64, outPlain=256, downSample=2, dwStride=1)
self.convTransDown6 = ConvTransBlockDown(inPlain=64, outPlain=256, dwStride=1)
self.convTransDown7 = ConvTransBlockDown(inPlain=64, outPlain=256, dwStride=1)
self.Last = ConvTransBlockDown(inPlain=64, outPlain=256, downSample=1, dwStride=1, last_fusion=True)
# Classifier head
self.trans_norm = tf.keras.layers.LayerNormalization()
self.trans_cls_head = tf.keras.layers.Dense(num_classes) if num_classes > 0 else tf.keras.layers.Activation('linear')
self.pooling = tf.keras.layers.GlobalAveragePooling2D()
self.conv_cls_head = tf.keras.layers.Dense(num_classes)
def call(self, inputs):
# Define the forward pass logic
x = self.stemApply(inputs)
convedPhoto = self.convFirst(x, x_t=None, return_x_2=False)
patchEmbed = self.patching(x)
transed, _ = self.trans(patchEmbed)
convo, transor = self.convTrans(convedPhoto, transed)
convo2, transor2 = self.convTrans2(convo, transor)
convo3, transor3 = self.convTrans3(convo2, transor2)
convo4, transor4 = self.convTransDown1(convo3, transor3)
convo5, transor5 = self.convTransDown2(convo4, transor4)
convo6, transor6 = self.convTransDown3(convo5, transor5)
convo7, transor7 = self.convTransDown4(convo6, transor6)
convo8, transor8 = self.convTransDown5(convo7, transor7)
convo9, transor9 = self.convTransDown6(convo8, transor8)
convo10, transor10 = self.convTransDown7(convo9, transor9)
convo11, transor11 = self.Last(convo10, transor10)
# Transformer-based classification
x_t = self.trans_norm(transor11)
output_1 = softmax(self.trans_cls_head(x_t[:, 0]), axis=-1) # Apply softmax activation
# CNN-based classification
x_p = self.pooling(convo11)
output_2 = softmax(self.conv_cls_head(x_p), axis=-1) # Apply softmax activation
return output_1, output_2
# Instantiate the model
model = MyModel(embed_dim=384,num_classes=6)
model.build(input_shape=(None, 224, 224, 3))
so if a helpful fellow could help me with my struggle would be much appreciated
I tried turning it into functional code using the custom layers but some problems and errors arise from it. I would show you the code but silly me forgot to save the code from colab before resetting runtime.