I am facing in issue in performing inference using a pb model that was converted using an onnx model(end goal is to convert torch to tflite model). My original torch model was converted to onnx model which had no problem in inference. The pb model converted onnx model shows an LSTM related error during inference. I ran the following inference code:
def load_input_image():
imgh = 120
imgw = 360
image_path = 'YRBybNcARj_1717501021400.jpg'
orig_image = cv2.imread(image_path)
image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2GRAY)
image = image/127.5 - 1.0
image = cv2.resize(image,(imgw, imgh),interpolation=cv2.INTER_CUBIC)
image = np.expand_dims(image, 0)
image = np.expand_dims(image, 0)
image = np.float32(image)
image.shape # (1,1,120,360)
return image
loaded = tf.saved_model.load(f"{repo_dir}/trial.pb")
print(list(loaded.signatures.keys())) # ["serving_default"]
infer = loaded.signatures['serving_default']
print("output description: ", infer.structured_outputs)
# output description: {'output': TensorSpec(shape=(1, 89, 12), dtype=tf.float32, name='output')}
infer(tf.constant(image))
#THIS IS THE ERROR:
Detected at node 'LSTM_d2b678f9/bidirectional_rnn/fw/fw/while/fw/multi_rnn_cell/cell_0/lstm_cell/BiasAdd' defined at (most recent call last):
Node: 'LSTM_d2b678f9/bidirectional_rnn/fw/fw/while/fw/multi_rnn_cell/cell_0/lstm_cell/BiasAdd'
Matrix size-incompatible: In[0]: [1,512], In[1]: [768,1024]
[[{{node LSTM_d2b678f9/bidirectional_rnn/fw/fw/while/fw/multi_rnn_cell/cell_0/lstm_cell/BiasAdd}}]] [Op:__inference_signature_wrapper_8182]
My pytorch model definition is as below.
opt.input_channel = 1
opt.hidden_size = 256
opt.output_channel = 512
class Model(nn.Module):
def __init__(self, opt):
super(Model, self).__init__()
self.opt = opt
self.stages = {'Trans': None, 'Feat': 'VGG',
'Seq': 'BiLSTM', 'Pred': 'CTC'}
#self.avgpoolcustom = AdaptiveAvgPool2dCustom(output_size=(512,1))
#self.avg_pool = nn.AvgPool2d(kernel_size=(1, 6), stride=(1, 6))
""" Transformation """
if opt.Transformation == 'TPS':
self.Transformation = TPS_SpatialTransformerNetwork(
F=opt.num_fiducial, I_size=(opt.imgH, opt.imgW), I_r_size=(opt.imgH, opt.imgW), I_channel_num=opt.input_channel)
else:
print('No Transformation module specified')
""" FeatureExtraction """
if opt.FeatureExtraction == 'VGG':
self.FeatureExtraction = VGG_FeatureExtractor(opt.input_channel, opt.output_channel)
elif opt.FeatureExtraction == 'RCNN':
self.FeatureExtraction = RCNN_FeatureExtractor(opt.input_channel, opt.output_channel)
elif opt.FeatureExtraction == 'ResNet':
self.FeatureExtraction = ResNet_FeatureExtractor(opt.input_channel, opt.output_channel)
else:
raise Exception('No FeatureExtraction module specified')
self.FeatureExtraction_output = opt.output_channel # int(imgH/16-1) * 512
self.AdaptiveAvgPool = nn.AdaptiveAvgPool2d((None, 1)) # Transform final (imgH/16-1) -> 1
""" Sequence modeling"""
if opt.SequenceModeling == 'BiLSTM':
self.SequenceModeling = nn.Sequential(
BidirectionalLSTM(self.FeatureExtraction_output, opt.hidden_size, opt.hidden_size),
BidirectionalLSTM(opt.hidden_size, opt.hidden_size, opt.hidden_size))
self.SequenceModeling_output = opt.hidden_size
else:
print('No SequenceModeling module specified')
self.SequenceModeling_output = self.FeatureExtraction_output
""" Prediction """
if opt.Prediction == 'CTC':
self.Prediction = nn.Linear(self.SequenceModeling_output, opt.num_class)
elif opt.Prediction == 'Attn':
self.Prediction = Attention(self.SequenceModeling_output, opt.hidden_size, opt.num_class)
else:
raise Exception('Prediction is neither CTC or Attn')
def forward(self, input, is_train=False):
input = input.detach() #shape: (1,120,360)
""" Transformation stage """
#if not self.stages['Trans'] == "None":
# input = self.Transformation(input)
#Shape after feature extraction: torch.Size([1, 512, 6, 89])
# Shape after permute: torch.Size([1,89,512,6])
#Shape after avg pooling: torch.Size([1, 89, 512, 1])
#Shape after squeeze: torch.Size([1, 89, 512])
""" Feature extraction stage """
visual_feature = self.FeatureExtraction(input) #Shape here: torch.Size([1, 512, 6, 89])
print("Shape after feature extraction: ", visual_feature.shape)
#original
#visual_feature = self.AdaptiveAvgPool(visual_feature.permute(0, 3, 1, 2)) # [b, c, h, w] -> [b, w, c, h]
#visual_feature = self.avgpoolcustom(visual_feature.permute(0, 3, 1, 2))
#visual_feature = self.avg_pool(visual_feature.permute(0, 3, 1, 2))
visual_feature = torch.mean(visual_feature.permute(0, 3, 1, 2), axis = 3)
print("Shape after avg pooling: ", visual_feature.shape)
#visual_feature = visual_feature.permute(0, 3, 1, 2) # this makes it [1,89]
#visual_feature = visual_feature.squeeze(3)
#print("Shape after squeeze: ", visual_feature.shape, "nn")
# Tensor shape here: (1,89,512)
""" Sequence modeling stage """
if self.stages['Seq'] == 'BiLSTM':
contextual_feature = self.SequenceModeling(visual_feature)
else:
contextual_feature = visual_feature # for convenience. this is NOT contextually modeled by BiLSTM
""" Prediction stage """
if self.stages['Pred'] == 'CTC':
prediction = self.Prediction(contextual_feature.contiguous())
#else:
# prediction = self.Prediction(contextual_feature.contiguous(), text, is_train, batch_max_length=self.opt.batch_max_length)
return prediction.detach()
This is how I converted pytorch to onnx:
def export_onnx_model(destpath, imgh = 120, imgw = 360):
input = torch.randn(1, 1, imgh, imgw).detach()
model.eval()
with torch.no_grad():
# Export the model
torch.onnx.export(model, # model being run
input, # model input
destpath,
export_params=True,
opset_version=11, # the ONNX version to export the model to
do_constant_folding=True, # whether to execute constant folding
input_names = ['input'], # the model's input names
output_names = ['output'], # the model's output names
)
Finally, this is how I am converting onnx model to pb model:
def convert_onnx_to_pb(onnx_model_path, dest_path):
onnx_model = onnx.load(onnx_model_path)
tf_rep = prepare(onnx_model)
print("Exporting to pb model")
tf_rep.export_graph(dest_path)
Would appreciate any help in solving this.