I am facing the following error while trying to decode the output of CTC RNN handwritten character recognition model using word beam search decoding algorithm:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[114], line 47
44 if preds.shape[2] != num_classes:
45 raise ValueError(f"Prediction model output shape mismatch. Expected last dimension to be {num_classes}, but got {preds.shape[2]}.")
---> 47 pred_texts = decode_batch_predictions(preds)
49 for i in range(16):
50 img = batch_images[i]
Cell In[114], line 30, in decode_batch_predictions(mat)
27 raise ValueError(f"The number of characters (chars) plus 1 must equal dimension 2 of the input tensor (mat). Expected {num_classes}, but got {mat.shape[2]}.")
29 # Compute label string
---> 30 label_str = wbs.compute(mat)
31 char_str = [] # Decoded texts for batch
32 for curr_label_str in label_str:
ValueError: the number of characters (chars) plus 1 must equal dimension 2 of the input tensor (mat)
This following is my code snippets
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
# Step 2: Define the file path and read the file
file_path = 'C:/Users/mmm/Desktop/HTR/data/chars.txt'
# Read the file and join the characters
with open(file_path, 'r') as file:
chars = ''.join(file.read().split())
# Add CTC-blank character
ctc_blank = ' ' # or any other character to represent CTC-blank
chars += ctc_blank
num_classes = len(chars)+2
word_chars = open("C:/Users/mmm/Desktop/HTR/data/wordChars.txt").read().splitlines()[0]
corpus = open('C:/Users/mmm/Desktop/HTR/data/corpus.txt').read()
# Initialize WordBeamSearch
wbs = WordBeamSearch(25, 'Words', 0.0, corpus.encode('utf8'), chars.encode('utf8'), word_chars.encode('utf8'))
# Utility function to decode the output of the network.
def decode_batch_predictions(mat):
# Verify that mat.shape[2] == num_classes
if mat.shape[2] != num_classes:
raise ValueError(f"The number of characters (chars) plus 1 must equal dimension 2 of the input tensor (mat). Expected {num_classes}, but got {mat.shape[2]}.")
# Compute label string
label_str = wbs.compute(mat)
char_str = [] # Decoded texts for batch
for curr_label_str in label_str:
s = ''.join([chars[label] for label in curr_label_str])
char_str.append(s)
return char_str
# Let's check results on some test samples.
for batch in test_ds.take(1):
batch_images = batch["image"]
_, ax = plt.subplots(4, 4, figsize=(15, 8))
preds = prediction_model.predict(batch_images)
# Ensure preds has the correct shape before decoding
if preds.shape[2] != num_classes:
raise ValueError(f"Prediction model output shape mismatch. Expected last dimension to be {num_classes}, but got {preds.shape[2]}.")
pred_texts = decode_batch_predictions(preds)
for i in range(16):
img = batch_images[i]
img = tf.image.flip_left_right(img)
img = tf.transpose(img, perm=[1, 0, 2])
img = (img * 255.0).numpy().clip(0, 255).astype(np.uint8)
img = img[:, :, 0]
title = f"Prediction: {pred_texts[i]}"
ax[i // 4, i % 4].imshow(img, cmap="gray")
ax[i // 4, i % 4].set_title(title)
ax[i // 4, i % 4].axis("off")
plt.show()
please I have no experience in object detection using deep learning I need your help
I tried to match the length of labels (characters) and model output because of this error by replacing num_classes = len(chars)
by num_classes = len(chars)+2
but this not fully solve the error this “ValueError: the number of characters (chars) plus 1 must equal dimension 2 of the input tensor (mat)” error persist
``---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[116], line 45
43 # Ensure preds has the correct shape before decoding
44 if preds.shape[2] != num_classes:
---> 45 raise ValueError(f"Prediction model output shape mismatch. Expected last dimension to be {num_classes}, but got {preds.shape[2]}.")
47 pred_texts = decode_batch_predictions(preds)
49 for i in range(16):
ValueError: Prediction model output shape mismatch. Expected last dimension to be 79, but got 81.``