Squeeze}} = Squeeze[T=DT_INT32,

i have been trying to train a model on extracting handwritten text from images . for this i have been using a hybrd model , where i am using Resnet model for features extraction and lstm for handling sequences .but for doing the same , i am facing error and i am not able to fix it .

this is my error image

here is my detailed code.

import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Lambda, Bidirectional, LSTM, Dropout, Reshape, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing.sequence import pad_sequences
import cv2
import os
from sklearn.model_selection import train_test_split

# Step 1: Character Mapping
characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
char_to_num = {char: i for i, char in enumerate(characters)}
num_classes = len(characters) + 1  # Include blank label for CTC
max_text_length = 11  # Adjust based on your dataset

# Step 2: Image Preprocessing Function
def load_and_process_image(image_path, img_height=32, img_width=256):
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, (img_width, img_height))
    img = img.astype('float32') / 255.0
    img = np.expand_dims(img, axis=-1)  # Add channel dimension
    return img

# Step 3: Convert Text to Sequence
def text_to_sequence(text, char_to_num):
    return [char_to_num[char] for char in text if char in char_to_num]

# Step 4: Load Dataset
def load_dataset(image_dir, label_dir, char_to_num, max_text_length, img_height=32, img_width=256):
    image_data = []
    label_data = []
    label_lengths = []

    for img_file in os.listdir(image_dir):
        if img_file.endswith(".png") or img_file.endswith(".jpg"):
            img_path = os.path.join(image_dir, img_file)
            image = load_and_process_image(img_path, img_height, img_width)
            image_data.append(image)

            label_file = img_file.replace(".png", ".txt").replace(".jpg", ".txt")
            label_path = os.path.join(label_dir, label_file)

            with open(label_path, 'r') as f:
                text = f.readline().strip()

            sequence = text_to_sequence(text, char_to_num)
            label_length = len(sequence)
            padded_sequence = pad_sequences([sequence], maxlen=max_text_length, padding='post')[0]
            label_data.append(padded_sequence)
            label_lengths.append(label_length)  # Store actual label length before padding

    image_data = np.array(image_data, dtype=np.float32)
    label_data = np.array(label_data, dtype=np.int32)
    label_lengths = np.array(label_lengths, dtype=np.int32)

    return image_data, label_data, label_lengths

# Step 5: Build the Model
def build_resnet_feature_extractor(input_shape):
    base_model = ResNet50(include_top=False, input_shape=input_shape)
    return Model(inputs=base_model.input, outputs=base_model.output)

def build_bilstm_layer(feature_extractor_output, lstm_units=256):
    lstm = Bidirectional(LSTM(lstm_units, return_sequences=True))(feature_extractor_output)
    lstm = Dropout(0.25)(lstm)
    return lstm

def apply_attention_layer(lstm_output):
    attention = tf.keras.layers.Attention()([lstm_output, lstm_output])
    return attention

def ctc_loss_lambda_func(args):
    labels, y_pred, input_length, label_length = args
    labels = tf.cast(labels, dtype=tf.int32)
    input_length = tf.cast(input_length, dtype=tf.int32)
    label_length = tf.cast(label_length, dtype=tf.int32)
    
    # Ensure label_length is squeezed to the expected shape
    label_length = tf.squeeze(label_length, axis=-1)  # Squeeze to ensure it's 1D

    return tf.keras.backend.ctc_batch_cost(labels, y_pred, input_length, label_length)

def build_ctc_loss_model(inputs, outputs, max_text_length):
    labels = Input(shape=(max_text_length,), name='labels')
    input_length = Input(shape=(1,), name='input_length')
    label_length = Input(shape=(1,), name='label_length')
    ctc_loss = Lambda(ctc_loss_lambda_func, output_shape=(1,), name='ctc')([labels, outputs, input_length, label_length])
    return Model(inputs=[inputs, labels, input_length, label_length], outputs=ctc_loss)

def build_handwriting_ocr_model(input_shape, max_text_length, lstm_units=256):
    inputs = Input(shape=input_shape, name='image_input')

    # Concatenate the grayscale image to 3 channels
    concatenated = Concatenate()([inputs, inputs, inputs])

    # Feature extraction with ResNet50
    feature_extractor = build_resnet_feature_extractor((input_shape[0], input_shape[1], 3))  # update input to 3 channels
    features = feature_extractor(concatenated)

    # Reshape for LSTM input
    features = Reshape((-1, features.shape[-1]))(features)

    # BiLSTM + Dropout
    lstm_output = build_bilstm_layer(features, lstm_units=lstm_units)

    # Attention Layer
    attention_output = apply_attention_layer(lstm_output)

    # Output Layer
    outputs = Dense(num_classes, activation='softmax', name='output')(attention_output)

    # CTC Loss Model
    ctc_model = build_ctc_loss_model(inputs, outputs, max_text_length)

    return ctc_model

# Step 6: Load Data and Split
image_dir = '/content/drive/MyDrive/ifsc_croped'
label_dir = '/content/drive/MyDrive/ifsc_cropped_txt'

train_images, train_labels, train_label_lengths = load_dataset(image_dir, label_dir, char_to_num, max_text_length)
train_images, val_images, train_labels, val_labels, train_label_lengths, val_label_lengths = train_test_split(
    train_images, train_labels, train_label_lengths, test_size=0.2, random_state=42
)

# Step 7: Padding Function
def pad_labels(label_sequences, max_text_length):
    return pad_sequences(label_sequences, maxlen=max_text_length, padding='post')

# Step 8: Data Generator
def data_generator(images, labels, label_lengths, batch_size, max_text_length):
    downsample_factor = 8  # Adjust based on your model
    time_steps = images.shape[2] // downsample_factor

    while True:
        for i in range(0, len(images), batch_size):
            batch_images = images[i:i + batch_size]
            batch_labels = labels[i:i + batch_size]
            batch_label_lengths = label_lengths[i:i + batch_size]

            input_lengths = np.ones((len(batch_images), 1), dtype=np.int32) * time_steps
            label_lengths_batch = np.array(batch_label_lengths, dtype=np.int32).reshape(-1, 1)  # Ensure shape is (batch_size, 1)

            inputs = {
                'image_input': batch_images,
                'labels': batch_labels,
                'input_length': input_lengths,
                'label_length': label_lengths_batch
            }

            outputs = np.zeros((len(batch_images), 1))  # Dummy outputs for CTC

            yield (inputs, outputs)

# Step 9: Compile and Train the Model
input_shape = (32, 256, 1)  # Adjust according to your images
model = build_handwriting_ocr_model(input_shape, max_text_length)
model.compile(optimizer=tf.keras.optimizers.Adam())

# Define training parameters
batch_size = 32
epochs = 20

history = model.fit(
    data_generator(train_images, train_labels, train_label_lengths, batch_size, max_text_length),
    validation_data=data_generator(val_images, val_labels, val_label_lengths, batch_size, max_text_length),
    steps_per_epoch=len(train_images) // batch_size,
    validation_steps=len(val_images) // batch_size,
    epochs=epochs
)

New contributor

Trang chủ Giới thiệu Sinh nhật bé trai Sinh nhật bé gái Tổ chức sự kiện Biểu diễn giải trí Dịch vụ khác Trang trí tiệc cưới Tổ chức khai trương Tư vấn dịch vụ Thư viện ảnh Tin tức - sự kiện Liên hệ Chú hề sinh nhật Trang trí YEAR END PARTY công ty Trang trí tất niên cuối năm Trang trí tất niên xu hướng mới nhất Trang trí sinh nhật bé trai Hải Đăng Trang trí sinh nhật bé Khánh Vân Trang trí sinh nhật Bích Ngân Trang trí sinh nhật bé Thanh Trang Thuê ông già Noel phát quà Biểu diễn xiếc khỉ Xiếc quay đĩa Dịch vụ tổ chức sự kiện 5 sao Thông tin về chúng tôi Dịch vụ sinh nhật bé trai Dịch vụ sinh nhật bé gái Sự kiện trọn gói Các tiết mục giải trí Dịch vụ bổ trợ Tiệc cưới sang trọng Dịch vụ khai trương Tư vấn tổ chức sự kiện Hình ảnh sự kiện Cập nhật tin tức Liên hệ ngay Thuê chú hề chuyên nghiệp Tiệc tất niên cho công ty Trang trí tiệc cuối năm Tiệc tất niên độc đáo Sinh nhật bé Hải Đăng Sinh nhật đáng yêu bé Khánh Vân Sinh nhật sang trọng Bích Ngân Tiệc sinh nhật bé Thanh Trang Dịch vụ ông già Noel Xiếc thú vui nhộn Biểu diễn xiếc quay đĩa Dịch vụ tổ chức tiệc uy tín Khám phá dịch vụ của chúng tôi Tiệc sinh nhật cho bé trai Trang trí tiệc cho bé gái Gói sự kiện chuyên nghiệp Chương trình giải trí hấp dẫn Dịch vụ hỗ trợ sự kiện Trang trí tiệc cưới đẹp Khởi đầu thành công với khai trương Chuyên gia tư vấn sự kiện Xem ảnh các sự kiện đẹp Tin mới về sự kiện Kết nối với đội ngũ chuyên gia Chú hề vui nhộn cho tiệc sinh nhật Ý tưởng tiệc cuối năm Tất niên độc đáo Trang trí tiệc hiện đại Tổ chức sinh nhật cho Hải Đăng Sinh nhật độc quyền Khánh Vân Phong cách tiệc Bích Ngân Trang trí tiệc bé Thanh Trang Thuê dịch vụ ông già Noel chuyên nghiệp Xem xiếc khỉ đặc sắc Xiếc quay đĩa thú vị

Filed under: Kiến thức lập trình - @ 05:47

Thẻ: tensorflow

Can not squeeze dim[1], expected a dimension of 1, got 11 for ‘{{node functional_3_1/ctc_1/Squeeze}} = Squeeze[T=DT_INT32,

this is my error image

here is my detailed code.

import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Lambda, Bidirectional, LSTM, Dropout, Reshape, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing.sequence import pad_sequences
import cv2
import os
from sklearn.model_selection import train_test_split

# Step 1: Character Mapping
characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
char_to_num = {char: i for i, char in enumerate(characters)}
num_classes = len(characters) + 1  # Include blank label for CTC
max_text_length = 11  # Adjust based on your dataset

# Step 2: Image Preprocessing Function
def load_and_process_image(image_path, img_height=32, img_width=256):
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, (img_width, img_height))
    img = img.astype('float32') / 255.0
    img = np.expand_dims(img, axis=-1)  # Add channel dimension
    return img

# Step 3: Convert Text to Sequence
def text_to_sequence(text, char_to_num):
    return [char_to_num[char] for char in text if char in char_to_num]

# Step 4: Load Dataset
def load_dataset(image_dir, label_dir, char_to_num, max_text_length, img_height=32, img_width=256):
    image_data = []
    label_data = []
    label_lengths = []

    for img_file in os.listdir(image_dir):
        if img_file.endswith(".png") or img_file.endswith(".jpg"):
            img_path = os.path.join(image_dir, img_file)
            image = load_and_process_image(img_path, img_height, img_width)
            image_data.append(image)

            label_file = img_file.replace(".png", ".txt").replace(".jpg", ".txt")
            label_path = os.path.join(label_dir, label_file)

            with open(label_path, 'r') as f:
                text = f.readline().strip()

            sequence = text_to_sequence(text, char_to_num)
            label_length = len(sequence)
            padded_sequence = pad_sequences([sequence], maxlen=max_text_length, padding='post')[0]
            label_data.append(padded_sequence)
            label_lengths.append(label_length)  # Store actual label length before padding

    image_data = np.array(image_data, dtype=np.float32)
    label_data = np.array(label_data, dtype=np.int32)
    label_lengths = np.array(label_lengths, dtype=np.int32)

    return image_data, label_data, label_lengths

# Step 5: Build the Model
def build_resnet_feature_extractor(input_shape):
    base_model = ResNet50(include_top=False, input_shape=input_shape)
    return Model(inputs=base_model.input, outputs=base_model.output)

def build_bilstm_layer(feature_extractor_output, lstm_units=256):
    lstm = Bidirectional(LSTM(lstm_units, return_sequences=True))(feature_extractor_output)
    lstm = Dropout(0.25)(lstm)
    return lstm

def apply_attention_layer(lstm_output):
    attention = tf.keras.layers.Attention()([lstm_output, lstm_output])
    return attention

def ctc_loss_lambda_func(args):
    labels, y_pred, input_length, label_length = args
    labels = tf.cast(labels, dtype=tf.int32)
    input_length = tf.cast(input_length, dtype=tf.int32)
    label_length = tf.cast(label_length, dtype=tf.int32)
    
    # Ensure label_length is squeezed to the expected shape
    label_length = tf.squeeze(label_length, axis=-1)  # Squeeze to ensure it's 1D

    return tf.keras.backend.ctc_batch_cost(labels, y_pred, input_length, label_length)

def build_ctc_loss_model(inputs, outputs, max_text_length):
    labels = Input(shape=(max_text_length,), name='labels')
    input_length = Input(shape=(1,), name='input_length')
    label_length = Input(shape=(1,), name='label_length')
    ctc_loss = Lambda(ctc_loss_lambda_func, output_shape=(1,), name='ctc')([labels, outputs, input_length, label_length])
    return Model(inputs=[inputs, labels, input_length, label_length], outputs=ctc_loss)

def build_handwriting_ocr_model(input_shape, max_text_length, lstm_units=256):
    inputs = Input(shape=input_shape, name='image_input')

    # Concatenate the grayscale image to 3 channels
    concatenated = Concatenate()([inputs, inputs, inputs])

    # Feature extraction with ResNet50
    feature_extractor = build_resnet_feature_extractor((input_shape[0], input_shape[1], 3))  # update input to 3 channels
    features = feature_extractor(concatenated)

    # Reshape for LSTM input
    features = Reshape((-1, features.shape[-1]))(features)

    # BiLSTM + Dropout
    lstm_output = build_bilstm_layer(features, lstm_units=lstm_units)

    # Attention Layer
    attention_output = apply_attention_layer(lstm_output)

    # Output Layer
    outputs = Dense(num_classes, activation='softmax', name='output')(attention_output)

    # CTC Loss Model
    ctc_model = build_ctc_loss_model(inputs, outputs, max_text_length)

    return ctc_model

# Step 6: Load Data and Split
image_dir = '/content/drive/MyDrive/ifsc_croped'
label_dir = '/content/drive/MyDrive/ifsc_cropped_txt'

train_images, train_labels, train_label_lengths = load_dataset(image_dir, label_dir, char_to_num, max_text_length)
train_images, val_images, train_labels, val_labels, train_label_lengths, val_label_lengths = train_test_split(
    train_images, train_labels, train_label_lengths, test_size=0.2, random_state=42
)

# Step 7: Padding Function
def pad_labels(label_sequences, max_text_length):
    return pad_sequences(label_sequences, maxlen=max_text_length, padding='post')

# Step 8: Data Generator
def data_generator(images, labels, label_lengths, batch_size, max_text_length):
    downsample_factor = 8  # Adjust based on your model
    time_steps = images.shape[2] // downsample_factor

    while True:
        for i in range(0, len(images), batch_size):
            batch_images = images[i:i + batch_size]
            batch_labels = labels[i:i + batch_size]
            batch_label_lengths = label_lengths[i:i + batch_size]

            input_lengths = np.ones((len(batch_images), 1), dtype=np.int32) * time_steps
            label_lengths_batch = np.array(batch_label_lengths, dtype=np.int32).reshape(-1, 1)  # Ensure shape is (batch_size, 1)

            inputs = {
                'image_input': batch_images,
                'labels': batch_labels,
                'input_length': input_lengths,
                'label_length': label_lengths_batch
            }

            outputs = np.zeros((len(batch_images), 1))  # Dummy outputs for CTC

            yield (inputs, outputs)

# Step 9: Compile and Train the Model
input_shape = (32, 256, 1)  # Adjust according to your images
model = build_handwriting_ocr_model(input_shape, max_text_length)
model.compile(optimizer=tf.keras.optimizers.Adam())

# Define training parameters
batch_size = 32
epochs = 20

history = model.fit(
    data_generator(train_images, train_labels, train_label_lengths, batch_size, max_text_length),
    validation_data=data_generator(val_images, val_labels, val_label_lengths, batch_size, max_text_length),
    steps_per_epoch=len(train_images) // batch_size,
    validation_steps=len(val_images) // batch_size,
    epochs=epochs
)

New contributor

Filed under: Kiến thức lập trình - @ 05:47

Thẻ: tensorflow

Thiết kế website giá rẻ

Danh mục

Can not squeeze dim[1], expected a dimension of 1, got 11 for ‘{{node functional_3_1/ctc_1/Squeeze}} = Squeeze[T=DT_INT32,

Can not squeeze dim[1], expected a dimension of 1, got 11 for ‘{{node functional_3_1/ctc_1/Squeeze}} = Squeeze[T=DT_INT32,