Thiết kế website giá rẻ

Question

I need to separate each character from a label on a potion bottle. But the tricky thing is that the text in the label is very close, and using vertical projection and horizontal projection cannot get accurate results.
Initial Image

Below are my attempts and results

Preprocessing: Before projection, more powerful image preprocessing is performed to reduce noise, including:

Median filtering or bilateral filtering to smooth the image and reduce noise.

Use morphological operations to enhance the text area and remove small noise points.

Sobel edge detection or Canny edge detection can help highlight the edges of text.

Character adhesion processing: Use distance transform and Watershed algorithm to segment the adhesion characters.

# 读取图像
image_path = "./result/cropped_2.jpg"
image = cv2.imread(image_path)

# 转换为灰度图像
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# 1. 中值滤波去噪声
gray = cv2.medianBlur(gray, 3)

# 2. 使用Sobel边缘检测增强文字边缘
sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
sobel = np.sqrt(sobelx**2 + sobely**2)
sobel = cv2.convertScaleAbs(sobel)
cv2.imwrite('slobe.jpg',sobel)

# 3. 使用Otsu阈值进行二值化
_, binary = cv2.threshold(sobel, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

# 4. 使用形态学操作（膨胀+腐蚀）去除噪声
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
morph = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)

# 5. 使用距离变换分离粘连的字符
dist_transform = cv2.distanceTransform(morph, cv2.DIST_L2, 5)
_, sure_fg = cv2.threshold(dist_transform, 0.7 * dist_transform.max(), 255, 0)

# 6. 使用Watershed算法分割粘连字符
sure_fg = np.uint8(sure_fg)
unknown = cv2.subtract(morph, sure_fg)

# 计算轮廓和标记
_, markers = cv2.connectedComponents(sure_fg)
markers = markers + 1
markers[unknown == 255] = 0

# 应用Watershed算法
markers = cv2.watershed(image, markers)
image[markers == -1] = [255, 0, 0]

cv2.imwrite('watershed.jpg',image)

# 7. 垂直投影法用于字符分割
h_projection = np.sum(morph, axis=1)

# 显示水平投影
plt.figure(figsize=(10, 6))
plt.plot(h_projection)
plt.title("Horizontal Projection After Improvement")
plt.show()

# 对每一行进行字符分割（垂直投影法）
lines = []
thresh = np.max(h_projection) * 0.2
in_line = False
for i in range(len(h_projection)):
    if h_projection[i] > thresh and not in_line:
        start = i
        in_line = True
    elif h_projection[i] <= thresh and in_line:
        end = i
        in_line = False
        lines.append((start, end))


# 对每一行进行垂直投影，用于字符分割
output_image = image.copy()
min_char_width = 10  # 最小字符宽度约束
max_char_width = 90  # 最大字符宽度约束

for (start, end) in lines:
    line_img = morph[start:end, :]
    v_projection = np.sum(line_img, axis=0)

    # 设置垂直投影的阈值
    thresh_v = np.max(v_projection) * 0.2
    in_char = False
    chars = []
    
    for j in range(len(v_projection)):
        if v_projection[j] > thresh_v and not in_char:
            x_start = j
            in_char = True
        elif v_projection[j] <= thresh_v and in_char:
            x_end = j
            in_char = False
            char_width = x_end - x_start

            # 根据字符宽度过滤，避免分割左右结构的字符
            if min_char_width < char_width < max_char_width:
                chars.append((x_start, x_end))
            elif char_width < min_char_width:  # 对于太窄的块，合并处理
                if chars:
                    chars[-1] = (chars[-1][0], x_end)  # 合并与上一个块

    # 绘制分割后的字符区域
    for (x_start, x_end) in chars:
        cv2.rectangle(output_image, (x_start, start), (x_end, end), (0, 255, 0), 2)

# 显示分割后的图像
plt.figure(figsize=(10, 6))
plt.imshow(cv2.cvtColor(output_image, cv2.COLOR_BGR2RGB))
plt.title("Character Segmentation with Improved Method")
plt.show()
cv2.imwrite('res3.jpg',output_image)

result

But the result is not ideal, there are cases where single characters are split

I want to find a better way to separate the characters

Thiết kế website giá rẻ

Danh mục

Split each character from multiline text