I need to separate each character from a label on a potion bottle. But the tricky thing is that the text in the label is very close, and using vertical projection and horizontal projection cannot get accurate results.
Initial Image
Below are my attempts and results
Preprocessing: Before projection, more powerful image preprocessing is performed to reduce noise, including:
Median filtering or bilateral filtering to smooth the image and reduce noise.
Use morphological operations to enhance the text area and remove small noise points.
Sobel edge detection or Canny edge detection can help highlight the edges of text.
Character adhesion processing: Use distance transform and Watershed algorithm to segment the adhesion characters.
# 读取图像
image_path = "./result/cropped_2.jpg"
image = cv2.imread(image_path)
# 转换为灰度图像
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# 1. 中值滤波去噪声
gray = cv2.medianBlur(gray, 3)
# 2. 使用Sobel边缘检测增强文字边缘
sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
sobel = np.sqrt(sobelx**2 + sobely**2)
sobel = cv2.convertScaleAbs(sobel)
cv2.imwrite('slobe.jpg',sobel)
# 3. 使用Otsu阈值进行二值化
_, binary = cv2.threshold(sobel, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# 4. 使用形态学操作(膨胀+腐蚀)去除噪声
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
morph = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
# 5. 使用距离变换分离粘连的字符
dist_transform = cv2.distanceTransform(morph, cv2.DIST_L2, 5)
_, sure_fg = cv2.threshold(dist_transform, 0.7 * dist_transform.max(), 255, 0)
# 6. 使用Watershed算法分割粘连字符
sure_fg = np.uint8(sure_fg)
unknown = cv2.subtract(morph, sure_fg)
# 计算轮廓和标记
_, markers = cv2.connectedComponents(sure_fg)
markers = markers + 1
markers[unknown == 255] = 0
# 应用Watershed算法
markers = cv2.watershed(image, markers)
image[markers == -1] = [255, 0, 0]
cv2.imwrite('watershed.jpg',image)
# 7. 垂直投影法用于字符分割
h_projection = np.sum(morph, axis=1)
# 显示水平投影
plt.figure(figsize=(10, 6))
plt.plot(h_projection)
plt.title("Horizontal Projection After Improvement")
plt.show()
# 对每一行进行字符分割(垂直投影法)
lines = []
thresh = np.max(h_projection) * 0.2
in_line = False
for i in range(len(h_projection)):
if h_projection[i] > thresh and not in_line:
start = i
in_line = True
elif h_projection[i] <= thresh and in_line:
end = i
in_line = False
lines.append((start, end))
# 对每一行进行垂直投影,用于字符分割
output_image = image.copy()
min_char_width = 10 # 最小字符宽度约束
max_char_width = 90 # 最大字符宽度约束
for (start, end) in lines:
line_img = morph[start:end, :]
v_projection = np.sum(line_img, axis=0)
# 设置垂直投影的阈值
thresh_v = np.max(v_projection) * 0.2
in_char = False
chars = []
for j in range(len(v_projection)):
if v_projection[j] > thresh_v and not in_char:
x_start = j
in_char = True
elif v_projection[j] <= thresh_v and in_char:
x_end = j
in_char = False
char_width = x_end - x_start
# 根据字符宽度过滤,避免分割左右结构的字符
if min_char_width < char_width < max_char_width:
chars.append((x_start, x_end))
elif char_width < min_char_width: # 对于太窄的块,合并处理
if chars:
chars[-1] = (chars[-1][0], x_end) # 合并与上一个块
# 绘制分割后的字符区域
for (x_start, x_end) in chars:
cv2.rectangle(output_image, (x_start, start), (x_end, end), (0, 255, 0), 2)
# 显示分割后的图像
plt.figure(figsize=(10, 6))
plt.imshow(cv2.cvtColor(output_image, cv2.COLOR_BGR2RGB))
plt.title("Character Segmentation with Improved Method")
plt.show()
cv2.imwrite('res3.jpg',output_image)
result
But the result is not ideal, there are cases where single characters are split
I want to find a better way to separate the characters
DD_DD is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.