i want to extract data from this table in this image, i use cv2 and
pytesseract but I don’t get reliable results. This is my code and my image.
enter image description here
import cv2
import pytesseract
from PIL import Image
def preprocess_image(image_path):
# Load the image using OpenCV
img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
# Apply thresholding (binarization)
_, thresh = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# Invert the image colors
inverted_img = cv2.bitwise_not(thresh)
# Convert the thresholded and inverted image back to PIL image format
pil_img = Image.fromarray(inverted_img)
return pil_img
def image_to_text(path):
# Path to your Tesseract executable
pytesseract.pytesseract.tesseract_cmd = r"C:Program FilesTesseract-OCRtesseract.exe"
# Preprocess the image
img = preprocess_image(path)
# Perform OCR using Tesseract
text = pytesseract.image_to_string(img, config='--psm 4 --oem 2 ', lang='rus')
return text
print(image_to_text('concatenated_image3.jpg'))
New contributor
Jephte François is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.