I am trying to extract text from an image but seems however I do it tessaract gives me some random values even though I think I have processed the image to a very good format.
import cv2 as cv
import pytesseract
from PIL import Image
image = cv.imread("screenshot.png", cv.IMREAD_GRAYSCALE)
ret, modified_image = cv.threshold(image, 120, 255, cv.THRESH_BINARY_INV + cv.THRESH_OTSU)
modified_image= cv.resize(modified_image, None, fx=2, fy=2, interpolation=cv.INTER_CUBIC)
#cv.imshow("image", image)
#cv.imshow("modified_image", modified_image)
cv.imwrite("modified_image.png", modified_image)
pytesseract.pytesseract.tesseract_cmd = r'C:Tesseract-OCRtesseract.exe'
text = pytesseract.image_to_string(Image.open('modified_image.png'), config="--psm 6 --oem 3", lang="eng")
print(f'Text: {text}')
This will incorrectly print “CWS-1Y” instead of “CW9-1Y”. From what I have understood the font in use is Shentox but seems like quite the task to train tessaract on it from what I could find online