I have some images, I am preprocessing them before extracting digits from them. The problem is “Tesseract” is not able to extract accurate digits from them. The images only contain digits.
Following is my code:
from PIL import Image, ImageEnhance, ImageFilter
import pytesseract
CAPTCHA_PATH = 'captcha_images/captcha8.jpeg'
RED_REMOVED_PATH = 'processed/red_removed.jpeg'
PROCESSED_IMAGE_PATH = 'processed/processed_image.png'
def change_pixels_except_black(image_path, output_path, threshold=50):
"""
Change all pixels to white except for pixels close to black.
:param image_path: Path to the input image.
:param output_path: Path to save the output image.
:param threshold: Threshold to determine if a pixel is black (default is 50).
"""
# Open the image
image = Image.open(image_path)
image = image.convert("RGB") # Ensure the image is in RGB mode
# Load the image data
pixels = image.load()
# Get the dimensions of the image
width, height = image.size
# Iterate over each pixel
for y in range(height):
for x in range(width):
# Get the current pixel's color
r, g, b = pixels[x, y]
# Check if the pixel is close to black
if r < threshold and g < threshold and b < threshold:
# Keep the pixel as is (close to black)
continue
else:
# Change the pixel to white
pixels[x, y] = (255, 255, 255)
# Save the modified image
image.save(output_path)
def preprocess_image(image_path, output_path):
"""
Preprocess the image to enhance its quality for OCR.
:param image_path: Path to the input image.
:param output_path: Path to save the processed image.
"""
# Open the image
image = Image.open(image_path)
# Resize the image
new_width = image.width * 2
new_height = image.height * 2
image = image.resize((new_width, new_height), Image.LANCZOS)
# Convert to grayscale
image = image.convert('L')
# Increase contrast
enhancer = ImageEnhance.Contrast(image)
image = enhancer.enhance(2)
# Apply a filter to sharpen the image
image = image.filter(ImageFilter.SHARPEN)
# Save the processed image
image.save(output_path)
def extract_text(image_path):
"""
Extract text from an image using pytesseract.
:param image_path: Path to the input image.
:return: Extracted text.
"""
text = pytesseract.image_to_string(Image.open(
image_path), lang='eng', config='--psm 10 --oem 3 -c tessedit_char_whitelist=0123456789')
return text
# Example usage
change_pixels_except_black(CAPTCHA_PATH, RED_REMOVED_PATH, threshold=70)
preprocess_image(RED_REMOVED_PATH, PROCESSED_IMAGE_PATH)
text = extract_text(PROCESSED_IMAGE_PATH)
print(f"Extracted Text of {CAPTCHA_PATH}:")
print(text)
These are some examples of images:
Original Image
Removed red lines
After Preprocessing
Output: 0858
Expected Output: 08588
Some other examples
Output: 95415
Expected Output: 92412
Output: 2043
Expected Output: 20413
Output: 61416
Expected Output: 61116
I have also tried different “Tesseract” configurations with no success. Please let me know if further details are required.