import pandas as pd
import numpy as np
import cv2
import glob
import imutils
from imutils import paths
import os
import os.path
import sys
captcha_image = ("C:/Users/xxxxx/Documents/PythonProgramming/IG/nothing.jpg")
# Load the image and convert it to grayscale
image = cv2.imread(captcha_image)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# grab the base filename as the text
filename = os.path.basename(captcha_image)
captcha_text = os.path.splitext(filename)[0]
# Adding some extra padding around the image
gray = cv2.copyMakeBorder(gray, 8, 8, 8, 8, cv2.BORDER_REPLICATE)
# applying threshold
thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV, cv2.THRESH_OTSU)[1]
# creating empty list for holding the coordinates of the letters
letter_image_regions = []
# finding the contours
contours, hierarchy = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
for contour in contours:
# Get the rectangle that contains the contour
(x, y, w, h) = cv2.boundingRect(contour)
# checking if any counter is too wide
# if countour is too wide then there could be two letters joined together or are very close to each other
if w / h > 1.25:
# Split it in half into two letter regions
half_width = int(w / 2)
letter_image_regions.append((x, y, half_width, h))
letter_image_regions.append((x + half_width, y, half_width, h))
else:
letter_image_regions.append((x, y, w, h))
letter_image_regions = sorted(letter_image_regions, key=lambda x: x[0])
# Save each letter as a single image
for letter_bounding_box, letter_text in zip(letter_image_regions, captcha_text):
# Grab the coordinates of the letter in the image
x, y, w, h = letter_bounding_box
# Extract the letter from the original image with a 2-pixel margin around the edge
letter_image = gray[y - 2:y + h + 2, x - 2:x + w + 2]
SOLVED_CAPTCHA_FOLDER = "solved-captchas"
OUTPUT_FOLDER = "extracted_letters"
# Get the path of all the solved captcha images
solved_captchas = glob.glob(os.path.join(SOLVED_CAPTCHA_FOLDER, "*"))
counts = {}
# loop over the image paths
for (i, captcha) in enumerate(solved_captchas):
print("processing image {}/{}".format(i + 1, len(solved_captchas)))
# grab the base filename as the text
filename = os.path.basename(captcha)
captcha_text = os.path.splitext(filename)[0]
# Load the image and convert it to grayscale
image = cv2.imread(captcha)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Add some extra padding around the image
gray = cv2.copyMakeBorder(gray, 8, 8, 8, 8, cv2.BORDER_REPLICATE)
# applying threshold
thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV, cv2.THRESH_OTSU)[1]
# finding the contours
contours, hierarchy = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# creating empty list for holding the coordinates of the letters
letter_image_regions = []
# Now we will loop through each of the contours and extract the letter
for contour in contours:
# Get the rectangle that contains the contour
(x, y, w, h) = cv2.boundingRect(contour)
# checking if any counter is too wide
# if countour is too wide then there could be two letters joined together or are very close to each other
if w / h > 1.25:
# Split it in half into two letter regions
half_width = int(w / 2)
letter_image_regions.append((x, y, half_width, h))
letter_image_regions.append((x + half_width, y, half_width, h))
else:
letter_image_regions.append((x, y, w, h))
# Sort the detected letter images based on the x coordinate to make sure
# we get them from left-to-right so that we match the right image with the right letter
letter_image_regions = sorted(letter_image_regions, key=lambda x: x[0])
# Save each letter as a single image
for letter_bounding_box, letter_text in zip(letter_image_regions, captcha_text):
# Grab the coordinates of the letter in the image
x, y, w, h = letter_bounding_box
# Extract the letter from the original image with a 2-pixel margin around the edge
letter_image = gray[y - 2:y + h + 2, x - 2:x + w + 2]
# Get the folder to save the image in
save_path = os.path.join(OUTPUT_FOLDER, letter_text)
# creating different output folder for storing different letters
if not os.path.exists(save_path):
os.makedirs(save_path)
# write the letter image to a file
count = counts.get(letter_text, 1)
p = os.path.join(save_path, "{}.png".format(str(count)))
cv2.imwrite(p, letter_image)
# increment the count
counts[letter_text] = count + 1
letter_folder = 'extracted_letters'
#creating empty lists for storing image data and labels
data = []
labels = []
for image in paths.list_images(letter_folder):
img = cv2.imread(image)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img = cv2.resize(img, (30,30))
# adding a 3rd dimension to the image
img = np.expand_dims(img, axis = 2)
#grabing the name of the letter based on the folder it is present in
label = image.split(os.path.sep)[-2]
# appending to the empty lists
data.append(img)
labels.append(label)
#converting data and labels to np array
data = np.array(data, dtype = "float")
labels = np.array(labels)
from keras.models import Sequential
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers.core import Flatten, Dense, Dropout
from keras.callbacks import EarlyStopping
#building model
model = Sequential()
model.add(Conv2D(20, (5, 5), padding="same", input_shape=(30, 30, 1), activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Conv2D(50, (5, 5), padding="same", activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Flatten())
model.add(Dense(128, activation="relu"))
model.add(Dropout(0.3))
model.add(Dense(9, activation="softmax"))
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
# using early stoping for avoiding overfitting
estop = EarlyStopping(patience=10, mode='min', min_delta=0.001, monitor='val_loss')
model.fit(train_x, train_y, validation_data=(val_x, val_y), batch_size=32, epochs=50, verbose=1, callbacks = [estop])
When I attempted to run the code with a file named 1.png I received this error
Traceback (most recent call last):
File “C:UsersxxxxxDocumentsPythonProgrammingIGpytesseract_solver.py”, line 14, in
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
cv2.error: OpenCV(4.10.0) D:aopencv-pythonopencv-pythonopencvmodulesimgprocsrccolor.cpp:196: error: (-215:Assertion failed) !_src.empty() in function ‘cv::cvtColor’
I then tried to run the code with a different captcha named nothing.jpg but same error.
Traceback (most recent call last):
File “C:UsersxxxxxDocumentsPythonProgrammingIGpytesseract_solver.py”, line 14, in
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
cv2.error: OpenCV(4.10.0) D:aopencv-pythonopencv-pythonopencvmodulesimgprocsrccolor.cpp:196: error: (-215:Assertion failed) !_src.empty() in function ‘cv::cvtColor’
RonS is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.