I create an algorithm that, after taking a screenshot and finding a sudoku on it, rewrites the numbers into an array and solves that sudoku. The algorithm finds the array and divides it into 81 elements, however, there is a problem when the model understands the numerical values. In short, it gives the value incorrectly.
I tried changing the model, adding depths, changing training sets however none of this helped. I would like the model to guess the numbers correctly. As a test, I recommend going to sudoku.com.
There is code for my model:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.datasets import mnist
import numpy as np
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
train_images = train_images.reshape((60000, 28, 28, 1)).astype('float32') / 255
test_images = test_images.reshape((10000, 28, 28, 1)).astype('float32') / 255
def generate_empty_images(num_images):
return np.random.rand(num_images, 28, 28, 1)
empty_labels = np.zeros((10000,), dtype=int)
non_empty_labels = np.ones((60000,), dtype=int)
mixed_train_images = np.concatenate((train_images, generate_empty_images(60000)))
mixed_train_labels = np.concatenate((train_labels, empty_labels))
mixed_test_images = np.concatenate((test_images, generate_empty_images(10000)))
mixed_test_labels = np.concatenate((test_labels, empty_labels))
shuffle_index = np.random.permutation(len(mixed_train_labels))
mixed_train_images, mixed_train_labels = mixed_train_images[shuffle_index], mixed_train_labels[shuffle_index]
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1, activation='sigmoid'))
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
model.fit(mixed_train_images, mixed_train_labels, epochs=10, batch_size=64, validation_split=0.2)
test_loss, test_acc = model.evaluate(mixed_test_images, mixed_test_labels)
print('Test accuracy:', test_acc)
model.save('../Sudoku Solver/resources/myModel.keras')
Helper file:
import cv2
import numpy as np
from tensorflow.keras.models import load_model
import matplotlib.pyplot as plt
def initializeModel():
model = load_model('resources/myModel.keras')
return model
def preProcess(img):
imgGray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
imgBlur = cv2.GaussianBlur(imgGray, (5, 5), 1)
imgThreshold = cv2.adaptiveThreshold(imgBlur, 255, 1, 1, 11, 2)
return imgThreshold
def biggestContour(contours):
biggest = np.array([])
max_area = 0
for i in contours:
area = cv2.contourArea(i)
if area > 50:
peri = cv2.arcLength(i, True)
approx = cv2.approxPolyDP(i, 0.02 * peri, True)
if area > max_area and len(approx) == 4:
biggest = approx
max_area = area
return biggest, max_area
def reorder(myPoints):
myPoints = myPoints.reshape((4, 2))
myPointsNew = np.zeros((4, 1, 2), np.int32)
add = myPoints.sum(1)
myPointsNew[0] = myPoints[np.argmin(add)]
myPointsNew[3] = myPoints[np.argmax(add)]
diff = np.diff(myPoints, axis=1)
myPointsNew[1] = myPoints[np.argmin(diff)]
myPointsNew[2] = myPoints[np.argmax(diff)]
return myPointsNew
def splitBoxes(img):
rows_dim = img.shape[0] // 9 * 9
cols_dim = img.shape[1] // 9 * 9
img = img[:rows_dim, :cols_dim]
rows = np.vsplit(img, 9)
boxes = []
for r in rows:
cols = np.hsplit(r, 9)
for box in cols:
boxes.append(box)
return boxes
def predictDigits(digits, model):
result = []
for image in digits:
img = np.asarray(image)
img = img[4:img.shape[0] - 4, 4:img.shape[1] - 4]
img = cv2.resize(img, (28, 28))
img = img / 255
img = img.reshape(1, 28, 28, 1)
prediction = model.predict(img)
classIndex = np.argmax(prediction[-1])
probabilityValue = np.amax(prediction)
print(classIndex, probabilityValue)
if probabilityValue > 0.8:
result.append(classIndex)
else:
result.append(0)
return result
And my main file:
import numpy as np
import pyautogui as pg
import tensorflow as tf
import cv2
from helper import biggestContour, reorder, splitBoxes, predictDigits, preProcess, initializeModel
# =================================Start================================= #
# Load the model
model = initializeModel()
# Take a screenshot of the sudoku
sc = pg.screenshot()
sc.save('resources/sudoku.png')
# Open the screenshot
imgOrg = cv2.imread('resources/sudoku.png')
# Get the dimensions of the image
height, width, _ = imgOrg.shape
width = height
imgOrg = cv2.resize(imgOrg, (height, width))
# =================================Start================================= #
# # ----------------- Preparing the Sudoku ----------------- # #
imgBlank = np.zeros((height, width, 3), np.uint8)
imgTreshold = preProcess(imgOrg)
# Find the contours
imgContours = imgOrg.copy()
imgBigContour = imgOrg.copy()
contours, _ = cv2.findContours(imgTreshold, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cv2.drawContours(imgContours, contours, -1, (0, 255, 0), 3)
# Find the biggest contour
biggest, maxArea = biggestContour(contours)
if biggest.size != 0:
# Reordering the points
biggest = reorder(biggest)
cv2.drawContours(imgBigContour, biggest, -1, (0, 255, 0), 20)
# Extracting the points with perspective transform
p1 = np.float32(biggest)
p2 = np.float32([[0, 0], [width, 0], [0, height], [width, height]])
matrix = cv2.getPerspectiveTransform(p1, p2)
# Warping the image to get actual sudoku
imgWarpColored = cv2.warpPerspective(imgOrg, matrix, (width, height))
imgDetectedDigits = imgBlank.copy()
imgWarpColored = cv2.cvtColor(imgWarpColored, cv2.COLOR_BGR2GRAY)
# ----------------- Preprocessing the Sudoku ----------------- #
imgSolvedDigits = imgBlank.copy()
digits = splitBoxes(imgWarpColored)
numbers = predictDigits(digits, model)
print(numbers)