I’m just a beginner on machine learning and I’ve watched tutorial videos on YouTube in order to come up with my code. Our professor has only given us less than a week to learn and complete this without teaching us anything. I’ve only been asking for help from AIs because it’s getting harder.
I tried to use both CSV files and images to train and test the model. When I run it, it gives 1.000 accuracy but it gives the wrong prediction when I test it using my camera. For example, it always shows a j when I’m showing a single language of the letter for any letter.
I’m using jupyter notebook for this.
Here’s my code:
import pandas as pd
import numpy as np
import tensorflow as tf
import cv2
import string
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from PIL import Image
import os
train_data = pd.read_csv('sign_mnist_train/sign_mnist_train.csv')
test_data = pd.read_csv('sign_mnist_test/sign_mnist_test.csv')
def preprocess_data(data):
y_data = data.iloc[:, 0].values
x_data = data.iloc[:, 1:].values
x_data = x_data / 255.0
x_data = x_data.reshape(-1, 28, 28, 1)
y_data = np.clip(y_data, 0, 9)
y_data = tf.keras.utils.to_categorical(y_data, 10)
return x_data, y_data
x_train_csv, y_train_csv = preprocess_data(train_data)
x_test_csv, y_test_csv = preprocess_data(test_data)
def load_and_preprocess_image(image_path, label):
img = Image.open(image_path)
img = img.resize((28, 28))
img = img.convert('L')
img_array = np.array(img) / 255.0
img_array = np.expand_dims(img_array, axis=-1)
img_array = np.expand_dims(img_array, axis=0)
label_array = tf.keras.utils.to_categorical(label, 10)
return img_array, label_array
image_folder = 'SL_img'
def load_images_from_folder(folder):
images = []
labels = []
for filename in os.listdir(folder):
if filename.endswith('.png') or filename.endswith('.jpg'):
image_path = os.path.join(folder, filename)
try:
label = ord(filename[0]) - ord('a')
if 0 <= label < 10:
img_array, label_array = load_and_preprocess_image(image_path, label)
images.append(img_array)
labels.append(label_array)
except Exception as e:
print(f"Error processing file {filename}: {e}")
return np.vstack(images), np.vstack(labels)
x_additional_images, y_additional_images = load_images_from_folder(image_folder)
print(f"x_train_csv shape: {x_train_csv.shape}")
print(f"y_train_csv shape: {y_train_csv.shape}")
print(f"x_additional_images shape: {x_additional_images.shape}")
print(f"y_additional_images shape: {y_additional_images.shape}")
x_train_combined = np.concatenate((x_train_csv, x_additional_images), axis=0)
y_train_combined = np.concatenate((y_train_csv, y_additional_images), axis=0)
print(f"x_train_combined shape: {x_train_combined.shape}")
print(f"y_train_combined shape: {y_train_combined.shape}")
if x_train_combined.shape[0] != y_train_combined.shape[0]:
raise ValueError(f"Inconsistent data sizes: x_train_combined has {x_train_combined.shape[0]} samples, but y_train_combined has {y_train_combined.shape[0]} samples.")
def conv_neural_network_model():
model = tf.keras.Sequential([
tf.keras.layers.Conv2D(32, (5, 5), activation='relu', padding='same', input_shape=(28, 28, 1)),
tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
tf.keras.layers.Conv2D(64, (5, 5), activation='relu', padding='same'),
tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(1024, activation='relu'),
tf.keras.layers.Dense(10)
])
return model
model = conv_neural_network_model()
model.compile(optimizer='adam',
loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
hm_epochs = 10
batch_size = 128
history = model.fit(x_train_combined, y_train_combined, epochs=hm_epochs, batch_size=batch_size, validation_split=0.1)
test_loss, test_acc = model.evaluate(x_test_csv, y_test_csv)
print('Test accuracy:', test_acc)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()
def test_model_with_opencv(model):
cap = cv2.VideoCapture(0)
if not cap.isOpened():
print("Error: Could not open webcam.")
return
print("Press 'q' to quit the webcam window.")
letters = list(string.ascii_lowercase)
while True:
ret, frame = cap.read()
if not ret:
print("Error: Could not read frame.")
break
resized_image = cv2.resize(frame, (28, 28))
gray_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)
normalized_image = gray_image / 255.0
input_image = np.expand_dims(normalized_image, axis=-1)
input_image = np.expand_dims(input_image, axis=0)
prediction = model.predict(input_image)
predicted_label = np.argmax(prediction)
predicted_letter = letters[predicted_label]
annotated_frame = frame.copy()
cv2.putText(annotated_frame, f'Prediction: {predicted_letter}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
cv2.imshow('Predicted Image', annotated_frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
test_model_with_opencv(model)
def preprocess_alphabet_image(image_path):
img = cv2.imread(image_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img = cv2.resize(img, (28, 28))
img = img / 255.0
img = np.expand_dims(img, axis=-1)
img = np.expand_dims(img, axis=0)
return img
alphabet_image_path = 'amer_sign2.png'
alphabet_image = preprocess_alphabet_image(alphabet_image_path)
prediction = model.predict(alphabet_image)
predicted_label = np.argmax(prediction)
plt.imshow(cv2.cvtColor(cv2.imread(alphabet_image_path), cv2.COLOR_BGR2RGB))
plt.title(f'Predicted Label: {predicted_label}')
plt.axis('off')
plt.show()
Before, I only used the CSV files to train and test the model, which gave a 0.69 accuracy rate. Now, I tried using images too to increase the acc rate, giving a 1.000 acc rate, but it still gave wrong predictions. Would adding more epochs solve this?
eur is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.
3