I have a code that is working perfectly with the MNIST handwritten digit dataset, and when I load it and train it everything is fine.
mnist = fetch_openml('mnist_784')
X, Y = mnist["data"], mnist["target"]
X = np.array(X) # just in case…
Y = np.array(Y) # just in case…
X = X / 255 - 0.5
Y_new = DLModel.to_one_hot(10,Y)
print(Y_new.shape)
m = 60000
m_test = X.shape[0] - m
X_train, X_test = X[:m].T, X[m:].T
Y_train, Y_test = Y_new[:,:m], Y_new[:,m:]
print(X_train.shape, Y_train.shape)
print(X_test.shape, Y_test.shape)
np.random.seed(111)
shuffle_index = np.random.permutation(m)
X_train, Y_train = X_train[:, shuffle_index], Y_train[:, shuffle_index]
np.random.seed(1)
Hidden = DLNeuronsLayer("Hidden",64,28*28,"sigmoid",0.1,'adaptive')
Output = DLNeuronsLayer("Output",10,64,"softmax",0.1,'adaptive')
digit_network = DLNetwork("digit_net")
digit_network.add_layer(Hidden)
digit_network.add_layer(Output)
digit_model = DLModel("model",digit_network,'categorical_cross_entropy')
costs = digit_model.train(X_train, Y_train, 200)
u10.print_costs(costs,200)
When I print the costs graph and the confusion matrix it shows a normal graph.
Costs Graph
The problem arrives when I try to switch the dataset from MNIST to Kaggle. I first loaded the dataset on google collab, and loaded it like this:
import os
from PIL import Image
import numpy as np
import os
import random
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
def array_to_image(array):
# Check if the array size is correct for a 28x28 image
if array.size != 28 * 28:
raise ValueError("The input array must have 784 elements (28x28).")
# Reshape the array back into a 28x28 image
image = array.reshape(28, 28)
# Display the image
plt.imshow(image, cmap='gray')
plt.axis('off') # Hide the axis
plt.show()
return image
def load_handwritten_digits(data_dir):
data = []
labels = []
all_files = []
for digit in range(10):
digit_dir = os.path.join(os.path.join(data_dir, str(digit)), str(digit))
digit_files = [os.path.join(digit_dir, f) for f in os.listdir(digit_dir) if f.endswith('.png')]
all_files.extend([(f, digit) for f in digit_files])
random.shuffle(all_files)
for file_path, digit in all_files:
try:
# Open the image
img = Image.open(file_path)
# print(f"Loaded image: {file_path}, mode: {img.mode}, size: {img.size}")
# Convert image to RGBA if it's not already
if img.mode != 'RGBA':
img = img.convert('RGBA')
# Split the image into its separate channels
r, g, b, a = img.split()
# Invert the RGB channels
r = r.point(lambda p: 255 - p)
g = g.point(lambda p: 255 - p)
b = b.point(lambda p: 255 - p)
# Merge the channels back, keeping the alpha channel unchanged
inverted_img = Image.merge("RGBA", (r, g, b, a))
# Create a black background image
black_bg = Image.new("RGBA", img.size, (0, 0, 0, 255))
# Paste the inverted image onto the black background using the alpha channel as mask
black_bg.paste(inverted_img, (0, 0), inverted_img)
# Convert to grayscale
img = black_bg.convert('L')
img_data = np.array(img).astype(np.float32) / 255.0
# Debugging step: Print min and max values to check normalization
# print(f"Converted Image: {file_path}, min: {img_data.min()}, max: {img_data.max()}")
# Display the image to debug if necessary
# array_to_image(img_data * 255) # Convert back to 0-255 for display
data.append(img_data.flatten())
labels.append(str(digit))
except Exception as e:
print(f"Error loading file: {file_path}")
print(e)
data = np.array(data)
labels = np.array(labels)
return data, labels
# Load images and labels
base_folder = 'dataset'
X, Y = load_handwritten_digits(base_folder)
(The displaying is just checking the everything has been loaded correctly, And everything else in the code is exactly the same)
Now for some reason, when I load the Kaggle dataset, the costs never go below 2 (When they should go way below 0.001), and the costs graph looks like this:
Costs image of kaggle
Am I doing something wrong whilst loading the dataset? I’ve tried everything to match the Kaggle dataset to look exactly like the MNIST dataset.
LordPiki is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.