Thiết kế website giá rẻ

Question

I have a deep neural network, and I am trying to use it to classify signed numbers in 64×64 pcitures of numbers from 0-9. The cost seems to always converge around 2.3, which is 10% accuracy or just random.

I tested the nn model on a generated dataset with 20 features and 4 classes, and it seems to work fine in plotting the decision boundaries and having low cost, however when I try it on my dataset, it always converges to 2.3 cost. I have adjusted the learning rate, changed the layer dimensions, and rechecked my nn code. I can’t seem to find where it’s going wrong. I made sure the X and Y npy’s I am pulling from are correct, as well as the right shape.

<code>import numpy as np

def initialize_layer_parameters(layer_dims):

parameters = {}

L = len(layer_dims) # number of layers in the network

# Initialize parameters for each layer

for l in range(1, L):

parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) * 0.1

parameters['b' + str(l)] = np.zeros((layer_dims[l], 1))

assert(parameters['W' + str(l)].shape == (layer_dims[l], layer_dims[l - 1]))

assert(parameters['b' + str(l)].shape == (layer_dims[l], 1))

return parameters

def sigmoid(Z):

A = 1 / (1 + np.exp(-Z))

cache = Z

return A, cache

def relu(Z):

A = np.maximum(0, Z)

cache = Z

return A, cache

def softmax(Z):

Z_shifted = Z - np.max(Z, axis=0, keepdims=True) # To prevent overflow

exp_Z = np.exp(Z_shifted)

A = exp_Z / np.sum(exp_Z, axis=0, keepdims=True)

cache = Z

return A, cache

def relu_backward(dA, cache):

Z = cache

dZ = np.array(dA, copy=True)

dZ[Z <= 0] = 0

assert (dZ.shape == Z.shape)

return dZ

def sigmoid_backward(dA, cache):

Z = cache

s = 1/(1+np.exp(-Z))

dZ = dA * s * (1-s)

assert (dZ.shape == Z.shape)

return dZ

def softmax_backward(dAL, cache):

Z = cache

m = Z.shape[1]

dZ = dAL / m

return dZ

def one_hot_encode(Y, num_classes):

Y = np.array(Y).astype(int) # Ensure Y is an array of integers

Y = Y.reshape(-1) # Flatten Y to be a 1D array if needed

if np.any(Y >= num_classes) or np.any(Y < 0):

raise ValueError("Labels must be in the range [0, num_classes-1]")

Y_encoded = np.eye(num_classes)[Y]

return Y_encoded.T

def forward_linear(A, W, b):

Z = np.dot(W, A) + b

cache = (A, W, b)

return Z, cache

def forward_activation(A_prev, W, b, activation):

if activation == "sigmoid":

Z, linear_cache = forward_linear(A_prev, W, b)

A, activation_cache = sigmoid(Z)

elif activation == "relu":

Z, linear_cache = forward_linear(A_prev, W, b)

A, activation_cache = relu(Z)

elif activation == "softmax":

Z, linear_cache = forward_linear(A_prev, W, b)

A, activation_cache = softmax(Z)

else:

raise ValueError("Invalid activation function!")

cache = (linear_cache, activation_cache)

return A, cache

def forward_propagation(X, parameters):

caches = []

A = X

L = len(parameters) // 2 # number of layers in the network

activation = "sigmoid" if classification_type == "binary" else "softmax"

# Forward propagation for each layer

for l in range(1, L):

A_prev = A

A, cache = forward_activation(A_prev, parameters['W' + str(l)], parameters['b' + str(l)], "relu")

caches.append(cache)

#AL is final output layer

AL, cache = forward_activation(A, parameters['W' + str(L)], parameters['b' + str(L)], activation)

caches.append(cache)

return AL, caches

def cost_function(AL, Y):

if classification_type == "binary":

cost = -np.mean(np.multiply(Y, np.log(AL)) + np.multiply((1 - Y), np.log(1 - AL)))

elif classification_type == "multivariable":

epsilon = 1e-8 # Small constant to prevent log(0)

AL = np.clip(AL, epsilon, 1 - epsilon) # Clip AL to avoid log(0) and division by zero

cost = -np.sum(Y * np.log(AL)) / Y.shape[1]

else:

raise ValueError("Invalid classification type!")

return cost

def backward_linear(dZ, cache):

A_prev, W, b = cache

m = A_prev.shape[1]

dW = 1/m * np.dot(dZ, A_prev.T)

db = 1/m * np.sum(dZ, axis=1, keepdims=True)

dA_prev = np.dot(W.T, dZ)

return dA_prev, dW, db

def backward_activation(dA, cache, activation):

linear_cache, activation_cache = cache

if activation == "relu":

dZ = relu_backward(dA, activation_cache)

dA_prev, dW, db = backward_linear(dZ, linear_cache)

elif activation == "sigmoid":

dZ = sigmoid_backward(dA, activation_cache)

dA_prev, dW, db = backward_linear(dZ, linear_cache)

elif activation == "softmax":

dZ = softmax_backward(dA, activation_cache)

dA_prev, dW, db = backward_linear(dZ, linear_cache)

else:

raise ValueError("Invalid activation function!")

return dW, dA_prev, db

def backward_propagation(AL, Y, caches):

grads = {}

L = len(caches)

activation = "sigmoid" if classification_type == "binary" else "softmax"

if activation == "sigmoid":

dAL = -(np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))

if activation == "softmax":

dAL = AL - Y

# Gradient descent of output layer

current_cache = caches[L - 1]

dW_temp, dA_prev_temp, db_temp = backward_activation(dAL, current_cache, activation)

grads["dW" + str(L)] = dW_temp

grads["dA" + str(L-1)] = dA_prev_temp

grads["db" + str(L)] = db_temp

# Store gradient descent for remaining layers

for l in reversed(range(L-1)):

dW_temp, dA_prev_temp, db_temp = backward_activation(grads["dA" + str(l+1)], caches[l], "relu")

grads["dW" + str(l + 1)] = dW_temp

grads["dA" + str(l)] = dA_prev_temp

grads["db" + str(l + 1)] = db_temp

return grads

def update_parameters(parameters, grads, learning_rate):

L = len(parameters) // 2 # number of layers in the neural network

# Update each parameter by layer

for l in range(L):

parameters["W" + str(l+1)] -= learning_rate * grads["dW" + str(l+1)]

parameters["b" + str(l+1)] -= learning_rate * grads["db" + str(l+1)]

return parameters

def deep_nn_model(X, Y, num_iterations, layer_dims, learning_rate, classification_method):

costs = []

parameters = initialize_layer_parameters(layer_dims)

global classification_type

classification_type = classification_method

if classification_type == "multivariable":

Y = one_hot_encode(Y, layer_dims[-1])

for i in range(0, num_iterations):

AL, caches = forward_propagation(X, parameters)

cost = cost_function(AL, Y)

grads = backward_propagation(AL, Y, caches)

parameters = update_parameters(parameters, grads, learning_rate)

if i % 100 == 0 or i == num_iterations - 1:

print("Cost after iteration {}: {}".format(i, np.squeeze(cost)))

if i % 100 == 0 or i == num_iterations:

costs.append(cost)

return parameters, costs

# Seperate code running the model

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

import sys

import os

import pickle

sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))

from src.deep_nn import forward_propagation, deep_nn_model

X = np.load("datasets/signdigits/X.npy")

Y = np.load("datasets/signdigits/Y.npy")

# Split dataset into some training examples(15%) and shuffle data

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.15, random_state=42)

m_train = X_train.shape[0]

X_train_flat = X_train.reshape(m_train, X_train.shape[1] * X_train.shape[2]).T

Y_train = Y_train[np.newaxis, :]

m_test = X_test.shape[0]

X_test_flat = X_test.reshape(m_test, X_test.shape[1] * X_test.shape[2]).T

Y_test = Y_test[np.newaxis, :]

print(X_train_flat.shape)

print(Y_train.shape)

print(X_test_flat.shape)

print(Y_test.shape)

layer_dims = [4096, 1024, 512, 256, 128, 10]

parameters, _ = deep_nn_model(X_train_flat, Y_train, num_iterations=1000, layer_dims=layer_dims, learning_rate=1, classification_method="multivariable")

def predict(X, parameters):

AL, _ = forward_propagation(X, parameters)

predictions = np.argmax(AL, axis=0)

return predictions

def calculate_accuracy(predictions, labels):

return np.mean(predictions == labels)

with open('model_parameters.pkl', 'wb') as f:

pickle.dump(parameters, f)

train_predictions = predict(X_train_flat, parameters)

train_accuracy = calculate_accuracy(train_predictions, Y_train)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")

test_predictions = predict(X_test_flat, parameters)

test_accuracy = calculate_accuracy(test_predictions, Y_test)

print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

</code>

<code>import numpy as np def initialize_layer_parameters(layer_dims): parameters = {} L = len(layer_dims) # number of layers in the network # Initialize parameters for each layer for l in range(1, L): parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) * 0.1 parameters['b' + str(l)] = np.zeros((layer_dims[l], 1)) assert(parameters['W' + str(l)].shape == (layer_dims[l], layer_dims[l - 1])) assert(parameters['b' + str(l)].shape == (layer_dims[l], 1)) return parameters def sigmoid(Z): A = 1 / (1 + np.exp(-Z)) cache = Z return A, cache def relu(Z): A = np.maximum(0, Z) cache = Z return A, cache def softmax(Z): Z_shifted = Z - np.max(Z, axis=0, keepdims=True) # To prevent overflow exp_Z = np.exp(Z_shifted) A = exp_Z / np.sum(exp_Z, axis=0, keepdims=True) cache = Z return A, cache def relu_backward(dA, cache): Z = cache dZ = np.array(dA, copy=True) dZ[Z <= 0] = 0 assert (dZ.shape == Z.shape) return dZ def sigmoid_backward(dA, cache): Z = cache s = 1/(1+np.exp(-Z)) dZ = dA * s * (1-s) assert (dZ.shape == Z.shape) return dZ def softmax_backward(dAL, cache): Z = cache m = Z.shape[1] dZ = dAL / m return dZ def one_hot_encode(Y, num_classes): Y = np.array(Y).astype(int) # Ensure Y is an array of integers Y = Y.reshape(-1) # Flatten Y to be a 1D array if needed if np.any(Y >= num_classes) or np.any(Y < 0): raise ValueError("Labels must be in the range [0, num_classes-1]") Y_encoded = np.eye(num_classes)[Y] return Y_encoded.T def forward_linear(A, W, b): Z = np.dot(W, A) + b cache = (A, W, b) return Z, cache def forward_activation(A_prev, W, b, activation): if activation == "sigmoid": Z, linear_cache = forward_linear(A_prev, W, b) A, activation_cache = sigmoid(Z) elif activation == "relu": Z, linear_cache = forward_linear(A_prev, W, b) A, activation_cache = relu(Z) elif activation == "softmax": Z, linear_cache = forward_linear(A_prev, W, b) A, activation_cache = softmax(Z) else: raise ValueError("Invalid activation function!") cache = (linear_cache, activation_cache) return A, cache def forward_propagation(X, parameters): caches = [] A = X L = len(parameters) // 2 # number of layers in the network activation = "sigmoid" if classification_type == "binary" else "softmax" # Forward propagation for each layer for l in range(1, L): A_prev = A A, cache = forward_activation(A_prev, parameters['W' + str(l)], parameters['b' + str(l)], "relu") caches.append(cache) #AL is final output layer AL, cache = forward_activation(A, parameters['W' + str(L)], parameters['b' + str(L)], activation) caches.append(cache) return AL, caches def cost_function(AL, Y): if classification_type == "binary": cost = -np.mean(np.multiply(Y, np.log(AL)) + np.multiply((1 - Y), np.log(1 - AL))) elif classification_type == "multivariable": epsilon = 1e-8 # Small constant to prevent log(0) AL = np.clip(AL, epsilon, 1 - epsilon) # Clip AL to avoid log(0) and division by zero cost = -np.sum(Y * np.log(AL)) / Y.shape[1] else: raise ValueError("Invalid classification type!") return cost def backward_linear(dZ, cache): A_prev, W, b = cache m = A_prev.shape[1] dW = 1/m * np.dot(dZ, A_prev.T) db = 1/m * np.sum(dZ, axis=1, keepdims=True) dA_prev = np.dot(W.T, dZ) return dA_prev, dW, db def backward_activation(dA, cache, activation): linear_cache, activation_cache = cache if activation == "relu": dZ = relu_backward(dA, activation_cache) dA_prev, dW, db = backward_linear(dZ, linear_cache) elif activation == "sigmoid": dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = backward_linear(dZ, linear_cache) elif activation == "softmax": dZ = softmax_backward(dA, activation_cache) dA_prev, dW, db = backward_linear(dZ, linear_cache) else: raise ValueError("Invalid activation function!") return dW, dA_prev, db def backward_propagation(AL, Y, caches): grads = {} L = len(caches) activation = "sigmoid" if classification_type == "binary" else "softmax" if activation == "sigmoid": dAL = -(np.divide(Y, AL) - np.divide(1 - Y, 1 - AL)) if activation == "softmax": dAL = AL - Y # Gradient descent of output layer current_cache = caches[L - 1] dW_temp, dA_prev_temp, db_temp = backward_activation(dAL, current_cache, activation) grads["dW" + str(L)] = dW_temp grads["dA" + str(L-1)] = dA_prev_temp grads["db" + str(L)] = db_temp # Store gradient descent for remaining layers for l in reversed(range(L-1)): dW_temp, dA_prev_temp, db_temp = backward_activation(grads["dA" + str(l+1)], caches[l], "relu") grads["dW" + str(l + 1)] = dW_temp grads["dA" + str(l)] = dA_prev_temp grads["db" + str(l + 1)] = db_temp return grads def update_parameters(parameters, grads, learning_rate): L = len(parameters) // 2 # number of layers in the neural network # Update each parameter by layer for l in range(L): parameters["W" + str(l+1)] -= learning_rate * grads["dW" + str(l+1)] parameters["b" + str(l+1)] -= learning_rate * grads["db" + str(l+1)] return parameters def deep_nn_model(X, Y, num_iterations, layer_dims, learning_rate, classification_method): costs = [] parameters = initialize_layer_parameters(layer_dims) global classification_type classification_type = classification_method if classification_type == "multivariable": Y = one_hot_encode(Y, layer_dims[-1]) for i in range(0, num_iterations): AL, caches = forward_propagation(X, parameters) cost = cost_function(AL, Y) grads = backward_propagation(AL, Y, caches) parameters = update_parameters(parameters, grads, learning_rate) if i % 100 == 0 or i == num_iterations - 1: print("Cost after iteration {}: {}".format(i, np.squeeze(cost))) if i % 100 == 0 or i == num_iterations: costs.append(cost) return parameters, costs # Seperate code running the model import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split import sys import os import pickle sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) from src.deep_nn import forward_propagation, deep_nn_model X = np.load("datasets/signdigits/X.npy") Y = np.load("datasets/signdigits/Y.npy") # Split dataset into some training examples(15%) and shuffle data X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.15, random_state=42) m_train = X_train.shape[0] X_train_flat = X_train.reshape(m_train, X_train.shape[1] * X_train.shape[2]).T Y_train = Y_train[np.newaxis, :] m_test = X_test.shape[0] X_test_flat = X_test.reshape(m_test, X_test.shape[1] * X_test.shape[2]).T Y_test = Y_test[np.newaxis, :] print(X_train_flat.shape) print(Y_train.shape) print(X_test_flat.shape) print(Y_test.shape) layer_dims = [4096, 1024, 512, 256, 128, 10] parameters, _ = deep_nn_model(X_train_flat, Y_train, num_iterations=1000, layer_dims=layer_dims, learning_rate=1, classification_method="multivariable") def predict(X, parameters): AL, _ = forward_propagation(X, parameters) predictions = np.argmax(AL, axis=0) return predictions def calculate_accuracy(predictions, labels): return np.mean(predictions == labels) with open('model_parameters.pkl', 'wb') as f: pickle.dump(parameters, f) train_predictions = predict(X_train_flat, parameters) train_accuracy = calculate_accuracy(train_predictions, Y_train) print(f"Training Accuracy: {train_accuracy * 100:.2f}%") test_predictions = predict(X_test_flat, parameters) test_accuracy = calculate_accuracy(test_predictions, Y_test) print(f"Test Accuracy: {test_accuracy * 100:.2f}%") </code>

import numpy as np

def initialize_layer_parameters(layer_dims):
    parameters = {}
    L = len(layer_dims) # number of layers in the network

    # Initialize parameters for each layer
    for l in range(1, L):
        parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) * 0.1
        parameters['b' + str(l)] = np.zeros((layer_dims[l], 1))
        
        assert(parameters['W' + str(l)].shape == (layer_dims[l], layer_dims[l - 1]))
        assert(parameters['b' + str(l)].shape == (layer_dims[l], 1))

    return parameters

def sigmoid(Z):
    A = 1 / (1 + np.exp(-Z))
    cache = Z
    return A, cache

def relu(Z):
    A = np.maximum(0, Z)
    cache = Z
    return A, cache

def softmax(Z):
    Z_shifted = Z - np.max(Z, axis=0, keepdims=True)  # To prevent overflow
    exp_Z = np.exp(Z_shifted)
    A = exp_Z / np.sum(exp_Z, axis=0, keepdims=True)
    cache = Z
    return A, cache

def relu_backward(dA, cache):
    Z = cache
    dZ = np.array(dA, copy=True)
    dZ[Z <= 0] = 0

    assert (dZ.shape == Z.shape)
    
    return dZ

def sigmoid_backward(dA, cache):
    Z = cache
    s = 1/(1+np.exp(-Z))
    dZ = dA * s * (1-s)
    
    assert (dZ.shape == Z.shape)
    
    return dZ

def softmax_backward(dAL, cache):
    Z = cache
    m = Z.shape[1]
    dZ = dAL / m

    return dZ

def one_hot_encode(Y, num_classes):
    Y = np.array(Y).astype(int)  # Ensure Y is an array of integers
    Y = Y.reshape(-1)           # Flatten Y to be a 1D array if needed
    if np.any(Y >= num_classes) or np.any(Y < 0):
        raise ValueError("Labels must be in the range [0, num_classes-1]")
    
    Y_encoded = np.eye(num_classes)[Y]
    return Y_encoded.T

def forward_linear(A, W, b):
    Z = np.dot(W, A) + b
    cache = (A, W, b)

    return Z, cache

def forward_activation(A_prev, W, b, activation):
    if activation == "sigmoid":
        Z, linear_cache = forward_linear(A_prev, W, b)
        A, activation_cache = sigmoid(Z)
    elif activation == "relu":
        Z, linear_cache = forward_linear(A_prev, W, b)
        A, activation_cache = relu(Z)
    elif activation == "softmax":
        Z, linear_cache = forward_linear(A_prev, W, b)
        A, activation_cache = softmax(Z)
    else:
        raise ValueError("Invalid activation function!")

    cache = (linear_cache, activation_cache)

    return A, cache

def forward_propagation(X, parameters):
    caches = []
    A = X
    L = len(parameters) // 2  # number of layers in the network
    activation = "sigmoid" if classification_type == "binary" else "softmax"

    # Forward propagation for each layer
    for l in range(1, L):
        A_prev = A
        A, cache = forward_activation(A_prev, parameters['W' + str(l)], parameters['b' + str(l)], "relu")
        caches.append(cache)

    #AL is final output layer
    AL, cache = forward_activation(A, parameters['W' + str(L)], parameters['b' + str(L)], activation)
    caches.append(cache)

    return AL, caches

def cost_function(AL, Y):
    if classification_type == "binary":
        cost = -np.mean(np.multiply(Y, np.log(AL)) + np.multiply((1 - Y), np.log(1 - AL)))
    elif classification_type == "multivariable":
        epsilon = 1e-8  # Small constant to prevent log(0)
        AL = np.clip(AL, epsilon, 1 - epsilon)  # Clip AL to avoid log(0) and division by zero
        cost = -np.sum(Y * np.log(AL)) / Y.shape[1]  
    else:
        raise ValueError("Invalid classification type!")
    
    return cost

def backward_linear(dZ, cache):
    A_prev, W, b = cache
    m = A_prev.shape[1]

    dW = 1/m * np.dot(dZ, A_prev.T)
    db = 1/m * np.sum(dZ, axis=1, keepdims=True)
    dA_prev = np.dot(W.T, dZ)

    return dA_prev, dW, db

def backward_activation(dA, cache, activation):
    linear_cache, activation_cache = cache

    if activation == "relu":
        dZ = relu_backward(dA, activation_cache)
        dA_prev, dW, db = backward_linear(dZ, linear_cache)
    elif activation == "sigmoid":
        dZ = sigmoid_backward(dA, activation_cache)
        dA_prev, dW, db = backward_linear(dZ, linear_cache)
    elif activation == "softmax":
        dZ = softmax_backward(dA, activation_cache)
        dA_prev, dW, db = backward_linear(dZ, linear_cache)
    else:
        raise ValueError("Invalid activation function!")


    return dW, dA_prev, db

def backward_propagation(AL, Y, caches):
    grads = {}
    L = len(caches)
    activation = "sigmoid" if classification_type == "binary" else "softmax"
    
    if activation == "sigmoid":
        dAL = -(np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
    if activation == "softmax":
        dAL = AL - Y

    # Gradient descent of output layer
    current_cache = caches[L - 1]
    dW_temp, dA_prev_temp, db_temp = backward_activation(dAL, current_cache, activation)
    grads["dW" + str(L)] = dW_temp
    grads["dA" + str(L-1)] = dA_prev_temp
    grads["db" + str(L)] = db_temp

    # Store gradient descent for remaining layers
    for l in reversed(range(L-1)):
        dW_temp, dA_prev_temp, db_temp = backward_activation(grads["dA" + str(l+1)], caches[l], "relu")
        grads["dW" + str(l + 1)] = dW_temp
        grads["dA" + str(l)] = dA_prev_temp  
        grads["db" + str(l + 1)] = db_temp

    return grads

def update_parameters(parameters, grads, learning_rate):
    L = len(parameters) // 2 # number of layers in the neural network

    # Update each parameter by layer  
    for l in range(L):
        parameters["W" + str(l+1)] -= learning_rate * grads["dW" + str(l+1)]
        parameters["b" + str(l+1)] -= learning_rate * grads["db" + str(l+1)]
        
    return parameters

def deep_nn_model(X, Y, num_iterations, layer_dims, learning_rate, classification_method):
    costs = []
    parameters = initialize_layer_parameters(layer_dims)
    global classification_type
    classification_type = classification_method

    if classification_type == "multivariable":
        Y = one_hot_encode(Y, layer_dims[-1])

    for i in range(0, num_iterations):
        AL, caches = forward_propagation(X, parameters)
        cost = cost_function(AL, Y)
        grads = backward_propagation(AL, Y, caches)
        parameters = update_parameters(parameters, grads, learning_rate)

        if i % 100 == 0 or i == num_iterations - 1:
            print("Cost after iteration {}: {}".format(i, np.squeeze(cost)))
        if i % 100 == 0 or i == num_iterations:
            costs.append(cost)

    return parameters, costs

# Seperate code running the model

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import sys
import os
import pickle
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from src.deep_nn import forward_propagation, deep_nn_model

X = np.load("datasets/signdigits/X.npy")
Y = np.load("datasets/signdigits/Y.npy")

# Split dataset into some training examples(15%) and shuffle data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.15, random_state=42)

m_train = X_train.shape[0]
X_train_flat = X_train.reshape(m_train, X_train.shape[1] * X_train.shape[2]).T
Y_train = Y_train[np.newaxis, :]

m_test = X_test.shape[0]
X_test_flat = X_test.reshape(m_test, X_test.shape[1] * X_test.shape[2]).T
Y_test = Y_test[np.newaxis, :]

print(X_train_flat.shape)
print(Y_train.shape)

print(X_test_flat.shape)
print(Y_test.shape)

layer_dims = [4096, 1024, 512, 256, 128, 10]
parameters, _ = deep_nn_model(X_train_flat, Y_train, num_iterations=1000, layer_dims=layer_dims, learning_rate=1, classification_method="multivariable")

def predict(X, parameters):
    AL, _ = forward_propagation(X, parameters)
    predictions = np.argmax(AL, axis=0)  
    return predictions

def calculate_accuracy(predictions, labels):
    return np.mean(predictions == labels)

with open('model_parameters.pkl', 'wb') as f:
    pickle.dump(parameters, f)

train_predictions = predict(X_train_flat, parameters)
train_accuracy = calculate_accuracy(train_predictions, Y_train)
print(f"Training Accuracy: {train_accuracy * 100:.2f}%")

test_predictions = predict(X_test_flat, parameters)
test_accuracy = calculate_accuracy(test_predictions, Y_test)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

Thiết kế website giá rẻ

Danh mục

Deep NN converging at same cost