Thiết kế website giá rẻ

Question

so i made a neural network from scratch and it seems that my loss doesn’t change at all that’s my train code

def backward_propagation(self, X, y, activations):
        dz = []
        m = X.shape[1]
        dW = []
        dB = []
        for i in reversed(range(1, len(self.layers))):
            if i == len(self.layers)-1:
                dz = activations[i] - y
            else:
                dz = np.dot(self.weights[i].T, dz) * self.activation_derivative(activations[i], self.activations[i])
            
            dw = np.dot(dz, activations[i-1].T) / m
            db = np.sum(dz, axis=1, keepdims=True) / m
            
            dW.append(dw)
            dB.append(db)
        
        return dW[::-1], dB[::-1]

    def update_parameters(self, dW, dB, learning_rate):
        for i in range(len(self.weights)):
            self.weights[i] -= learning_rate * dW[i]
            self.biases[i] -= learning_rate * dB[i]


    def train(self, X, y, learning_rate=0.01, epochs=1000):
        m = X.shape[1]
        for epoch in range(epochs):
            total_loss = 0
            for i in range(m):
                x_sample = X[:, i:i+1]
                y_sample = y[:, i:i+1]
                
                activations = self.feed_forward(x_sample)
                dW, dB = self.backward_propagation(x_sample, y_sample, activations)
                self.update_parameters(dW, dB, learning_rate)
                
                loss = self.compute_loss(activations[-1], y_sample)
                total_loss += loss
            
            avg_loss = total_loss / m
            if epoch % 100 == 0:
                print(f'Epoch {epoch}, Average Loss: {avg_loss}')

and that’s the full class code


class NeuralNetwork:
    def __init__(self, *, input_size):
        self.input_size = input_size
        self.layers = [input_size]
        self.weights = []
        self.biases = []
        self.activations = []
        
    def add_layer(self, layer_size, activation='relu'):
        self.layers.append(layer_size)
        self.activations.append(activation)
        
    def initialize_weights(self):
        self.weights = []
        self.biases = []
        for i in range(1, len(self.layers)):
            in_dim = self.layers[i-1]
            out_dim = self.layers[i]
            stddev = np.sqrt(2 / (in_dim + out_dim))
            
            weight_matrix = np.random.normal(loc=0.0, scale=stddev, size=(out_dim, in_dim))
            bias_vector = np.random.normal(loc=0.0, scale=stddev, size=(out_dim, 1))
            
            self.weights.append(weight_matrix)
            self.biases.append(bias_vector)
    
    def activate(self, Z, activation):
        if activation == 'relu':
            return np.maximum(0, Z)
        elif activation == 'tanh':
            return np.tanh(Z)
        elif activation == 'softmax':
            exp_Z = np.exp(Z - np.max(Z, axis=0, keepdims=True))
            return exp_Z / np.sum(exp_Z, axis=0, keepdims=True)
        elif activation == 'linear':
            return Z
        elif activation == 'sigmoid':
            return 1 / (1 + np.exp(-Z))
        elif activation == 'binary':
            return (Z > 0.5).astype(int)  # Binary activation for output layer
        else:
            raise ValueError(f"Unsupported activation function: {activation}")
    
    
    def activation_derivative(self, A, activation):
        if activation == 'relu':
            return (A > 0).astype(float)
        elif activation == 'tanh':
            return 1 - np.power(A, 2)
        elif activation == 'sigmoid':
            return A * (1 - A)
        elif activation == 'linear':
            return np.ones_like(A)
        elif activation == 'softmax':
            return A * (1 - A)
        elif activation == 'binary':
            return 1
        else:
            raise ValueError(f"Unsupported activation function: {activation}")
        
        
    def feed_forward(self, X):
        A = X
        activations = [A]
        for weights, bias, activation in zip(self.weights, self.biases, self.activations):
            Z = np.dot(weights, A) + bias
            A = self.activate(Z, activation)
            activations.append(A)
        return activations
    
    
    def backward_propagation(self, X, y, activations):
        dz = []
        m = X.shape[1]
        dW = []
        dB = []
        for i in reversed(range(1, len(self.layers))):
            if i == len(self.layers)-1:
                dz = activations[i] - y
            else:
                dz = np.dot(self.weights[i].T, dz) * self.activation_derivative(activations[i], self.activations[i])
            
            dw = np.dot(dz, activations[i-1].T) / m
            db = np.sum(dz, axis=1, keepdims=True) / m
            
            dW.append(dw)
            dB.append(db)
        
        return dW[::-1], dB[::-1]  # Reverse the lists to match weights/biases order

    def update_parameters(self, dW, dB, learning_rate):
        for i in range(len(self.weights)):
            self.weights[i] -= learning_rate * dW[i]
            self.biases[i] -= learning_rate * dB[i]


    def train(self, X, y, learning_rate=0.01, epochs=1000):
        m = X.shape[1]
        for epoch in range(epochs):
            total_loss = 0
            for i in range(m):
                x_sample = X[:, i:i+1]
                y_sample = y[:, i:i+1]
                
                activations = self.feed_forward(x_sample)
                dW, dB = self.backward_propagation(x_sample, y_sample, activations)
                self.update_parameters(dW, dB, learning_rate)
                
                loss = self.compute_loss(activations[-1], y_sample)
                total_loss += loss
            
            avg_loss = total_loss / m
            if epoch % 100 == 0:
                print(f'Epoch {epoch}, Average Loss: {avg_loss}')
    
    def compute_loss(self, A, y):
        m = y.shape[1]
        loss = -np.sum(y * np.log(A + 1e-8) + (1 - y) * np.log(1 - A + 1e-8)) / m
        return loss

i tried changing my intialization function and used xavier implementation and changing the learning rate and still no change in the loss
Epoch 0, Average Loss: 0.8672735163691898
Epoch 100, Average Loss: 0.6935956011113185
Epoch 200, Average Loss: 0.690694091666978
Epoch 300, Average Loss: 0.6922357305611471
Epoch 400, Average Loss: 0.6918833076884003
Epoch 500, Average Loss: 0.6909379643394351
Epoch 600, Average Loss: 0.6902891583150265
Epoch 700, Average Loss: 0.6875228090388348
Epoch 800, Average Loss: 0.6879678899764555
Epoch 900, Average Loss: 0.6670931736764081
These are my average losses

Thiết kế website giá rẻ

Danh mục

my loss doesn’t drop in my neural network implementation