so i made a neural network from scratch and it seems that my loss doesn’t change at all that’s my train code
def backward_propagation(self, X, y, activations):
dz = []
m = X.shape[1]
dW = []
dB = []
for i in reversed(range(1, len(self.layers))):
if i == len(self.layers)-1:
dz = activations[i] - y
else:
dz = np.dot(self.weights[i].T, dz) * self.activation_derivative(activations[i], self.activations[i])
dw = np.dot(dz, activations[i-1].T) / m
db = np.sum(dz, axis=1, keepdims=True) / m
dW.append(dw)
dB.append(db)
return dW[::-1], dB[::-1]
def update_parameters(self, dW, dB, learning_rate):
for i in range(len(self.weights)):
self.weights[i] -= learning_rate * dW[i]
self.biases[i] -= learning_rate * dB[i]
def train(self, X, y, learning_rate=0.01, epochs=1000):
m = X.shape[1]
for epoch in range(epochs):
total_loss = 0
for i in range(m):
x_sample = X[:, i:i+1]
y_sample = y[:, i:i+1]
activations = self.feed_forward(x_sample)
dW, dB = self.backward_propagation(x_sample, y_sample, activations)
self.update_parameters(dW, dB, learning_rate)
loss = self.compute_loss(activations[-1], y_sample)
total_loss += loss
avg_loss = total_loss / m
if epoch % 100 == 0:
print(f'Epoch {epoch}, Average Loss: {avg_loss}')
and that’s the full class code
class NeuralNetwork:
def __init__(self, *, input_size):
self.input_size = input_size
self.layers = [input_size]
self.weights = []
self.biases = []
self.activations = []
def add_layer(self, layer_size, activation='relu'):
self.layers.append(layer_size)
self.activations.append(activation)
def initialize_weights(self):
self.weights = []
self.biases = []
for i in range(1, len(self.layers)):
in_dim = self.layers[i-1]
out_dim = self.layers[i]
stddev = np.sqrt(2 / (in_dim + out_dim))
weight_matrix = np.random.normal(loc=0.0, scale=stddev, size=(out_dim, in_dim))
bias_vector = np.random.normal(loc=0.0, scale=stddev, size=(out_dim, 1))
self.weights.append(weight_matrix)
self.biases.append(bias_vector)
def activate(self, Z, activation):
if activation == 'relu':
return np.maximum(0, Z)
elif activation == 'tanh':
return np.tanh(Z)
elif activation == 'softmax':
exp_Z = np.exp(Z - np.max(Z, axis=0, keepdims=True))
return exp_Z / np.sum(exp_Z, axis=0, keepdims=True)
elif activation == 'linear':
return Z
elif activation == 'sigmoid':
return 1 / (1 + np.exp(-Z))
elif activation == 'binary':
return (Z > 0.5).astype(int) # Binary activation for output layer
else:
raise ValueError(f"Unsupported activation function: {activation}")
def activation_derivative(self, A, activation):
if activation == 'relu':
return (A > 0).astype(float)
elif activation == 'tanh':
return 1 - np.power(A, 2)
elif activation == 'sigmoid':
return A * (1 - A)
elif activation == 'linear':
return np.ones_like(A)
elif activation == 'softmax':
return A * (1 - A)
elif activation == 'binary':
return 1
else:
raise ValueError(f"Unsupported activation function: {activation}")
def feed_forward(self, X):
A = X
activations = [A]
for weights, bias, activation in zip(self.weights, self.biases, self.activations):
Z = np.dot(weights, A) + bias
A = self.activate(Z, activation)
activations.append(A)
return activations
def backward_propagation(self, X, y, activations):
dz = []
m = X.shape[1]
dW = []
dB = []
for i in reversed(range(1, len(self.layers))):
if i == len(self.layers)-1:
dz = activations[i] - y
else:
dz = np.dot(self.weights[i].T, dz) * self.activation_derivative(activations[i], self.activations[i])
dw = np.dot(dz, activations[i-1].T) / m
db = np.sum(dz, axis=1, keepdims=True) / m
dW.append(dw)
dB.append(db)
return dW[::-1], dB[::-1] # Reverse the lists to match weights/biases order
def update_parameters(self, dW, dB, learning_rate):
for i in range(len(self.weights)):
self.weights[i] -= learning_rate * dW[i]
self.biases[i] -= learning_rate * dB[i]
def train(self, X, y, learning_rate=0.01, epochs=1000):
m = X.shape[1]
for epoch in range(epochs):
total_loss = 0
for i in range(m):
x_sample = X[:, i:i+1]
y_sample = y[:, i:i+1]
activations = self.feed_forward(x_sample)
dW, dB = self.backward_propagation(x_sample, y_sample, activations)
self.update_parameters(dW, dB, learning_rate)
loss = self.compute_loss(activations[-1], y_sample)
total_loss += loss
avg_loss = total_loss / m
if epoch % 100 == 0:
print(f'Epoch {epoch}, Average Loss: {avg_loss}')
def compute_loss(self, A, y):
m = y.shape[1]
loss = -np.sum(y * np.log(A + 1e-8) + (1 - y) * np.log(1 - A + 1e-8)) / m
return loss
i tried changing my intialization function and used xavier implementation and changing the learning rate and still no change in the loss
Epoch 0, Average Loss: 0.8672735163691898
Epoch 100, Average Loss: 0.6935956011113185
Epoch 200, Average Loss: 0.690694091666978
Epoch 300, Average Loss: 0.6922357305611471
Epoch 400, Average Loss: 0.6918833076884003
Epoch 500, Average Loss: 0.6909379643394351
Epoch 600, Average Loss: 0.6902891583150265
Epoch 700, Average Loss: 0.6875228090388348
Epoch 800, Average Loss: 0.6879678899764555
Epoch 900, Average Loss: 0.6670931736764081
These are my average losses