I’m facing an issue with the train function in my neural network implementation. Below is the relevant code snippet:
<code> def __init__(self):
# DO NOT CHANGE PARAMETERS
self.input_to_hidden_weights = np.matrix('1 1; 1 1; 1 1')
self.hidden_to_output_weights = np.matrix('1 1 1')
self.biases = np.matrix('0; 0; 0')
self.learning_rate = .001
self.epochs_to_train = 10
self.training_points = [((2,1), 10), ((3,3), 21), ((4,5), 32), ((6, 6), 42)]
self.testing_points = [(1,1), (2,2), (3,3), (5,5), (10,10)]
def train(self, x1, x2, y):
### Forward propagation ###
input_values = np.matrix([[x1], [x2]]) # 2 by 1
# Calculate the input and activation of the hidden layer
hidden_layer_weighted_input = np.dot(self.input_to_hidden_weights, input_values) + self.biases
hidden_layer_activation = np.vectorize(rectified_linear_unit)(hidden_layer_weighted_input)
# Calculate the output
output = np.dot(hidden_layer_activation.T, self.hidden_to_output_weights.T)
activated_output = output_layer_activation(output)
### Backpropagation ###
# Compute gradients
output_layer_error = y - activated_output
hidden_layer_error = np.dot(self.hidden_to_output_weights, output_layer_error) * np.vectorize(rectified_linear_unit_derivative)(hidden_layer_weighted_input)
bias_gradients = hidden_layer_error
hidden_to_output_weight_gradients = output_layer_error * hidden_layer_activation.T
input_to_hidden_weight_gradients = hidden_layer_error * input_values.T
# Use gradients to adjust weights and biases using gradient descent
self.biases += self.learning_rate * bias_gradients
self.input_to_hidden_weights += self.learning_rate * input_to_hidden_weight_gradients
self.hidden_to_output_weights += self.learning_rate * hidden_to_output_weight_gradients
</code>
<code> def __init__(self):
# DO NOT CHANGE PARAMETERS
self.input_to_hidden_weights = np.matrix('1 1; 1 1; 1 1')
self.hidden_to_output_weights = np.matrix('1 1 1')
self.biases = np.matrix('0; 0; 0')
self.learning_rate = .001
self.epochs_to_train = 10
self.training_points = [((2,1), 10), ((3,3), 21), ((4,5), 32), ((6, 6), 42)]
self.testing_points = [(1,1), (2,2), (3,3), (5,5), (10,10)]
def train(self, x1, x2, y):
### Forward propagation ###
input_values = np.matrix([[x1], [x2]]) # 2 by 1
# Calculate the input and activation of the hidden layer
hidden_layer_weighted_input = np.dot(self.input_to_hidden_weights, input_values) + self.biases
hidden_layer_activation = np.vectorize(rectified_linear_unit)(hidden_layer_weighted_input)
# Calculate the output
output = np.dot(hidden_layer_activation.T, self.hidden_to_output_weights.T)
activated_output = output_layer_activation(output)
### Backpropagation ###
# Compute gradients
output_layer_error = y - activated_output
hidden_layer_error = np.dot(self.hidden_to_output_weights, output_layer_error) * np.vectorize(rectified_linear_unit_derivative)(hidden_layer_weighted_input)
bias_gradients = hidden_layer_error
hidden_to_output_weight_gradients = output_layer_error * hidden_layer_activation.T
input_to_hidden_weight_gradients = hidden_layer_error * input_values.T
# Use gradients to adjust weights and biases using gradient descent
self.biases += self.learning_rate * bias_gradients
self.input_to_hidden_weights += self.learning_rate * input_to_hidden_weight_gradients
self.hidden_to_output_weights += self.learning_rate * hidden_to_output_weight_gradients
</code>
def __init__(self):
# DO NOT CHANGE PARAMETERS
self.input_to_hidden_weights = np.matrix('1 1; 1 1; 1 1')
self.hidden_to_output_weights = np.matrix('1 1 1')
self.biases = np.matrix('0; 0; 0')
self.learning_rate = .001
self.epochs_to_train = 10
self.training_points = [((2,1), 10), ((3,3), 21), ((4,5), 32), ((6, 6), 42)]
self.testing_points = [(1,1), (2,2), (3,3), (5,5), (10,10)]
def train(self, x1, x2, y):
### Forward propagation ###
input_values = np.matrix([[x1], [x2]]) # 2 by 1
# Calculate the input and activation of the hidden layer
hidden_layer_weighted_input = np.dot(self.input_to_hidden_weights, input_values) + self.biases
hidden_layer_activation = np.vectorize(rectified_linear_unit)(hidden_layer_weighted_input)
# Calculate the output
output = np.dot(hidden_layer_activation.T, self.hidden_to_output_weights.T)
activated_output = output_layer_activation(output)
### Backpropagation ###
# Compute gradients
output_layer_error = y - activated_output
hidden_layer_error = np.dot(self.hidden_to_output_weights, output_layer_error) * np.vectorize(rectified_linear_unit_derivative)(hidden_layer_weighted_input)
bias_gradients = hidden_layer_error
hidden_to_output_weight_gradients = output_layer_error * hidden_layer_activation.T
input_to_hidden_weight_gradients = hidden_layer_error * input_values.T
# Use gradients to adjust weights and biases using gradient descent
self.biases += self.learning_rate * bias_gradients
self.input_to_hidden_weights += self.learning_rate * input_to_hidden_weight_gradients
self.hidden_to_output_weights += self.learning_rate * hidden_to_output_weight_gradients
<code> def rectified_linear_unit(x):
""" Returns the ReLU of x, or the maximum between 0 and x."""
return max(0, x)
def rectified_linear_unit_derivative(x):
""" Returns the derivative of ReLU."""
return 1 if x > 0 else 0
def output_layer_activation(x):
""" Linear function, returns input as is. """
return x
def output_layer_activation_derivative(x):
""" Returns the derivative of a linear function: 1. """
return 1
</code>
<code> def rectified_linear_unit(x):
""" Returns the ReLU of x, or the maximum between 0 and x."""
return max(0, x)
def rectified_linear_unit_derivative(x):
""" Returns the derivative of ReLU."""
return 1 if x > 0 else 0
def output_layer_activation(x):
""" Linear function, returns input as is. """
return x
def output_layer_activation_derivative(x):
""" Returns the derivative of a linear function: 1. """
return 1
</code>
def rectified_linear_unit(x):
""" Returns the ReLU of x, or the maximum between 0 and x."""
return max(0, x)
def rectified_linear_unit_derivative(x):
""" Returns the derivative of ReLU."""
return 1 if x > 0 else 0
def output_layer_activation(x):
""" Linear function, returns input as is. """
return x
def output_layer_activation_derivative(x):
""" Returns the derivative of a linear function: 1. """
return 1
When I run this code, I encounter the following error:
Image of Error
Could you please help me identify the cause of this error and suggest a solution? Thank you!
New contributor
Karton is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.