I am working on writing my first neural network. It’s a perceptron with (potentially) multiple hidden layers. Currently, it’s configured to have 3 layers, and that configuration is in main.cpp. I am running some training examples on the neural net using back propagation, and hoping that the output approximates the XOR of the two input neurons. Here is the total code:
main.cpp
#include "Perceptron.cpp"
#include "TrainingData.cpp"
int main()
{
int numLayers = 3;
int neuronsPerLayer[3] = { 2, 2, 1 };
Perceptron::Perceptron perceptron(numLayers, neuronsPerLayer, 0.01);
perceptron.initializeWeightsAndBiases();
perceptron.print();
std::cout << "Training begin." << std::endl;
for (int reps = 0; reps < 10000; reps++)
{
for (int i = 0; i < NUM_TRAINING_DATA; i++)
{
perceptron.backPropogation(trainingInput[i], trainingGoal[i]);
}
}
std::cout << "Training complete." << std::endl;
float input[2] = { 0, 0 };
float * output = perceptron.forwardPropogation(input);
std::cout << "Input = { " << input[0] << ", " << input[1] << " } Output = " << output[0] << std::endl;
input[0] = 0;
input[1] = 1;
output = perceptron.forwardPropogation(input);
std::cout << "Input = { " << input[0] << ", " << input[1] << " } Output = " << output[0] << std::endl;
input[0] = 1;
input[1] = 0;
output = perceptron.forwardPropogation(input);
std::cout << "Input = { " << input[0] << ", " << input[1] << " } Output = " << output[0] << std::endl;
input[0] = 1;
input[1] = 1;
output = perceptron.forwardPropogation(input);
std::cout << "Input = { " << input[0] << ", " << input[1] << " } Output = " << output[0] << std::endl;
return 0;
}
Helpers.cpp
namespace Perceptron
{
float dotProduct(float * weight, float * activation, int size)
{
float sum = 0;
for (int idx = 0; idx < size; idx++)
{
sum += weight[idx] * activation[idx];
}
return sum;
}
float sum(float * input, int size)
{
float sum = 0;
for (int idx = 0; idx < size; idx++)
{
sum += input[idx];
}
return sum;
}
}
Perceptron.cpp
#include <cmath>
#include <iostream>
#include "Helpers.cpp"
namespace Perceptron
{
class Perceptron
{
private:
int numLayers;
float learningRate;
int * layerSizes; // layerSizes[layer]
float * * * weights; // weights[source layer][destination neuron][source neuron]
float * * biases;
float * * activations; // activations[layer][neuron]
public:
Perceptron(int numLayers, int * layerSizes, float learningRate)
{
this->numLayers = numLayers;
this->layerSizes = layerSizes;
this->learningRate = learningRate;
weights = new float * * [numLayers - 1]; // No weights from last layer.
for (int layer = 0; layer < numLayers - 1; layer++)
{
weights[layer] = new float * [layerSizes[layer + 1]]; // Destination neuron array.
for (int source = 0; source < layerSizes[layer]; source++)
{
weights[layer][source] = new float[layerSizes[layer]]; // Source neuron array.
}
}
activations = new float * [numLayers];
biases = new float * [numLayers];
for (int layer = 0; layer < numLayers; layer++)
{
activations[layer] = new float[layerSizes[layer]];
biases[layer] = new float[layerSizes[layer]];
}
}
void print()
{
std::cout << "Weights:" << std::endl;
for (int layer = 0; layer < numLayers - 1; layer++)
{
std::cout << "Layer " << layer << " weights:";
for (int destNeuron = 0; destNeuron < layerSizes[layer + 1]; destNeuron++)
{
for (int sourceNeuron = 0; sourceNeuron < layerSizes[layer]; sourceNeuron++)
{
std::cout << sourceNeuron << " -> " << destNeuron << ": " << weights[layer][destNeuron][sourceNeuron] << std::endl;
}
}
std::cout << std::endl << std::endl;
}
std::cout << "Biases" << std::endl;
for (int layer = 0; layer < numLayers; layer++)
{
for (int neuron = 0; neuron < layerSizes[layer]; neuron++)
{
std::cout << biases[layer][neuron] << " ";
}
std::cout << std::endl;
}
std::cout << "Activations" << std::endl;
for (int layer = 0; layer < numLayers; layer++)
{
for (int neuron = 0; neuron < layerSizes[layer]; neuron++)
{
std::cout << activations[layer][neuron] << " ";
}
std::cout << std::endl;
}
}
void initializeWeightsAndBiases()
{
srand(time(NULL));
for (int layer = 0; layer < numLayers - 1; layer++)
{
for (int destNeuron = 0; destNeuron < layerSizes[layer + 1]; destNeuron++)
{
for (int sourceNeuron = 0; sourceNeuron < layerSizes[layer]; sourceNeuron++)
{
weights[layer][destNeuron][sourceNeuron] = 2 * ((float) rand()) / (float) RAND_MAX;
weights[layer][destNeuron][sourceNeuron] -= 1;
}
}
}
for (int layer = 0; layer < numLayers; layer++)
{
for (int neuron = 0; neuron < layerSizes[layer]; neuron++)
{
biases[layer][neuron] = 2 * ((float) rand()) / (float) RAND_MAX;
biases[layer][neuron] -= 1;
}
}
}
float * forwardPropogation(float input[])
{
int outputSize = layerSizes[numLayers - 1];
float * output = new float[outputSize];
// For each layer
for (int layer = 0; layer < numLayers; layer++)
{
// For each neuron in that layer.
for (int neuron = 0; neuron < layerSizes[layer]; neuron++)
{
if (layer == 0)
{
// Activation equals input.
activations[layer][neuron] = input[neuron];
}
else
{
activations[layer][neuron] = sigmoid(
dotProduct(
weights[layer - 1][neuron],
activations[layer - 1],
layerSizes[layer - 1]
) + biases[layer][neuron],
false);
}
}
}
for (int neuron = 0; neuron < outputSize; neuron++)
{
output[neuron] = activations[numLayers - 1][neuron];
}
// TODO - Free all memory.
return output;
}
void backPropogation(float * input, float * goal)
{
// Forward prop
float * forwardResult = forwardPropogation(input);
float * * errors = new float * [numLayers];
for (int layer = 0; layer < numLayers; layer++)
{
errors[layer] = new float[layerSizes[layer]];
}
// Output layer errors.
for (int neuron = 0; neuron < layerSizes[numLayers - 1]; neuron++)
{
errors[numLayers - 1][neuron] = cost(goal[neuron], forwardResult[neuron]);
}
// Hidden layers errors.
for (int layer = numLayers - 2; layer >= 0; layer--)
{
for (int neuron = 0; neuron < layerSizes[layer]; neuron++)
{
float errorSum = 0.0;
float zSum = 0.0;
for (int next = 0; next < layerSizes[layer + 1]; next++)
{
errorSum += (errors[layer + 1][next] * weights[layer][next][neuron]);
}
if (layer == 0)
{
//zSum = input[neuron] + biases[0][neuron];
for (int prev = 0; prev < layerSizes[0]; prev++) // inputSize should be defined globally or passed as a parameter
{
zSum += weights[layer][neuron][prev] * input[prev];
}
zSum += biases[layer][neuron];
}
else
{
for (int prev = 0; prev < layerSizes[layer - 1]; prev++)
{
zSum += (weights[layer - 1][neuron][prev] * activations[layer - 1][prev]);
}
zSum += biases[layer][neuron];
}
errors[layer][neuron] = errorSum * sigmoid(zSum, true);
}
}
// Adjust Weights.
for (int layer = 0; layer < numLayers - 1; layer++)
{
for (int dest = 0; dest < layerSizes[layer + 1]; dest++)
{
for (int source = 0; source < layerSizes[layer]; source++)
{
weights[layer][dest][source] -= learningRate * (errors[layer + 1][dest] * activations[layer][source]);
}
}
}
// Adjust Biases.
for (int layer = 0; layer < numLayers; layer++)
{
for (int neuron = 0; neuron < layerSizes[layer]; neuron++)
{
biases[layer][neuron] -= learningRate * (errors[layer][neuron]);
}
}
// TODO - Free all memory.
}
float sigmoid(float x, bool derivative)
{
if (derivative)
{
return x * (1 - x);
}
else
{
return 1 / (1 + exp(0 - x));
}
}
float cost(float expected, float calculated)
{
return (calculated - expected);
}
};
};
TrainingData.cpp
#define NUM_TRAINING_DATA 4
float trainingInput[NUM_TRAINING_DATA][2] = {
{ 0, 0 },
{ 0, 1 },
{ 1, 0 },
{ 1, 1 }
};
float trainingGoal[NUM_TRAINING_DATA][1] = {
{ 0 },
{ 1 },
{ 1 },
{ 0 }
};
As shown in main.cpp, I am running back propagation on this simple neural network with 10,000 epochs and a learning rate of 0.01. When I run forward propagation on the training examples, they always come to the same answer: -nan. I have been struggling to understand the back propagation algorithm, and wondering where I am incorrect.
Cam D is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.
2