I am trying to create a neural network from scratch in c++. I am training it on the NMIST dataset and using a linear algebra library called armadillo. I have figured out feed forward and calculating loss as well as accessing my dataset. However I can not get the network to optimize correctly as it is always just going to zero. My average loss just goes to 1.
Here is my code
#include "NeuralNet.h"
NeuralNetwork::NeuralNetwork(int outputNodes, int inputNodes, int activationFunction, vector<int> hiddenLayers) {
this->activationFunction = activationFunction;
this->outputNodes = outputNodes;
this->inputNodes = inputNodes;
initiateParameters(hiddenLayers);
};
void NeuralNetwork::initiateParameters(vector<int> hiddenLayers) {
//set weights and biases to be the correct size
int prevNodeCount = inputNodes;
for (int nodeCount : hiddenLayers) {
//all biases will start off as 0
biases.push_back(mat(nodeCount, 1, arma::fill::randn));
//all weights will start off as rand number b/t 0 & 1
weights.push_back(mat(nodeCount, prevNodeCount, arma::fill::randn));
prevNodeCount = nodeCount;
}
//set output layer biases and weights
biases.push_back(mat(outputNodes, 1, arma::fill::randn));
weights.push_back(mat(outputNodes, prevNodeCount, arma::fill::randn));
};
//used to take in the activaton function and runs data through the function
mat NeuralNetwork::runActivation(mat data) {
switch (activationFunction) {
case ReLu:
data.for_each([](mat::elem_type& val) {
val = val > 0 ? val : 0;
});
break;
case Sigmoid:
data.for_each([](mat::elem_type& val) {
val = 1/(1+exp(-val));
});
break;
case Softplus:
data.for_each([](mat::elem_type& val) {
val = 1/(1+exp(-val));
});
break;
}
return data;
}
//used in backprop to take the activation function derivative
mat NeuralNetwork::derivativeActivation(mat data) {
switch (activationFunction) {
case ReLu:
data.for_each([](mat::elem_type& val) {
val = val > 0 ? 1 : 0;
});
break;
case Sigmoid:
data.for_each([](mat::elem_type& val) {
double sig = 1/(1+exp(-val));
val = sig*(1-sig);
});
break;
case Softplus:
break;
}
return data;
}
//forward propogation, must be flatten BEFORE entry
mat NeuralNetwork::predict(mat data, vector<mat>* hiddenLayers) {
int x = 0;
for(mat weight : weights) {
if(hiddenLayers != nullptr) {//used to get the hidden layers values, goes first cause I dont care about output
(*hiddenLayers).push_back(data);
}
mat w_data = weight * data;
w_data.each_col() += biases.at(x);
data = runActivation(w_data);
data = normalise(data);
x++;
}
return data;
};
void NeuralNetwork::printWeights() {
for(mat weight : weights)
cout << weight << endl;
for(mat bias : biases)
cout << bias << endl;
}
void NeuralNetwork::optomize(mat batch, mat loss, double learning_rate) {
for(arma::uword col = 0; col < batch.n_cols; ++col) {
vector<mat> hiddenLayersActivations;
mat out = predict(batch.col(col), &(hiddenLayersActivations));
backpropogation(out, loss, hiddenLayersActivations, learning_rate);
}
}
//will take in data that is in the form rows of pixels, and each column is a different datapoint
//uses the backpropagtion algorthm to perform gradient descent to optimize the neural network
void NeuralNetwork::backpropogation(mat out,mat gradientOutput, vector<mat> hiddenLayers, double learning_rate) {
//loops through each layer from the end
for(int i = hiddenLayers.size()-1; i >=0; i--) {
//gets the current layer
mat cur_hiddenLayer = hiddenLayers[i];
if(i != hiddenLayers.size() - 1)//transforms gradient to correct size
gradientOutput = weights[i+1].t() * gradientOutput;
//finds gradient of current layer
gradientOutput = gradientOutput % derivativeActivation(weights[i]*cur_hiddenLayer+biases[i]);
// Calculate weight gradient for the current layer
mat weightDerivativeCost = gradientOutput * cur_hiddenLayer.t();
// Calculate bias gradient for the current layer
vec biasesDerivativeCost = gradientOutput;
// Update weights and biases
weights[i] -= learning_rate * weightDerivativeCost;
biases[i] -= learning_rate * biasesDerivativeCost;
// Calculate delta for the previous layer (backpropagate the error)
}
}
My main is
#include "main.h"
double calculateCostFunction(mat actualLabel, mat predictedLabel) {
predictedLabel -= actualLabel;
predictedLabel.transform([](double val){return val*val;});
return accu(predictedLabel)/predictedLabel.n_cols;
};
void progressTracker(int point, int length) {
double precentDone = std::round(100*point/length);
cout <<"["<< (precentDone) << "% complete]r";
cout.flush();
};
mat returnLoss(mat predicted, mat actual) {
mat diff = arma::pow((predicted-actual), 2);
return arma::mean(diff, 1);
}
int main(int argc, char *argv[])
{
if (argc != 2) {
std::cerr << "ensure that format is correctly followed with ./runNeuralNet [# of datapoints]n";
return 1;
}
/*read in the file data what I am actually going to do is
1. make a class that takes in the file name from the dataset
2. reads the labels(stores that into a vector)
3. includes a get function that returns the image with the relevant label*/
string trainFileName = "mnist_train.csv";
dataset trainData(trainFileName);
//initiate the model
//NN is just a bunch of nodes with weights and biases on each of them
int inputNodes = trainData.returnItemSize();
int outputNodes = trainData.returnUniqueLabels().size();
//vector<int> hiddenLayers = {1000, 500, 100, 50};
vector<int> hiddenLayers = {32, 16};
//vector<int> hiddenLayers = {};
NeuralNetwork NN(outputNodes, inputNodes, ReLu, hiddenLayers);
//train the model
//hyperparameters
int n_epoch = 1;
double learning_rate = .05;
int batchsize = 10;
trainData.randomize();
//NN.printWeights();
int currentPoint = 0;
//cout << trainData.len() << endl;
int dataused = std::stoi(argv[1]);//trainData.len();
int trainingLen= .8*dataused;
//cout << "data points: " << dataused << endl;
auto start = std::chrono::high_resolution_clock::now();
while(currentPoint < trainingLen) {
//predict and get layers
mat actualLabels(trainData.returnUniqueLabels().size(), batchsize);//defines the matrix to use for actual labels
mat batch = trainData.returnBatch(¤tPoint, batchsize, actualLabels.memptr());//gets actual labels and batch info
mat predictedLabels = NN.predict(batch);//predicts labels
mat loss = returnLoss(predictedLabels, actualLabels);
cout << arma::sum(abs(loss)) << endl;
//back prop and error calculation
NN.optomize(batch, loss, learning_rate);
//progressTracker(currentPoint, trainingLen);
}
// Calculate the duration
auto end = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::minutes>(end - start).count();
// Print the duration in microseconds
std::cout << "Time taken: " << duration << " minutes" << std::endl;
cout << "evaluation" << endl;
double correct_pred, incorrect_pred;
while(currentPoint < dataused) {
mat actualLabels(trainData.returnUniqueLabels().size(), batchsize);//defines the matrix to use for actual labels
mat batch = trainData.returnBatch(¤tPoint, batchsize, actualLabels.memptr());//gets actual labels and batch info
mat predictedLabels = NN.predict(batch);//predicts label
vector<string> actual = trainData.decodeOneHot(actualLabels);
vector<string> pred= trainData.decodeOneHot(predictedLabels);
vector<string>::iterator it_pred = pred.begin();
for(vector<string>::iterator it_act = actual.begin(); it_act != actual.end(); it_act++) {
if(*it_act == *it_pred)
correct_pred++;
else
incorrect_pred++;
it_pred++;
}
progressTracker(currentPoint-trainingLen, .2*dataused);
}
cout << "naccuracy: " << correct_pred/(incorrect_pred+correct_pred)*100 << endl;
//used to demonstrate the ability to read and get data from the dataset and predict the value.
while(true) {
string selection;
cout << "enter number between 1-60000: ";
std::cin >> selection;
string label;
mat img = trainData.getItem(std::stoi(selection),&label);
img.reshape(img.n_cols*img.n_rows, 1);
mat predicted = NN.predict(img);
cout << "predicted: " << trainData.decodeOneHot(predicted).at(0) << " actual: " << label << endl;
cout << predicted.t();
}
return 0;
}
I have tried to go through my math and use smaller models to more accurately predict the results. I have reviewed my equations by referencing the neural networks and deep learning book. I am at a loss. Any suggestions to move forward and figure this out?
Gavin Bardwell is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.