Thiết kế website giá rẻ

Question

I am trying to create a neural network from scratch in c++. I am training it on the NMIST dataset and using a linear algebra library called armadillo. I have figured out feed forward and calculating loss as well as accessing my dataset. However I can not get the network to optimize correctly as it is always just going to zero. My average loss just goes to 1.

Here is my code

#include "NeuralNet.h"

NeuralNetwork::NeuralNetwork(int outputNodes, int inputNodes, int activationFunction, vector<int> hiddenLayers) {
    this->activationFunction = activationFunction;
    this->outputNodes = outputNodes;
    this->inputNodes = inputNodes;
    initiateParameters(hiddenLayers);
};

void NeuralNetwork::initiateParameters(vector<int> hiddenLayers) {
    //set weights and biases to be the correct size
    int prevNodeCount = inputNodes;
    for (int nodeCount : hiddenLayers) {
        //all biases will start off as 0
        biases.push_back(mat(nodeCount, 1, arma::fill::randn));
        //all weights will start off as rand number b/t 0 & 1
        weights.push_back(mat(nodeCount, prevNodeCount, arma::fill::randn));

        prevNodeCount = nodeCount;
    }
    //set output layer biases and weights
    biases.push_back(mat(outputNodes, 1, arma::fill::randn));  
    weights.push_back(mat(outputNodes, prevNodeCount, arma::fill::randn));
};
//used to take in the activaton function and runs data through the function
mat NeuralNetwork::runActivation(mat data) {
    switch (activationFunction) {
        case ReLu:
            data.for_each([](mat::elem_type& val) {
                val = val > 0 ? val : 0;
            });
        break;
        case Sigmoid:
            data.for_each([](mat::elem_type& val) {
                val = 1/(1+exp(-val));
            });
        break;
        case Softplus:
            data.for_each([](mat::elem_type& val) {
                val = 1/(1+exp(-val));
            });
        break;
    }
    return data;
}
//used in backprop to take the activation function derivative
mat NeuralNetwork::derivativeActivation(mat data) {
    switch (activationFunction) {
        case ReLu:
            data.for_each([](mat::elem_type& val) {
                val = val > 0 ? 1 : 0;    
            });
        break;
        case Sigmoid:
            data.for_each([](mat::elem_type& val) {
                double sig = 1/(1+exp(-val));
                val = sig*(1-sig);
            });
        break;
        case Softplus:
        break;
    }
    return data;
}

//forward propogation, must be flatten BEFORE entry 
mat NeuralNetwork::predict(mat data, vector<mat>* hiddenLayers) {
    int x = 0;
    for(mat weight : weights) {
        if(hiddenLayers != nullptr) {//used to get the hidden layers values, goes first cause I dont care about output
            (*hiddenLayers).push_back(data);
        }     
        mat w_data = weight * data;
        w_data.each_col() += biases.at(x);
        data = runActivation(w_data);
        data = normalise(data);
        x++;
    }
    return data;
};

void NeuralNetwork::printWeights() {
    for(mat weight : weights)
        cout << weight << endl;
    for(mat bias : biases)
        cout << bias << endl;
}

void NeuralNetwork::optomize(mat batch, mat loss, double learning_rate) {
    for(arma::uword col = 0; col < batch.n_cols; ++col) {
        vector<mat> hiddenLayersActivations;
        mat out = predict(batch.col(col), &(hiddenLayersActivations));
        backpropogation(out, loss,  hiddenLayersActivations, learning_rate);
    }
}

//will take in data that is in the form rows of pixels, and each column is a different datapoint
//uses the backpropagtion algorthm to perform gradient descent to optimize the neural network
void NeuralNetwork::backpropogation(mat out,mat gradientOutput, vector<mat> hiddenLayers, double learning_rate) {
    //loops through each layer from the end
    for(int i = hiddenLayers.size()-1; i >=0; i--) {
        //gets the current layer
        mat cur_hiddenLayer = hiddenLayers[i];

        if(i != hiddenLayers.size() - 1)//transforms gradient to correct size
            gradientOutput = weights[i+1].t() * gradientOutput;
        //finds gradient of current layer

        gradientOutput = gradientOutput % derivativeActivation(weights[i]*cur_hiddenLayer+biases[i]);
        // Calculate weight gradient for the current layer
        
        mat weightDerivativeCost = gradientOutput * cur_hiddenLayer.t();
        // Calculate bias gradient for the current layer
        vec biasesDerivativeCost = gradientOutput;
        // Update weights and biases
        weights[i] -= learning_rate * weightDerivativeCost;
        biases[i] -= learning_rate * biasesDerivativeCost;
        // Calculate delta for the previous layer (backpropagate the error)

    }
}

My main is

#include "main.h"

double calculateCostFunction(mat actualLabel, mat predictedLabel) {
  predictedLabel -= actualLabel;
  predictedLabel.transform([](double val){return val*val;});
  return accu(predictedLabel)/predictedLabel.n_cols;
};
void progressTracker(int point, int length) {
  double precentDone = std::round(100*point/length);
    cout <<"["<< (precentDone) << "% complete]r";
    cout.flush();
};

mat returnLoss(mat predicted, mat actual) {
    mat diff = arma::pow((predicted-actual), 2);
    return arma::mean(diff, 1);
}

int main(int argc, char *argv[])
  {
  if (argc != 2) {
    std::cerr << "ensure that format is correctly followed with ./runNeuralNet [# of datapoints]n";
    return 1;
  }
  /*read in the file data what I am actually going to do is 
    1. make a class that takes in the file name from the dataset
    2. reads the labels(stores that into a vector)
    3. includes a get function that returns the image with the relevant label*/ 
  string trainFileName = "mnist_train.csv";
  dataset trainData(trainFileName);

  //initiate the model
  //NN is just a bunch of nodes with weights and biases on each of them
  int inputNodes = trainData.returnItemSize();
  int outputNodes = trainData.returnUniqueLabels().size();
  //vector<int> hiddenLayers = {1000, 500, 100, 50};
  vector<int> hiddenLayers = {32, 16};
  //vector<int> hiddenLayers = {};
  NeuralNetwork NN(outputNodes, inputNodes, ReLu, hiddenLayers);
  //train the model
  //hyperparameters
  int n_epoch = 1;
  double learning_rate = .05;
  int batchsize = 10;
  trainData.randomize();
  //NN.printWeights();
  int currentPoint = 0;
  //cout << trainData.len() << endl;
  int dataused = std::stoi(argv[1]);//trainData.len();
  int trainingLen= .8*dataused;
  //cout << "data points: " << dataused << endl;
  auto start = std::chrono::high_resolution_clock::now();



  while(currentPoint < trainingLen) {
    //predict and get layers
    mat actualLabels(trainData.returnUniqueLabels().size(), batchsize);//defines the matrix to use for actual labels
    mat batch = trainData.returnBatch(&currentPoint, batchsize, actualLabels.memptr());//gets actual labels and batch info

    mat predictedLabels = NN.predict(batch);//predicts labels
    mat loss = returnLoss(predictedLabels, actualLabels);
    cout << arma::sum(abs(loss)) << endl;
    //back prop and error calculation
    NN.optomize(batch, loss, learning_rate);

    //progressTracker(currentPoint, trainingLen);
  }

  // Calculate the duration
  auto end = std::chrono::high_resolution_clock::now();
  auto duration = std::chrono::duration_cast<std::chrono::minutes>(end - start).count();
  // Print the duration in microseconds
  std::cout << "Time taken: " << duration << " minutes" << std::endl;

  cout << "evaluation" << endl;
  double correct_pred, incorrect_pred;
  while(currentPoint < dataused) {
    mat actualLabels(trainData.returnUniqueLabels().size(), batchsize);//defines the matrix to use for actual labels
    mat batch = trainData.returnBatch(&currentPoint, batchsize, actualLabels.memptr());//gets actual labels and batch info
    mat predictedLabels = NN.predict(batch);//predicts label
    vector<string> actual = trainData.decodeOneHot(actualLabels);
    vector<string> pred= trainData.decodeOneHot(predictedLabels);

    vector<string>::iterator it_pred = pred.begin();
    for(vector<string>::iterator it_act = actual.begin(); it_act != actual.end(); it_act++) {
      if(*it_act == *it_pred)
        correct_pred++;
      else
        incorrect_pred++;
      it_pred++;
    }
    progressTracker(currentPoint-trainingLen, .2*dataused);
  }
  cout << "naccuracy: " << correct_pred/(incorrect_pred+correct_pred)*100 << endl;
  
  //used to demonstrate the ability to read and get data from the dataset and predict the value.
  while(true) {
    string selection;
    cout << "enter number between 1-60000: ";
    std::cin >> selection;
    string label;
    mat img = trainData.getItem(std::stoi(selection),&label);
    img.reshape(img.n_cols*img.n_rows, 1);
    mat predicted = NN.predict(img);
    cout << "predicted: " << trainData.decodeOneHot(predicted).at(0) << " actual: " << label << endl;
    cout << predicted.t();
  }

  return 0;
  }

I have tried to go through my math and use smaller models to more accurately predict the results. I have reviewed my equations by referencing the neural networks and deep learning book. I am at a loss. Any suggestions to move forward and figure this out?

Thiết kế website giá rẻ

Danh mục

neural network zeroing out after back propogation