//BackPropagation.h
#ifndef BackPropagation_H #define BackPropagation_H #include<stdio.h> #include<math.h> #include<time.h> #include<stdlib.h> #include<assert.h> class backPro { public: ~backPro(); backPro(int n1, int *sz, double b, double a);//init and allocatrs memory void bpgt(double *in, double *tgt);// bp error for one set of input void ffwd(double *in);// feed forwards activations for one set of inputs double mse(double *tgt) const;// return mean square error of the net double output(int i) const; // return i'th output of the net private: double **out; double **delta; double ***weight; int num1; int *lsize; double beta; double alpha; double ***prevDwt; double sigmoid(double in); }; //initializes and allocates memory on heap backPro::backPro(int n1, int *sz, double b, double a):beta(b),alpha(a) { num1 = n1; lsize = new int[num1]; for(int i = 0; i < num1; i ++) lsize[i] = sz[i]; // allocate memmory for output of each neuron out = new double*[num1]; for(int i = 0; i < num1; i ++) out[i] = new double[lsize[i]]; // allocate memory for delta delta = new double*[num1]; for(int i = 1; i < num1; i ++) delta[i] = new double[lsize[i]]; // allocate memory for weights weight = new double**[num1]; for(int i = 1; i < num1; i ++) weight[i] = new double*[lsize[i]]; for(int i = 1; i < num1; i ++) for(int j = 0; j < lsize[i]; j ++) weight[i][j] = new double[lsize[i-1]+1]; //allocate memory for previous weight prevDwt = new double**[num1]; for(int i = 1; i < num1; i ++) prevDwt[i] = new double*[lsize[i]]; for(int i = 1; i < num1; i ++) for(int j = 0; j < lsize[i]; j ++) prevDwt[i][j] = new double[lsize[i-1]+1]; // seed and assign random weights srand((unsigned)(time(NULL))); for(int i = 1; i < num1; i ++) for(int j = 0; j < lsize[i]; j ++) for(int k = 0; k < lsize[i-1]+1; k ++) weight[i][j][k] = (double)(rand())/(RAND_MAX/2)-1; //32767 // initialize previous weights to 0 for first iteration for(int i = 1; i < num1; i ++) for(int j = 0; j < lsize[i]; j ++) for(int k = 0; k < lsize[i-1]+1; k ++) prevDwt[i][j][k] = (double)0.0; } backPro::~backPro() { // free out for(int i = 0; i < num1; i ++) delete[] out[i]; delete[] out; // free delta for(int i = 1; i < num1; i ++) delete[] delta[i]; delete[] delta; //free weight for(int i = 1; i < num1; i ++) for(int j = 0; j < lsize[i]; j ++) delete[] weight[i][j]; for(int i = 1; i < num1; i ++) delete[] weight[i]; delete[] weight; // free prevDwt for(int i = 1; i < num1; i ++) for(int j = 0; j < lsize[i]; j ++) delete[] prevDwt[i][j]; for(int i = 1; i < num1; i ++) delete[] prevDwt[i]; delete[] prevDwt; // free layer info delete[] lsize; } double backPro::sigmoid(double in) { return (double)(1/(1+exp(-in))); } // mean square error double backPro::mse(double *tgt) const { double mse = 0; for(int i = 0; i < lsize[num1-1]; i ++) mse += (tgt[i]-out[num1-1][i]) * (tgt[i]-out[num1-1][i]); return mse/2; } // return i'th output of the net double backPro::output(int i) const { return out[num1-1][i]; } // feed forward one set of input void backPro::ffwd(double *in) { double sum; // assign content to input layer for(int i = 0; i < lsize[0]; i ++) out[0][i] = in[i]; //output_from_neuron(i,j) J'th neuron in I'th layer // assign output(activation) value to each neuron using sigmoid fune for(int i = 1; i < num1; i ++)// for each layer { for(int j = 0; j < lsize[i]; j ++)// for each neuron in current layer { sum = 0.0; for(int k = 0; k < lsize[i-1]; k ++)// for input from each neuron in preceeding sum += out[i-1][k] * weight[i][j][k];// apply weight to inputs and add to sum sum += weight[i][j][lsize[i-1]]; // apply bias out[i][j] = sigmoid(sum); // apply sigmoid function } } } // bp errors from output layer uptill the first hidden layer void backPro::bpgt(double *in, double *tgt) { double sum; // update output values for each neuron ffwd(in); // find delta for output layer for(int i = 0; i < lsize[num1-1]; i ++) delta[num1-1][i] = out[num1-1][i]*(1-out[num1-1][i])*(tgt[i]-out[num1-1][i]); // find delta for hidden layers for(int i = num1-2; i > 0; i --) { for(int j = 0; j < lsize[i]; j ++) { sum = 0.0; for(int k = 0; k < lsize[i+1]; k ++) sum += delta[i+1][k]*weight[i+1][k][j]; delta[i][j] = out[i][j]*(1-out[i][j])*sum; } } // apply momentum(does nothing if alpha = 0) for(int i = 1; i < num1; i ++) { for(int j = 0; j < lsize[i]; j ++) { for(int k = 0; k < lsize[i-1]; k ++) weight[i][j][k] += alpha*prevDwt[i][j][k]; weight[i][j][lsize[i-1]] += alpha*prevDwt[i][j][lsize[i-1]]; } } // adjust weights using steepest descent for(int i = 1; i < num1; i ++) { for(int j = 0; j < lsize[i]; j ++) { for(int k = 0; k < lsize[i-1]; k ++) { prevDwt[i][j][k] = beta*delta[i][j]*out[i-1][k]; weight[i][j][k] += prevDwt[i][j][k]; } prevDwt[i][j][lsize[i-1]] = beta*delta[i][j]; weight[i][j][lsize[i-1]] += prevDwt[i][j][lsize[i-1]]; } } } #endif
#include <iostream> #include"BackPropagation.h" using namespace std; /* run this program using the console pauser or add your own getch, system("pause") or input loop */ int main(int argc, char** argv) { // prepare XOR traing data double data[][3] = { 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0}; // prepare test data double testData[][2] = { 0, 0, 0, 1, 1, 0, 1, 1}; int numLayers = 3, lsz[3] = {2, 2, 1}; double beta = 0.3, alpha = 0.1, Thresh = 0.00001; long num_iter = 2000000; backPro *bp = new backPro(numLayers, lsz, beta, alpha); cout<<endl<<"Now training the network..."<<endl; long i; for(i = 0; i < num_iter; i ++) { bp->bpgt(data[i%4], &data[i%4][2]); if(bp->mse(&data[i%4][2]) < Thresh) { cout<<endl<<"Network Trained. Threshold value achieved in "<<i<<"iterations"<<endl; cout<<"MSE: "<<bp->mse(&data[i%4][2])<<endl<<endl; break; } if(i % (num_iter/10) == 0) cout<<endl<<"MSE: "<<bp->mse(&data[i%4][2])<<"...Training..."<<endl; } if(i == num_iter) cout<<endl<<i<<" iterations completed..."<<"MSE: "<<bp->mse(&data[(i-1)%4][2])<<endl; cout<<"Now using the trained network to make predctions on test data..."<<endl<<endl; for(i = 0; i < 4; i ++) { bp->ffwd(testData[i]); cout<<testData[i][0]<<" "<<testData[i][1]<<" "<<bp->output(0)<<endl; } return 0; }