多层神经网络的c++实现

写在前面:

    上一个学期一直都在看deep learning的一些理论知识,着重看了一些关于卷积神经网络(convolutional neural network)。趁现在暑假,时间比较充裕写点程序完善一下之前看的知识。

    先给大家上点开胃菜,这两天写了个可扩展的多层神经网络,里面没什么新东西,没rbm也没卷积,就是一个普普通通的神经网络,写完这个再打算写卷积神经网络。

     这个程序是基于反向传播算法写的,不熟悉的同学可以看下UFLDL 里面关于bp的那一部分,我是基于那一部分讲解写得程序 ,当然这个程序写得还是比较简陋的,只把bp的大概框架搭了起来,应该比较容易各位初学者的理解。

 

    程序一共可以分为三个部分:NeuralNetwork主要用来确定整个网络的结构,以及训练的测试;HiddenLayer部分顾名思义就是中间层,每个对象是一层,所以可以堆叠起来构建一个深层的网络;最后一个部分是LogisticRegressionLayer,是整个网络的最后一层,我采用了比较常用的softmax来进行误差计算。

NeuralNetwork.h

#include "HiddenLayer.h"
#include "LogisticRegressionLayer.h"



class NeuralNetwork
{
public:
	NeuralNetwork(int n, int n_i, int n_o, int nhl, int*hls);
	~NeuralNetwork();

	void train(double** in_data, double** in_label, double lr, int epochs);
	void predict(double** in_data, int n);

private:
	int N; //样本数量
	int n_in; //输入维数
	int n_out; //输出维数
	int n_hidden_layer; //隐层数目
	int* hidden_layer_size; //中间隐层的大小 e.g. {3,4}表示有两个隐层,第一个有三个节点,第二个有4个节点

	HiddenLayer **sigmoid_layers;
	LogisticRegressionLayer *log_layer;
};

NeuralNetwork.cpp

#include 
#include "NeuralNetwork.h"

using namespace std;

NeuralNetwork::NeuralNetwork(int n, int n_i, int n_o, int nhl, int *hls)
{
	N = n;
	n_in = n_i;
	n_out = n_o;

	n_hidden_layer = nhl;
	hidden_layer_size = new int [n_hidden_layer];
	hidden_layer_size = hls;

	//构造网络结构
	sigmoid_layers = new HiddenLayer* [n_hidden_layer];
	for(int i = 0; i < n_hidden_layer; ++i)
	{
		if(i == 0)
		{
			sigmoid_layers[i] = new HiddenLayer(n_in, hidden_layer_size[i]);//第一个隐层
		}
		else
		{
			sigmoid_layers[i] = new HiddenLayer(hidden_layer_size[i-1], hidden_layer_size[i]);//其他隐层
		}
	}

	log_layer = new LogisticRegressionLayer(hidden_layer_size[n_hidden_layer-1], n_out);//最后的softmax层
}

NeuralNetwork::~NeuralNetwork()
{
	for(int i = 0; i < n_hidden_layer; ++i)delete sigmoid_layers[i];
	delete[] sigmoid_layers;
	delete[] log_layer;
	delete[] hidden_layer_size;
}

void NeuralNetwork::train(double** in_data, double** in_label, double lr, int epochs)
{
	//二维数组动态初始化和赋值操作
	double** data = new double* [N];
	for(int i = 0; i < N; ++i)
		data[i] = new double [n_in];

	for(int i = 0; i < N; ++i)
		for(int j = 0; j < n_in; ++j)
			data[i][j] = *((double*)in_data+i*n_in+j);

	//二维数组动态初始化和赋值操作
	double** label = new double* [N];
	for(int i = 0; i < N; ++i)
		label[i] = new double [n_out];

	for(int i = 0; i < N; ++i)
		for(int j = 0; j < n_out; ++j)
			label[i][j] = *((double*)in_label+i*n_out+j);

	//反复迭代样本epochs次训练
	for(int epoch = 0; epoch < epochs; ++epoch)
	{
		double e = 0.0;
		for(int i = 0; i < N; ++i)
		{
			//前向传播阶段 
			for(int n = 0; n < n_hidden_layer; ++ n)
			{
				if(n == 0) //第一个隐层直接输入数据
				{
					sigmoid_layers[n]->forward_propagation(data[i]);
				}
				else //其他隐层用前一层的输出作为输入数据
				{
					sigmoid_layers[n]->forward_propagation(sigmoid_layers[n-1]->output_data);
				}
			}
			//softmax层使用最后一个隐层的输出作为输入数据
			log_layer->forward_propagation(sigmoid_layers[n_hidden_layer-1]->output_data);

			e += log_layer->cal_error(label[i]);

			//反向传播阶段
			log_layer->back_propagation(sigmoid_layers[n_hidden_layer-1]->output_data, label[i], lr, N);
			for(int n = n_hidden_layer-1; n >= 1; --n)
			{
				if(n == n_hidden_layer-1)
				{
					sigmoid_layers[n]->back_propagation(sigmoid_layers[n-1]->output_data, 
						log_layer->delta, (double**)log_layer->w, log_layer->n_out, lr, N);
				}
				else
				{
					sigmoid_layers[n]->back_propagation(sigmoid_layers[n-1]->output_data, 
						sigmoid_layers[n+1]->delta, (double**)sigmoid_layers[n+1]->w, sigmoid_layers[n+1]->n_out, lr, N);
				}
			}
		}
		cout << "epochs number is " << epoch << "	cost function is " << e / (double)N << endl;
	}


	for(int i = 0; i < N; ++i)delete data[i];
	delete[] data;
	for(int i = 0; i < N; ++i)delete label[i];
	delete[] label;
}

void NeuralNetwork::predict(double** data, int n)
{
	double** d = new double* [n];
	for(int i = 0; i < n; ++i)
		d[i] = new double [n_in];

	for(int i = 0; i < n; ++i)
		for(int j = 0; j < n_in; ++j)
			d[i][j] = *((double*)data+i*n_in+j);

	for(int i = 0; i < n; ++i)
	{
		for(int n = 0; n < n_hidden_layer; ++ n)
		{
			if(n == 0) //第一个隐层直接输入数据
			{
				sigmoid_layers[n]->forward_propagation(d[i]);
			}
			else //其他隐层用前一层的输出作为输入数据
			{
				sigmoid_layers[n]->forward_propagation(sigmoid_layers[n-1]->output_data);
			}
		}
		//softmax层使用最后一个隐层的输出作为输入数据
		log_layer->forward_propagation(sigmoid_layers[n_hidden_layer-1]->output_data);


		for(int j = 0; j < n_out; ++j)
		{
			cout << log_layer->output_data[j] << " ";
		}
		cout << endl;
	}

	for(int i = 0; i < n; ++i)delete d[i];
	delete[] d;
	
}

HiddenLayer.h

#ifndef HIDDENLAYER
#define HIDDENLAYER

class HiddenLayer{
public:
	HiddenLayer(int n_i, int n_o);
	~HiddenLayer();

	void forward_propagation(double* input_data);
	void back_propagation(double *input_data, double *next_layer_delta, double** next_layer_w, int next_layer_n_out, double lr, int N);

	//本层前向传播的输出值,作为下一层的输入值
	double* output_data;
	//反向传播时所需值
	double* delta;

public:
	int n_in;
	int n_out;
	double** w;
	double*b;
};

#endif

HiddenLayer.cpp

#include 
#include 
#include 
#include "HiddenLayer.h"

using namespace std;

double sigmoid(double x)
{
	return 1.0/(1.0+exp(-x));
}

double uniform(double _min, double _max)
{
	return rand()/(RAND_MAX + 1.0) * (_max - _min) + _min;
}

HiddenLayer::HiddenLayer(int n_i, int n_o)
{
	n_in  = n_i;
	n_out = n_o;

	w = new double* [n_out];
	for(int i = 0; i < n_out; ++i)
	{
		w[i] = new double [n_in];
	}
	b = new double [n_out];

	double a = 1.0 / n_in;

	srand((unsigned)time(NULL));
	for(int i = 0; i < n_out; ++i)
	{
		for(int j = 0; j < n_in; ++j)
			w[i][j] = uniform(-a, a);
		b[i] = uniform(-a, a);
	}

	output_data = new double [n_out];
}

HiddenLayer::~HiddenLayer()
{
	for(int i=0; i

LogisticRegressionLayer.h

#ifndef LOGISTICREGRESSIONLAYER
#define LOGISTICREGRESSIONLAYER

class LogisticRegressionLayer{
public:
 LogisticRegressionLayer(int n_i, int i_o);
 ~LogisticRegressionLayer();

 void forward_propagation(double* input_data);
 void back_propagation(double* input_data, double* label, double lr, int N);
 void softmax(double* x);

 double cal_error(double* label);

 //本层前向传播的输出值,也是最终的预测值
 double* output_data;
 //反向传播时所需值
 double* delta;

public:
 int n_in;
 int n_out;
 double** w;
 double* b;

};

#endif

LogisticRegressionLayer.cpp

#include 
#include 
#include 
#include "LogisticRegressionLayer.h"

using namespace std;

double uniform_2(double _min, double _max)
{
 return rand()/(RAND_MAX + 1.0) * (_max - _min) + _min;
}

LogisticRegressionLayer::LogisticRegressionLayer(int n_i, int n_o)
{
 n_in = n_i;
 n_out = n_o;

 w = new double* [n_out];
 for(int i = 0; i < n_out; ++i)
 {
  w[i] = new double [n_in];
 }
 b = new double [n_out];

 double a = 1.0 / n_in;

 srand((unsigned)time(NULL));
 for(int i = 0; i < n_out; ++i)
 {
  for(int j = 0; j < n_in; ++j)
   w[i][j] = uniform_2(-a, a);
  b[i] = uniform_2(-a, a);
 }

 output_data = new double [n_out];
}

LogisticRegressionLayer::~LogisticRegressionLayer()
{
 for(int i=0; i < n_out; i++) delete w[i]; 
 delete[] w; 
 delete[] b;
 delete[] output_data;
 delete[] delta;
}

void LogisticRegressionLayer::softmax(double* x)
{
 double _max = 0.0;
 double _sum = 0.0;

 for(int i = 0; i < n_out; ++i)
 {
  if(_max < x[i])
   _max = x[i];
 }
 for(int i = 0; i < n_out; ++i)
 {
  x[i] = exp(x[i]-_max);
  _sum += x[i];
 }

 for(int i = 0; i < n_out; ++i)
 {
  x[i] /= _sum;
 }
}

void LogisticRegressionLayer::forward_propagation(double* input_data)
{
 for(int i = 0; i < n_out; ++i)
 {
  output_data[i] = 0.0;
  for(int j = 0; j < n_in; ++j)
  {
   output_data[i] = w[i][j]*input_data[j];
  }
  output_data[i] += b[i];
 }
 softmax(output_data);
}

void LogisticRegressionLayer::back_propagation(double* input_data, double* label, double lr, int N)
{
 delta = new double [n_out];
 for(int i = 0; i < n_out; ++i)
 {
  delta[i] = (label[i] - output_data[i]) * 1.0;
  for(int j = 0; j < n_in; ++j)
  {
   w[i][j] += lr * delta[i] * input_data[j] / N;//这里有点疑问,delta照理应该是(y-o)*f'(x),但是这里无f'(x)
  }
  b[i] += lr * delta[i] / N;
 }
}

double LogisticRegressionLayer::cal_error(double* label)
{
 double error = 0.0;
 double* l = new double [n_out];

 for(int i = 0; i < n_out; ++i)
 {
  error += fabs(label[i] - output_data[i])*fabs(label[i] - output_data[i]) / 2.0;
 }

 delete[] l;

 return error;
}



 

你可能感兴趣的:(神经网络,C++,卷积神经网络,CNN,deep,learning,机器学习)