上一个学期一直都在看deep learning的一些理论知识,着重看了一些关于卷积神经网络(convolutional neural network)。趁现在暑假,时间比较充裕写点程序完善一下之前看的知识。
先给大家上点开胃菜,这两天写了个可扩展的多层神经网络,里面没什么新东西,没rbm也没卷积,就是一个普普通通的神经网络,写完这个再打算写卷积神经网络。
这个程序是基于反向传播算法写的,不熟悉的同学可以看下UFLDL 里面关于bp的那一部分,我是基于那一部分讲解写得程序 ,当然这个程序写得还是比较简陋的,只把bp的大概框架搭了起来,应该比较容易各位初学者的理解。
程序一共可以分为三个部分:NeuralNetwork主要用来确定整个网络的结构,以及训练的测试;HiddenLayer部分顾名思义就是中间层,每个对象是一层,所以可以堆叠起来构建一个深层的网络;最后一个部分是LogisticRegressionLayer,是整个网络的最后一层,我采用了比较常用的softmax来进行误差计算。
NeuralNetwork.h
#include "HiddenLayer.h"
#include "LogisticRegressionLayer.h"
class NeuralNetwork
{
public:
NeuralNetwork(int n, int n_i, int n_o, int nhl, int*hls);
~NeuralNetwork();
void train(double** in_data, double** in_label, double lr, int epochs);
void predict(double** in_data, int n);
private:
int N; //样本数量
int n_in; //输入维数
int n_out; //输出维数
int n_hidden_layer; //隐层数目
int* hidden_layer_size; //中间隐层的大小 e.g. {3,4}表示有两个隐层,第一个有三个节点,第二个有4个节点
HiddenLayer **sigmoid_layers;
LogisticRegressionLayer *log_layer;
};
NeuralNetwork.cpp
#include
#include "NeuralNetwork.h"
using namespace std;
NeuralNetwork::NeuralNetwork(int n, int n_i, int n_o, int nhl, int *hls)
{
N = n;
n_in = n_i;
n_out = n_o;
n_hidden_layer = nhl;
hidden_layer_size = new int [n_hidden_layer];
hidden_layer_size = hls;
//构造网络结构
sigmoid_layers = new HiddenLayer* [n_hidden_layer];
for(int i = 0; i < n_hidden_layer; ++i)
{
if(i == 0)
{
sigmoid_layers[i] = new HiddenLayer(n_in, hidden_layer_size[i]);//第一个隐层
}
else
{
sigmoid_layers[i] = new HiddenLayer(hidden_layer_size[i-1], hidden_layer_size[i]);//其他隐层
}
}
log_layer = new LogisticRegressionLayer(hidden_layer_size[n_hidden_layer-1], n_out);//最后的softmax层
}
NeuralNetwork::~NeuralNetwork()
{
for(int i = 0; i < n_hidden_layer; ++i)delete sigmoid_layers[i];
delete[] sigmoid_layers;
delete[] log_layer;
delete[] hidden_layer_size;
}
void NeuralNetwork::train(double** in_data, double** in_label, double lr, int epochs)
{
//二维数组动态初始化和赋值操作
double** data = new double* [N];
for(int i = 0; i < N; ++i)
data[i] = new double [n_in];
for(int i = 0; i < N; ++i)
for(int j = 0; j < n_in; ++j)
data[i][j] = *((double*)in_data+i*n_in+j);
//二维数组动态初始化和赋值操作
double** label = new double* [N];
for(int i = 0; i < N; ++i)
label[i] = new double [n_out];
for(int i = 0; i < N; ++i)
for(int j = 0; j < n_out; ++j)
label[i][j] = *((double*)in_label+i*n_out+j);
//反复迭代样本epochs次训练
for(int epoch = 0; epoch < epochs; ++epoch)
{
double e = 0.0;
for(int i = 0; i < N; ++i)
{
//前向传播阶段
for(int n = 0; n < n_hidden_layer; ++ n)
{
if(n == 0) //第一个隐层直接输入数据
{
sigmoid_layers[n]->forward_propagation(data[i]);
}
else //其他隐层用前一层的输出作为输入数据
{
sigmoid_layers[n]->forward_propagation(sigmoid_layers[n-1]->output_data);
}
}
//softmax层使用最后一个隐层的输出作为输入数据
log_layer->forward_propagation(sigmoid_layers[n_hidden_layer-1]->output_data);
e += log_layer->cal_error(label[i]);
//反向传播阶段
log_layer->back_propagation(sigmoid_layers[n_hidden_layer-1]->output_data, label[i], lr, N);
for(int n = n_hidden_layer-1; n >= 1; --n)
{
if(n == n_hidden_layer-1)
{
sigmoid_layers[n]->back_propagation(sigmoid_layers[n-1]->output_data,
log_layer->delta, (double**)log_layer->w, log_layer->n_out, lr, N);
}
else
{
sigmoid_layers[n]->back_propagation(sigmoid_layers[n-1]->output_data,
sigmoid_layers[n+1]->delta, (double**)sigmoid_layers[n+1]->w, sigmoid_layers[n+1]->n_out, lr, N);
}
}
}
cout << "epochs number is " << epoch << " cost function is " << e / (double)N << endl;
}
for(int i = 0; i < N; ++i)delete data[i];
delete[] data;
for(int i = 0; i < N; ++i)delete label[i];
delete[] label;
}
void NeuralNetwork::predict(double** data, int n)
{
double** d = new double* [n];
for(int i = 0; i < n; ++i)
d[i] = new double [n_in];
for(int i = 0; i < n; ++i)
for(int j = 0; j < n_in; ++j)
d[i][j] = *((double*)data+i*n_in+j);
for(int i = 0; i < n; ++i)
{
for(int n = 0; n < n_hidden_layer; ++ n)
{
if(n == 0) //第一个隐层直接输入数据
{
sigmoid_layers[n]->forward_propagation(d[i]);
}
else //其他隐层用前一层的输出作为输入数据
{
sigmoid_layers[n]->forward_propagation(sigmoid_layers[n-1]->output_data);
}
}
//softmax层使用最后一个隐层的输出作为输入数据
log_layer->forward_propagation(sigmoid_layers[n_hidden_layer-1]->output_data);
for(int j = 0; j < n_out; ++j)
{
cout << log_layer->output_data[j] << " ";
}
cout << endl;
}
for(int i = 0; i < n; ++i)delete d[i];
delete[] d;
}
HiddenLayer.h
#ifndef HIDDENLAYER
#define HIDDENLAYER
class HiddenLayer{
public:
HiddenLayer(int n_i, int n_o);
~HiddenLayer();
void forward_propagation(double* input_data);
void back_propagation(double *input_data, double *next_layer_delta, double** next_layer_w, int next_layer_n_out, double lr, int N);
//本层前向传播的输出值,作为下一层的输入值
double* output_data;
//反向传播时所需值
double* delta;
public:
int n_in;
int n_out;
double** w;
double*b;
};
#endif
HiddenLayer.cpp
#include
#include
#include
#include "HiddenLayer.h"
using namespace std;
double sigmoid(double x)
{
return 1.0/(1.0+exp(-x));
}
double uniform(double _min, double _max)
{
return rand()/(RAND_MAX + 1.0) * (_max - _min) + _min;
}
HiddenLayer::HiddenLayer(int n_i, int n_o)
{
n_in = n_i;
n_out = n_o;
w = new double* [n_out];
for(int i = 0; i < n_out; ++i)
{
w[i] = new double [n_in];
}
b = new double [n_out];
double a = 1.0 / n_in;
srand((unsigned)time(NULL));
for(int i = 0; i < n_out; ++i)
{
for(int j = 0; j < n_in; ++j)
w[i][j] = uniform(-a, a);
b[i] = uniform(-a, a);
}
output_data = new double [n_out];
}
HiddenLayer::~HiddenLayer()
{
for(int i=0; i
LogisticRegressionLayer.h
#ifndef LOGISTICREGRESSIONLAYER
#define LOGISTICREGRESSIONLAYER
class LogisticRegressionLayer{
public:
LogisticRegressionLayer(int n_i, int i_o);
~LogisticRegressionLayer();
void forward_propagation(double* input_data);
void back_propagation(double* input_data, double* label, double lr, int N);
void softmax(double* x);
double cal_error(double* label);
//本层前向传播的输出值,也是最终的预测值
double* output_data;
//反向传播时所需值
double* delta;
public:
int n_in;
int n_out;
double** w;
double* b;
};
#endif
LogisticRegressionLayer.cpp
#include
#include
#include
#include "LogisticRegressionLayer.h"
using namespace std;
double uniform_2(double _min, double _max)
{
return rand()/(RAND_MAX + 1.0) * (_max - _min) + _min;
}
LogisticRegressionLayer::LogisticRegressionLayer(int n_i, int n_o)
{
n_in = n_i;
n_out = n_o;
w = new double* [n_out];
for(int i = 0; i < n_out; ++i)
{
w[i] = new double [n_in];
}
b = new double [n_out];
double a = 1.0 / n_in;
srand((unsigned)time(NULL));
for(int i = 0; i < n_out; ++i)
{
for(int j = 0; j < n_in; ++j)
w[i][j] = uniform_2(-a, a);
b[i] = uniform_2(-a, a);
}
output_data = new double [n_out];
}
LogisticRegressionLayer::~LogisticRegressionLayer()
{
for(int i=0; i < n_out; i++) delete w[i];
delete[] w;
delete[] b;
delete[] output_data;
delete[] delta;
}
void LogisticRegressionLayer::softmax(double* x)
{
double _max = 0.0;
double _sum = 0.0;
for(int i = 0; i < n_out; ++i)
{
if(_max < x[i])
_max = x[i];
}
for(int i = 0; i < n_out; ++i)
{
x[i] = exp(x[i]-_max);
_sum += x[i];
}
for(int i = 0; i < n_out; ++i)
{
x[i] /= _sum;
}
}
void LogisticRegressionLayer::forward_propagation(double* input_data)
{
for(int i = 0; i < n_out; ++i)
{
output_data[i] = 0.0;
for(int j = 0; j < n_in; ++j)
{
output_data[i] = w[i][j]*input_data[j];
}
output_data[i] += b[i];
}
softmax(output_data);
}
void LogisticRegressionLayer::back_propagation(double* input_data, double* label, double lr, int N)
{
delta = new double [n_out];
for(int i = 0; i < n_out; ++i)
{
delta[i] = (label[i] - output_data[i]) * 1.0;
for(int j = 0; j < n_in; ++j)
{
w[i][j] += lr * delta[i] * input_data[j] / N;//这里有点疑问,delta照理应该是(y-o)*f'(x),但是这里无f'(x)
}
b[i] += lr * delta[i] / N;
}
}
double LogisticRegressionLayer::cal_error(double* label)
{
double error = 0.0;
double* l = new double [n_out];
for(int i = 0; i < n_out; ++i)
{
error += fabs(label[i] - output_data[i])*fabs(label[i] - output_data[i]) / 2.0;
}
delete[] l;
return error;
}