反向传播算法(Backpropagation)是一种适合于多层神经元网络的学习算法,通常用于训练大规模的深度学习网络。反向传播算法主要基于梯度下降法,其过程由前向传播、反向传播、权重更新这三步构成。
下面将结合代码,详细阐述反向传播算法在MLP中的应用过程。
该步骤代码与前面代码一致,不再重复给出。
该步骤代码与前面代码一致,不再重复给出。
首先我们需要确定网络的层数与节点数,以本文为例,MLP的层数为两层,隐藏层(第一层)节点数为8,输出层(第二层)节点数为3:
#define node1 8 //第一层节点数
#define node2 3 //第二层节点数
之后定义sigmoid函数及其导数:
//激活函数
double sigmoid(double x)
{
return 1.0 / (1.0 + exp(-x));
}
//激活函数的导数,y为激活函数值
double dsigmoid(double y)
{
return y * (1.0 - y);
}
之后,根据层数和节点数初始化权重矩阵:
double W_1[node1][col]; //第一层权重
double W_2[node2][node1+1]; //第二层权重
// 初始化权重
for(j=0;j
将权重打印出来如下:
W_1[0][0] = 0.100000 W_1[0][1] = 0.100000 W_1[0][2] = 0.100000 W_1[0][3] = 0.100000 W_1[0][4] = 0.100000 W_1[0][5] = 0.100000 W_1[0][6] = 0.100000 W_1[0][7] = 0.100000
W_1[1][0] = 0.100000 W_1[1][1] = 0.100000 W_1[1][2] = 0.100000 W_1[1][3] = 0.100000 W_1[1][4] = 0.100000 W_1[1][5] = 0.100000 W_1[1][6] = 0.100000 W_1[1][7] = 0.100000
W_1[2][0] = 0.100000 W_1[2][1] = 0.100000 W_1[2][2] = 0.100000 W_1[2][3] = 0.100000 W_1[2][4] = 0.100000 W_1[2][5] = 0.100000 W_1[2][6] = 0.100000 W_1[2][7] = 0.100000
W_1[3][0] = 0.100000 W_1[3][1] = 0.100000 W_1[3][2] = 0.100000 W_1[3][3] = 0.100000 W_1[3][4] = 0.100000 W_1[3][5] = 0.100000 W_1[3][6] = 0.100000 W_1[3][7] = 0.100000
W_1[4][0] = 0.100000 W_1[4][1] = 0.100000 W_1[4][2] = 0.100000 W_1[4][3] = 0.100000 W_1[4][4] = 0.100000 W_1[4][5] = 0.100000 W_1[4][6] = 0.100000 W_1[4][7] = 0.100000
W_1[5][0] = 0.100000 W_1[5][1] = 0.100000 W_1[5][2] = 0.100000 W_1[5][3] = 0.100000 W_1[5][4] = 0.100000 W_1[5][5] = 0.100000 W_1[5][6] = 0.100000 W_1[5][7] = 0.100000
W_1[6][0] = 0.100000 W_1[6][1] = 0.100000 W_1[6][2] = 0.100000 W_1[6][3] = 0.100000 W_1[6][4] = 0.100000 W_1[6][5] = 0.100000 W_1[6][6] = 0.100000 W_1[6][7] = 0.100000
W_1[7][0] = 0.100000 W_1[7][1] = 0.100000 W_1[7][2] = 0.100000 W_1[7][3] = 0.100000 W_1[7][4] = 0.100000 W_1[7][5] = 0.100000 W_1[7][6] = 0.100000 W_1[7][7] = 0.100000
W_2[0][0] = 0.100000 W_2[0][1] = 0.100000 W_2[0][2] = 0.100000 W_2[0][3] = 0.100000 W_2[0][4] = 0.100000 W_2[0][5] = 0.100000 W_2[0][6] = 0.100000 W_2[0][7] = 0.100000 W_2[0][8] = 0.100000
W_2[1][0] = 0.100000 W_2[1][1] = 0.100000 W_2[1][2] = 0.100000 W_2[1][3] = 0.100000 W_2[1][4] = 0.100000 W_2[1][5] = 0.100000 W_2[1][6] = 0.100000 W_2[1][7] = 0.100000 W_2[1][8] = 0.100000
W_2[2][0] = 0.100000 W_2[2][1] = 0.100000 W_2[2][2] = 0.100000 W_2[2][3] = 0.100000 W_2[2][4] = 0.100000 W_2[2][5] = 0.100000 W_2[2][6] = 0.100000 W_2[2][7] = 0.100000 W_2[2][8] = 0.100000
权重初始化完成后,就可以开始进行训练。首先第一步是前向传播,设
代码片段如下:
double layer1_out[node1]; //第一层节点输出值
double layer2_out[node2]; //第二层节点输出值
double train[7] = {
15.26,14.84,0.871,5.763,3.312,2.221,5.22};
for(j=0;j
经计算后,我们可以得到如下结果:
layer1[0] = 0.992222
layer1[1] = 0.992222
layer1[2] = 0.992222
layer1[3] = 0.992222
layer1[4] = 0.992222
layer1[5] = 0.992222
layer1[6] = 0.992222
layer1[7] = 0.992222
layer2[0] = 0.709669
layer2[1] = 0.709669
layer2[2] = 0.709669
前向传播完成后,就可以计算预测值和真实值的误差,然后进行反向传播,为之后的权值更新做准备。设
(i本文选取的例子为多分类问题,需要把真实值(1,2,3)转换成one-hot形式([1,0,0],[0,1,0],[0,0,1])。定义转换函数如下:
#define class_num 3 //种类数量
double *transfer_to_one_hot(int y){
double *one_hot = (double *)malloc(class_num*sizeof(double));
int i;
for(i=0;i
于是利用转换函数把原数据转换为one-hot形式,再计算误差进行反向传播,代码片段如下:
// 误差反向传播
int y = 1;
double *target = transfer_to_one_hot(y);
double layer2_delta[node2];
double layer1_delta[node1];
for(j=0;j
经过计算后,我们可以得到各个层的delta值,结果如下:
layer2_delta[0] = 0.059819
layer2_delta[1] = -0.146219
layer2_delta[2] = -0.146219
layer1_delta[0] = -0.000180
layer1_delta[1] = -0.000180
layer1_delta[2] = -0.000180
layer1_delta[3] = -0.000180
layer1_delta[4] = -0.000180
layer1_delta[5] = -0.000180
layer1_delta[6] = -0.000180
layer1_delta[7] = -0.000180
在得到各个神经元的差值(
写成代码片段如下:
// 更新权重
double l_rate = 0.01;
for(j=0;j
把更新后的权重结果打印如下:
W_1[0][0] = 0.099973,W_1[0][1] = 0.099973,W_1[0][2] = 0.099998,W_1[0][3] = 0.099990,W_1[0][4] = 0.099994,W_1[0][5] = 0.099996,W_1[0][6] = 0.099991,W_1[0][8] = 0.099998
W_1[1][0] = 0.099971,W_1[1][1] = 0.099973,W_1[1][2] = 0.099998,W_1[1][3] = 0.099990,W_1[1][4] = 0.099994,W_1[1][5] = 0.099996,W_1[1][6] = 0.099991,W_1[1][8] = 0.099998
W_1[2][0] = 0.099971,W_1[2][1] = 0.099973,W_1[2][2] = 0.099998,W_1[2][3] = 0.099990,W_1[2][4] = 0.099994,W_1[2][5] = 0.099996,W_1[2][6] = 0.099991,W_1[2][8] = 0.099998
W_1[3][0] = 0.099971,W_1[3][1] = 0.099973,W_1[3][2] = 0.099998,W_1[3][3] = 0.099990,W_1[3][4] = 0.099994,W_1[3][5] = 0.099996,W_1[3][6] = 0.099991,W_1[3][8] = 0.099998
W_1[4][0] = 0.099971,W_1[4][1] = 0.099973,W_1[4][2] = 0.099998,W_1[4][3] = 0.099990,W_1[4][4] = 0.099994,W_1[4][5] = 0.099996,W_1[4][6] = 0.099991,W_1[4][8] = 0.099998
W_1[5][0] = 0.099971,W_1[5][1] = 0.099973,W_1[5][2] = 0.099998,W_1[5][3] = 0.099990,W_1[5][4] = 0.099994,W_1[5][5] = 0.099996,W_1[5][6] = 0.099991,W_1[5][8] = 0.099998
W_1[6][0] = 0.099971,W_1[6][1] = 0.099973,W_1[6][2] = 0.099998,W_1[6][3] = 0.099990,W_1[6][4] = 0.099994,W_1[6][5] = 0.099996,W_1[6][6] = 0.099991,W_1[6][8] = 0.099998
W_1[7][0] = 0.099971,W_1[7][1] = 0.099973,W_1[7][2] = 0.099998,W_1[7][3] = 0.099990,W_1[7][4] = 0.099994,W_1[7][5] = 0.099996,W_1[7][6] = 0.099991,W_1[7][8] = -0.000002
W_2[0][0] = 0.100594,W_2[0][1] = 0.100594,W_2[0][2] = 0.100594,W_2[0][3] = 0.100594,W_2[0][4] = 0.100594,W_2[0][5] = 0.100594,W_2[0][6] = 0.100594,W_2[0][7] = 0.100594,W_2[0][8] = 0.100060,W_2[0][8] = 0.100658
W_2[1][0] = 0.098549,W_2[1][1] = 0.098549,W_2[1][2] = 0.098549,W_2[1][3] = 0.098549,W_2[1][4] = 0.098549,W_2[1][5] = 0.098549,W_2[1][6] = 0.098549,W_2[1][7] = 0.098549,W_2[1][8] = 0.099853,W_2[1][8] = 0.098391
W_2[2][0] = 0.098549,W_2[2][1] = 0.098549,W_2[2][2] = 0.098549,W_2[2][3] = 0.098549,W_2[2][4] = 0.098549,W_2[2][5] = 0.098549,W_2[2][6] = 0.098549,W_2[2][7] = 0.098549,W_2[2][8] = 0.099853,W_2[2][8] = 0.098391
训练完成后,就可以利用训练好的权重矩阵进行预测。其过程和前向传播大致相同。代码如下:
// 预测
double *predictions = (double *)malloc(test_size*sizeof(double));
for(i=0;i0){
if(out2[j]>max){
predictions[i] = j+1;
max = out2[j];
}
}else{
predictions[i] = 1;
max = out2[j];
}
}
}
该步骤代码与前面代码一致,不再重复给出。
#include
#include
extern double ***cross_validation_split(double **dataset, int row, int n_folds, int fold_size,int col);
extern double* get_test_prediction(double **train, double **test, double l_rate, int n_epoch, int train_size,int test_size,int col);
extern double accuracy_metric(double *actual, double *predicted, int fold_size);
double* evaluate_algorithm(double **dataset, int n_folds, int fold_size, double l_rate, int n_epoch,int col,int row)
{
double*** split = cross_validation_split(dataset, row, n_folds, fold_size,col);
int i, j, k, l;
int test_size = fold_size;
int train_size = fold_size * (n_folds - 1);//train_size个一维数组
double* score = (double*)malloc(n_folds * sizeof(double));
for (i = 0; i < n_folds; i++)
{
//因为要遍历删除,所以拷贝一份split
double*** split_copy = (double***)malloc(n_folds * sizeof(double**));
for (j = 0; j < n_folds; j++) {
split_copy[j] = (double**)malloc(fold_size * sizeof(double*));
for (k = 0; k < fold_size; k++) {
split_copy[j][k] = (double*)malloc(col * sizeof(double));
}
}
for (j = 0; j < n_folds; j++)
{
for (k = 0; k < fold_size; k++)
{
for (l = 0; l < col; l++)
{
split_copy[j][k][l] = split[j][k][l];
}
}
}
double** test_set = (double**)malloc(test_size * sizeof(double*));
for (j = 0; j < test_size; j++) {
//对test_size中的每一行
test_set[j] = (double*)malloc(col * sizeof(double));
for (k = 0; k < col; k++) {
test_set[j][k] = split_copy[i][j][k];
}
}
for (j = i; j < n_folds - 1; j++) {
split_copy[j] = split_copy[j + 1];
}
double** train_set = (double**)malloc(train_size * sizeof(double*));
for (k = 0; k < n_folds - 1; k++) {
for (l = 0; l < fold_size; l++) {
train_set[k*fold_size + l] = (double*)malloc(col * sizeof(double));
train_set[k*fold_size + l] = split_copy[k][l];
}
}
double *predicted_2;
predicted_2 = get_test_prediction(train_set, test_set, l_rate, n_epoch, train_size,test_size,col);
double predicted[test_size];
double* actual = (double*)malloc(test_size * sizeof(double));
for(l=0;l
本节以小麦种子数据集为例,使用反向传播算法,预测小麦种子类别。下面给出主函数以及训练部分的代码:
main.c:
#include
#include
#include
#include
extern int get_row(char *filename);
extern int get_col(char *filename);
extern void get_two_dimension(char *line, double **dataset, char *filename);
void main(){
char filename[] = "seeds_data.csv";
char line[1024];
int row = get_row(filename);
int col = get_col(filename);
printf("row = %dn",row);
printf("col = %dn",col);
double **dataset = (double **)malloc(row*sizeof(int *));
int i;
for (i = 0; i < row; ++i){
dataset[i] = (double *)malloc(col*sizeof(double));
}//动态申请二维数组
get_two_dimension(line, dataset, filename);
double l_rate = 0.1;
int n_epoch = 100;
int n_folds = 4;
int fold_size;
fold_size=(int)(row/n_folds);
evaluate_algorithm(dataset, n_folds, fold_size, l_rate, n_epoch,col,row);
}
test_prediction.c:
#define randval(high) ( (double)rand() / RAND_MAX * high )
#define uniform_plus_minus_one ( (double)( 2.0 * rand() ) / ((double)RAND_MAX + 1.0) - 1.0 ) //均匀随机分布
#define node1 12 //第一层节点数
#define node2 3 //第二层节点数
#define class_num 3 //种类数量
#include "math.h"
#include "stdlib.h"
#include "time.h"
#include "assert.h"
#include "string.h"
#include "stdio.h"
//激活函数
double sigmoid(double x)
{
return 1.0 / (1.0 + exp(-x));
}
//激活函数的导数,y为激活函数值
double dsigmoid(double y)
{
return y * (1.0 - y);
}
double *transfer_to_one_hot(int y){
double *one_hot = (double *)malloc(class_num*sizeof(double));
int i;
for(i=0;i0){
if(out2[j]>max){
predictions[i] = j+1;
max = out2[j];
}
}else{
predictions[i] = 1;
max = out2[j];
}
}
}
return predictions;
}
运算后得到的结果如下:
row = 199
col = 8
score[0]=91.836735
score[1]=87.755102
score[2]=93.877551
score[3]=89.795918
mean_accuracy=90.816327
B站视频链接:
C语言实现机器学习系列教程_哔哩哔哩 (゜-゜)つロ 干杯~-bilibiliwww.bilibili.comgithub链接:
Gao-Jianxiong-SDUWH/C-machine-learninggithub.com