简单的一元线性回归算法已经在这里“C语言简单的一元线性回归算法”,并且也简单阐述了梯度求解推导过程。
今天我们再呈上多元线性回归算法梯度下降的C语言实现,代码中已经加入了相应的注释。如下:
MultipleLinearRegression.h
#ifndef MULTIPLELINEARREGRESSION_MULTIPLELINEARREGRESSION_H
#define MULTIPLELINEARREGRESSION_MULTIPLELINEARREGRESSION_H
//设置样本数为 10
#define SAMPLE_COUNT 10
//设置参数个数为 6
#define PARAMETER_COUNT 6
void init(double learning_rate, long int X_Len,long int X_arg_count,int channel);
void fit(double train_x[SAMPLE_COUNT][PARAMETER_COUNT],double train_y[PARAMETER_COUNT],double temp[PARAMETER_COUNT],double theta[PARAMETER_COUNT]);
double _f(const double train_x[PARAMETER_COUNT],double theta[PARAMETER_COUNT]);
double* predict(double train_x[SAMPLE_COUNT][PARAMETER_COUNT],double theta[PARAMETER_COUNT]);
double loss(double train_x[SAMPLE_COUNT][PARAMETER_COUNT],double train_y[PARAMETER_COUNT],
double temp[PARAMETER_COUNT],double *loss_val);
void calc_gradient(double train_x[SAMPLE_COUNT][PARAMETER_COUNT],double train_y[PARAMETER_COUNT],double *temp,double *theta);
void train_step(double temp[PARAMETER_COUNT],double theta[PARAMETER_COUNT]);
#endif //MULTIPLELINEARREGRESSION_MULTIPLELINEARREGRESSION_H
MultipleLinearRegression.h
#include "MultipleLinearRegression.h"
#include
#include
//int type_count = 5;
int g_X_Len = 10;
double g_learning_rate = 0.01;
int g_X_arg_count = 5;
int g_channel = 1;
double g_out_Y_pt = 0;
//预测输出值指针
double *y_pred_pt = 0;
//损失值
double loss_val[1] = {1.0};
/*
* learning_rate 学习率
*
* X_Len 样本个数
*
* X_arg_count 参数个数
*
* channel 通道数<暂未考虑>
* */
void init(double learning_rate, long int X_Len,long int X_arg_count,int channel){
g_learning_rate = learning_rate;
g_X_Len = X_Len;
g_X_arg_count = X_arg_count;
g_channel = channel;
y_pred_pt = malloc((size_t) X_Len);
}
void fit(double train_x[SAMPLE_COUNT][PARAMETER_COUNT],double train_y[PARAMETER_COUNT],
double temp[PARAMETER_COUNT],double theta[PARAMETER_COUNT]){
for (int i = 0; i < 10000; ++i) {
printf("step %d: \n", i);
calc_gradient(train_x,train_y,temp,theta);
loss_val[0] = loss(train_x,train_y,theta,loss_val);
}
}
double _f(const double train_x[PARAMETER_COUNT],double theta[PARAMETER_COUNT]){
g_out_Y_pt = -1;
for (int i = 0; i < PARAMETER_COUNT; ++i) {
g_out_Y_pt += theta[i]*train_x[i];
}
return g_out_Y_pt;
}
//预测
double* predict(double train_x[SAMPLE_COUNT][PARAMETER_COUNT],double theta[PARAMETER_COUNT]){
for (int i = 0; i < SAMPLE_COUNT; ++i) {
y_pred_pt[i] = _f(train_x[i],theta);
}
return y_pred_pt;
}
//求损失
double loss(double train_x[SAMPLE_COUNT][PARAMETER_COUNT],double train_y[PARAMETER_COUNT],
double theta[PARAMETER_COUNT],double *loss_val){
predict(train_x,theta);
loss_val[0] = -1;
for (int i = 0; i < SAMPLE_COUNT; i++) {
loss_val[0] += (train_y[i] - y_pred_pt[i]) * (train_y[i] - y_pred_pt[i]);
}
loss_val[0] = loss_val[0] / SAMPLE_COUNT;
printf(" loss_val = %f\n", loss_val[0]);
return loss_val[0];
}
//求梯度
void calc_gradient(double train_x[SAMPLE_COUNT][PARAMETER_COUNT],double train_y[PARAMETER_COUNT],
double temp[PARAMETER_COUNT], double theta[PARAMETER_COUNT]) {
for (int i = 0; i < g_X_arg_count -1; i++) {
double sum = 0;
for (int j = 0; j < g_X_Len; j++) {
double hx = 0;
for (int k = 0; k < g_X_arg_count; k++) {
hx += theta[k] * train_x[j][k];
}
sum += (hx - train_y[j]) * train_x[j][i];
}
temp[i] = sum / g_X_Len * 0.01;
}
printf("\n--------------------\n");
train_step(temp, theta);
}
//更新参数值
void train_step(double temp[PARAMETER_COUNT],double theta[PARAMETER_COUNT]) {
for (int i = 0; i < g_X_arg_count - 1; i++) {
theta[i] = theta[i] - temp[i];
printf(" theta[%d] = %f\n",i, theta[i]);
}
}
main.c
//#include "src/LinerRegression.h"
#include "utils/util.h"
#include "src/MultipleLinearRegression.h"
int main() {
// 初始数据 设置为 5个维度;增加第一维为 1 即为 常量值 b
//该数据由 Y= 4*X1 + 9*X2 + 10*X3 + 2*X4 + 1*X5 + 6 产生(数据中尚为加入噪声)
double X_pt[10][6] = {{1, 7.41, 3.98, 8.34, 8.0, 0.95},
{1, 6.26, 5.12, 9.57, 0.3, 7.79},
{1, 1.52, 1.95, 4.01, 7.96, 2.19},
{1, 1.91, 8.58, 6.64, 2.99, 2.18},
{1, 2.2, 6.88, 0.88, 0.5, 9.74},
{1, 5.17, 0.14, 4.09, 9.0, 2.63},
{1, 9.13, 5.54, 6.36, 9.98, 5.27},
{1, 1.17, 4.67, 9.02, 5.14, 3.46},
{1, 3.97, 6.72, 6.12, 9.42, 1.43},
{1, 0.27, 3.16, 7.07, 0.28, 1.77}};
double Y_pt[10] = {171.81, 181.21, 87.84, 165.42, 96.26, 89.47, 181.21, 156.65, 163.83, 108.55};
//初始参数函数 temp 只是一个临时变量
double temp[6] = {1.0,1.0,1.0,1.0,1.0,1.0};
double theta[6] = {1.0,1.0,1.0,1.0,1.0,1.0};
//初始化相关参数
init(0.01,10,5+1,1);
fit(X_pt,Y_pt,temp,theta);
return 0;
}
可以看到再训练10000次的时候损失已经比较小了,各个参数也已经接应我们预设的参数了
step 9999:
--------------------
theta[0] = 5.994281
theta[1] = 3.999957
theta[2] = 9.000404
theta[3] = 10.000375
theta[4] = 2.000242
loss_val = 0.900745
Process finished with exit code 0
这只是一个C语言的简单实现,学习率也设定的是固定值,训练次数也设定为固定值。如果各位大侠有其他比较好的实现方式欢迎留言推荐。另外由于很久很久不用C语言开发了,肯定会有语法的不完美。如有更好的建议或者其他疑问欢迎交流,小弟恭候。