多元线性回归-梯度下降法 c++

多元线性回归-梯度下降法 c++

  • 线性回归(Linear Regression)
  • 损失函数(Loss Function)
  • 梯度下降(gradient descent)
    • 批梯度下降(batch gradient descent)
    • 随机梯度下降(Stochastic Gradient Descent, SGD)
  • 代码片段如下:

线性回归(Linear Regression)

假设线性回归方程如下:

h ( x ) = h θ ( x ) = θ 0 + θ 1 x + + θ 2 x h(x)=h_\theta(x)=\theta_0+\theta_1x++\theta_2x h(x)=hθ(x)=θ0+θ1x++θ2x

损失函数(Loss Function)

损失函数,表达式如下:
J ( θ ) = 1 2 ∑ i = 0 m ( h θ ( x i ) − y i ) 2 J(\theta) = \frac{1}{2}\sum_{i=0}^{m}(h_\theta(x^i)-y^i)^2 J(θ)=21i=0m(hθ(xi)yi)2
目标为求解最优的 θ \theta θ,使损失函数 J ( θ ) J(\theta) J(θ)取最小值。

梯度下降(gradient descent)

批梯度下降(batch gradient descent)

处理一个样本的表达式:
θ j : = θ j + α ( y i − ( h θ ( x i ) ) x j i \theta_j:=\theta_j+\alpha(y^i-(h_\theta(x^i))x_j^i θj:=θj+α(yi(hθ(xi))xji
转化为处理多个样本:
Repeat until convergence{
θ j : = θ j + α ∑ i = 1 m ( y i − ( h θ ( x i ) ) x j i \theta_j:=\theta_j+\alpha\sum_{i=1}^{m}(y^i-(h_\theta(x^i))x_j^i θj:=θj+αi=1m(yi(hθ(xi))xji
}
α -步长(learning rate),控制θ每次向J(θ)变小的方向迭代时的变化幅度

在表达式中每一步都是计算的全部训练集的数据,所以称之为批梯度下降(batch gradient descent)。
注意,梯度下降可能得到局部最优,但在优化问题里已经证明线性回归只有一个最优点,因为损失函数J(θ)是一个二次的凸函数,不会产生局部最优的情况。(假设学习步长α不是特别大)

随机梯度下降(Stochastic Gradient Descent, SGD)

随机梯度下降在计算下降最快的方向时时随机选一个数据进行计算,而不是扫描全部训练数据集,这样就加快了迭代速度。随机梯度下降并不是沿着J(θ)下降最快的方向收敛,而是震荡的方式趋向极小点。

Loop {
for i=i to m {
θ j : = θ j + α ( y i − ( h θ ( x i ) ) x j i \theta_j:=\theta_j+\alpha(y^i-(h_\theta(x^i))x_j^i θj:=θj+α(yi(hθ(xi))xji
}
}

代码片段如下:

linear_regression.h 代码片.

// 利用批梯度下降算法求解线性回归 
#include 
#include 
#include 

using namespace std;

double predict(double* w, double* data, int feature_num);
// 损失函数
double Theta(double **training_set, int featue_num, int training_num, double* w);
// 批梯度下降
void gradient_descent(double** training_set, int feature_num, int training_num, double* w, double a, int iterator_time);
// 特征归一化
void feature_normalize(double **feature_set, int feature_num, int training_num);
// 测试模型预测误差
void forecast(double *forecast_set, double* w, int featue_num);

linear_regression.cpp 代码片.

#include "pch.h"
#include "gradient_descent.h"

double predict(double* w, double* data, int feature_num) {
	double sum = 0;
	for (int i = 0; i < feature_num; i++) {
		sum += w[i] * data[i];
	}
	return sum;
}

double Theta(double **training_set, int featue_num, int training_num, double* w) {
	double sum = 0;
	for (int i = 0; i < training_num; i++) {
		sum += (training_set[i][featue_num] - predict(w, training_set[i], featue_num))*(training_set[i][featue_num] - predict(w, training_set[i], featue_num));
	}
	return sum / (2 * training_num);
}

void gradient_descent(double** training_set, int feature_num, int training_num, double* w, double a, int iterator_time) {
	while (iterator_time--) {
		double* del_theta = new double[feature_num];
		for (int i = 0; i < feature_num; i++) {
			del_theta[i] = 0;
			for (int j = 0; j < training_num; j++) {
				del_theta[i] += (predict(w, training_set[j], feature_num) - training_set[j][feature_num])*training_set[j][i];
			}
		}
		//w[i]的更新必须等所有的del_theta测算出来了才可以!不然更新的会影响没更新的
		//上述问题在代码内表示即是下面的for循环不能和上面的合并!
		for (int i = 0; i < feature_num; i++)
			w[i] -= a * del_theta[i] / (double)training_num;
		//printf("%.3lf\n", Theta(training_set, feature_num, training_num, w));
		delete[] del_theta;
	}
	printf("计算结果:\n");
	for (int i = 0; i < feature_num - 1; i++) {
		printf("%.3lf ", w[i]);
	}
	printf("%.3lf\n", w[feature_num - 1]);
	return;
}

void forecast(double *forecast_set, double* w, int feature_num) {
	double y=w[0];
	for (int i = 1; i < feature_num - 1; i++) {
		printf("%.3lf ", w[i]);
		printf("\t%.3lf\n ", forecast_set[i]);
		y = y + w[i] * forecast_set[i];
	}
	printf("------------------------------------------\n");
	printf("预测值: %.3lf\n", y);
	printf("实际值: %.3lf\n", forecast_set[feature_num]);
	printf("误差: %.3lf%%\n", fabs(y - forecast_set[feature_num])*100/y);
}


void feature_normalize(double **feature_set, int feature_num, int training_num) {
	//特征归一化
	// 对于某个特征 x(i)=(x(i)-average(X))/standard_devistion(X)
	// 1、求出特征X在n个样本中的平均值average(X)
	// 2、求出特征X在n个样本中的标准差 standard_devistion(X)
	// 3、对特征X的n个样本中的每个值x(i),使用上述公式进行归一化
	double *average = new double[feature_num];
	double  *stanrd_divition = new double[feature_num];
	for (int i = 1; i < feature_num; i++) {
		double sum = 0;
		for (int j = 0; j < training_num; j++) {
			sum += feature_set[j][i];
		}
		average[i] = sum / training_num;
	}
	for (int i = 1; i < feature_num; i++) {
		double sum = 0;
		for (int j = 0; j < training_num; j++) {
			sum += (feature_set[j][i] - average[i])*(feature_set[j][i] - average[i]);
		}
		stanrd_divition[i] = sqrt((sum / (training_num - 1)));
	}
	for (int i = 1; i < feature_num; i++)
		for (int j = 0; j < training_num; j++) {
			feature_set[j][i] = (feature_set[j][i] - average[i]) / (double)stanrd_divition[i];
		}
	delete[] stanrd_divition;
	delete[] average;
}

你可能感兴趣的:(c++)