逻辑回归的推导及C代码实现

逻辑回归是一种常用的分类算法(注意逻辑回归不是回归哟)

学习的时候借鉴了这个资料:https://zhuanlan.zhihu.com/p/74874291

很好了的资料感谢作者的分享。

 

 

下面的训练代码比较好理解,输入正样本数据及个数,负样本数据及个数,返回对应的参数theta。

//梯度下降法进行逻辑回归,分别输入正负样本
void logisticPosNegTrain(const LOGISTICDATA *pos, const int pos_n, const LOGISTICDATA *neg, const int neg_n, double theta[5]){
	if (pos_n < 1 || neg_n < 1){ TFLYERROR("size error\n"); return; }
	const LOGISTICDATA *head_pos = pos, *head_neg = neg;
	double theta0 = -1.9, theta1 = 3.60, theta2 = -3.70, alpha = 0.01, error = 1e-6;
	double tmp_theta0 = theta0, tmp_theta1 = theta1, tmp_theta2 = theta2;
	double sum_theta0 = 0.0, sum_theta1 = 0.0, sum_theta2 = 0.0;
	double exp_result = 0.0;
	while(1){
		head_pos = pos;
		for (size_t i = 0; i < pos_n; i++) { //正样本
			exp_result = 1.0 / (1.0 + exp(-(theta0 + theta1 * head_pos->data.x + theta2 * head_pos->data.y)));
			sum_theta0 += 1.0 * head_pos->label - exp_result;
			sum_theta1 += (1.0 * head_pos->label - exp_result) * head_pos->data.x;
			sum_theta2 += (1.0 * head_pos->label - exp_result) * head_pos->data.y;
			head_pos++;

		}
		head_neg = neg;
		for (size_t i = 0; i < pos_n; i++) { //负样本
			exp_result = 1.0 / (1.0 + exp(-(theta0 + theta1 * head_neg->data.x + theta2 * head_neg->data.y)));
			sum_theta0 += 1.0 * head_neg->label - exp_result;
			sum_theta1 += (1.0 * head_neg->label - exp_result) * head_neg->data.x;
			sum_theta2 += (1.0 * head_neg->label - exp_result) * head_neg->data.y;
			head_neg++;
		}
		sum_theta0 = -1.0 / (pos_n + neg_n) * sum_theta0;
		sum_theta1 = -1.0 / (pos_n + neg_n) * sum_theta1;
		sum_theta2 = -1.0 / (pos_n + neg_n) * sum_theta2;
		theta0 = theta0 - alpha * sum_theta0;
		theta1 = theta1 - alpha * sum_theta1;
		theta2 = theta2 - alpha * sum_theta2;
		printf("theta0=%lf, theta1=%lf,theta2=%lf\n", theta0, theta1, theta2);
		//printf("error0=%lf, error1=%lf,error2=%lf\n", abs(theta0 - tmp_theta0), abs(theta1 - tmp_theta1), (theta2 - tmp_theta2));
		if (fabs(theta0 - tmp_theta0) < error && fabs(theta1 - tmp_theta1) < error && fabs(theta2 - tmp_theta2) < error){ //迭代终止条件,也可以使用
			break;
		}
		tmp_theta0 = theta0;
		tmp_theta1 = theta1;
		tmp_theta2 = theta2;
	}
	double k = -theta1 / theta2;
	double b = -theta0 / theta2;

	*theta++ = k;
	*theta++ = b;
	*theta++ = theta0;
	*theta++ = theta1;
	*theta++ = theta2;

}

 

下面是预测代码。

//模型预测结果
void logisticPredicted(const LOGISTICDATA *pred_data, const int pred_num, const double *pred_value, LOGISTICRESULT *prob){
	if (pred_num < 1){ TFLYERROR("size error\n"); return; }
	double theta0 = *(pred_value + 2), theta1 = *(pred_value + 3), theta2 = *(pred_value + 4);
	const LOGISTICDATA *head_data = pred_data;
	double exp_result = 0.0;
	double z = 0.0;
	for (int i = 0; i < pred_num; i++) {
		z = theta0 + theta1 * head_data->data.x + theta2 * head_data->data.y;
		exp_result = 1.0 / (1.0 + exp(-z));
		prob->z = z;
		prob->p = exp_result;
		prob->cls = z > 0.5 ? 1 : 0; //可选用
		//printf("结果:%lf, 类别:%d, 概率:%f\n", z, (int)(z > 0.0 ? 1 : 0), exp_result);
		head_data++;
		prob++;
	}
}

 

你可能感兴趣的:(机器学习与算法分析,机器学习与算法)