利用python 实现多元线性回归#-------机器学习--------
1、简单一元线性回归
import numpy as np
import matplotlib.pyplot as plt
x = np.array([1.,2.,3.,4.,5.])
y = np.array([1.,3.,2.,3.,5,])
plt.scatter(x,y)
plt.axis([0,6,0,6])
plt.show()
x_mean = np.mean(x)#计算均值
y_mean = np.mean(y)
num = 0.0
d = 0.0
#zip函数打包成[(x_i,y_i)...]
for x_i,y_i in zip(x,y):
num = num + (x_i - x_mean) * (y_i - y_mean)
d = d + (x_i - x_mean) ** 2
a = num / d
b = y_mean - a * x_mean
y_hat = a * x + b
plt.scatter(x,y) # 绘制散点图
plt.plot(x,y_hat,color='r') # 绘制直线
plt.axis([0,6,0,6])
plt.show()
#预测
x_predict = 6
y_predict = a * x_predict + b
print(y_predict)
2多元线性回归
import numpy as np
from .metrics import r2_score
class LinearRegression:
def __init__(self):
"""初始化Linear Regression模型"""
self.coef_ = None # 系数(theta0~1 向量)
self.interception_ = None # 截距(theta0 数)
self._theta = None # 整体计算出的向量theta
def fit_normal(self, X_train, y_train):
"""根据训练数据X_train,y_train训练Linear Regression模型"""
assert X_train.shape[0] == y_train.shape[0], \
"the size of X_train must be equal to the size of y_train"
# 正规化方程求解
X_b = np.hstack([np.ones((len(X_train), 1)), X_train])
self._theta = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y_train)
self.interception_ = self._theta[0]
self.coef_ = self._theta[1:]
return self
def predict(self, X_predict):
"""给定待预测的数据集X_predict,返回表示X_predict的结果向量"""
assert self.interception_ is not None and self.coef_ is not None, \
"must fit before predict"
assert X_predict.shape[1] == len(self.coef_), \
"the feature number of X_predict must be equal to X_train"
X_b = np.hstack([np.ones((len(X_predict), 1)), X_predict])
y_predict = X_b.dot(self._theta)
return y_predict
def score(self, X_test, y_test):
"""很倔测试机X_test和y_test确定当前模型的准确率"""
y_predict = self.predict(self, X_test)
return r2_score(y_test, y_predict)
def __repr__(self):
return "LinearRegression()"
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
boston = datasets.load_boston()
X = boston.data
y = boston.target
X = X[y<50.0]
y = y[y<50.0]
X.shape
y.shape
from myAlgorithm.model_selection import train_test_split
from myAlgorithm.LinearRegression import LinearRegression
X_train, X_test, y_train, y_test = train_test_split(X, y, seed = 666)
reg = LinearRegression()
reg.fit_normal(X_train, y_train)
reg.coef_
reg.score(X_test, y_test)
总结
线性回归模型有着比较清晰的数据推导过程,也是其他复杂模型的基础。线性回归算法是典型的参数学习。虽然线性回归只能解决回归问题,但是却是很多分类问题,如逻辑回归的基础。并且线性回归算法是假设数据是有一定的线性关系的,且线性关系越强,效果越好。