一元线性回归
# _*_ coding : utf-8 _*_
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
class LinearRegression(object):
def __init__(self,learning_rate=0.01,max_iter=100,seed=None):
np.random.seed(seed)
self.lr = learning_rate
self.max_iter = max_iter
self.w = np.random.normal(1,0.1)
self.b = np.random.normal(1,0.1)
self.loss_arr = []
def fit(self,x,y):
self.x = x
self.y = y
for i in range(self.max_iter):
self._train_step()
self.loss_arr.append(self.loss())
def model(self,x,w,b):
return x * w + b
def predict(self,x=None):
if x is None:
x = self.x
y_pred = self.model(x,self.w,self.b)
return y_pred
def loss(self,y_true=None,y_pred=None):
if y_true is None or y_pred is None:
y_true = self.y
y_pred = self.predict(self.x)
return np.mean((y_true - y_pred)**2)
def _calc_gradient(self):
d_w = np.mean((self.x * self.w + self.b - self.y) * self.x)
d_b = np.mean((self.x * self.w + self.b - self.y))
return d_w,d_b
def _train_step(self):
d_w,d_b = self._calc_gradient()
self.w = self.w - self.lr * d_w
self.b = self.b - self.lr * d_b
return self.w,self.b
def generate_data():
np.random.seed(272)
data_size = 100
X = np.random.uniform(low=1.0,high=10.0,size=data_size)
y = X * 20 + 10 + np.random.normal(loc=0.0,scale=10.0,size=data_size)
return pd.DataFrame({"X":X,"y":y})
if __name__ == '__main__':
data = np.array(generate_data())
x = data[:,0]
y = data[:,1]
regr = LinearRegression(learning_rate=0.01,max_iter=10,seed=111)
regr.fit(x,y)
def show_data(x, y, w=None, b=None):
plt.scatter(x, y, marker='.')
if w is not None and b is not None:
plt.plot(x, w * x + b, c='red')
plt.show()
show_data(x, y, regr.w, regr.b)
plt.scatter(np.arange(len(regr.loss_arr)), regr.loss_arr, marker='o', c='green')
plt.show()
一元线性回归
越往下写的越好
# _*_ coding: utf-8 _*_
'''
一元线性回归Python实现
'''
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
class LinearRegressinon(object):
def __init__(self,learning_rate=0.01,max_iter=100,seed=None):
self.lr = learning_rate
self.max_iter = max_iter
self.w = np.random.normal(1,0.1)
self.b = np.random.normal(1,0.1)
self.loss_arr = []
def fit(self,X,y):
self.X = X
self.y = y
for i in range(self.max_iter):
self._train_step()
self.loss_arr.append(self.loss())
def model(self,X,w,b):
return X * w + b
def predict(self,X=None):
if X is None:
X = self.X
y_pred = self.model(X,self.w,self.b)
return y_pred
def loss(self,y_true=None,y_pred=None):
if y_true is None or y_pred is None:
y_true = self.y
y_pred = self.predict(self.X)
return np.mean((y_true - y_pred)**2)
def _calc_gradient(self):
d_w = np.mean((self.X * self.w + self.b - self.y)*self.X)
d_b = np.mean((self.X * self.w + self.b - self.y))
return d_w ,d_b
def _train_step(self):
d_w , d_b = self._calc_gradient()
self.w = self.w - self.lr * d_w
self.b = self.b - self.lr * d_b
return self.w, self.b
def show_data(X,y,w=None,b=None):
plt.scatter(X,y,marker='.')
if w is not None and b is not None:
plt.plot(X, X*w + b, c='r')
plt.show()
if __name__ == '__main__':
X = np.random.uniform(low=1.0,high=10.0,size=300)
y = X * 20 + 10 + np.random.normal(loc=0.0,scale=10.0,size=300)
regr = LinearRegressinon(learning_rate=0.01,max_iter=100,seed=100)
regr.fit(X,y)
show_data(X,y,regr.w,regr.b)
plt.plot(np.arange(len(regr.loss_arr)),regr.loss_arr,marker='o',c='green')
plt.show()
一元线性回归
越往下越规范
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
class LinearRegression(object):
def __init__(self,learning_rate=0.01,max_iter=100,seed=101):
self.w = np.random.randn(1)[0]
self.b = np.random.randn(1)[0]
self.lr = learning_rate
self.max_iter = max_iter
self.loss_arr = []
def fit(self,X,y):
self.X = X
self.y = y
for i in range(self.max_iter):
self.Grandient_descent()
self.loss_arr.append(self.loss())
def model(self,X):
return self.w * X + self.b
def loss(self,y_true=None,y_pred=None):
if y_true is None or y_pred is None:
y_true = self.y
y_pred = self.model(self.X)
return np.sum((y_true - y_pred) ** 2)
def cal_gradient(self):
d_w = np.mean(2 * (self.model(self.X) - self.y) * self.X) #不理解为什么要取平均
d_b = np.mean(2 * (self.model(self.X) - self.y)) #不理解为什么要取平均
return d_w, d_b
def Grandient_descent(self):
d_w,d_b = self.cal_gradient()
self.w -= self.lr * d_w
self.b -= self.lr * d_b
def show_data(X,y,w=None,b=None):
plt.scatter(X,y,marker='.')
if w is not None and b is not None:
plt.plot(X, X*w + b, c='r')
plt.show()
if __name__ == '__main__':
X = np.random.uniform(low=1.0,high=10.0,size=300)
y = X * 20 + 10 + np.random.normal(loc=0.0,scale=10.0,size=300)
regr = LinearRegression(learning_rate=0.01, max_iter=10, seed=101)
regr.fit(X,y)
show_data(X,y,regr.w,regr.b)
plt.plot(np.arange(len(regr.loss_arr)),regr.loss_arr,marker='o',c='green')
plt.show()
多元线性回归 【不一定正确】
from sklearn import datasets
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D # plot 3d
class Multiple_LinearRegression(object):
def __init__(self,learning_rate,max_iter):
self.learning_rate = learning_rate
self.max_iter = max_iter
self.loss_arr = []
# 模型的入口
def fit(self,X,y):
self.X = X
self.y = y
self.N = X.shape[0]
self.col = X.shape[1]
self.theta = np.random.randn(self.col)
self.lr = np.ones(self.col)*self.learning_rate
for i in range(self.max_iter):
self.gradient_descent()
self.loss_arr.append(self.loss())
def model(self,X):
print("X:",X.shape)
print("theta",self.theta.shape)
return X.dot(self.theta) #反馈了N个数
def loss(self,y_true=None,y_pred=None):
if y_true is None or y_pred is None:
y_true = self.y
y_pred = self.model(self.X)
return np.mean((y_true - y_pred)**2)
def cal_gradient(self):
temp = (self.model(self.X) - self.y) * self.X.T
print("----",X.shape)
return np.mean(temp,axis=1)
def gradient_descent(self):
temp_theta = self.cal_gradient()
self.theta -= self.lr * temp_theta
if __name__ == '__main__':
boston = datasets.load_boston()
y = boston.target
n = len(y)
X = np.ones((n,3),dtype=float)
X[:,0] = boston.data[:,5]
X[:,1] = boston.data[:,12]
regr = Multiple_LinearRegression(learning_rate=0.008,max_iter=100)
regr.fit(X,y)
theta = regr.theta
print("theta:::::",theta)
# plot scatter
fig = plt.figure()
ax = Axes3D(fig)
ax.scatter(X[:,0], X[:,1], y)
# plot line
x = [8, 9]
y = [40, 5]
z = [theta[0] + theta[1] * x[0] + theta[2] * y[0], theta[0] + theta[1] * x[1] + theta[2] * y[1]]
print(z)
figure = ax.plot(x, y, z, c='r')
# figure = ax.plot(X[:,0], X[:,1], regr.model(X), c='r')
ax.set_zlabel('Z', fontdict={'size': 15, 'color': 'red'})
ax.set_ylabel('Y', fontdict={'size': 15, 'color': 'red'})
ax.set_xlabel('X', fontdict={'size': 15, 'color': 'red'})
plt.show()
plt.plot(np.arange(len(regr.loss_arr)),regr.loss_arr,marker='o',c='green')
plt.show()
print("最后的损失",regr.loss())