这是一个线性回归的例子,先读取数据,将b整合到W中
import numpy as np
import csv
import pandas as pd
f=open(r'C:\Users\Lenovo\Desktop\HW1\train.csv')
data=list(csv.reader(f))
index=[i[2]=='PM2.5' for i in data]
Data=[]
for i in range(len(index)):
if index[i] is True:
Data.append(data[i][3:])
Data=np.array(Data)
#每天24小时,每十个小时作为一条数据,其中第10个小时的数据作为预测的y
#所以每天得到的样本数据有24-9=15 15*240=3600
X=[]
y=[]
for i in range(15):
tempx=Data[:,i:i+9]
tempy=Data[:,i+9]
X.append(tempx)
y.append(tempy)
X=np.array(X,dtype=float).reshape(-1,9)
y=np.array(y,dtype=float).reshape(-1)
X=np.hstack([X,np.ones((len(X),1))])
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,train_size=0.8)
接着采用各种算梯度的方法
1. GD
def GD(X, y, W, lr, iters, lambdaL2):
loss_his=[]
for i in range(iters):
s=X.dot(W)
loss1=y-s
loss=np.sum(loss1**2)/len(X)
loss_his.append(loss)
d=np.dot(-1*X.T,loss1)/len(X) + lambdaL2*w
W=W-d*lr
return W,loss_his
w = np.zeros(len(X_train[0]))
w,loss=GD(X_train,y_train,w,0.0001,20000,0)
y_gd = np.dot(X_test, w)
sum(np.abs(y_gd-y_test)/len(y_test))
2. Mini-batch gradient descent
import random
def batch_data(X,y,batch):
all_len=len(y)
l=list(range(all_len))
random.shuffle(l)
ll=l[:batch]
return X[ll],y[ll]
def SGD(X, y, W, lr, iters, lambdaL2):
loss_his=[]
for i in range(iters):
batch_x,batch_y=batch_data(X,y,200)
s=X.dot(W)
loss1=y-s
loss=np.sum(loss1**2)/len(X)
loss_his.append(loss)
d=np.dot(-1*X.T,loss1)/len(X) + lambdaL2*w
W=W-d*lr
return W,loss_his
w = np.zeros(len(X_train[0]))
w,loss=SGD(X_train,y_train,w,0.0001,20000,0)
y_sgd = np.dot(X_test, w)
sum(np.abs(y_sgd-y_test)/len(y_test))
3. Adagrad
def Adagrad(X, y, W, lr, iters, lambdaL2):
loss_his=[]
grad=np.zeros(W.shape)
for i in range (iters):
s=X.dot(W)
loss1=y-s
loss=np.sum(loss1**2)/len(X)#+lambdaL2*np.sum(W*W)
loss_his.append(loss)
d=np.dot(-1*X.T,loss1)/len(X) + lambdaL2*w
grad=grad+d**2
ada=np.sqrt(grad)
W=W-lr*d/ada
return W,loss_his
w = np.zeros(len(X_train[0]))
w,loss=Adagrad(X_train,y_train,w,0.0001,20000,0)
y_ada = np.dot(X_test, w)
sum(np.abs(y_ada-y_test)/len(y_test))
4. Adam=带动量的SDG+RMSProp
def Adam(X, y, W, lr, iters, lambdaL2,beta1,beta2):
loss_his=[]
first_moment=0
second_moment=0
for i in range(iters):
batch_x,batch_y=batch_data(X,y,200)
s=X.dot(W)
loss1=y-s
loss=np.sum(loss1**2)/len(X)
loss_his.append(loss)
d=np.dot(-1*X.T,loss1)/len(X) + lambdaL2*w
first_moment=first_moment*beta1+(1-beta2)*d
second_moment=second_moment*beta2+(1-beta2)*d*d
W=W-lr*first_moment/np.sqrt(second_moment+1e-7)
return W,loss_his
w = np.zeros(len(X_train[0]))
w,loss=Adam(X_train,y_train,w,0.0001,20000,0,0.9,0.999)
y_adam = np.dot(X_test, w)
sum(np.abs(y_adam-y_test)/len(y_test))
拓展:
https://blog.csdn.net/willduan1/article/details/78070086
https://www.cnblogs.com/HL-space/p/10676637.html
https://blog.csdn.net/oklahomawestbrook/article/details/90488837
https://blog.csdn.net/qq_35860352/article/details/80772142
https://blog.csdn.net/yzy_1996/article/details/84618536#MomentumSGD_175