李宏毅-机器学习HW1

这是一个线性回归的例子,先读取数据,将b整合到W中

import numpy as np
import csv
import pandas as pd 

f=open(r'C:\Users\Lenovo\Desktop\HW1\train.csv')
data=list(csv.reader(f))
index=[i[2]=='PM2.5' for i in data]
Data=[]
for i in range(len(index)):
    if index[i] is True:
        Data.append(data[i][3:])
Data=np.array(Data)
#每天24小时,每十个小时作为一条数据,其中第10个小时的数据作为预测的y
#所以每天得到的样本数据有24-9=15  15*240=3600
X=[]
y=[]
for i in range(15):
    tempx=Data[:,i:i+9]
    tempy=Data[:,i+9]
    X.append(tempx)
    y.append(tempy)
X=np.array(X,dtype=float).reshape(-1,9)
y=np.array(y,dtype=float).reshape(-1)
X=np.hstack([X,np.ones((len(X),1))])
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,train_size=0.8)

接着采用各种算梯度的方法

1. GD

def GD(X, y, W, lr, iters, lambdaL2):
    loss_his=[]
    for i in range(iters):
        s=X.dot(W)
        loss1=y-s
        loss=np.sum(loss1**2)/len(X)
        loss_his.append(loss)
        d=np.dot(-1*X.T,loss1)/len(X) + lambdaL2*w
        W=W-d*lr
    return W,loss_his
w = np.zeros(len(X_train[0]))
w,loss=GD(X_train,y_train,w,0.0001,20000,0)
y_gd = np.dot(X_test, w)
sum(np.abs(y_gd-y_test)/len(y_test))

2. Mini-batch gradient descent

import random
def batch_data(X,y,batch):
    all_len=len(y)
    l=list(range(all_len))
    random.shuffle(l)
    ll=l[:batch]
    return X[ll],y[ll]
    
def SGD(X, y, W, lr, iters, lambdaL2):
    loss_his=[]
    for i in range(iters):
        batch_x,batch_y=batch_data(X,y,200)
        s=X.dot(W)
        loss1=y-s
        loss=np.sum(loss1**2)/len(X)
        loss_his.append(loss)
        d=np.dot(-1*X.T,loss1)/len(X) + lambdaL2*w
        W=W-d*lr
    return W,loss_his

w = np.zeros(len(X_train[0]))
w,loss=SGD(X_train,y_train,w,0.0001,20000,0)
y_sgd = np.dot(X_test, w)
sum(np.abs(y_sgd-y_test)/len(y_test))

3. Adagrad

def Adagrad(X, y, W, lr, iters, lambdaL2):
    loss_his=[]
    grad=np.zeros(W.shape)
    for i in range (iters):
        s=X.dot(W)
        loss1=y-s
        loss=np.sum(loss1**2)/len(X)#+lambdaL2*np.sum(W*W)
        loss_his.append(loss)
        
        d=np.dot(-1*X.T,loss1)/len(X) + lambdaL2*w
        grad=grad+d**2
        ada=np.sqrt(grad)
        W=W-lr*d/ada
    return W,loss_his
        
w = np.zeros(len(X_train[0]))
w,loss=Adagrad(X_train,y_train,w,0.0001,20000,0)
y_ada = np.dot(X_test, w)
sum(np.abs(y_ada-y_test)/len(y_test))

4. Adam=带动量的SDG+RMSProp

def Adam(X, y, W, lr, iters, lambdaL2,beta1,beta2):
    loss_his=[]
    first_moment=0
    second_moment=0
    for i in range(iters):
        batch_x,batch_y=batch_data(X,y,200)
        s=X.dot(W)
        loss1=y-s
        loss=np.sum(loss1**2)/len(X)
        loss_his.append(loss)
        d=np.dot(-1*X.T,loss1)/len(X) + lambdaL2*w
        first_moment=first_moment*beta1+(1-beta2)*d
        second_moment=second_moment*beta2+(1-beta2)*d*d
        W=W-lr*first_moment/np.sqrt(second_moment+1e-7)
        
    return W,loss_his
w = np.zeros(len(X_train[0]))
w,loss=Adam(X_train,y_train,w,0.0001,20000,0,0.9,0.999)
y_adam = np.dot(X_test, w)
sum(np.abs(y_adam-y_test)/len(y_test))

拓展:

https://blog.csdn.net/willduan1/article/details/78070086

https://www.cnblogs.com/HL-space/p/10676637.html

https://blog.csdn.net/oklahomawestbrook/article/details/90488837

https://blog.csdn.net/qq_35860352/article/details/80772142

https://blog.csdn.net/yzy_1996/article/details/84618536#MomentumSGD_175

你可能感兴趣的:(机器学习)