机器学习基石笔记:Homework #3 LinReg&LogReg相关习题

问题描述

图1 13

图2 14-15

图3 18

图4 19-20

程序实现

13-15

# coding: utf-8

import numpy as np
import numpy.random as random
import matplotlib.pyplot as plt

def sign(x):
    if(x>=0):
        return 1
    else:
        return -1

def gen_data():
    x1=random.uniform(-1,1,1000)
    x2=random.uniform(-1,1,1000)
    id_array=random.permutation([i for i in range(1000)])
    dataY=np.zeros((1000,1))
    for i in range(1000):
        if(i<1000*0.1):
            i = id_array[i]
            dataY[i][0]=-sign(x1[i]**2+x2[i]**2-0.6)
        else:
            i = id_array[i]
            dataY[i][0]=sign(x1[i]**2+x2[i]**2-0.6)
    dataX=np.concatenate((np.ones((1000,1)),np.array(x1).reshape((1000,1)),np.array(x2).reshape((1000,1))),axis=1)
    return dataX,dataY

def w_lin(dataX,dataY):
    dataX_T=np.transpose(dataX)
    tmp=np.dot(np.linalg.inv(np.dot(dataX_T,dataX)),dataX_T)
    return np.dot(tmp,dataY)

def pred(dataX,wLIN):
    pred=np.dot(dataX,wLIN)
    num_data=dataX.shape[0]
    for i in range(num_data):
        pred[i][0]=sign(pred[i][0])
    return pred

def zero_one_cost(pred,dataY):
    return np.sum(pred!=dataY)/dataY.shape[0]

def feat_transform(dataX):
    num_data=dataX.shape[0]
    tmp1=dataX[:,1]*dataX[:,2]
    tmp2=dataX[:,1]**2
    tmp3=dataX[:,2]**2
    new_dataX=np.concatenate(
        (dataX,tmp1.reshape((num_data,1)),tmp2.reshape((num_data,1)),tmp3.reshape((num_data,1))),axis=1)
    return new_dataX


if __name__=="__main__":

    cost_list=[]
    for i in range(1000):
        dataX,dataY=gen_data()
        wLIN=w_lin(dataX,dataY)
        cost_list.append(zero_one_cost(pred(dataX,wLIN),dataY))
    # show results
    print("the average Ein over 1000 experiments: ",sum(cost_list)/len(cost_list))
    plt.figure()
    plt.hist(cost_list)
    plt.xlabel("zero_one Ein")
    plt.ylabel("frequency")
    plt.title("13")
    plt.savefig("13.png")

    W=[]
    cost_list=[]
    for i in range(1000):
        # train
        dataX,dataY=gen_data()
        dataX=feat_transform(dataX)
        wLIN=w_lin(dataX,dataY)
        W.append(wLIN[:,0].tolist())
        # test
        testX, testY = gen_data()
        testX = feat_transform(testX)
        cost_list.append(zero_one_cost(pred(testX, wLIN), testY))
    min_cost=min(cost_list)
    min_id=cost_list.index(min_cost)
    print(W[min_id])
    W=np.array(W)
    # show w3
    print("the average w3 over 1000 experiments: ",np.average(W,axis=0)[3])
    plt.figure()
    plt.hist(W[:,3].tolist())
    plt.xlabel("w3")
    plt.ylabel("frequency")
    plt.title("14")
    plt.savefig("14.png")
    # show Eout
    print("the average Eout over 1000 experiments: ",sum(cost_list)/len(cost_list))
    plt.figure()
    plt.hist(cost_list)
    plt.xlabel("Eout")
    plt.ylabel("frequency")
    plt.title("15")
    plt.savefig("15.png")

18-20

# coding: utf-8

import numpy as np

def sigmoid(x):
    return 1/(1+np.e**(-x))

def read_data(dataFile):
    with open(dataFile,'r') as f:
        lines=f.readlines()
        data_list=[]
        for line in lines:
            line=line.strip().split()
            data_list.append([1.0] + [float(l) for l in line])
        dataArray=np.array(data_list)
        num_data=dataArray.shape[0]
        num_dim=dataArray.shape[1]-1
        dataX=dataArray[:,:-1].reshape((num_data,num_dim))
        dataY=dataArray[:,-1].reshape((num_data,1))
        return dataX,dataY

def gradient_descent(w,dataX,dataY,eta):
    assert w.shape[0]==dataX.shape[1],"wrong shape!"
    assert w.shape[1]==1,"wrong shape of w!"
    num_data=dataX.shape[0]
    num_dim=dataX.shape[1]
    tmp1=-dataY*dataX
    tmp2=-dataY*np.dot(dataX,w)
    for i in range(num_data):
        tmp2[i][0]=sigmoid(tmp2[i][0])
    tmp3=np.average(tmp1 * tmp2, axis=0)
    new_w=w-eta*tmp3.reshape((num_dim,1))
    return new_w

def s_gradient_descent(w,dataX,dataY,eta):
    assert w.shape[0]==dataX.shape[1],"wrong shape!"
    assert w.shape[1]==1,"wrong shape of w!"
    assert dataX.shape[0]==1,"wrong shape of x!"
    assert dataY.shape[0]==1,"wrong shape of y!"
    num_dim=dataX.shape[1]
    tmp1=-dataY*dataX
    tmp2=-dataY*np.dot(dataX,w)
    tmp2[0][0]=sigmoid(tmp2[0][0])
    tmp3=np.average(tmp1 * tmp2, axis=0)
    new_w=w-eta*tmp3.reshape((num_dim,1))
    return new_w

def pred(wLOG,dataX):
    pred=np.dot(dataX,wLOG)
    num_data=dataX.shape[0]
    for i in range(num_data):
        pred[i][0]=sigmoid(pred[i][0])
        if(pred[i][0]>=0.5):
            pred[i][0]=1
        else:
            pred[i][0]=-1
    return pred

def zero_one_cost(pred,dataY):
    return np.sum(pred!=dataY)/dataY.shape[0]


if __name__=="__main__":
    # train
    dataX,dataY=read_data("hw3_train.dat")
    num_dim=dataX.shape[1]
    w=np.zeros((num_dim,1))
    print("\n18")
    for i in range(2000):
        w=gradient_descent(w,dataX,dataY,eta=0.001)
    print("the weight vector within g: ",w[:,0])
    # test
    testX,testY=read_data("hw3_test.dat")
    Eout=zero_one_cost(pred(w,testX),testY)
    print("the Eout(g) on the test set: ",Eout)

    print("\n18.1")
    w = np.zeros((num_dim, 1))
    for i in range(20000):
        w = gradient_descent(w, dataX, dataY, eta=0.001)
    print("the weight vector within g: ", w[:, 0])
    # test
    Eout = zero_one_cost(pred(w, testX), testY)
    print("the Eout(g) on the test set: ", Eout)

    print("\n19")
    w=np.zeros((num_dim,1))
    for i in range(2000):
        w = gradient_descent(w, dataX, dataY, eta=0.01)
    print("the weight vector within g: ", w[:, 0])
    # test
    Eout = zero_one_cost(pred(w, testX), testY)
    print("the Eout(g) on the test set: ", Eout)

    print("\n20")
    w=np.zeros((num_dim,1))
    num_data=dataX.shape[0]
    for i in range(2000):
        i%=num_data
        x=dataX[i,:].reshape((1,num_dim))
        y=dataY[i,:].reshape((1,1))
        w=s_gradient_descent(w,x,y,eta=0.001)
    print("the weight vector within g: ", w[:, 0])
    # test
    Eout = zero_one_cost(pred(w, testX), testY)
    print("the Eout(g) on the test set: ", Eout)

运行结果及分析

13-15

图5 13-15结果1

图6 13-15结果2

图7 13-15结果3

图8 13-15结果4

18-20

图9 18-20结果

对比18和18.1,可知迭代步长较小时,需要较多迭代次数才能达到较优效果。

你可能感兴趣的:(机器学习基石笔记:Homework #3 LinReg&LogReg相关习题)