机器学习基石第三次课代码

机器学习基石第三次课代码

import urllib2
import numpy as np
from math import exp

# url = 'https://d396qusza40orc.cloudfront.net/ntumlone%2Fhw3%2Fhw3_test.dat'
# f = urllib2.urlopen(url)
# with open("hw3_train.dat", "wb") as code:
# code.write(f.read())

def train_PLA(wn, xn, yn, index):
    # wn = wn + learn_rate * logistic.pdf(0 - yn[i] * (np.dot(wn, xn[i].transpose()))) * yn[i] * xn[i] #SGD
    v = 0
    learn_rate = 0.001

    for k in range(0, len(xn)):
        theta = 1 / (1 + exp(-(-1) * yn[k] * np.dot(wn, xn[k].transpose())))
        v += theta * yn[k] * xn[k]
    v = v * 1. / len(xn)

    # theta = 1 / (1 + exp(-(-1) * yn[index] * np.dot(wn, xn[index].transpose())))
    # v += theta * yn[index] * xn[index]
    wn = wn + learn_rate * v

    return wn

def test_PLA(wn, xn, yn):
    error_rate = 0
    for i in range(0, len(xn)):
       if (np.dot(wn, xn[i]) * yn[i] < 0):
           error_rate += 1
    return error_rate * 1. / len(xn)


#load data
def load_data(filename):
    code = open(filename, "r")
    lines = code.readlines()
    xn = np.zeros((len(lines), 21)).astype(np.float)
    yn = np.zeros((len(lines),)).astype(np.int)

    for i in range(0, len(lines)):
        line = lines[i]
        line = line.rstrip('\r\n').replace('\t', ' ').split(' ')
        xn[i, 0] = 1
        for j in range(1, 21):
            xn[i, j] = float(line[j])
        yn[i] = int(line[21])
    return xn, yn


train_xn, train_yn = load_data('hw3_train.dat')
test_xn, test_yn = load_data('hw3_test.dat')

error_rate = 0
wn = np.zeros((21, )).astype(np.float)
for i in range(2000):
    wn = train_PLA(wn, train_xn, train_yn, i % train_xn.shape[0])
    error = test_PLA(wn, test_xn, test_yn)
    error_rate += error
    print 'random :', i, ' error_rate: ', error
print 'Eout: ', error_rate / 2000.

你可能感兴趣的:(机器学习,numpy)