logisticregression

  1 from numpy import *

  2 import random

  3 import time

  4 st = time.time()

  5 

  6 def loaddata(filename):

  7     fr = open(''.join([filename, '.txt'])).readlines()

  8     trainx = [[1] + map(float, line.split()[:-1]) for line in fr] # trainx = [[1,12.2,22.4],[1,22.3,31.2],...]

  9     trainy = [[float(line.split()[-1])] for line in fr] # trainy = [0,1,1,0,...]

 10     return trainx, trainy

 11 

 12 def sigmod(z):

 13     return 1.0 / (1 + exp(-z))

 14 

 15 def optimizaion(trainx, trainy):

 16     trainxmat = mat(trainx)

 17     m = len(trainx)

 18     # beta = [0,0,0]

 19     beta = ones((len(trainx[0]),1)) # array

 20     # maxiter

 21     M = 500

 22     """

 23     # error permid

 24     e = 

 25     """

 26     """

 27     for i in xrange(M):

 28         #if error2sum > e:

 29         # z = betat.T * x = trainx (matricdoc)* beta = [beta.Tx1,beta.Tx2,...,beta.Txn]

 30         sigmodz = sigmod(trainxmat * beta)

 31         # [error_i = yi - sigmod(zi)]

 32         error = trainy - sigmodz

 33         # update beta

 34         beta += alpha * trainxmat.T * error

 35         print beta

 36         """

 37     # random gradascent

 38     for j in xrange(M):

 39         for i in xrange(m):

 40             # per span

 41             alpha = 0.01 + 4 / (1.0 + i +j)

 42             randid = random.randint(0, m - 1)

 43             sigmodz = sigmod(trainxmat[randid] * beta)

 44             error = trainy[randid] - sigmodz

 45             beta += alpha * trainxmat[randid].T * error

 46             #print beta

 47 

 48     return beta

 49 

 50 

 51 def logregress(testx, beta):

 52     if mat(testx) * beta > 0: return [1.0]

 53     else: return [0.0]

 54 

 55 def main():

 56     # step 1: loading data...

 57     print "step 1: loading data..."

 58     trainx, trainy = loaddata('horseColicTraining')

 59     testx, testy = loaddata('horseColicTest')

 60     """

 61     print 'trainx', trainx

 62     print 'trainy', trainy

 63     print 'testx', testx

 64     print 'testy', testy

 65     print 'testy[2]',testy[2]

 66     """

 67 

 68     # step 2: training...

 69     print "step 2: training..."

 70     beta = optimizaion(trainx, trainy)

 71     #print "beta = ",beta

 72 

 73     # step 3: testing...

 74     print "step 3: testing..."

 75     numTests = 10; errorSum = 0.0; l = len(testx)

 76     for j in xrange(numTests):

 77         errorcount = 0.0

 78         #print 'the total number is: ',l

 79         for i in xrange(l):

 80             if logregress(testx[i], beta) != testy[i]: 

 81                 errorcount += 1

 82         #print "the number of error is: ", errorcount

 83         print "the error rate is: ", (errorcount / l)

 84         errorSum += (errorcount / l)

 85     print "after %d iterations the average error rate is: %f" %(numTests, errorSum/numTests)

 86 

 87 

 88 

 89 """

 90 trainx, trainy = loaddata('testSet')

 91 print trainy

 92 optimizaion(trainx, trainy)

 93 """

 94 

 95 main()

 96 

 97 print "cost time: ", (time.time() - st)

 98 

 99 """ lineregres

100         # ssi = sigmod(zi) - sigmod(zi) ** 2

101         ss = [sigmodzi - sigmodzi ** 2 for sigmodzi in sigmodz]

102         # errssi = errori * ssi

103         errss = map(lambda x, y: x * y, error, ss)

104         # treri = errssi * trainxi(vector)

105         trer = [errss[i] * array(trainx[i]) for i in xrange(m)]

106         """

 

你可能感兴趣的:(log)