三、logistic实现
数据集格式
-0.017612 14.0530640
-1.395634 4.6625411
-0.752157 6.5386200
-1.322371 7.1528530
0.423363 11.0546770
0.406704 7.0673351
0.667394 12.7414520
-2.460150 6.8668051
0.569411 9.5487550
-0.026632 10.4277430
# -*- coding: utf-8 -*- from numpy import * import matplotlib.pyplot as plt def loadDataSet(): f=open('testSet.txt') dataMat=[] labelMat=[] for line in f.readlines(): lineList=line.strip().split() dataMat.append([1.0,float(lineList[0]),float(lineList[1])]) labelMat.append(int(lineList[2])) return dataMat,labelMat def sigmoid(x): return 1/(1+exp(-x)) def gradAscent(dataMat,labelMat): dataMat=mat(dataMat) labelMat=mat(labelMat).T r,c=shape(dataMat) alpha=0.001 maxIteration=500 w=ones((c,1)) for k in range(maxIteration): h=sigmoid(dataMat*w) error=labelMat-h w=w+alpha*dataMat.T*error #梯度下降法的关键 return w def plotBestFit(weights): import matplotlib.pyplot as plt dataMat,labelMat=loadDataSet() dataArr = array(dataMat) n = shape(dataArr)[0] xcord1 = []; ycord1 = [] xcord2 = []; ycord2 = [] for i in range(n): if int(labelMat[i])== 1: xcord1.append(dataArr[i,1]); ycord1.append(dataArr[i,2]) else: xcord2.append(dataArr[i,1]); ycord2.append(dataArr[i,2]) fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(xcord1, ycord1, s=30, c='red', marker='s') ax.scatter(xcord2, ycord2, s=30, c='green') x = arange(-3.0, 3.0, 0.1) y = (-weights[0]-weights[1]*x)/weights[2] ax.plot(x, y) plt.xlabel('X1'); plt.ylabel('X2'); plt.show()
<机器学习实战-logistic回归>
Andrew Ng机器学习视频公开课笔记 http://blog.csdn.net/linuxcumt/article/details/8572746