Python 分类算法(1)——逻辑回归logistic regression之代码实现(1)

    本节根据逻辑回归的原理,利用python编写逻辑回归代码,实现简单的线性分类。

    本例中,需要将下图中的两类数据点进行分类。


Python 分类算法(1)——逻辑回归logistic regression之代码实现(1)_第1张图片

    逻辑回归的原理,可以详细阅读《统计学习方法》中逻辑回归部分的内容,这里只贴上代码,并加以说明。

import numpy as np
import pandas as pd
scatterdata=pd.read_csv("C:/Users/Ray/Desktop/logistic regression/data3.csv",header=None)#
scatterdata.head()
data=np.array(scatterdata)#将dataframe转化为array,以用于后面的数值运算
# print(data[:,1])
利用scatter绘制散点图,观察数据分布情况。
from numpy import where
import matplotlib.pyplot as plt
x=data[:,0:2]
x1=np.ones((len(x),1))
x=np.hstack((x,x1))
y=data[:,2].reshape(-1,1)
print(x.shape)
pos=where(y==1)
neg=where(y==0)
plt.scatter(x[pos[0],0],x[pos[0],1],marker='o',c='b')
plt.scatter(x[neg[0],0],x[neg[0],1],marker='x',c='r')

定义sigmoid函数

def sigmoid(x):
    p=exp(x)/(1+np.exp(x))
    return p

定义损失函数

def cost(theta,x,y):
    j=(y.T).dot(x.dot(theta.T))-sum(log(1+exp(x.dot(theta.T))))
    print(j)

定义绘图函数

def plotfig(theta,x,y):
    import matplotlib.pyplot as plt
    pos=where(y==1)
    neg=where(y==0)
    plt.scatter(x[pos[0],0],x[pos[0],1],marker='o',c='b')
    plt.scatter(x[neg[0],0],x[neg[0],1],marker='x',c='r')
    xp=np.arange(-4,4,0.1)
    yp=(-theta[0,2]-theta[0,0]*xp)/theta[0,1]
    plt.plot(xp,yp)

定义logistic regression函数

def logistic_regression(x,y):
    theta=np.ones((1,shape(x)[1]))
    iteration=5000#迭代的次数
    step=0.01
    for i in range(iteration):
        h=sigmoid(x.dot(theta.T))
        grad=(x.T).dot(y-h)
        theta=theta+(step*grad).T
    cost(theta,x,y)
    plotfig(theta,x,y)

根据导入的数据进行逻辑回归计算

logistic_regression(x,y)
Python 分类算法(1)——逻辑回归logistic regression之代码实现(1)_第2张图片

数据来源:

-0.017612,14.053064,0
-1.395634,4.662541,1
-0.752157,6.53862,0
-1.322371,7.152853,0
0.423363,11.054677,0
0.406704,7.067335,1
0.667394,12.741452,0
-2.46015,6.866805,1
0.569411,9.548755,0
-0.026632,10.427743,0
0.850433,6.920334,1
1.347183,13.1755,0
1.176813,3.16702,1
-1.781871,9.097953,0
-0.566606,5.749003,1
0.931635,1.589505,1
-0.024205,6.151823,1
-0.036453,2.690988,1
-0.196949,0.444165,1
1.014459,5.754399,1
1.985298,3.230619,1
-1.693453,-0.55754,1
-0.576525,11.778922,0
-0.346811,-1.67873,1
-2.124484,2.672471,1
1.217916,9.597015,0
-0.733928,9.098687,0
-3.642001,-1.618087,1
0.315985,3.523953,1
1.416614,9.619232,0
-0.386323,3.989286,1
0.556921,8.294984,1
1.224863,11.58736,0
-1.347803,-2.406051,1
1.196604,4.951851,1
0.275221,9.543647,0
0.470575,9.332488,0
-1.889567,9.542662,0
-1.527893,12.150579,0
-1.185247,11.309318,0
-0.445678,3.297303,1
1.042222,6.105155,1
-0.618787,10.320986,0
1.152083,0.548467,1
0.828534,2.676045,1
-1.237728,10.549033,0
-0.683565,-2.166125,1
0.229456,5.921938,1
-0.959885,11.555336,0
0.492911,10.993324,0
0.184992,8.721488,0
-0.355715,10.325976,0
-0.397822,8.058397,0
0.824839,13.730343,0
1.507278,5.027866,1
0.099671,6.835839,1
-0.344008,10.717485,0
1.785928,7.718645,1
-0.918801,11.560217,0
-0.364009,4.7473,1
-0.841722,4.119083,1
0.490426,1.960539,1
-0.007194,9.075792,0
0.356107,12.447863,0
0.342578,12.281162,0
-0.810823,-1.466018,1
2.530777,6.476801,1
1.296683,11.607559,0
0.475487,12.040035,0
-0.783277,11.009725,0
0.074798,11.02365,0
-1.337472,0.468339,1
-0.102781,13.763651,0
-0.147324,2.874846,1
0.518389,9.887035,0
1.015399,7.571882,0
-1.658086,-0.027255,1
1.319944,2.171228,1
2.056216,5.019981,1
-0.851633,4.375691,1
-1.510047,6.061992,0
-1.076637,-3.181888,1
1.821096,10.28399,0
3.01015,8.401766,1
-1.099458,1.688274,1
-0.834872,-1.733869,1
-0.846637,3.849075,1
1.400102,12.628781,0
1.752842,5.468166,1
0.078557,0.059736,1
0.089392,-0.7153,1
1.825662,12.693808,0
0.197445,9.744638,0
0.126117,0.922311,1
-0.679797,1.22053,1
0.677983,2.556666,1
0.761349,10.693862,0
-2.168791,0.143632,1
1.38861,9.341997,0
0.317029,14.739025,0

你可能感兴趣的:(逻辑回归,logistic,regression,算法)