Logistic回归----机器学习读书笔记

优点:计算代价不高,易于理解和实现。
缺点:容易欠拟合,分类精度可能不高。
适用数据类型:数值型和标称型数据。

线性回归,采用梯度上升优化,类似于单层BP神经网络(BP采用梯度下降法)

实现代码如下:

import numpy as np
import matplotlib.pyplot as plt

#获取数据集
def loadDataSet():
    #假设回归线为y=0.5x+0.3,回归线上方的为1,下方为0
    x = np.random.rand(100)
    delta = np.random.rand(100)*2-1
    y = x * 0.5 + delta + 0.3
    labels = np.zeros(100)
    for i, v in enumerate(delta):
        if v > 0:
            labels[i] = 1
        else:
            labels[i] = 0
    data = dict(zip(x, y))
    dataSet = []
    for k, v in data.items():
        dataSet.append([1.0, k, v])
    return np.array(dataSet, dtype=np.float32), np.array(labels, dtype=np.int)

#阶越函数
def sigmoid(inX):
    return 1.0/(1+np.exp(-inX))
#梯度上升优化
def gradAscent(dataMatIn, classLabels):
    classLabels = classLabels.reshape(1, 100)
    alpha = 0.001
    maxCycles = 500
    weights = np.ones((dataMatIn.shape[1], 1))
    for i in range(maxCycles):
        h = sigmoid(dataMatIn.dot(weights))
        error = classLabels.T - h
        weights = weights + alpha*dataMatIn.T.dot(error)
    return weights

dataSet, labels = loadDataSet()
weights = gradAscent(dataSet, labels)


for i in range(dataSet.shape[0]):
    if labels[i] == 1:
        plt.scatter(dataSet[i][1], dataSet[i][2], color='r')
    else:
        plt.scatter(dataSet[i][1], dataSet[i][2], color='b')
        
x = np.linspace(0, 1, 20)
y = (-weights[0]-weights[1]*x)/weights[2]
plt.plot(x, y, color='k')
plt.show()

你可能感兴趣的:(机器学习算法,logistic回归,机器学习)