【机器学习笔记2.4】用逻辑回归做二分类(TensorFlow实现)

概述

相关理论介绍可参阅【机器学习笔记2.1】线性模型之逻辑回归

代码示例

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt


def plotDataMat(dataMat, labelMat, weights):
    n = np.shape(dataMat)[0]
    xcord1 = []
    ycord1 = []
    xcord2 = []
    ycord2 = []
    for i in range(n):
        if int(labelMat[i]) == 1:
            xcord1.append(dataMat[i, 0])
            ycord1.append(dataMat[i, 1])
        else:
            xcord2.append(dataMat[i, 0])
            ycord2.append(dataMat[i, 1])
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.scatter(xcord1, ycord1, s=30, c='red', marker='s')
    ax.scatter(xcord2, ycord2, s=30, c='green')

    x = np.arange(-3.0, 3.0, 0.1)
    #y = (-weights[0] - weights[1] * x) / weights[2]
    y = (-1 - weights[0] * x) / weights[1]
    ax.plot(x, y)

    plt.xlabel('X1');
    plt.ylabel('X2');
    plt.show()


def loadDataSet(file_path):
    dataMat = []
    labelMat = []
    fr = open(file_path)
    for line in fr.readlines():
        lineArr = line.strip().split()
        dataMat.append([float(lineArr[0]), float(lineArr[1])])
        labelMat.append(int(lineArr[2]))
    return dataMat, labelMat


dataMat, labelMat = loadDataSet('testSet.txt') # 《机器学习实战》逻辑回归中用的数据集
dataMat = np.mat(dataMat).astype(np.float32)
labelMat = np.mat(labelMat).transpose().astype(np.float32)
sample_num = dataMat.shape[0]

threshold = 1.0e-2

weight = tf.Variable(tf.zeros([2, 1]))
bias = tf.Variable(tf.zeros([1, 1]))

x_ = tf.placeholder(tf.float32, [None, 2])
y_ = tf.placeholder(tf.float32, [None, 1])

g = tf.matmul(x_, weight) + bias
hyp = tf.sigmoid(g) # hypothesis,假设,假说
#hyp = tf.nn.softmax(g) # failed,没有调试通过 
cost = (y_ * tf.log(hyp) + (1 - y_) * tf.log(1 - hyp)) / -sample_num # [1]
loss = tf.reduce_sum(cost)

optimizer = tf.train.GradientDescentOptimizer(0.01)
train = optimizer.minimize(loss)

step = 0
w = None
flag = 0
loss_buf = []
init = tf.initialize_all_variables()
with tf.Session() as sess:
    sess.run(init)
    for _ in range(100):
        for data, label in zip(dataMat, labelMat):
            sess.run(train, feed_dict={x_: data, y_: label})
            step += 1
            if step % 10 == 0:
                print(step, sess.run(weight).flatten(), sess.run(bias).flatten())

        loss_val = sess.run(loss, {x_: data, y_: label})
        print('loss_val = ', loss_val)
        loss_buf.append(loss_val)
        if loss_val <= threshold:
            flag = 0
        print('weight = ', weight.eval(sess))
    w = weight.eval(sess)

# 画出loss曲线
loss_ndarray = np.array(loss_buf)
loss_size = np.arange(len(loss_ndarray))
plt.plot(loss_size, loss_ndarray, 'b+', label='loss')

plotDataMat(dataMat, labelMat, w)
print('end')

拟合出的最佳分类曲线:

loss曲线:

参考文献

[1] 从零开始使用TensorFlow建立简单的逻辑回归模型

你可能感兴趣的:(机器学习笔记)