吴恩达机器学习课程:编程练习 | (2) ex2-logistic regression

1. logistic-regression

"""
逻辑回归
案例:根据学生的两门学生成绩,预测该学生是否会被大学录取
"""

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


def get_Xy(data):
    data.insert(0, 'ones', 1)
    X = np.array(data.iloc[:, 0:-1])
    y = np.array(data.iloc[:, -1])
    return X, y


def sigmoid(z):
    return 1 / (1 + np.exp(-z))


def cost_function(theta, X, y): # theta 为3×1维
    first = y * np.log(sigmoid(X @ theta))  # first为(100,)维度为1,长度100,*点乘,对数组执行对应位置相乘,对矩阵执行矩阵乘法运算
    second = (1 - y) * np.log(1 - sigmoid(X @ theta))
    #print(first.shape,second.shape)
    return -np.sum(first + second) / len(X)


def gradient_descent(X, y, theta, epoch, alpha): # theta 为3×1维
    m = len(X)
    costs = []

    for i in range(epoch):
        A = sigmoid(X @ theta)
        theta = theta - (alpha / m) * X.T @ (A - y)
        cost = cost_function(theta, X, y)
        costs.append(cost)
        if i % 1000 == 0:
            print(cost)
    return costs, theta


def gradient(theta, X, y): # 迭代了一次的梯度  theta 为3×1维
    parameters = int(theta.ravel().shape[0])  # ravel展平数组
    grad = np.zeros(parameters)  # grad赋与theta一样的维度,3×1
    grad = grad.T
    error = sigmoid(X @ theta) - y

    for i in range(parameters):
        term = np.multiply(error, X[:, i])
        grad[i] = np.sum(term) / len(X)

    return grad


def predict(theta, X): # theta 为3×1维
    probability = sigmoid(X @ theta)
    return [1 if x >= 0.5 else 0 for x in probability]


if __name__ == "__main__":
    data = pd.read_csv("ex2data1.txt", names=['Exam 1', 'Exam 2', 'Admitted'])
    positive = data[data['Admitted'].isin([1])]
    negative = data[data['Admitted'].isin([0])]

    fig, ax = plt.subplots(figsize=(8, 6))
    ax.scatter(positive['Exam 1'], positive['Exam 2'], s=50, c='b', marker='o',
               label='Admitted')  # s 浮点或数组形式,shape(n,),可选大小以点数平方。c表示颜色
    ax.scatter(negative['Exam 1'], negative['Exam 2'], s=50, c='r', marker='x', label='Not Admitted')
    # 也可以用如下方法
    # ax.scatter(data[data['Accepted']==0]['Exam 1'],data[data['Accepted']==0]['Exam 2'],c='r',marker='x',label='y=0')
    # ax.scatter(data[data['Accepted']==1]['Exam 1'],data[data['Accepted']==1]['Exam 2'],c='b',marker='o',label='y=1')
    ax.legend()
    ax.set_xlabel('Exam 1 Score')
    ax.set_ylabel('Exam 2 Score')
    X, y = get_Xy(data)
    print(X.shape,y.shape)
    theta = np.zeros(3,)  # 1×3维
    θ = theta.T
    # *****************选择学习速率α******************************
    cost_init = cost_function(θ, X, y)
    print("初始最小代价函数值:{}".format(cost_init))
    epoch = 200000
    alpha = 0.004
    costs, final_theta = gradient_descent(X, y, θ, epoch, alpha)
    print(final_theta)
    # 精度验证
    y_ = np.array(predict(final_theta, X))
    print(y_.shape,y.shape)
    acc = np.mean(y_ == y)
    print ('accuracy = {0}'.format(acc))
    print("-" * 30, "我是分割线", "-" * 30)
    # *****************调用高级优化函数--自动选择学习速率α******************************
    import scipy.optimize as opt

    result = opt.fmin_tnc(func=cost_function, x0=θ, fprime=gradient, args=(X, y))
    print(result)
    print("最终代价函数计算结果:{}".format(cost_function(result[0], X, y)))
    # 精度验证
    y_1 = np.array(predict(result[0], X))
    print(y_1.shape, y.shape)
    acc1 = np.mean(y_1 == y)
    print ('accuracy_1 = {0}'.format(acc1))

    plt.show()

2. logistic_regression regularization

"""
逻辑回归-正则化
案例:案例:设想你是工厂的生产主管,你要决定是否芯片要被接受或抛弃
"""

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


def feature_mapping(x1, x2, power):
    data = {
     }

    for i in np.arange(power + 1):
        for j in np.arange(i + 1):
            data['F{}{}'.format(i - j, j)] = np.power(x1, i - j) * np.power(x2, j)

    return pd.DataFrame(data)


def sigmoid(z):
    return 1 / (1 + np.exp(-z))


def cost_function(theta, X, y, λ):
    first = y.T @ np.log(sigmoid(X @ theta))
    second = (1 - y.T) @ np.log(1 - sigmoid(X @ theta))
    reg = (λ / (2 * len(X))) * np.sum(np.power(theta[1:], 2)) # 排除theta0--theta[1:]
    # print(first.shape,second.shape,reg)
    return -np.sum(first + second) / len(X) + reg


def gradient_descent(theta, X, y, α, epoch, λ):
    costs = []

    for i in range(epoch):

        reg = theta[1:] * (λ / len(X))
        reg = np.insert(reg, 0, values=0, axis=0)

        theta = theta - (X.T @ (sigmoid(X @ theta) - y)) * α / len(X) - reg
        cost = cost_function(theta, X, y, λ)
        costs.append(cost)

        if i % 1000 == 0:
            print(cost)

    return theta, costs


def predict(theta, X):  # theta 为3×1维
    probability = sigmoid(X @ theta)
    return [1 if x >= 0.5 else 0 for x in probability]


if __name__ == "__main__":
    data = pd.read_csv("ex2data2.txt", names=['Test 1', 'Test 2', 'Accepted'])
    fig, ax = plt.subplots()
    ax.scatter(data[data['Accepted'] == 0]['Test 1'], data[data['Accepted'] == 0]['Test 2'], c='r', marker='x',
               label='y=0')
    ax.scatter(data[data['Accepted'] == 1]['Test 1'], data[data['Accepted'] == 1]['Test 2'], c='b', marker='o',
               label='y=1')
    ax.legend()
    ax.set(xlabel='Test1', ylabel='Test2')
    x1 = data['Test 1']
    x2 = data['Test 2']
    data2 = feature_mapping(x1, x2, 6)
    X = np.array(data2.values)
    y = np.array(data.iloc[:, -1].values).reshape(len(X), 1)
    print(X.shape, y.shape)

    theta = np.zeros((28, 1))
    cost_init = cost_function(theta, X, y, λ=1)
    print("初始最小代价函数值:{}".format(cost_init))
    α = 0.001
    epoch = 200000
    final_theta, costs = gradient_descent(theta, X, y, α, epoch, λ=0.1)
    print("final_theta:{}".format(final_theta))
    # 精度验证
    y_ = np.array(predict(final_theta, X)).reshape(len(X), 1)
    print(y_.shape, y.shape) # 注:两个数组维数一定保持完全相同,(118,)与(118,1)不同
    acc = np.mean(y_ == y)
    print('accuracy = {0}'.format(acc))

    plt.show()

3. logistic_regression高级优化函数

"""
基于高级优化函数的逻辑回归-正则化
案例:案例:设想你是工厂的生产主管,你要决定是否芯片要被接受或抛弃
"""

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


def feature_mapping(x1, x2, power):
    data = {
     }

    for i in np.arange(power + 1):
        for j in np.arange(i + 1):
            data['F{}{}'.format(i - j, j)] = np.power(x1, i - j) * np.power(x2, j)

    return pd.DataFrame(data)


def sigmoid(z):
    return 1 / (1 + np.exp(-z))


def cost_function(theta, X, y, λ):
    first = y.T @ np.log(sigmoid(X @ theta))
    second = (1 - y.T) @ np.log(1 - sigmoid(X @ theta))
    reg = (λ / (2 * len(X))) * np.sum(np.power(theta[1:], 2))
    # print(first.shape,second.shape,reg)
    return -np.sum(first + second) / len(X) + reg


def gradient(theta, X, y, λ):   # 梯度下降法
    theta = np.mat(theta)
    X = np.mat(X)
    y = np.mat(y)

    parameters = int(theta.ravel().shape[1])
    grad = np.zeros(parameters)

    error = sigmoid(X * theta.T) - y
    # print(theta)
    for i in range(parameters):
        term = np.multiply(error, X[:, i])  # X[:, i]--从X中选择第i列数据

        if (i == 0):
            grad[i] = np.sum(term) / len(X)
        else:
            grad[i] = (np.sum(term) / len(X)) + ((λ / len(X)) * theta[:, i])

    return grad


def predict(theta, X):  # theta 为3×1维
    probability = sigmoid(X @ theta)
    return [1 if x >= 0.5 else 0 for x in probability]


if __name__ == "__main__":
    data = pd.read_csv("ex2data2.txt", names=['Test 1', 'Test 2', 'Accepted'])
    fig, ax = plt.subplots()
    ax.scatter(data[data['Accepted'] == 0]['Test 1'], data[data['Accepted'] == 0]['Test 2'], c='r', marker='x',
               label='y=0')
    ax.scatter(data[data['Accepted'] == 1]['Test 1'], data[data['Accepted'] == 1]['Test 2'], c='b', marker='o',
               label='y=1')
    ax.legend()
    ax.set(xlabel='Test1', ylabel='Test2')
    x1 = data['Test 1']
    x2 = data['Test 2']
    data2 = feature_mapping(x1, x2, 6)
    X = np.array(data2.values)
    y = np.array(data.iloc[:, -1].values.reshape(len(X),1))
    #print(X.shape, y.shape)
    theta = np.zeros((28,1))

    import scipy.optimize as opt

    λ = 1
    result = opt.fmin_tnc(func=cost_function, x0=theta, fprime=gradient, args=(X, y, λ))
    print(result[0])
    # 精度验证
    y_ = np.array(predict(result[0], X)).reshape(len(X), 1)
    print(y_.shape, y.shape)  # 注:两个数组维数一定保持完全相同,(118,)与(118,1)不同
    acc = np.mean(y_ == y)
    print('accuracy = {0}'.format(acc))

    # sklearn实现方法
    from sklearn import linear_model  # 调用sklearn的线性回归包

    model = linear_model.LogisticRegression(penalty='l2', C=1.0)
    model.fit(X, y.ravel())
    print("sklerarn_accuracy={}".format(model.score(X, y)))
    # 画图
    x = np.linspace(-1.2, 1.2, 200)
    xx, yy = np.meshgrid(x, x) # 从坐标向量中返回坐标矩阵,例如X轴可以取三个值1,2,3, Y轴可以取三个值7,8, 有坐标(1,7)(2,7)(3,7)(1,8)(2,8)(3,8)
    z = feature_mapping(xx.ravel(), yy.ravel(), 6).values

    zz = z @ result[0]
    zz = zz.reshape(xx.shape)

    fig, ax = plt.subplots()
    ax.scatter(data[data['Accepted'] == 0]['Test 1'], data[data['Accepted'] == 0]['Test 2'], c='r', marker='x',
               label='y=0')
    ax.scatter(data[data['Accepted'] == 1]['Test 1'], data[data['Accepted'] == 1]['Test 2'], c='b', marker='o',
               label='y=1')
    ax.legend()
    ax.set(xlabel='Test1',
           ylabel='Test2')

    plt.contour(xx, yy, zz, 0)
    plt.show()

你可能感兴趣的:(机器/深度学习,python,机器学习)