机器学习西瓜书第三章习题-编程题代码

3.3 编程实现对率回归,并给出西瓜数据集3.0α上的结果

import numpy as np
import math

x = [[0.697, 0.46, 1],
     [0.774, 0.376, 1],
     [0.634, 0.264, 1],
     [0.608, 0.318, 1],
     [0.556, 0.215, 1],
     [0.403, 0.237, 1],
     [0.481, 0.149, 1],
     [0.437, 0.211, 1],
     [0.666, 0.091, 1],
     [0.243, 0.267, 1],
     [0.245, 0.057, 1],
     [0.343, 0.099, 1],
     [0.639, 0.161, 1],
     [0.657, 0.198, 1],
     [0.36, 0.37, 1],
     [0.593, 0.042, 1],
     [0.719, 0.103, 1]]

y = [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
x = np.array(x)
y = np.array(y)

w1 = 0
w2 = 0
b = 0
B = np.array([w1, w2, b])
d_1 = np.ones(3)
d_2 = 1
pred = np.zeros(17)

time = 0
for m in range(1000):
        for i in range(17):
                t = 0
                for j in range(3):
                        t += x[i][j] * B[j]
                pred[i] = math.exp(t) / (1 + math.exp(t))
        d_1 = [0, 0, 0]
        d_2 = 0
        for i in range(17):
                d_1 = d_1 - x[i] * (y[i] - pred[i])
                x_2 = 0
                for j in range(3):
                        x_2 = x_2 + x[i][j] * x[i][j]
                d_2 = d_2 + x_2 * pred[i] * (1 - pred[i])
        B = B - d_1 / d_2

print(pred)

pre = np.zeros(17)
for i in range(17):
        if pred[i] > 0.5:
                pre[i] = 1
right_count = 0
for i in range(17):
        if pre[i] == y[i]:
                right_count += 1
accuracy = right_count / 17
print(accuracy)

3.4 选择鸢尾花数据集,比较10折交叉验证法和留一法所估计出的对率回归的错误率。

import numpy as np
import math

# 通过python自带的文件读取文本文件
file = open('iris.data', 'r')
data = file.read()
file.close()

# 读取数据
data = data.split('\n')
A = np.zeros([150, 8])
for i in range(150):
    for j in range(4):
        A[i][j] = data[i].split(',')[j]
    if data[i].split(',')[4] == 'Iris-setosa':
        A[i][4] = 1
        A[i][7] = 0
    elif data[i].split(',')[4] == 'Iris-versicolor':
        A[i][5] = 1
        A[i][7] = 1
    else:
        A[i][6] = 1
        A[i][7] = 2
np.random.shuffle(A)

# 十折交叉法
w1 = np.zeros([3, 10])
w2 = np.zeros([3, 10])
w3 = np.zeros([3, 10])
w4 = np.zeros([3, 10])
b = np.zeros([3, 10])
for m in range(10):
    X_test = A[m*15: m*15 + 15, 0: 4]
    X_test = np.concatenate((X_test, np.ones([15, 1])), axis=1)
    y_test = A[m*15: m*15 + 15, 7]
    if m == 0:
        X_train = A[m*15 + 15: 150, 0: 4]
        y_train = A[m*15 + 15: 150, 4: 7]
    elif m == 9:
        X_train = A[0: m*15, 0: 4]
        y_train = A[0: m*15, 4: 7]
    else:
        X_train = np.concatenate((A[0: m*15, 0: 4], A[m*15 + 15: 150, 0: 4]), axis=0)
        y_train = np.concatenate((A[0: m*15, 4: 7], A[m*15 + 15: 150, 4: 7]), axis=0)
    X_train = np.concatenate((X_train, np.ones([135, 1])), axis=1)

    B_0 = np.array([w1[0][m], w2[0][m], w3[0][m], w4[0][m], b[0][m]])
    B_1 = np.array([w1[1][m], w2[1][m], w3[1][m], w4[1][m], b[1][m]])
    B_2 = np.array([w1[2][m], w2[2][m], w3[2][m], w4[2][m], b[2][m]])
    pred = np.zeros(135)
    # 基于牛顿法调整预测一类中的参数
    for n in range(500):
        for i in range(135):
            pred[i] = math.exp(np.dot(X_train[i], B_0.T)) / (1 + math.exp(np.dot(X_train[i], B_0.T)))
        d_1 = np.zeros([1, 5])
        d_2 = 0
        for i in range(135):
            d_1 = d_1 - X_train[i] * (y_train[i, 0] - pred[i])
            d_2 = d_2 + np.dot(X_train[i], X_train[i].T) * pred[i] * (1 - pred[i])
        B_0 = B_0 - d_1 / d_2
    w1[0][m] = B_0[0][0]
    w2[0][m] = B_0[0][1]
    w3[0][m] = B_0[0][2]
    w4[0][m] = B_0[0][3]
    b[0][m] = B_0[0][4]
    # 基于牛顿法调整预测二类中的参数
    for n in range(500):
        for i in range(135):
            pred[i] = math.exp(np.dot(X_train[i], B_1.T)) / (1 + math.exp(np.dot(X_train[i], B_1.T)))
        d_1 = np.zeros([1, 5])
        d_2 = 0
        for i in range(135):
            d_1 = d_1 - X_train[i] * (y_train[i, 1] - pred[i])
            d_2 = d_2 + np.dot(X_train[i], X_train[i].T) * pred[i] * (1 - pred[i])
        B_1 = B_1 - d_1 / d_2
    w1[1][m] = B_1[0][0]
    w2[1][m] = B_1[0][1]
    w3[1][m] = B_1[0][2]
    w4[1][m] = B_1[0][3]
    b[1][m] = B_1[0][4]
    # 基于牛顿法调整预测三类中的参数
    for n in range(500):
        for i in range(135):
            pred[i] = math.exp(np.dot(X_train[i], B_2.T)) / (1 + math.exp(np.dot(X_train[i], B_2.T)))
        d_1 = np.zeros([1, 5])
        d_2 = 0
        for i in range(135):
            d_1 = d_1 - X_train[i] * (y_train[i, 2] - pred[i])
            d_2 = d_2 + np.dot(X_train[i], X_train[i].T) * pred[i] * (1 - pred[i])
        B_2 = B_2 - d_1 / d_2
    w1[2][m] = B_2[0][0]
    w2[2][m] = B_2[0][1]
    w3[2][m] = B_2[0][2]
    w4[2][m] = B_2[0][3]
    b[2][m] = B_2[0][4]
    # 在测试集上计算准确率
    right_count = 0
    for i in range(15):
        pre_test = np.zeros(3)
        pre_test[0] = math.exp(np.dot(X_test[i], B_0.T)) / (1 + math.exp(np.dot(X_test[i], B_0.T)))
        pre_test[1] = math.exp(np.dot(X_test[i], B_1.T)) / (1 + math.exp(np.dot(X_test[i], B_1.T)))
        pre_test[2] = math.exp(np.dot(X_test[i], B_2.T)) / (1 + math.exp(np.dot(X_test[i], B_2.T)))
        y_pre = np.where(pre_test == np.max(pre_test))
        if y_pre == y_test[i]:
            right_count += 1
    accuracy = right_count / 15
    print(accuracy)
# 平均
B_0 = np.array((np.mean(w1[0]), np.mean(w2[0]), np.mean(w3[0]), np.mean(w4[0]), np.mean(b[0])))
B_1 = np.array((np.mean(w1[1]), np.mean(w2[1]), np.mean(w3[1]), np.mean(w4[1]), np.mean(b[1])))
B_2 = np.array((np.mean(w1[2]), np.mean(w2[2]), np.mean(w3[2]), np.mean(w4[2]), np.mean(b[2])))
X = np.concatenate((A[:, 0: 4], np.ones([150, 1])), axis=1)
y = A[:, 7]
right_count = 0
for i in range(150):
    pre_test = np.zeros(3)
    pre_test[0] = math.exp(np.dot(X[i], B_0.T)) / (1 + math.exp(np.dot(X[i], B_0.T)))
    pre_test[1] = math.exp(np.dot(X[i], B_1.T)) / (1 + math.exp(np.dot(X[i], B_1.T)))
    pre_test[2] = math.exp(np.dot(X[i], B_2.T)) / (1 + math.exp(np.dot(X[i], B_2.T)))
    y_pre = np.where(pre_test == np.max(pre_test))
    if y_pre == y[i]:
        right_count += 1
accuracy = right_count / 150
print(accuracy)

# 留一法
w1 = np.zeros([3, 150])
w2 = np.zeros([3, 150])
w3 = np.zeros([3, 150])
w4 = np.zeros([3, 150])
b = np.zeros([3, 150])
for m in range(150):
    if m == 0:
        X_train = A[1: 150, 0: 4]
        y_train = A[1: 150, 4: 7]
    elif m == 149:
        X_train = A[0: 149, 0: 4]
        y_train = A[0: 149, 4: 7]
    else:
        X_train = np.concatenate((A[0: m, 0: 4], A[m + 1: 150, 0: 4]), axis=0)
        y_train = np.concatenate((A[0: m, 4: 7], A[m + 1: 150, 4: 7]), axis=0)
    X_train = np.concatenate((X_train, np.ones([149, 1])), axis=1)

    B_0 = np.array([w1[0][m], w2[0][m], w3[0][m], w4[0][m], b[0][m]])
    B_1 = np.array([w1[1][m], w2[1][m], w3[1][m], w4[1][m], b[1][m]])
    B_2 = np.array([w1[2][m], w2[2][m], w3[2][m], w4[2][m], b[2][m]])
    pred = np.zeros(149)
    # 基于牛顿法调整预测一类中的参数
    for n in range(500):
        for i in range(149):
            pred[i] = math.exp(np.dot(X_train[i], B_0.T)) / (1 + math.exp(np.dot(X_train[i], B_0.T)))
        d_1 = np.zeros([1, 5])
        d_2 = 0
        for i in range(149):
            d_1 = d_1 - X_train[i] * (y_train[i, 0] - pred[i])
            d_2 = d_2 + np.dot(X_train[i], X_train[i].T) * pred[i] * (1 - pred[i])
        B_0 = B_0 - d_1 / d_2
    w1[0][m] = B_0[0][0]
    w2[0][m] = B_0[0][1]
    w3[0][m] = B_0[0][2]
    w4[0][m] = B_0[0][3]
    b[0][m] = B_0[0][4]
    # 基于牛顿法调整预测二类中的参数
    for n in range(500):
        for i in range(149):
            pred[i] = math.exp(np.dot(X_train[i], B_1.T)) / (1 + math.exp(np.dot(X_train[i], B_1.T)))
        d_1 = np.zeros([1, 5])
        d_2 = 0
        for i in range(149):
            d_1 = d_1 - X_train[i] * (y_train[i, 1] - pred[i])
            d_2 = d_2 + np.dot(X_train[i], X_train[i].T) * pred[i] * (1 - pred[i])
        B_1 = B_1 - d_1 / d_2
    w1[1][m] = B_1[0][0]
    w2[1][m] = B_1[0][1]
    w3[1][m] = B_1[0][2]
    w4[1][m] = B_1[0][3]
    b[1][m] = B_1[0][4]
    # 基于牛顿法调整预测三类中的参数
    for n in range(500):
        for i in range(149):
            pred[i] = math.exp(np.dot(X_train[i], B_2.T)) / (1 + math.exp(np.dot(X_train[i], B_2.T)))
        d_1 = np.zeros([1, 5])
        d_2 = 0
        for i in range(149):
            d_1 = d_1 - X_train[i] * (y_train[i, 2] - pred[i])
            d_2 = d_2 + np.dot(X_train[i], X_train[i].T) * pred[i] * (1 - pred[i])
        B_2 = B_2 - d_1 / d_2
    w1[2][m] = B_2[0][0]
    w2[2][m] = B_2[0][1]
    w3[2][m] = B_2[0][2]
    w4[2][m] = B_2[0][3]
    b[2][m] = B_2[0][4]
# 平均
B_0 = np.array((np.mean(w1[0]), np.mean(w2[0]), np.mean(w3[0]), np.mean(w4[0]), np.mean(b[0])))
B_1 = np.array((np.mean(w1[1]), np.mean(w2[1]), np.mean(w3[1]), np.mean(w4[1]), np.mean(b[1])))
B_2 = np.array((np.mean(w1[2]), np.mean(w2[2]), np.mean(w3[2]), np.mean(w4[2]), np.mean(b[2])))
X = np.concatenate((A[:, 0: 4], np.ones([150, 1])), axis=1)
y = A[:, 7]
right_count = 0
for i in range(150):
    pre_test = np.zeros(3)
    pre_test[0] = math.exp(np.dot(X[i], B_0.T)) / (1 + math.exp(np.dot(X[i], B_0.T)))
    pre_test[1] = math.exp(np.dot(X[i], B_1.T)) / (1 + math.exp(np.dot(X[i], B_1.T)))
    pre_test[2] = math.exp(np.dot(X[i], B_2.T)) / (1 + math.exp(np.dot(X[i], B_2.T)))
    y_pre = np.where(pre_test == np.max(pre_test))
    if y_pre == y[i]:
        right_count += 1
accuracy = right_count / 150
print(accuracy)

3.5 编程实现线性判别分析,并给出西瓜数据集3.0α上的结果

import numpy as np

x = [[0.697, 0.46],
     [0.774, 0.376],
     [0.634, 0.264],
     [0.608, 0.318],
     [0.556, 0.215],
     [0.403, 0.237],
     [0.481, 0.149],
     [0.437, 0.211],
     [0.666, 0.091],
     [0.243, 0.267],
     [0.245, 0.057],
     [0.343, 0.099],
     [0.639, 0.161],
     [0.657, 0.198],
     [0.36, 0.37],
     [0.593, 0.042],
     [0.719, 0.103]]
y = [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
x = np.array(x)
y = np.array(y)

# 求投影直线
u_1 = [np.mean(x[0: 8, 0]), np.mean(x[0: 8, 1])]
u_0 = [np.mean(x[9: 17, 0]), np.mean(x[9: 17, 1])]
u_1 = np.mat(u_1)
u_0 = np.mat(u_0)
Sw = 0
for i in range(17):
    if y[i] == 0:
        Sw += np.dot((x[i] - u_0), (x[i] - u_0).T)
    else:
        Sw += np.dot((x[i] - u_1), (x[i] - u_1).T)
w = (u_0 - u_1) / Sw
print(w)

# 分类
pre = np.zeros(17)
for i in range(17):
    if abs(np.dot(w, x[i].T) - np.dot(w, u_0.T)) < abs(np.dot(w, x[i].T) - np.dot(w, u_1.T)):
        pre[i] = 0
    else:
        pre[i] = 1
print(pre)
right_count = 0
for i in range(17):
    if pre[i] == y[i]:
        right_count += 1
accuracy = right_count / 17
print(accuracy)

你可能感兴趣的:(机器学习,机器学习,大数据,python)