3.3 编程实现对率回归,并给出西瓜数据集3.0α上的结果
import numpy as np
import math
x = [[0.697, 0.46, 1],
[0.774, 0.376, 1],
[0.634, 0.264, 1],
[0.608, 0.318, 1],
[0.556, 0.215, 1],
[0.403, 0.237, 1],
[0.481, 0.149, 1],
[0.437, 0.211, 1],
[0.666, 0.091, 1],
[0.243, 0.267, 1],
[0.245, 0.057, 1],
[0.343, 0.099, 1],
[0.639, 0.161, 1],
[0.657, 0.198, 1],
[0.36, 0.37, 1],
[0.593, 0.042, 1],
[0.719, 0.103, 1]]
y = [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
x = np.array(x)
y = np.array(y)
w1 = 0
w2 = 0
b = 0
B = np.array([w1, w2, b])
d_1 = np.ones(3)
d_2 = 1
pred = np.zeros(17)
time = 0
for m in range(1000):
for i in range(17):
t = 0
for j in range(3):
t += x[i][j] * B[j]
pred[i] = math.exp(t) / (1 + math.exp(t))
d_1 = [0, 0, 0]
d_2 = 0
for i in range(17):
d_1 = d_1 - x[i] * (y[i] - pred[i])
x_2 = 0
for j in range(3):
x_2 = x_2 + x[i][j] * x[i][j]
d_2 = d_2 + x_2 * pred[i] * (1 - pred[i])
B = B - d_1 / d_2
print(pred)
pre = np.zeros(17)
for i in range(17):
if pred[i] > 0.5:
pre[i] = 1
right_count = 0
for i in range(17):
if pre[i] == y[i]:
right_count += 1
accuracy = right_count / 17
print(accuracy)
3.4 选择鸢尾花数据集,比较10折交叉验证法和留一法所估计出的对率回归的错误率。
import numpy as np
import math
# 通过python自带的文件读取文本文件
file = open('iris.data', 'r')
data = file.read()
file.close()
# 读取数据
data = data.split('\n')
A = np.zeros([150, 8])
for i in range(150):
for j in range(4):
A[i][j] = data[i].split(',')[j]
if data[i].split(',')[4] == 'Iris-setosa':
A[i][4] = 1
A[i][7] = 0
elif data[i].split(',')[4] == 'Iris-versicolor':
A[i][5] = 1
A[i][7] = 1
else:
A[i][6] = 1
A[i][7] = 2
np.random.shuffle(A)
# 十折交叉法
w1 = np.zeros([3, 10])
w2 = np.zeros([3, 10])
w3 = np.zeros([3, 10])
w4 = np.zeros([3, 10])
b = np.zeros([3, 10])
for m in range(10):
X_test = A[m*15: m*15 + 15, 0: 4]
X_test = np.concatenate((X_test, np.ones([15, 1])), axis=1)
y_test = A[m*15: m*15 + 15, 7]
if m == 0:
X_train = A[m*15 + 15: 150, 0: 4]
y_train = A[m*15 + 15: 150, 4: 7]
elif m == 9:
X_train = A[0: m*15, 0: 4]
y_train = A[0: m*15, 4: 7]
else:
X_train = np.concatenate((A[0: m*15, 0: 4], A[m*15 + 15: 150, 0: 4]), axis=0)
y_train = np.concatenate((A[0: m*15, 4: 7], A[m*15 + 15: 150, 4: 7]), axis=0)
X_train = np.concatenate((X_train, np.ones([135, 1])), axis=1)
B_0 = np.array([w1[0][m], w2[0][m], w3[0][m], w4[0][m], b[0][m]])
B_1 = np.array([w1[1][m], w2[1][m], w3[1][m], w4[1][m], b[1][m]])
B_2 = np.array([w1[2][m], w2[2][m], w3[2][m], w4[2][m], b[2][m]])
pred = np.zeros(135)
# 基于牛顿法调整预测一类中的参数
for n in range(500):
for i in range(135):
pred[i] = math.exp(np.dot(X_train[i], B_0.T)) / (1 + math.exp(np.dot(X_train[i], B_0.T)))
d_1 = np.zeros([1, 5])
d_2 = 0
for i in range(135):
d_1 = d_1 - X_train[i] * (y_train[i, 0] - pred[i])
d_2 = d_2 + np.dot(X_train[i], X_train[i].T) * pred[i] * (1 - pred[i])
B_0 = B_0 - d_1 / d_2
w1[0][m] = B_0[0][0]
w2[0][m] = B_0[0][1]
w3[0][m] = B_0[0][2]
w4[0][m] = B_0[0][3]
b[0][m] = B_0[0][4]
# 基于牛顿法调整预测二类中的参数
for n in range(500):
for i in range(135):
pred[i] = math.exp(np.dot(X_train[i], B_1.T)) / (1 + math.exp(np.dot(X_train[i], B_1.T)))
d_1 = np.zeros([1, 5])
d_2 = 0
for i in range(135):
d_1 = d_1 - X_train[i] * (y_train[i, 1] - pred[i])
d_2 = d_2 + np.dot(X_train[i], X_train[i].T) * pred[i] * (1 - pred[i])
B_1 = B_1 - d_1 / d_2
w1[1][m] = B_1[0][0]
w2[1][m] = B_1[0][1]
w3[1][m] = B_1[0][2]
w4[1][m] = B_1[0][3]
b[1][m] = B_1[0][4]
# 基于牛顿法调整预测三类中的参数
for n in range(500):
for i in range(135):
pred[i] = math.exp(np.dot(X_train[i], B_2.T)) / (1 + math.exp(np.dot(X_train[i], B_2.T)))
d_1 = np.zeros([1, 5])
d_2 = 0
for i in range(135):
d_1 = d_1 - X_train[i] * (y_train[i, 2] - pred[i])
d_2 = d_2 + np.dot(X_train[i], X_train[i].T) * pred[i] * (1 - pred[i])
B_2 = B_2 - d_1 / d_2
w1[2][m] = B_2[0][0]
w2[2][m] = B_2[0][1]
w3[2][m] = B_2[0][2]
w4[2][m] = B_2[0][3]
b[2][m] = B_2[0][4]
# 在测试集上计算准确率
right_count = 0
for i in range(15):
pre_test = np.zeros(3)
pre_test[0] = math.exp(np.dot(X_test[i], B_0.T)) / (1 + math.exp(np.dot(X_test[i], B_0.T)))
pre_test[1] = math.exp(np.dot(X_test[i], B_1.T)) / (1 + math.exp(np.dot(X_test[i], B_1.T)))
pre_test[2] = math.exp(np.dot(X_test[i], B_2.T)) / (1 + math.exp(np.dot(X_test[i], B_2.T)))
y_pre = np.where(pre_test == np.max(pre_test))
if y_pre == y_test[i]:
right_count += 1
accuracy = right_count / 15
print(accuracy)
# 平均
B_0 = np.array((np.mean(w1[0]), np.mean(w2[0]), np.mean(w3[0]), np.mean(w4[0]), np.mean(b[0])))
B_1 = np.array((np.mean(w1[1]), np.mean(w2[1]), np.mean(w3[1]), np.mean(w4[1]), np.mean(b[1])))
B_2 = np.array((np.mean(w1[2]), np.mean(w2[2]), np.mean(w3[2]), np.mean(w4[2]), np.mean(b[2])))
X = np.concatenate((A[:, 0: 4], np.ones([150, 1])), axis=1)
y = A[:, 7]
right_count = 0
for i in range(150):
pre_test = np.zeros(3)
pre_test[0] = math.exp(np.dot(X[i], B_0.T)) / (1 + math.exp(np.dot(X[i], B_0.T)))
pre_test[1] = math.exp(np.dot(X[i], B_1.T)) / (1 + math.exp(np.dot(X[i], B_1.T)))
pre_test[2] = math.exp(np.dot(X[i], B_2.T)) / (1 + math.exp(np.dot(X[i], B_2.T)))
y_pre = np.where(pre_test == np.max(pre_test))
if y_pre == y[i]:
right_count += 1
accuracy = right_count / 150
print(accuracy)
# 留一法
w1 = np.zeros([3, 150])
w2 = np.zeros([3, 150])
w3 = np.zeros([3, 150])
w4 = np.zeros([3, 150])
b = np.zeros([3, 150])
for m in range(150):
if m == 0:
X_train = A[1: 150, 0: 4]
y_train = A[1: 150, 4: 7]
elif m == 149:
X_train = A[0: 149, 0: 4]
y_train = A[0: 149, 4: 7]
else:
X_train = np.concatenate((A[0: m, 0: 4], A[m + 1: 150, 0: 4]), axis=0)
y_train = np.concatenate((A[0: m, 4: 7], A[m + 1: 150, 4: 7]), axis=0)
X_train = np.concatenate((X_train, np.ones([149, 1])), axis=1)
B_0 = np.array([w1[0][m], w2[0][m], w3[0][m], w4[0][m], b[0][m]])
B_1 = np.array([w1[1][m], w2[1][m], w3[1][m], w4[1][m], b[1][m]])
B_2 = np.array([w1[2][m], w2[2][m], w3[2][m], w4[2][m], b[2][m]])
pred = np.zeros(149)
# 基于牛顿法调整预测一类中的参数
for n in range(500):
for i in range(149):
pred[i] = math.exp(np.dot(X_train[i], B_0.T)) / (1 + math.exp(np.dot(X_train[i], B_0.T)))
d_1 = np.zeros([1, 5])
d_2 = 0
for i in range(149):
d_1 = d_1 - X_train[i] * (y_train[i, 0] - pred[i])
d_2 = d_2 + np.dot(X_train[i], X_train[i].T) * pred[i] * (1 - pred[i])
B_0 = B_0 - d_1 / d_2
w1[0][m] = B_0[0][0]
w2[0][m] = B_0[0][1]
w3[0][m] = B_0[0][2]
w4[0][m] = B_0[0][3]
b[0][m] = B_0[0][4]
# 基于牛顿法调整预测二类中的参数
for n in range(500):
for i in range(149):
pred[i] = math.exp(np.dot(X_train[i], B_1.T)) / (1 + math.exp(np.dot(X_train[i], B_1.T)))
d_1 = np.zeros([1, 5])
d_2 = 0
for i in range(149):
d_1 = d_1 - X_train[i] * (y_train[i, 1] - pred[i])
d_2 = d_2 + np.dot(X_train[i], X_train[i].T) * pred[i] * (1 - pred[i])
B_1 = B_1 - d_1 / d_2
w1[1][m] = B_1[0][0]
w2[1][m] = B_1[0][1]
w3[1][m] = B_1[0][2]
w4[1][m] = B_1[0][3]
b[1][m] = B_1[0][4]
# 基于牛顿法调整预测三类中的参数
for n in range(500):
for i in range(149):
pred[i] = math.exp(np.dot(X_train[i], B_2.T)) / (1 + math.exp(np.dot(X_train[i], B_2.T)))
d_1 = np.zeros([1, 5])
d_2 = 0
for i in range(149):
d_1 = d_1 - X_train[i] * (y_train[i, 2] - pred[i])
d_2 = d_2 + np.dot(X_train[i], X_train[i].T) * pred[i] * (1 - pred[i])
B_2 = B_2 - d_1 / d_2
w1[2][m] = B_2[0][0]
w2[2][m] = B_2[0][1]
w3[2][m] = B_2[0][2]
w4[2][m] = B_2[0][3]
b[2][m] = B_2[0][4]
# 平均
B_0 = np.array((np.mean(w1[0]), np.mean(w2[0]), np.mean(w3[0]), np.mean(w4[0]), np.mean(b[0])))
B_1 = np.array((np.mean(w1[1]), np.mean(w2[1]), np.mean(w3[1]), np.mean(w4[1]), np.mean(b[1])))
B_2 = np.array((np.mean(w1[2]), np.mean(w2[2]), np.mean(w3[2]), np.mean(w4[2]), np.mean(b[2])))
X = np.concatenate((A[:, 0: 4], np.ones([150, 1])), axis=1)
y = A[:, 7]
right_count = 0
for i in range(150):
pre_test = np.zeros(3)
pre_test[0] = math.exp(np.dot(X[i], B_0.T)) / (1 + math.exp(np.dot(X[i], B_0.T)))
pre_test[1] = math.exp(np.dot(X[i], B_1.T)) / (1 + math.exp(np.dot(X[i], B_1.T)))
pre_test[2] = math.exp(np.dot(X[i], B_2.T)) / (1 + math.exp(np.dot(X[i], B_2.T)))
y_pre = np.where(pre_test == np.max(pre_test))
if y_pre == y[i]:
right_count += 1
accuracy = right_count / 150
print(accuracy)
3.5 编程实现线性判别分析,并给出西瓜数据集3.0α上的结果
import numpy as np
x = [[0.697, 0.46],
[0.774, 0.376],
[0.634, 0.264],
[0.608, 0.318],
[0.556, 0.215],
[0.403, 0.237],
[0.481, 0.149],
[0.437, 0.211],
[0.666, 0.091],
[0.243, 0.267],
[0.245, 0.057],
[0.343, 0.099],
[0.639, 0.161],
[0.657, 0.198],
[0.36, 0.37],
[0.593, 0.042],
[0.719, 0.103]]
y = [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
x = np.array(x)
y = np.array(y)
# 求投影直线
u_1 = [np.mean(x[0: 8, 0]), np.mean(x[0: 8, 1])]
u_0 = [np.mean(x[9: 17, 0]), np.mean(x[9: 17, 1])]
u_1 = np.mat(u_1)
u_0 = np.mat(u_0)
Sw = 0
for i in range(17):
if y[i] == 0:
Sw += np.dot((x[i] - u_0), (x[i] - u_0).T)
else:
Sw += np.dot((x[i] - u_1), (x[i] - u_1).T)
w = (u_0 - u_1) / Sw
print(w)
# 分类
pre = np.zeros(17)
for i in range(17):
if abs(np.dot(w, x[i].T) - np.dot(w, u_0.T)) < abs(np.dot(w, x[i].T) - np.dot(w, u_1.T)):
pre[i] = 0
else:
pre[i] = 1
print(pre)
right_count = 0
for i in range(17):
if pre[i] == y[i]:
right_count += 1
accuracy = right_count / 17
print(accuracy)