实验内容及要求
1.实验数据:iris数据,分为三种类型:分别为w1,w2和w3类,每种类型中包括50个四维的向量,各类别出现的概率相等。
2实验要求
1)从iris数据的每个样本中取出三个特征作为分类特征,并且将样本点画出;
2)从每个类别的数据中抽取45个样本作为训练样本,5个样本作为测试样本,
3)用感知器批处理的方法实现w1类和w2类之间,w2类和w3类,w1类和w3类之间分类器的设计,写出判别函数,画出分类面,并记录收敛的次数。
4)用感知器单步处理的方法实现w1类和w2类之间,w2类和w3类,w1类和w3类之间分类器的设计,写出判别函数,画出分类面,并记录收敛的次数。
5)对于前面用感知器算法无法迭代出权向量的类别,使用最小平方误差判别的方法求出权向量,写出判别函数,画出分类面,并记录迭代次数。
6)用多类分类器的逐步修正的方法对三个类别进行分类,写出每个类别的判别函数,画出分类面。
7)将测试样本分别应用在分类器上,对测试样本进行判别,将判别结果进行显示。
8)使用python语言来完成实验
import numpy as np
import matplotlib.colors
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import matplotlib as mpl
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
f_train1 = np.loadtxt("iris训练数据.txt", dtype=np.double, delimiter='\t', usecols=[1, 2, 3]) # 使用第123个特征
f_train2 = np.loadtxt("iris测试数据.txt", dtype=np.double, delimiter='\t', usecols=[1, 2, 3])
# print(f_train1.shape)
train_1 = (np.r_[f_train1[0:25], f_train2[0:20]]).T # 3*45
train_2 = (np.r_[f_train1[25:50], f_train2[25:45]]).T
train_3 = (np.r_[f_train1[50:75], f_train2[50:70]]).T
test = (np.r_[f_train2[20:25], f_train2[45:50], f_train2[70:75]]).T
# print(f_train.shape)
# 将样本点画出
# mpl.rcParams['legend.fontsize'] = 10 # 中文支持相关设置:全局设置
fig = plt.figure()
ax = fig.gca(projection='3d')
# ax.plot(x, y, z, label='parametric curve') # 直线绘制
# ax.set_title('origin sample')
ax.scatter(train_1[0, ], train_1[1, ], train_1[2, ], c='r',label="points 1")
ax.scatter(train_2[0, ], train_2[1, ], train_2[2, ], c='g',label="points 2")
ax.scatter(train_3[0, ], train_3[1, ], train_3[2, ], c='b',label="points 3")
ax.legend() # 在图上标明一个图例 用于说明每条曲线的文字显示
# 增广 每个样本增加一个维度
ww1 = np.r_[np.ones((1, 45)), train_1] # 4*45 第一行增广
ww2 = np.r_[np.ones((1, 45)), train_2]
ww3 = np.r_[np.ones((1, 45)), train_3]
test = np.r_[np.ones((1, 15)), test]
# 对样本进行规范化处理
w12 = np.c_[ww1, -ww2] # 4*90
w23 = np.c_[ww2, -ww3] # 4*90
w13 = np.c_[ww1, -ww3] # 4*90
# print(w23) # 90
def FLQ_DCS(a,x):
# 计算投影点
w_d = np.matrix(np.zeros((4, 90)))
for i in range(90):
w_d[:, i] = a * (np.dot(a.T, x[:, i]) / np.dot(a.T, a))
'''
分类面方程求解:
根据判别函数g(x), 知当g(x) = 0时, 是分类面所在的位置, 将g(x) = 0转换为关于z的方程即可
g(x) = x*a1+y*a2+z*a3+w0 = 0
->
z = - (x*a1+y*a2+w0)/a3
'''
# 绘制分类面(利用点法式方程)
x = np.linspace(0, 7, 100)
y = np.linspace(0, 7, 100)
X, Y = np.meshgrid(x, y)
Z = -(a[1, 0] * X + a[2, 0] * Y + a[0, 0]) / a[3, 0]
# ax = plt.axes(projection='3d')
# ax.set_xlabel("feature1")
# ax.set_ylabel("feature2")
# ax.set_zlabel("feature3")
# ax.scatter3D(train_data_1[:, 1], train_data_1[:, 2],
# train_data_1[:, 3], label='class1')
# ax.scatter3D(train_data_2[:, 1], train_data_2[:, 2],
# train_data_2[:, 3], label='class2')
# 画投影点
# ax.scatter3D(w_d[1, 0:45], w_d[2, 0:45], w_d[3, 0:45], label='POC1')
# ax.scatter3D(-w_d[1, 45:90], -w_d[2, 45:90], -w_d[3, 45:90], label='POC2')
ax.plot_surface(X, Y, Z)
ax.scatter3D(0, 0, 0, label="origin")
ax.plot3D([0, a[1, 0] / 100],
[0, a[2, 0] / 100],
[0, a[3, 0] / 100])
ax.legend(loc='best')
# plt.show()
# 单样本
def SGD(w,p,max): # 单样本修正法(stochastic gradient descent)
# 初始化权向量
# W = np.array([0, 0, 0, 0]).reshape((4, 1)) # 4*1 int32
W = np.matrix(np.ones((4, 1)))
# 循环结束标记
flag = True
cnt = 0
# 单样本修正
while True:
for i in range(w.shape[1]): # 90
# if np.dot(W.T, np.reshape((w12[:, i]), (4, 1)))[0][0] > 0: # [0][0] 取出数
if np.dot(W.T, w[:, i])[0][0] > 0: # [0][0] 取出数
W = W
else:
W += p * np.reshape((w[:, i]), (4, 1))
# print(W.T)
cnt += 1
if(cnt >= max):
return W, cnt
flag = False
if flag:
break
else:
flag = True
return W,cnt
# main
p = 0.3
max = 500
W12,cnt12 = SGD(w12,p,max)
W23,cnt23 = SGD(w23,p,max) # 无法收敛
W13,cnt13 = SGD(w13,p,max)
print("单样本修正 类别12 收敛次数为:", cnt12, " W最终为:", W12.T)
print("单样本修正 类别23 收敛次数为:", cnt23, " W最终为:", W23.T)
print("单样本修正 类别13 收敛次数为:", cnt13, " W最终为:", W13.T)
def FLQ_Test(test, a12, a23, a13): # 4*15 应用于单样本跟批处理
t1 = np.matmul(a12.T, test) # 1*15
t2 = np.matmul(a23.T, test)
t3 = np.matmul(a13.T, test)
err = 0
ans = np.zeros(15)
for i in range(test.shape[1]):
if t1[:,i] > 0 and t3[:,i] > 0:
ans[i] = 1
elif t1[:,i] < 0 and t2[:,i] > 0:
ans[i] = 2
elif t2[:,i] < 0 and t3[:,i] < 0:
ans[i] = 3
for i in range(test.shape[1]):
if i < 5 and ans[i] != 1:
err += 1
elif i > 9 and ans[i] != 3:
err += 1
elif i > 4 and i < 10 and ans[i] != 2:
err +=1
return err,ans
err,ans = FLQ_Test(test,W12,W23,W13)
print('单样本修正错误率:', err / test.shape[1], ' 分类结果为:', ans)
# 画分类面
# 单样本
#FLQ_DCS(W12,w12)
# FLQ_DCS(W23,w23)
# FLQ_DCS(W13,w13)
# 批量梯度下降法(Batch Gradient Descent)
def BGD(x, p, m, maxIterations):
"""批量梯度下降法,每一次梯度下降使用训练集中的所有样本来计算误差。
:param x: 训练集种的自变量
:param y: 训练集种的因变量
:param w: 待求的权值
:param p: 学习速率
:param m: 样本总数
:param maxIterations: 最大迭代次数
"""
w = np.matrix([-1,2,-2,-1]).reshape(4,1)
# w = np.ones(4).reshape(4, 1)
cnt = 0
# print(x)
while cnt < maxIterations:
loss = 0
j = 0
hypothesis = np.dot(w.T, x) # 1*90
# print(hypothesis)
for i in range(90):
if hypothesis[:, i] < 0:
loss += hypothesis[:, i] # y
j+=1
# print('loss',j)
if loss >= 0:
break
gradient = loss / (m*m) #y 对所有的样本进行求和,然后除以样本数#
# print ("gradient",gradient)
# print(w)
w = w + p * gradient
cnt += 1
return w, cnt
# mian
p = 0.014
maxIteration = 20
a12,cnt = BGD(w12, p, 90, maxIteration)
print("批处理 类别12 收敛次数为:", cnt, " W最终为:", a12.T)
a23,cnt = BGD(w23, p, 90, maxIteration)
print("批处理 类别23 收敛次数为:", cnt, " W最终为:", a23.T)
a13,cnt = BGD(w13, p, 90, maxIteration)
print("批处理 类别13 收敛次数为:", cnt, " W最终为:", a13.T)
# plt.show()
err,ans = FLQ_Test(test, a12, a23, a13)
print('批处理修正错误率:', err / test.shape[1], ' 分类结果为:', ans)
# 画分类面
# 批处理
# FLQ_DCS(a12,w12)#×
# FLQ_DCS(a23,w23)
# FLQ_DCS(a13,w13)
# 最小平方误差 MSE
# 伪逆矩阵法 和 梯度下降法
# 采用梯度下降法
def MSE(y,p,expect_e,B,maxtrycount):
"""
y 样本 规模为 4*90
p = 0.1 学习率
expect_e 期望误差
B 规模为 90 * 1
maxtrycount 最大尝试次数
a 4*1
"""
a = np.ones(4).reshape(4,1)
cnt = 0
while True:
cnt += 1
err = 2 * np.matmul(y,(np.matmul(y.T,a) - B))
a_pre = a #
a -= p * np.matmul(y,(np.matmul(y.T,a) - B)) / 90
# print(u"a:", a)
# print(u"第 %d 次调整:" % cnt)
# 退出条件(一)
# print(u"误差:",err)
if min(abs(err)) < expect_e or cnt >= maxtrycount:
break
# (二)
# print(a-a_pre)
# # print("np.linalg.det(a - a_pre):",np.linalg.det(a - a_pre))
# if np.linalg.det(a - a_pre) <= expect_e or cnt >= maxtrycount:
# break
return a,err,cnt
# 主函数
expect_e = 0.5 ##期望误差
maxtrycount = 20 ##最大尝试次数
p = 0.01 # 学习率
B = np.ones(90).reshape(90,1)
a12,err,cnt = MSE(w12,p,expect_e,B,maxtrycount)
print("梯度下降 类别12 收敛次数为:", cnt, " W最终为:", a12.T)
a23,err,cnt = MSE(w23,p,expect_e,B,maxtrycount)
print("梯度下降 类别23 收敛次数为:", cnt, " W最终为:", a23.T)
a13,err,cnt = MSE(w13,p,expect_e,B,maxtrycount)
print("梯度下降 类别13 收敛次数为:", cnt, " W最终为:", a13.T)
err,ans = FLQ_Test(test, a12, a23, a13)
print('梯度下降修正错误率:', err / test.shape[1], ' 分类结果为:', ans)
# 画分类面
# 梯度下降 --- 还不错
# FLQ_DCS(a12,w12)
# FLQ_DCS(a23,w23)
# FLQ_DCS(a13,w13)
# 伪逆矩阵法
def simm(e):
'''
模式类别可分性的判别
当不等式组Xw>0有解时,该算法对收敛,可求得解w。
(i) 若e(k)=0,即Xw(k)=b(k)>0,有解。
(ii) 若e(k)>0,此时隐含的条件,有解。若继续进行迭代,可使e(k)->0。
(iii) 若e(k)的全部分量停止变为负值(但不是全部为零),表明该模式类别线性不可分。
因此,若e(k)没有一个分量为正值,则b(k)不会再变化,所以不能求得解。
:param e: 误差
:return: 1 or 0
'''
L = len(e)
flag = 0
s = 0
for k in e:
if (k < 0):
s += 1
if s == L:
flag = 1
return flag
def MSE_DIV(w,c,B):
'''
:param w: 样本
:param c: 学习率
:param B:b
:return:结果a,误差e,迭代次数i
'''
"步骤一:规范化样本增广矩阵"
# w1 = np.array([[0, 0,1], [1,1,1]])
# w2 = np.array([[0,-1,-1], [-1,0,-1]])
"步骤二,求伪逆矩阵"
# X = np.concatenate((w1, w2), axis=0) #将两个矩阵合并在一起
# print(X)
w = w.T #90*4
wn = np.matmul(np.linalg.inv(np.matmul(w.transpose(), w)), w.transpose()) # 伪逆矩阵
"步骤三,设置初始值,求w1"
# b = np.array([1,1,1,1])
a = np.matmul(wn, B) # 90*1
# w1
e = np.matmul(w, a) - B #1.48310375e+15
"步骤四,步骤五,循环,判别"
i = 0
# print(e)
# print(simm(e))
if simm(e) == 1:
# print(np.linalg.norm(e))
print('线性不可分')
elif np.linalg.norm(e) > 0: # 求范数
while np.linalg.norm(e) > 0.1 and i < 50:
'''
a(k+1) = a(k) + X#{C[Xa(k)– b(k) + |Xa(k) – b(k)|]}
= a(k) + CX#[e(k) + |e(k)|]
因此:
a(k+1)= a(k) + CX#|e(k)|
b(k+1)= b(k) + C[Xa(k) – b(k) + |Xa(k) – b(k)|]
= b(k) + C[e(k) + |e(k)|]
'''
dkh = e + abs(e)
B = B + c * dkh
a = a + c * np.matmul(wn, dkh)
e = np.matmul(w, a) - B
# if simm(e) == 1:
# print('线性不可分')
i += 1
# print(wn, '\n', '\n', a, '\n', '\n', e)
if np.linalg.norm(e) == 0:
print('线性可分,解出w,算法结束')
# print("第%d次迭代满足条件" % i)
return a,e,i
# 主函数
a12,err,cnt = MSE_DIV(w12,p,B)
print("伪逆矩阵 类别12 收敛次数为:", cnt, " W最终为:", a12.T)
a23,err,cnt = MSE_DIV(w23,p,B)
print("伪逆矩阵 类别23 收敛次数为:", cnt, " W最终为:", a23.T)
a13,err,cnt = MSE_DIV(w13,p,B)
print("伪逆矩阵 类别13 收敛次数为:", cnt, " W最终为:", a13.T)
err,ans = FLQ_Test(test, a12, a23, a13)
print('伪逆矩阵错误率:', err / test.shape[1], ' 分类结果为:', ans)
# 画分类面
# 伪逆矩阵 ---正确分类
# FLQ_DCS(a12,w12)
# FLQ_DCS(a23,w23)
# FLQ_DCS(a13,w13)
# 多类线性判别 # ww1 ww2 ww3 为4*45 增广后样本
def MulStep(w1,w2,a1,a2,max,p):# Multi-class stepwise correction
# w1 4*45 a1 4*1
cnt = 0
while cnt < max:
for i in range(w1.shape[1]):
if np.matmul(a1.T, w1[:, i]) < np.matmul(a2.T, w1[:, i]):
a1 += p * w1[:, i].reshape(4,1)
a2 -= p * w1[:, i].reshape(4,1)
# print('a1:',a1,'\na2:'%a2)
# else:
# print("no need 1")
if np.matmul(a1.T, w2[:, i]) > np.matmul(a2.T, w2[:, i]):
a2 += p * w2[:, i].reshape(4,1)
a1 -= p * w2[:, i].reshape(4,1)
# print('a1:', a1, '\na2:' % a2)
# else:
# print("no need 2")
cnt += 1
return a1,a2,cnt
# 增广权向量
a1 = np.matrix([1.0,2,3,4]).reshape(4,1)
a2 = np.matrix([3.0,2,2,1]).reshape(4,1)
a3 = np.matrix([2.0,1,1,4]).reshape(4,1)
a1, a2, cnt1 = MulStep(ww1,ww2,a1,a2,500,5)
a2, a3, cnt2 = MulStep(ww2,ww3,a2,a3,500,0.01)
a1, a3, cnt3 = MulStep(ww1,ww3,a1,a3,500,0.5)
# print('a1:',a1,'\na2:', a2,'\na3:', a3,'\n', cnt1,cnt2,cnt3)
def MulStep_Test(test, a1, a2, a3): # 4*15 12 23 13
t1 = np.matmul(a1.T, test) # 1*15
t2 = np.matmul(a2.T, test)
t3 = np.matmul(a3.T, test)
err = 0
ans = np.zeros(15)
for i in range(test.shape[1]):
if t1[:,i] > t2[:,i]:
if t1[:,i] > t3[:,i]:
ans[i] = 1
else:
ans[i] = 3
else:
if t2[:,i] > t3[:,i]:
ans[i] = 2
else:
ans[i] = 3
for i in range(test.shape[1]):
if i < 5 and ans[i] != 1:
err += 1
elif i > 9 and ans[i] != 3:
err += 1
elif i > 4 and i < 10 and ans[i] != 2:
err +=1
print('多分类逐步修正错误率:', err / test.shape[1], ' 分类结果为:', ans)
MulStep_Test(test, a1, a2, a3)
# print(a1) #4*1
def MulStep_DCS(a1,a2,i): # Draw classification surface
'''
多类线性分类面的求解:
首先根据判别函数, 得知当 g_i = g_j 时, 是分类面所在的位置, 故只需用 g_i = g_j 来求解即可
g_i = x*a_i1 + y*a_i2 + z*a_i3 + ki = g_j = x*a_j1 + y*a_j2 + z*a_j3 + kj
化简为z关于x,y的方程:
z = ((a_i1 - a_j1)*x + (a_i2-a_j2)*y + (ki-kj))/(a_j3-a_i3)
'''
# X、Y 数据决定坐标点,Z 轴数据决定 X、Y 坐标点对应的高度。
# x = np.matrix([1,3,5,6])
x = np.linspace(-1, 7, 20)
y = np.linspace(-1, 7, 20)
# y = np.matrix([2,4,5,9])
# 对x、y数据执行网格化
X, Y = np.meshgrid(x, y)
c = ['r','g','b']
z = ((a1[1, 0] - a2[1, 0]) * X + (a1[3, 0] - a2[3, 0]) * Y + (a1[0, 0] - a2[0, 0])) / (a2[2, 0] - a1[2, 0])
# print(z12)
ax.plot_surface(X, Y, z,color = c[i])
return
# MulStep_DCS(a1,a2,0)#ok
# MulStep_DCS(a2,a3,1) # 分不开
# MulStep_DCS(a1,a3,2)# 13重合
plt.show()