感知器算法实现

实验内容及要求

1.实验数据:iris数据,分为三种类型:分别为w1,w2和w3类,每种类型中包括50个四维的向量,各类别出现的概率相等。

2实验要求

1)从iris数据的每个样本中取出三个特征作为分类特征,并且将样本点画出;

2)从每个类别的数据中抽取45个样本作为训练样本,5个样本作为测试样本,

3)用感知器批处理的方法实现w1类和w2类之间,w2类和w3类,w1类和w3类之间分类器的设计,写出判别函数,画出分类面,并记录收敛的次数。

4)用感知器单步处理的方法实现w1类和w2类之间,w2类和w3类,w1类和w3类之间分类器的设计,写出判别函数画出分类面,并记录收敛的次数。

5)对于前面用感知器算法无法迭代出权向量的类别,使用最小平方误差判别的方法求出权向量,写出判别函数,画出分类面,并记录迭代次数。

6)用多类分类器的逐步修正的方法对三个类别进行分类,写出每个类别的判别函数,画出分类面。

7)将测试样本分别应用在分类器上,对测试样本进行判别,将判别结果进行显示。

8)使用python语言来完成实验

import numpy as np
import matplotlib.colors
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import matplotlib as mpl
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC

f_train1 = np.loadtxt("iris训练数据.txt", dtype=np.double, delimiter='\t', usecols=[1, 2, 3]) # 使用第123个特征
f_train2 = np.loadtxt("iris测试数据.txt", dtype=np.double, delimiter='\t', usecols=[1, 2, 3])
# print(f_train1.shape)
train_1 = (np.r_[f_train1[0:25], f_train2[0:20]]).T  # 3*45
train_2 = (np.r_[f_train1[25:50], f_train2[25:45]]).T
train_3 = (np.r_[f_train1[50:75], f_train2[50:70]]).T
test = (np.r_[f_train2[20:25], f_train2[45:50], f_train2[70:75]]).T
# print(f_train.shape)
# 将样本点画出
# mpl.rcParams['legend.fontsize'] = 10 # 中文支持相关设置:全局设置

fig = plt.figure()
ax = fig.gca(projection='3d')
# ax.plot(x, y, z, label='parametric curve') # 直线绘制
# ax.set_title('origin sample')
ax.scatter(train_1[0, ], train_1[1, ], train_1[2, ], c='r',label="points 1")
ax.scatter(train_2[0, ], train_2[1, ], train_2[2, ], c='g',label="points 2")
ax.scatter(train_3[0, ], train_3[1, ], train_3[2, ], c='b',label="points 3")
ax.legend() # 在图上标明一个图例 用于说明每条曲线的文字显示


# 增广 每个样本增加一个维度
ww1 = np.r_[np.ones((1, 45)), train_1]  # 4*45   第一行增广
ww2 = np.r_[np.ones((1, 45)), train_2]
ww3 = np.r_[np.ones((1, 45)), train_3]
test = np.r_[np.ones((1, 15)), test]
# 对样本进行规范化处理
w12 = np.c_[ww1, -ww2] # 4*90
w23 = np.c_[ww2, -ww3] # 4*90
w13 = np.c_[ww1, -ww3] # 4*90
# print(w23) # 90

def FLQ_DCS(a,x):
    # 计算投影点
    w_d = np.matrix(np.zeros((4, 90)))
    for i in range(90):
        w_d[:, i] = a * (np.dot(a.T, x[:, i]) / np.dot(a.T, a))

    '''
        分类面方程求解:
            根据判别函数g(x), 知当g(x) = 0时, 是分类面所在的位置, 将g(x) = 0转换为关于z的方程即可
            g(x) = x*a1+y*a2+z*a3+w0 = 0
            ->
            z = - (x*a1+y*a2+w0)/a3

    '''
    # 绘制分类面(利用点法式方程)
    x = np.linspace(0, 7, 100)
    y = np.linspace(0, 7, 100)

    X, Y = np.meshgrid(x, y)
    Z = -(a[1, 0] * X + a[2, 0] * Y + a[0, 0]) / a[3, 0]

    # ax = plt.axes(projection='3d')
    # ax.set_xlabel("feature1")
    # ax.set_ylabel("feature2")
    # ax.set_zlabel("feature3")
    # ax.scatter3D(train_data_1[:, 1], train_data_1[:, 2],
    #              train_data_1[:, 3], label='class1')
    # ax.scatter3D(train_data_2[:, 1], train_data_2[:, 2],
    #              train_data_2[:, 3], label='class2')
    # 画投影点
    # ax.scatter3D(w_d[1, 0:45], w_d[2, 0:45], w_d[3, 0:45], label='POC1')
    # ax.scatter3D(-w_d[1, 45:90], -w_d[2, 45:90], -w_d[3, 45:90], label='POC2')

    ax.plot_surface(X, Y, Z)

    ax.scatter3D(0, 0, 0, label="origin")

    ax.plot3D([0, a[1, 0] / 100],
              [0, a[2, 0] / 100],
              [0, a[3, 0] / 100])

    ax.legend(loc='best')
    # plt.show()


# 单样本
def SGD(w,p,max):     # 单样本修正法(stochastic gradient descent)
    # 初始化权向量
    # W = np.array([0, 0, 0, 0]).reshape((4, 1)) # 4*1 int32
    W = np.matrix(np.ones((4, 1)))
    # 循环结束标记
    flag = True
    cnt = 0
    # 单样本修正
    while True:
        for i in range(w.shape[1]):  # 90
            # if np.dot(W.T, np.reshape((w12[:, i]), (4, 1)))[0][0] > 0: # [0][0] 取出数
            if np.dot(W.T, w[:, i])[0][0] > 0:  # [0][0] 取出数
                W = W
            else:
                W += p * np.reshape((w[:, i]), (4, 1))
                # print(W.T)
                cnt += 1
                if(cnt >= max):
                    return W, cnt
                flag = False
        if flag:
            break
        else:
            flag = True
    return W,cnt

# main
p = 0.3
max = 500
W12,cnt12 = SGD(w12,p,max)
W23,cnt23 = SGD(w23,p,max) # 无法收敛
W13,cnt13 = SGD(w13,p,max)
print("单样本修正  类别12  收敛次数为:", cnt12, "    W最终为:", W12.T)
print("单样本修正  类别23  收敛次数为:", cnt23, "    W最终为:", W23.T)
print("单样本修正  类别13  收敛次数为:", cnt13, "    W最终为:", W13.T)

def FLQ_Test(test, a12, a23, a13): # 4*15  应用于单样本跟批处理
    t1 = np.matmul(a12.T, test) # 1*15
    t2 = np.matmul(a23.T, test)
    t3 = np.matmul(a13.T, test)
    err = 0
    ans = np.zeros(15)

    for i in range(test.shape[1]):
        if t1[:,i] > 0 and t3[:,i] > 0:
            ans[i] = 1
        elif t1[:,i] < 0 and t2[:,i] > 0:
            ans[i] = 2
        elif t2[:,i] < 0 and t3[:,i] < 0:
            ans[i] = 3

    for i in range(test.shape[1]):
        if i < 5 and ans[i] != 1:
            err += 1
        elif i > 9 and ans[i] != 3:
            err += 1
        elif i > 4 and i < 10 and ans[i] != 2:
            err +=1
    return err,ans
err,ans = FLQ_Test(test,W12,W23,W13)
print('单样本修正错误率:', err / test.shape[1], '   分类结果为:', ans)

# 画分类面
# 单样本
#FLQ_DCS(W12,w12)
# FLQ_DCS(W23,w23)
# FLQ_DCS(W13,w13)

# 批量梯度下降法(Batch Gradient Descent)
def BGD(x, p, m, maxIterations):
    """批量梯度下降法,每一次梯度下降使用训练集中的所有样本来计算误差。
        :param x: 训练集种的自变量
        :param y: 训练集种的因变量
        :param w: 待求的权值
        :param p: 学习速率
        :param m: 样本总数
        :param maxIterations: 最大迭代次数
        """
    w = np.matrix([-1,2,-2,-1]).reshape(4,1)
    # w = np.ones(4).reshape(4, 1)
    cnt = 0
    # print(x)
    while cnt < maxIterations:
        loss = 0
        j = 0
        hypothesis = np.dot(w.T, x)  # 1*90
        # print(hypothesis)
        for i in range(90):
            if hypothesis[:, i] < 0:
                loss += hypothesis[:, i]  # y
            j+=1
        # print('loss',j)
        if loss >= 0:
            break
        gradient = loss / (m*m)             #y  对所有的样本进行求和,然后除以样本数#
        # print ("gradient",gradient)
        # print(w)
        w = w + p * gradient
        cnt += 1
    return w, cnt

# mian
p = 0.014
maxIteration = 20
a12,cnt = BGD(w12, p, 90, maxIteration)
print("批处理 类别12 收敛次数为:", cnt, "    W最终为:", a12.T)
a23,cnt = BGD(w23, p, 90, maxIteration)
print("批处理 类别23 收敛次数为:", cnt, "    W最终为:", a23.T)
a13,cnt = BGD(w13, p, 90, maxIteration)
print("批处理 类别13 收敛次数为:", cnt, "    W最终为:", a13.T)
# plt.show()
err,ans = FLQ_Test(test, a12, a23, a13)
print('批处理修正错误率:', err / test.shape[1], '   分类结果为:', ans)

# 画分类面
# 批处理
# FLQ_DCS(a12,w12)#×
# FLQ_DCS(a23,w23)
# FLQ_DCS(a13,w13)


# 最小平方误差 MSE
# 伪逆矩阵法 和 梯度下降法

# 采用梯度下降法
def MSE(y,p,expect_e,B,maxtrycount):
    """
    y 样本  规模为 4*90
    p = 0.1 学习率
    expect_e 期望误差
    B 规模为   90 * 1
    maxtrycount 最大尝试次数
    a 4*1
    """
    a = np.ones(4).reshape(4,1)
    cnt = 0

    while True:
        cnt += 1
        err = 2 * np.matmul(y,(np.matmul(y.T,a) - B))

        a_pre = a  #
        a -= p * np.matmul(y,(np.matmul(y.T,a) - B)) / 90
        # print(u"a:", a)
        # print(u"第 %d 次调整:" % cnt)

        # 退出条件(一)
        # print(u"误差:",err)
        if min(abs(err)) < expect_e or cnt >= maxtrycount:
            break
        # (二)
        # print(a-a_pre)
        # # print("np.linalg.det(a - a_pre):",np.linalg.det(a - a_pre))
        # if np.linalg.det(a - a_pre) <= expect_e or cnt >= maxtrycount:
        #     break

    return a,err,cnt

# 主函数
expect_e = 0.5  ##期望误差
maxtrycount = 20  ##最大尝试次数
p = 0.01 # 学习率
B = np.ones(90).reshape(90,1)
a12,err,cnt = MSE(w12,p,expect_e,B,maxtrycount)
print("梯度下降 类别12 收敛次数为:", cnt, "    W最终为:", a12.T)
a23,err,cnt = MSE(w23,p,expect_e,B,maxtrycount)
print("梯度下降 类别23 收敛次数为:", cnt, "    W最终为:", a23.T)
a13,err,cnt = MSE(w13,p,expect_e,B,maxtrycount)
print("梯度下降 类别13 收敛次数为:", cnt, "    W最终为:", a13.T)

err,ans = FLQ_Test(test, a12, a23, a13)
print('梯度下降修正错误率:', err / test.shape[1], '   分类结果为:', ans)

# 画分类面
# 梯度下降   --- 还不错
# FLQ_DCS(a12,w12)
# FLQ_DCS(a23,w23)
# FLQ_DCS(a13,w13)

# 伪逆矩阵法
def simm(e):
    '''
    模式类别可分性的判别
    当不等式组Xw>0有解时,该算法对收敛,可求得解w。
    (i)    若e(k)=0,即Xw(k)=b(k)>0,有解。
    (ii)   若e(k)>0,此时隐含的条件,有解。若继续进行迭代,可使e(k)->0。
    (iii)  若e(k)的全部分量停止变为负值(但不是全部为零),表明该模式类别线性不可分。
            因此,若e(k)没有一个分量为正值,则b(k)不会再变化,所以不能求得解。

    :param e: 误差
    :return: 1 or 0
    '''
    L = len(e)
    flag = 0
    s = 0
    for k in e:
        if (k < 0):
            s += 1
    if s == L:
        flag = 1
    return flag
def MSE_DIV(w,c,B):
    '''
    :param w: 样本
    :param c: 学习率
    :param B:b
    :return:结果a,误差e,迭代次数i
    '''
    "步骤一:规范化样本增广矩阵"
    # w1 = np.array([[0, 0,1], [1,1,1]])
    # w2 = np.array([[0,-1,-1], [-1,0,-1]])
    "步骤二,求伪逆矩阵"
    # X = np.concatenate((w1, w2), axis=0) #将两个矩阵合并在一起
    # print(X)
    w = w.T #90*4
    wn = np.matmul(np.linalg.inv(np.matmul(w.transpose(), w)), w.transpose())  # 伪逆矩阵
    "步骤三,设置初始值,求w1"
    # b = np.array([1,1,1,1])
    a = np.matmul(wn, B) # 90*1

    # w1
    e = np.matmul(w, a) - B  #1.48310375e+15
    "步骤四,步骤五,循环,判别"
    i = 0
    # print(e)
    # print(simm(e))
    if simm(e) == 1:
        # print(np.linalg.norm(e))
        print('线性不可分')
    elif np.linalg.norm(e) > 0:  # 求范数
        while np.linalg.norm(e) > 0.1 and i < 50:
            '''
            a(k+1) = a(k) + X#{C[Xa(k)– b(k) + |Xa(k) – b(k)|]}
                   = a(k) + CX#[e(k) + |e(k)|]
            因此:
            a(k+1)= a(k) + CX#|e(k)|
            b(k+1)= b(k) + C[Xa(k) – b(k) + |Xa(k) – b(k)|]
                  = b(k) + C[e(k) + |e(k)|]
            '''
            dkh = e + abs(e)
            B = B + c * dkh
            a = a + c * np.matmul(wn, dkh)
            e = np.matmul(w, a) - B
            # if simm(e) == 1:
            #     print('线性不可分')
            i += 1
            # print(wn, '\n', '\n', a, '\n', '\n', e)
            if np.linalg.norm(e) == 0:
                print('线性可分,解出w,算法结束')
    # print("第%d次迭代满足条件" % i)
    return a,e,i
# 主函数
a12,err,cnt = MSE_DIV(w12,p,B)
print("伪逆矩阵 类别12 收敛次数为:", cnt, "    W最终为:", a12.T)
a23,err,cnt = MSE_DIV(w23,p,B)
print("伪逆矩阵 类别23 收敛次数为:", cnt, "    W最终为:", a23.T)
a13,err,cnt = MSE_DIV(w13,p,B)
print("伪逆矩阵 类别13 收敛次数为:", cnt, "    W最终为:", a13.T)

err,ans = FLQ_Test(test, a12, a23, a13)
print('伪逆矩阵错误率:', err / test.shape[1], '   分类结果为:', ans)

# 画分类面
# 伪逆矩阵  ---正确分类
# FLQ_DCS(a12,w12)
# FLQ_DCS(a23,w23)
# FLQ_DCS(a13,w13)

# 多类线性判别  # ww1 ww2 ww3 为4*45   增广后样本
def MulStep(w1,w2,a1,a2,max,p):# Multi-class stepwise correction
    # w1 4*45 a1 4*1
    cnt = 0
    while cnt < max:
        for i in range(w1.shape[1]):
            if np.matmul(a1.T, w1[:, i]) < np.matmul(a2.T, w1[:, i]):
                a1 += p * w1[:, i].reshape(4,1)
                a2 -= p * w1[:, i].reshape(4,1)
                # print('a1:',a1,'\na2:'%a2)
            # else:
            #     print("no need 1")
            if np.matmul(a1.T, w2[:, i]) > np.matmul(a2.T, w2[:, i]):
                a2 += p * w2[:, i].reshape(4,1)
                a1 -= p * w2[:, i].reshape(4,1)
                # print('a1:', a1, '\na2:' % a2)
            # else:
            #     print("no need 2")
        cnt += 1
    return a1,a2,cnt
# 增广权向量
a1 = np.matrix([1.0,2,3,4]).reshape(4,1)
a2 = np.matrix([3.0,2,2,1]).reshape(4,1)
a3 = np.matrix([2.0,1,1,4]).reshape(4,1)
a1, a2, cnt1 = MulStep(ww1,ww2,a1,a2,500,5)
a2, a3, cnt2 = MulStep(ww2,ww3,a2,a3,500,0.01)
a1, a3, cnt3 = MulStep(ww1,ww3,a1,a3,500,0.5)
# print('a1:',a1,'\na2:', a2,'\na3:', a3,'\n', cnt1,cnt2,cnt3)
def MulStep_Test(test, a1, a2, a3): # 4*15  12 23 13
    t1 = np.matmul(a1.T, test) # 1*15
    t2 = np.matmul(a2.T, test)
    t3 = np.matmul(a3.T, test)
    err = 0
    ans = np.zeros(15)
    for i in range(test.shape[1]):
        if t1[:,i] > t2[:,i]:
            if t1[:,i] > t3[:,i]:
                ans[i] = 1
            else:
                ans[i] = 3
        else:
            if t2[:,i] > t3[:,i]:
                ans[i] = 2
            else:
                ans[i] = 3
    for i in range(test.shape[1]):
        if i < 5 and ans[i] != 1:
            err += 1
        elif i > 9 and ans[i] != 3:
            err += 1
        elif i > 4 and i < 10 and ans[i] != 2:
            err +=1
    print('多分类逐步修正错误率:', err / test.shape[1], '   分类结果为:', ans)

MulStep_Test(test, a1, a2, a3)
# print(a1)  #4*1



def MulStep_DCS(a1,a2,i):  # Draw classification surface
    '''
    多类线性分类面的求解:
        首先根据判别函数, 得知当 g_i = g_j 时, 是分类面所在的位置, 故只需用 g_i = g_j 来求解即可
    g_i = x*a_i1 + y*a_i2 + z*a_i3 + ki = g_j = x*a_j1 + y*a_j2 + z*a_j3 + kj
    化简为z关于x,y的方程:

    z = ((a_i1 - a_j1)*x + (a_i2-a_j2)*y + (ki-kj))/(a_j3-a_i3)

    '''
    # X、Y 数据决定坐标点,Z 轴数据决定 X、Y 坐标点对应的高度。
    # x = np.matrix([1,3,5,6])
    x = np.linspace(-1, 7, 20)
    y = np.linspace(-1, 7, 20)
    # y = np.matrix([2,4,5,9])
    # 对x、y数据执行网格化
    X, Y = np.meshgrid(x, y)
    c = ['r','g','b']
    z = ((a1[1, 0] - a2[1, 0]) * X + (a1[3, 0] - a2[3, 0]) * Y + (a1[0, 0] - a2[0, 0])) / (a2[2, 0] - a1[2, 0])
    # print(z12)
    ax.plot_surface(X, Y, z,color = c[i])

    return


# MulStep_DCS(a1,a2,0)#ok
# MulStep_DCS(a2,a3,1) # 分不开
# MulStep_DCS(a1,a3,2)# 13重合
plt.show()

你可能感兴趣的:(模式识别,算法,人工智能)