BP神经网络应该是所有神经网络里面的比较简单易懂的一种。
当然,python是有BP神经网络的库,但是我这次要给的代码,是我自己根据网上大佬的案例,自己手动编写的一个隐含层的一个神经网络模型。
代码的流程如下:
1.读取train训练集数据,读取test测试集数据。
2.对数据进行归一化处理。
3.初始化其中的w(权值),b(偏置)。
4.随机选取一个训练集数据,进行正向传播,h = wi*xi + …. + b1
5.得到答案与正确期望值进行对比。得到误差值。
6.通过误差值进行反向传播。修正各个权值数值以及偏置的数值。
7.进行一轮测试集的测试,测试该模型的识别率。
8.如果识别率低于80%,则重复步骤4
9.输出当前的准确率
有关于优化,或者改良:
关于学习率:我在隐含层到输出层的学习率统一设置的是0.5,而在输入层到隐含层的学习率统一设置的是0.2.因为这两者对于最终结果的影响并不一样大,所以权值设置的不一样。
关于识别率:我在识别率上做出了一定的妥协,本来出现的三个数据要超过0.90的数据出现我才判断正确。但是在之后的调试中,我将其改成了0.75。但是这并没有使得正确率变得更高。所以我认为这应该是隐含层的层数不足导致的。
我的隐含层只有一层,且只有4个节点。我认为这是影响识别率的一个关键问题。
训练集和测试集数据链接:
链接:https://pan.baidu.com/s/1O1Zvn4eNii3DJDSn03v43w
提取码:25eh
import random
import math
#该实验设置4个隐含层节点,3个输出层节点
#每个隐含层节点都有4个输出,3个输出
#权值都放在W中,其中
#共计有w0-w15作为输入层权值,w16-27是作为输出层权值
#阈值的初始值设置为1
#每一个隐含层节点都储存在独立的数组h中
#每层都有一个偏置数组
x = []#训练数据储存
w = []#hi权值的存放(28)
h = []#隐含层数组
out = []#输出层数组
b1 = []#偏置
b2 = []#偏置
y = []#测试集数据储存
def putin(w,a,j,b1):#计算输出层到隐含层的函数,w为权值数组,a为训练数组中元素,j为第几个中间隐含层
sum = 0
yin = b1[j]
j = j * 4
for n in range(0,4):
sum = sum + a[n]*w[j]
j = j + 1
sum = sum + yin
return sum
def putout(h,w,j,b2):#计算隐含层到输出层的函数
sum = 0
yin = b2[j]
j = j + 16 + j * 3
for n in range(0,3):
sum = sum + h[n]*w[j]
j = j + 1
sum = sum + yin
return sum
def back2w(O,out,h,i,j):#i为第几个输出层元素,j为隐含层的输入
val = 0.0
val = - 2/3 * (O[i] - out[i])
val = val * (out[i] * (1- out[i]) * h[j])
return val
def back2b(O,out,h,i,j):#i为第几个输出层元素,j为隐含层的输入
val = 0.0
val = - 2/3 * (O[i] - out[i])
val = val * out[i] * (1- out[i])
return val
def back1w(O,out,w,j):#i为第几个输出层元素,w[j]为权值
sum = 0
for i in range(3):
val = 0.0
val = - 2/3 * (O[i] - out[i])
val = val * (out[i] * (1- out[i]) * w[16 + j])
sum = sum + val
j = j + 4
return sum
def sigmoid(h):#翻新隐含层数组
for i in range(0,len(h)):
AS = round(-h[i],6)
y = 1/(math.exp(AS)+1)
y = round(y,4)
h[i] = y
def acc(O,out):
max = out[0]
max_v = 0
for i in range(3):
if out[i] > max:
max = out[i]
max_v = i
if max >= 0.75:
for i in range(3):
if O[i] == 0.95:
if max_v == i:
return 1
return 0
reload = 200 #最大学习次数为50次
f = open('iris-data-training.txt')
char = f.readline()
while char:#输入训练模型的数据
char1 = char.split( )
x1 = float(char1[0])
x2 = float(char1[1])
x3 = float(char1[2])
x4 = float(char1[3])
x5 = int(char1[4])
char2 = [x1,x2,x3,x4,x5]
x.append(char2)
char = f.readline()
f.close()
x_max = []
x_min = []
for j in range(0,4):#x变量归一化
max = 0
min = 0
for i in range(0,len(x)):
if x[i][j] > max:
max = x[i][j]
if x[i][j] < min:
min = x[i][j]
x_max.append(max)
x_min.append(min)
for i in range(0,len(x)):
x[i][j] = round(((x[i][j] - min)/(max-min)),4)
for i in range(0,len(x)):
if x[i][4] == 1:
x[i].append(0)
x[i].append(0)
if x[i][4] == 2:
x[i][4] = 0
x[i].append(1)
x[i].append(0)
if x[i][4] == 3:
x[i][4] = 0
x[i].append(0)
x[i].append(1)
f = open('iris-data-testing.txt')
char = f.readline()
while char:#输入训练模型的数据
char1 = char.split( )
x1 = float(char1[0])
x2 = float(char1[1])
x3 = float(char1[2])
x4 = float(char1[3])
x5 = int(char1[4])
char2 = [x1,x2,x3,x4,x5]
y.append(char2)
char = f.readline()
f.close()
for i in range(0,len(y)):
for j in range(0,4):
y[i][j] = round(((y[i][j] - x_min[j])/( x_max[j]- x_min[j])),4)
for i in range(0,len(y)):
if y[i][4] == 1:
y[i].append(0)
y[i].append(0)
if y[i][4] == 2:
y[i][4] = 0
y[i].append(1)
y[i].append(0)
if y[i][4] == 3:
y[i][4] = 0
y[i].append(0)
y[i].append(1)
for j in range(0,28):#w初始化
rand = random.uniform(-1,1)
rand = round(rand,2)
w.append(rand)
for i in range(0,4):#偏置初始化
rand = random.uniform(-0.5,0.5)
rand = round(rand,2)
b1.append(rand)
for i in range(0,3):#偏置初始化
rand = random.uniform(-0.5,0.5)
rand = round(rand,2)
b2.append(rand)
while(1):
a = []
O = []
a_a = int(random.uniform(0,len(x)))
for i in range(0,7):
a.append(x[a_a][i])
if x[a_a][i] == 1:
if i == 4:
O = [0.95,0.025,0.025]
if i == 5:
O = [0.025,0.95,0.025]
if i == 6:
O = [0.025,0.025,0.95]
for i in range(100):
#随机选取一个x内的元素作为训练集
h = []
out = []
for i in range(0,4):
h.append(putin(w,a,i,b1))
sigmoid(h)
for i in range(0,3):
out.append(putout(h,w,i,b2))
sigmoid(out)
e = []
e_total = 0
for i in range(0,3):#得到偏差
K = 1/3*((O[i] - out[i])**2)
e.append(K)
e_total = e_total + K
for i in range(3):#修正隐含层到输出层的权值
for j in range(4):
w[16+(4*i)+j] = w[16+(4*i)+j] - 0.5 * back2w(O,out,h,i,j)
for j in range(3):#修正隐含层到输出层的偏置值
b2[j] = b2[j] - 0.5 * back2b(O,out,h,i,j)
for i in range(4):
for j in range(4):
w[(4*i)+j] = w[(4*i)+j] - 0.2 *(back1w(O, out, w, j) * h[i] * a[i])
for j in range(4):
b1[j] = b1[j] - 0.2 *(back1w(O, out, w, j) * h[i])
sum = 0#测试精确度
for j in range(len(y)):
b = []
O = []
bb = j
for i in range(0, 7):
b.append(y[bb][i])
if y[bb][i] == 1:
if i == 4:
O = [0.95, 0.025, 0.025]
if i == 5:
O = [0.025, 0.95, 0.025]
if i == 6:
O = [0.025, 0.025, 0.95]
h = []
out = []
for i in range(0,4):
h.append(putin(w,b,i,b1))
sigmoid(h)
for i in range(0,3):
out.append(putout(h,w,i,b2))
sigmoid(out)
sum = sum + acc(O,out)
print(sum)
if sum/len(y) >= 0.80:
break