所使用的的数据连接:https://www.ituring.com.cn/book/download/d16bf588-375b-4176-b319-ec3c3a2d99a1
原书网址
计算过程如下:
import pandas, numpy
global seed
def get_npdata():
file = r'D:\ChromeCoreDownloads\【Excel示例文件】深度学习的数学\附录B.xlsx'
data = pandas.read_excel(file, sheet_name='Data', engine='openpyxl')
df = pandas.DataFrame(data)
# 通过切片,去除部分没有意义的行和列
df = df.iloc[1:10, 11:587]
# 将待识别数据转换为结构为(96, 6, 6)numpy数组np_data, 将正解存储在(95x3)的矩阵value中(准确的说是DataFrame中)
for i in range(0, 571, 6):
df1 = df.iloc[:, i:i+6]
if i == 0:
value = numpy.array(df1.iloc[6:, i])
value = pandas.DataFrame(value).transpose()
else:
value.loc[len(value)] = numpy.array(df1.iloc[6:, 0])
df1 = df1.iloc[:6, :]
if i == 0:
# 将一个待识别的数字的矩阵(6x6)表示作为一个np_data的一个元素
np_data = numpy.array([numpy.array(df1)])
else:
np_data_temp = numpy.array([numpy.array(df1)])
np_data = numpy.concatenate((np_data, np_data_temp))
value = numpy.array(value)
return np_data, value
def convo_cal(np_data, fil, bf):
"""计算卷积结果(供convo_layer_out调用)
"""
i = j = 0
while i+3 <= np_data.shape[0]:
while j+3 <= np_data.shape[1]:
# 截取(3,3)矩阵
x = np_data[i:i+3, j:j+3]
# 两矩阵(3x3)相同位置相乘, 结果求和,加上偏置,就是卷积层的输入
z = numpy.sum(x * fil) + bf
# sigmoid激活函数y(x), 得到输出a
a = 1/(1 + numpy.exp(-1*z))
if j == 0:
temp = numpy.array([a])
else:
temp = numpy.append(temp, a)
j += 1
if i == 0:
convo_cal_return = numpy.array([temp])
else:
convo_cal_return = numpy.concatenate((convo_cal_return, [temp]))
i += 1
j = 0
return convo_cal_return
def convo_layer_out(np_data):
"""本函数计算卷积层输出"""
# 接下来计算卷积层, 先要随机3个(3,3)过滤器及3个偏置bf
numpy.random.seed(seed)
f1 = numpy.random.randn(3, 3)
f2 = numpy.random.randn(3, 3)
f3 = numpy.random.randn(3, 3)
bf1, bf2, bf3 = numpy.random.randn(3)
# 使用书本提供的数据验证
# f1, f2, f3, bf = set_para_standard()
# bf1, bf2, bf3 = bf
# 将卷积层的输出整理成一个矩阵(96x3x4x4)
flag = 0
for pic in np_data:
# 每张相片需经三个过滤器处理。
for f, b in zip([f1, f2, f3], [bf1, bf2, bf3]):
temp = convo_cal(pic, f, b)
if f is f1:
picfill = numpy.array([temp])
else:
picfill = numpy.concatenate((picfill, [temp]))
if flag == 0:
result = numpy.array(([picfill]))
flag += 1
else:
result = numpy.concatenate((result, [picfill]))
return result
def pooling_out(convo_out):
"""本函数计算池化层输出"""
# 将(4x4)矩阵划分为4个(2x2)矩阵
# 设置flag 用于判断是否为第一张图片
flag = 0
for pic in convo_out:
# 设置subflag用于判断是否为第一个元素
subflag = 0
for fil in pic:
filhalf = numpy.hsplit(fil, 2)
for f in filhalf:
filquater = numpy.vsplit(f, 2)
for pice in filquater:
if subflag == 0:
picpooling = numpy.array([numpy.max(pice)])
subflag = 1
else:
picpooling = numpy.append(picpooling, [numpy.max(pice)])
# 因为图片三个过滤器池化的数据合并在一起,得到一个(1x12)的矩阵,将图片三个过滤器的池化输出,整理为3x4的矩阵
picpooling = numpy.hsplit(picpooling, 3)
picpooling = numpy.array(picpooling)
# 将96张图片数据合并为一个(96x3x4)矩阵
if flag == 0:
poolingout = numpy.array([picpooling])
flag = 1
else:
poolingout = numpy.concatenate((poolingout, [picpooling]))
# print(poolingout.shape)
return poolingout
def last_layer_out(poolingout):
"""结果输出"""
# 随机(服从正态分布)的参数
numpy.random.seed(seed)
w1 = numpy.random.randn(3, 4)
w2 = numpy.random.randn(3, 4)
w3 = numpy.random.randn(3, 4)
bo = numpy.random.randn(3)
# 使用书本提供的数据验证
# w1, w2, w3, bo = set_outlayer_para()
allout = []
for pic in poolingout:
flag = 0
for w, b in zip([w1, w2, w3], bo):
subflag = 0
for d, wi in zip(pic, w):
if subflag == 0:
zpicece = numpy.dot(d, wi)
subflag = 1
else:
zpicece += numpy.dot(d, wi)
zpicece = zpicece + b
apicece = 1/(1+numpy.exp(-1*zpicece))
if flag == 0:
a = numpy.array([[apicece]])
flag = 1
else:
a = numpy.append(a, [apicece])
allout.append(a)
allout = numpy.array(allout)
return allout
def cost_function(out, value):
"""求代价函数"""
c = 0
for o, v in zip(out, value):
c += numpy.sum(1/2*(o-v)**2)
def test():
global seed
seed = 16
np_data, value = get_npdata()
# 计算卷积层输出
convo_out = convo_layer_out(np_data)
# 计算池化层输出
poolingout = pooling_out(convo_out)
# 计算输出层输出
out = last_layer_out(poolingout)
# 求代价函数
c = cost_function(out, value)
print(c)
def check():
global seed
seed = 16
np_data, value = get_npdata()
testdata = np_data[0]
testdata[0][2] = 1
testdata[0][4] = 1
testdata[5][2] = 1
testdata[5][4] = 1
testdata = numpy.array([testdata])
# 计算卷积层输出
convo_out = convo_layer_out(testdata)
# 计算池化层输出
poolingout = pooling_out(convo_out)
# 计算输出层输出
out = last_layer_out(poolingout)
return out
out = check()
print(out)
print('number is :', list(out[0]).index(numpy.max(out[0]))+1)
我们还是通过更改seed(i)的i值,从而估计出最佳的参数。可以用下面的函数实现
def guess_para():
global seed
f = []
g = []
for i in range(10000):
seed = i
# 获取数据
np_data, value = get_npdata()
# 计算卷积层输出
convo_out = convo_layer_out(np_data)
# 计算池化层输出
poolingout = pooling_out(convo_out)
# 计算输出层输出
out = last_layer_out(poolingout)
# 求代价函数
c = cost_function(out, value)
print(c, seed)
if c < 50:
f.append((seed, c))
g.append(c)
index = g.index(min(g))
print('best seed is :', f[index][0])
100以内最佳的seed值是16, 代价函数的值为34.
验证是只需将验证数据设置为(1x6x6)矩阵,并赋值给np_data
根据输出层输出的最大的值在数组中的位置加一,即为判断判定结果。
以下使用书本提供的参数验证计算过程是正确的。
# 使用书本提供的数据验证
def set_para_standard():
f1 = numpy.array([[1.161, -0.848, 1.356], [5.825, -14.571, -6.945], [4.387, 8.317, 1.214]])
f2 = numpy.array([[1.950, 14.210, 5.029], [4.344, -1.471, -12.478], [-1.346, -5.859, -2.408]])
f3 = numpy.array([[-0.785, 4.159, -0.542], [0.468, -8.465, -4.508], [0.183, -2.567, 0.231]])
bf = [-14.706, -13.572, -4.879]
return f1, f2, f3, bf
def set_outlayer_para():
w1 = numpy.array([[-0.317, -3.223, 0.151, 1.323], [-25.877, 0.402, -0.494, -1.571], [-3.661, 0.009, 1.908, -0.159]])
w2 = numpy.array([[0.010, -11.069, -0.642, 9.001], [12.096, -0.352, 0.003, 10.159], [0.430, 2.060, 1.672, -0.093]])
w3 = numpy.array([[2.775, 43.616, -0.233, 0.045], [0.755, -0.138, 0.213, -8.731], [-1.086, -0.167, -0.851, -3.019]])
bo = [14.764, -21.116, -9.164]
return w1, w2, w3, bo