下文将实现卷积层,池化层,全连接层的代码。对于卷积层代码将多次重复实现,主要区别在代码优化上。
import numpy as np
h = 32 #输入数据的高度
w = 48 #输入数据的宽度
input_2Ddata = np.random.randn(h,w)
output_2dData = np.random.randn(h,w) #令输出的尺寸和输入的一样
kern = np.random.randn(3,3)#3*3的卷积核
# kern = np.array([[-1,-2,-1],[0,0,0],[1,2,1]],dtype = np.float64)
padding = np.zeros(shape = (h+2,w+2))
padding[1:-1,1:-1] = input_2Ddata
#注意这里的0填充方法
for i in range(h):
for j in range(w):
window = padding[i:i+3,j:j+3]#以i,j为中心的窗口
output_2dData[i,j] = np.sum(kern*window)
import numpy as np
def conv2D(input_2Ddata, kern):
(h,w) = input_2Ddata.shape
(kern_h , kern_w) = kern.shape
paddnig_h = (kern_h-1)//2
paddnig_w = (kern_w-1)//2
padding = np.zeros(shape=(h+2*paddnig_h,w+2*paddnig_w))
#0填充
padding[paddnig_h:-paddnig_h,paddnig_w:-paddnig_w] = input_2Ddata
#padding_h和padding_w用的太妙了
output_2Ddata = np.zeros(shape=(h,w))
#卷积运算
for i in range(h):
for j in range(w):
window = padding[i:i+kern_h,j:j+kern_w]
output_2Ddata[i,j] = np.sum(kern*window)
return output_2Ddata
h = 32 #输入数据的高度
w = 48 #输入数据的宽度
in_d = 12 #输入数据的深度,在图像分类中为RGB=3.
out_d = 24 #输出数据的深度
input_3Ddata = np.random.randn(h,w,in_d)
output_3Ddata = np.zeros(shape = (h,w,out_d))#初始为0
(kern_h,kern_w) = (3,3)#卷积核的高度和宽度,一般为3*3或者5*5
kerns = np.random.randn(out_d,kern_h,kern_w,in_d)#4D卷积核
bias = np.random.randn(out_d)#1D偏置
for m in range(out_d):
for k in range(in_d):
input_2Ddata = input_3Ddata[:,:,k]
kern = kerns[m,:,:,k]
output_3Ddata[:,:,m] += conv2D(input_2Ddata,kern)#每个卷积和相加
output_3Ddata[:,:,m] += bias[m]#每个输出2D数据只需要加一次偏置
跑下来发现速度很慢,我开始慌了。
import numpy as np
def conv2D(input_2Ddata, kern,in_size,out_size,kern_size=3,stride=1):#当未注明最后两个参数时,选择3,1
(h1,w1) = in_size
(h2,w2) = out_size
output_2Ddata = np.zeros(shape=out_size)
for i2,i1 in zip(range(h2),range(0,h1,stride)):#zip函数是为两个list打包为元组的list
for j2,j1 in zip(range(w2),range(0,w1,stride)):
window = input_2Ddata[i1:i1+kern_size,j1:j1+kern_size]
output_2Ddata[i2,j2] = np.sum(kern*window)
return output_2Ddata
h1 = 32 #输入数据的高度
w1 = 48 #输入数据的宽度
d1 = 12 #输入数据的深度
input_3Ddata = np.random.randn(h1,w1,d1)
#超参数
S = 2 #步长
F = 3 #卷积核的尺寸
d2 = 24 #输出数据的深度
P = (F-1)//2 #填充尺寸
h2 = (h1-F+2*P)//S + 1 #输出数据高度
w2 = (w1-F+2*P)//S + 1 #输出数据宽度
padding = np.zeros(shape=(h1+2*P,w1+2*P,d1))# 0填充
padding[P:-P,P:-P,:] = input_3Ddata
output_3Ddata = np.zeros(shape=(h2,w2,d2))
kerns = np.random.randn(d2,F,F,d1)#4D卷积核
bias = np.random.randn(d2) #1D偏置
for m in range(d2):
for k in range(d1):
input_2Ddata = padding[:,:,k]
kern = kerns[m,:,:,k]
output_3Ddata[:,:,m] += conv2D(input_2Ddata,kern,in_size=(h1,w1),out_size=(h2,w2),kern_size=F,stride = S)#每个卷积和相加
output_3Ddata[:,:,m] += bias[m]#每个输出2D数据只需要加一次偏置
运行的时候明显看出来,比上一个程序快了很多。
import numpy as np
###第一步:局部窗口数据拉伸为行向量
filter_size = 3 #卷积核尺寸
filter_size2 = filter_size*filter_size
stride = 1 #步长
padding = (filter_size - 1)//2 #0填充padding
(batch,in_height,in_width,in_depth) = (8,32,48,16) #共8张图片
in_data = np.random.randn(batch,in_height,in_width,in_depth) #随机生成4D恶的输入特征
out_height = (in_height-filter_size+2*padding)//stride + 1#特征图的高度
out_width = (in_width-filter_size+2*padding)//stride + 1#特征图的宽度
out_size = out_height *out_width
matric_data = np.zeros((out_size*batch,filter_size2*in_depth))#分配存储空间
padding_data = np.zeros((batch,in_height+2*padding,in_width+2*padding,in_depth))#0填充
padding_data[:,padding:-padding,padding:-padding,:] = in_data
height_ef = padding_data.shape[1] - filter_size + 1#卷积运算以stride步长滑动时,在输入数据体上最大能滑动到的位置
width_ef = padding_data.shape[2] - filter_size + 1
for i_batch in range(batch):#遍历每个3D特征图
i_batch_size = i_batch*out_size#第i_batch个3D特征图的首个局部窗口数据的行位置。
for i_h,i_height in zip(range(out_height),range(0,height_ef,stride)):#遍历每一行
i_height_size = i_batch_size +i_h*out_width#第i_h行说个局部窗口数据的行位置
for i_w ,i_width in zip(range(out_width),range(0,width_ef,stride)):#遍历每一列
matric_data[i_height_size+i_w,:]=padding_data[i_batch,i_height:i_height+filter_size,i_width:i_width+filter_size,:].ravel()#获取窗口数据,并使用ravel方法将其拉伸为1D向量,赋值给对应的行。
###第二步,卷积核组拉伸为列向量。
out_depth = 32
weights = 0.01*np.random.randn(filter_size2*in_depth,out_depth)
bias = np.zeros((1,out_depth))
###第三步,矩阵相乘和ReLU激活函数
filter_data = np.dot(matric_data,weights)+bias#广播机制,简化代码
filter_data = np.maximum(0,filter_data)#激活
###第四步,把filter_data的每一行数据装扮为输出4D特征图对应位置的深度维度数据。
out_data = np.zeros((batch,out_width,out_width,out_depth))#分配存储空间
for i_batch in range(batch):#遍历每一个输出3D特征图
i_batch_size = i_batch*out_size#第i_batch个3D特征图的首行位置。
for i_height in range(out_height):#遍历每一行
i_height_size = i_batch_size+i_height*out_width#第i_height行的首行位置
for i_width in range(out_width):#遍历每一列
out_data[i_batch,i_height,i_width,:]=filter_data[i_batch_size+i_height,:]#把filter_data对应的行向量赋值给输出4D特征图对应的深度维度
池化的思想比较简单,直接用矩阵运算来实现了。
import numpy as np
filter_size = 2
filter_size2 = filter_size*filter_size
stride = 2
(batch,in_height,in_width,in_depth) = (8,32,48,16) #共8张图片
in_data = np.random.randn(batch,in_height,in_width,in_depth) #随机生成4D恶的输入特征
out_height = (in_height-filter_size)//stride + 1#特征图的高度
out_width = (in_width-filter_size)//stride + 1#特征图的宽度
out_size = out_height *out_width
out_depth = in_depth
out_data = np.zeros((batch,out_height,out_width,out_depth))
matric_data = np.zeros((out_size*batch*in_depth,filter_size2))
height_ef = in_height - filter_size + 1
width_ef = in_width -filter_size + 1
for i_batch in range(batch):#池化中的矩阵卷积过程
i_batch_size = i_batch*out_size*in_depth
for i_h,i_height in zip(range(out_height),range(0,height_ef,stride)):
i_height_size = i_batch_size + i_h*out_width*in_depth
for i_w ,i_width in zip(range(0,out_width*in_depth,in_depth),range(0,width_ef,stride)):
md = matric_data[i_height_size+i_w:i_height_size+i_w+in_depth,:]
src = in_data[i_batch,i_height:i_height+filter_size,i_width:i_width+filter_size,:]
for i in range(filter_size):
for j in range(filter_size):
md[:,i*filter_size+j] = src[i,j,:]
matric_data_max_value = matric_data.max(axis = 1 ,keepdims= True )#keepdims = True 保持矩阵的维度
matric_data_max_pos = matric_data == matric_data_max_value#保存最大值的位置
for i_batch in range(batch):#
i_batch_size = i_batch*out_size*out_depth
for i_height in range(out_height):
i_height_size = i_batch_size +i_height*out_width*out_depth
for i_width in range(out_width):
#赋值深度维度的数
out_data[i_batch ,i_height,i_width,:] = matric_data_max_value[i_height_size+i_width*out_depth:i_height_size+i_width*out_depth+out_depth].ravel()
了解了池化需要做的工作,看懂前面矩阵乘法实现卷积过程,则这部分代码就不会太难看懂。
import numpy as np
last = 0
(batch,in_height,in_width,in_depth) = (8,32,48,16)
in_data = np.random.randn(batch,in_height,in_width,in_depth)
size = in_height*in_width*in_depth
matric_data = np.zeros((batch,size))
for i_batch in range(batch):
matric_data[i_batch] = in_data[i_batch].ravel()#拉伸为1D向量
out_depth = 32
weights = 0.01*np.random.randn(size,out_depth)
bias = np.zeros((1,out_depth))
filter_data = np.dot(matric_data,weights) + bias
if not last:
out_data = np.maximum(0,filter_data) #ReLU激活
至此,CNN的简单实现已经完成。