卷积核在图像上扫描,与局部窗口的像素求内积后累加,作为新的值,直到遍历图像中的每一个像素。
'''
在2维空间卷积核[k,k]与in_data[h1,w1]相乘,然后逐通道相加得到一个新的特征,卷集核在特征图上滑动得到一个[h2,w2]的特征,依此类推,得到c_out个特征,out_data[h2,w2,c_out]
in_data[h1,w1,c_in]
kernel[c_out,F,F,c_in]
out_data[h2,w2,c_out]
P = (k-1)//2 # P = padding
h2 = (h1+2P-k)//S +1 # S = stride
'''
# 在2维空间内,特征与卷积核求内积
def conv2D(input2Ddata,kern,in_size,out_size,kern_size=3,stride=1):
(h1,w1) = in_size
(h2,w2) = out_size
output2Ddata = np.zeros([h2,w2])
for i1,i2 in zip(range(0,h1,stride),range(h2)):
for j1,j2 in zip(range(0,w1,stride),range(w2)):
window = input2Ddata[i1:i1+kern_size,j1:j1+kern_size] # 局部特征
output2Ddata[i2,j2] = np.sum(window * kern)
return output2Ddata
# 生成特征input3Ddata
h1,w1,d1,S,F,d2 = 32,48,12,2,3,24 # d1输入通道,S步长,F卷积核尺寸,d2输出通道
input3Ddata = np.random.randn(h1,w1,d1)
P = (F-1)//2 # padding
h2 = (h1-F+2*P)//S+1 # 输出特征的高
w2 = (w1-F+2*P)//S+1
padding = np.zeros([h1+ P*2,w1+ P*2,d1])
padding[P:-P,P:-P,:] = input3Ddata
output3Ddata = np.zeros([h2,w2,d2])
kerns = np.random.randn(d2,F,F,d1)
bias = np.random.randn(d2)
for m in range(d2): # 得到d2个特征
for k in range(d1): # 逐通道取内积
input2Ddata = padding[:,:,k]
kern = kerns[m,:,:,k]
output3Ddata[:,:,m] += conv2D(input2Ddata,kern,in_size=(h1,w1),out_size=(h2,w2),kern_size=F,stride=S)
output3Ddata[:,:,m] += bias[m]
把数据转化为矩阵,通过矩阵运算得到卷积
# 1 x[b,h1,w1,d1] --> matrix[b*h2*w2,k*k*d1]
# 2 w[d2,k,k,d1] --> w[k*k*kd1,d2]
# 3 out = matrix @ w
# 4 out[b*h2*w2,d2] --> out[b,h2,w2,d2]
##############
# 1 x[b,h1,w1,d1] --> matrix[b*h2*w2,k*k*d1]
(b,h1,w1,d1)=(10,32,48,10)
x = np.random.randn(b,h1,w1,d1)
F = 3
S = 1
P = (F-1) // 2
pad_x = np.zeros([b,h1+2*P,w1+2*P,d1])
pad_x[:,P:-P,P:-P,:] = x
h2 = (h1+2*P - F)//S +1
w2 = (w1+2*P - F)//S +1
matric = np.zeros([b*h2*w2,F*F*d1])
for ib in range(b):
i_batch = ib*h2*w2
for i_h1,i_h2 in zip(range(0,h1,S),range(h2)):
i_h = i_batch + i_h2*w2
for i_w1,i_w2 in zip(range(0,w1,S),range(w2)):
matric[i_h + i_w2,:] = pad_x[ib,i_h1:i_h1+F,i_w1:i_w1+F,:].ravel()
# 2 w[d2,k,k,d1] --> w[k*k*kd1,d2]
d2 = 30
w = np.random.randn(F*F*d1,d2)
bias = np.zeros([1,d2])
# 3 out = matrix @ w
out = np.dot(matric,w)+bias
out = np.maximum(0,out) # ReLU激活
# 4 out[b*h2*w2,d2] --> out[b,h2,w2,d2]
out_ = np.zeros([b,h2,w2,d2])
for ib in range(b):
i_b = ib*h2*w2
for ih in range(h2):
i_h = i_b + ih*w2
for iw in range(w2):
out_[ib,ih,iw,:] = out[i_h+iw,:]
in_data[h1,w1,c1]
kernel[c2,F,F,c1]
out_data[h2,w2,c2]
参数量:FFc1*c2 + c2
计算量:FFc1c2h2*w2
卷积有三种模式:full、valid、 same
空洞卷积就是隔像素取值或者在卷积核上补0。假设卷积核的大小为33,dilation rate为2,那么相当于55的卷积。k_new = k_old + (k_old )(dilation-1) ; 5 = 3+(3-1)(2-1); 7 = 3+(3-1)*(3-1)。由此可见,空洞卷积在不增加参数的条件下增大感受野,同时输出特征图的尺寸不变。 尺寸不变,增大感受野,方便检验大物体。
空洞卷积是隔像素取值,会漏掉一部分像素;稀疏采样,导致远距离卷积得到的特征缺乏相关性,影响检测结果;
'''
在2维空间卷积核[k,k]与in_data[h1,w1]相乘,然后逐通道相加得到一个新的特征,卷集核在特征图上滑动得到一个[h2,w2]的特征,依此类推,得到c_out个特征,out_data[h2,w2,c_out]
in_data[h1,w1,c_in]
kernel[c_out,F,F,c_in]
out_data[h2,w2,c_out]
P = (k-1)//2 # P = padding
h2 = (h1+2P-k)//S +1 # S = stride
'''
# 在2维空间内,特征与卷积核求内积
def conv2D(input2Ddata,kern,in_size,out_size,kern_size=3,stride=1,dilate=2):
(h1,w1) = in_size
(h2,w2) = out_size
output2Ddata = np.zeros([h2,w2])
for i1,i2 in zip(range(0,h1,stride),range(h2)):
for j1,j2 in zip(range(0,w1,stride),range(w2)):
window = input2Ddata[i1:i1+kern_size:dilate,j1:j1+kern_size:dilate] # 局部特征
output2Ddata[i2,j2] = np.sum(window * kern)
return output2Ddata
# 生成特征input3Ddata
h1,w1,d1,S,F,d2 = 32,32,12,2,3,24 # d1输入通道,S步长,F卷积核尺寸,d2输出通道
input3Ddata = np.random.randn(h1,w1,d1)
dilate=2
F_ = F+(F-1)*(dilate-1)
P = (F_-1)//2 # padding
h2 = (h1-F_+2*P)//S+1 # 输出特征的高
w2 = (w1-F_+2*P)//S+1
padding = np.zeros([h1+ P*2,w1+ P*2,d1])
padding[P:-P,P:-P,:] = input3Ddata
output3Ddata = np.zeros([h2,w2,d2])
kerns = np.random.randn(d2,F,F,d1)
bias = np.random.randn(d2)
for m in range(d2): # 得到d2个特征
for k in range(d1): # 逐通道取内积
input2Ddata = padding[:,:,k]
kern = kerns[m,:,:,k]
output3Ddata[:,:,m] += conv2D(input2Ddata,kern,in_size=(h1,w1),out_size=(h2,w2),kern_size=F_,stride=S,dilate=2)
output3Ddata[:,:,m] += bias[m]
将卷积分为逐通道卷积和逐点卷积。
逐点卷积是卷积核为1x1的普通卷积;逐通道卷积是相当于卷积核与每一个特征上滑动做2维卷积后作为输出,没有相加。
input_data[h1,w1,c1]
kernel[1,k,k,c1]
out_data[h2,w2,c1]
input_data[h1,w1,c1]
kernel[c2,k,k,c1]
out_data[h2,w2,c2]
普通卷积所需的参数:kxkxc1xc2
深度可分离卷积所需的参数:kxkxc1 + c1x1x1xc2
(kxkxc1 + c1x1x1xc2)/kxkxc1xc2=1/c2+1/(kxk)
# 在2维空间内,特征与卷积核求内积
def conv2D(input2Ddata,kern,in_size,out_size,kern_size=3,stride=1):
(h1,w1) = in_size
(h2,w2) = out_size
output2Ddata = np.zeros([h2,w2])
for i1,i2 in zip(range(0,h1,stride),range(h2)):
for j1,j2 in zip(range(0,w1,stride),range(w2)):
window = input2Ddata[i1:i1+kern_size,j1:j1+kern_size] # 局部特征
output2Ddata[i2,j2] = np.sum(window * kern)
return output2Ddata
# 生成特征input3Ddata
h1,w1,d1,S,F = 32,48,12,2,3 # d1输入通道,S步长,F卷积核尺寸,d2输出通道
input3Ddata = np.random.randn(h1,w1,d1)
P = (F-1)//2 # padding
h2 = (h1-F+2*P)//S+1 # 输出特征的高
w2 = (w1-F+2*P)//S+1
padding = np.zeros([h1+ P*2,w1+ P*2,d1])
padding[P:-P,P:-P,:] = input3Ddata
output3Ddata = np.zeros([h2,w2,d1])
kerns = np.random.randn(F,F,d1)
bias = np.random.randn(d1)
# 逐通道取内积
for i in range(d1):
input2Ddata = padding[:,:,i]
kern = kerns[:,:,i]
output3Ddata[:,:,i] = conv2D(input2Ddata,kern,in_size=(h1,w1),out_size=(h2,w2),kern_size=F,stride=S)
output3Ddata[:,:,i] += bias[i]
# 逐点取内积
input2Ddata1 = output3Ddata
h3,w3,d3 = input2Ddata1.shape # (16, 24, 12)
F1 = 1
d4 = 30
S1 = 1
kerns1 = np.random.randn(d4,F1,F1,d3)
bias1 = np.random.randn(d4)
output3Ddata1= np.zeros([h3,w3,d4])
for m in range(d4): # 得到d2个特征
for k in range(d3): # 逐通道取内积
input2Ddata = input2Ddata1[:,:,k]
kern = kerns1[m,:,:,k]
output3Ddata1[:,:,m] += conv2D(input2Ddata,kern,in_size=(h3,w3),out_size=(h3,w3),kern_size=F1,stride=S1)
output3Ddata1[:,:,m] += bias1[m]
通过反卷积还原到原来的尺寸。 把特征图上通过滑动窗口得到的数据放在矩阵里,然后通过矩阵相乘,得到输出。
## 卷积
# 1 x[b,h1,w1,d1] --> matrix[b*h2*w2,k*k*d1]
# 2 w[d2,k,k,d1] --> w[k*k*kd1,d2]
# 3 out = matrix @ w ,out[b*h2*w2,d2] -->out[b,h2,w2,d2]
## 反卷积
# 1 out_new[b,h2,w2,d2]-->out_new[b*h2*w2,d2]
# 2 w_new[d2,k*k*kd1] ## w_new的shape是w的shape的转置
# 3 matrix_new = out_new @ w_new
# 4 matrix[b*h2*w2,k*k*d1] --> x[b,h1,w1,d1]
分组卷积和普通卷积的区别是把输入特征数和输出特征数分组后分别卷积。假设输入特征有10个通道,要求输出20个特征。用分组卷积,把输入特征分为两组,每组5个特征。第一组进行卷积后输出10个特征。同理,第二组进行卷积后再输出10个特征,两组一共20个特征。
input_data[h1,w1,c1]
groups = n
kernel = [c2/n,k,k,c1/n]
out_data[h2,w2,c2]
分组卷积所需的参数量:(c2/n)xkxkx(c1/n)xn=c2xkxkxc1/n
把输入特征分组,每组卷积后放在一起。
indata [h1,w1,d1]
outdata [h2,w2,d2]
group = n
indata_i[h1,w1,i:(i+1)*d1/n]卷积生成 outdata_i[h2,w2,d2/n] ,i=0,1,2,…,n-1
# 在2维空间内,特征与卷积核求内积
def conv2D(input2Ddata,kern,in_size,out_size,kern_size=3,stride=1):
(h1,w1) = in_size
(h2,w2) = out_size
output2Ddata = np.zeros([h2,w2])
for i1,i2 in zip(range(0,h1,stride),range(h2)):
for j1,j2 in zip(range(0,w1,stride),range(w2)):
window = input2Ddata[i1:i1+kern_size,j1:j1+kern_size] # 局部特征
output2Ddata[i2,j2] = np.sum(window * kern)
return output2Ddata
# 生成特征input3Ddata
h1,w1,d1,S,F,d2 = 32,48,12,2,3,24 # d1输入通道,S步长,F卷积核尺寸,d2输出通道
input3Ddata = np.random.randn(h1,w1,d1)
P = (F-1)//2 # padding
h2 = (h1-F+2*P)//S+1 # 输出特征的高
w2 = (w1-F+2*P)//S+1
padding = np.zeros([h1+ P*2,w1+ P*2,d1])
padding[P:-P,P:-P,:] = input3Ddata
output3Ddata = np.zeros([h2,w2,d2])
groups = 3
g1,g2 = 4,8 # g1 = 12/3=4 ; g2 = 24/3=8
for g in range(groups):
pad = padding[:,:,g*g1:(g+1)*g1]
output3Ddata_ = output3Ddata[:,:,g*g2:(g+1)*g2]
kerns = np.random.randn(g2,F,F,g1)
bias = np.random.randn(g2)
for m in range(g2): # 得到d2个特征
for k in range(g1): # 逐通道取内积
input2Ddata = pad[:,:,k]
kern = kerns[m,:,:,k]
output3Ddata_[:,:,m] += conv2D(input2Ddata,kern,in_size=(h1,w1),out_size=(h2,w2),kern_size=F,stride=S)
output3Ddata_[:,:,m] += bias[m]
output3Ddata[:,:,g*g2:(g+1)*g2] = output3Ddata_