暂时针对二维卷积
def numpy_conv(inputs, myfilter):
h_ori, w_ori = inputs.shape
h_k, w_k= myfilter.shape
h_new, w_new = h_ori-h_k+1,w_ori-w_k+1
result = np.zeros((h_new, w_new))
for row in range(0, h_new, 1):
for col in range(0, w_new, 1):
# 池化大小的输入区域
cur_input = inputs[row:row+h_k, col:col+w_k]
#和核进行乘法计算
cur_output = cur_input * myfilter
#再把所有值求和
conv_sum = np.sum(cur_output)
#当前点输出值
result[row, col] = conv_sum
return result
def my_img2col(img, filter_shape, padding='valid', strides=1):
row, col = img.shape[:2]
tp_row, tp_col = filter_shape[:2]
if padding == 'valid':
transfer_shape = np.array([(row - tp_row + 1), (col - tp_col + 1)])
img_new = img
if padding == 'same':
'''padding只补充卷积核带来的尺寸减少,不补充strides '''
transfer_shape = np.array([row , col])
padding_num_row = tp_row - 1
padding_num_col = tp_col - 1
flip_left = padding_num_row // 2
flip_top = padding_num_col // 2
img_new = np.zeros((row+tp_row-1, col+tp_col-1))
img_new[flip_left:flip_left+row, flip_top:flip_top+col] = img
im2col = np.zeros((tp_row*tp_col, (transfer_shape[0]//strides) * (transfer_shape[1]//strides)))
count = 0
for y in range(0, transfer_shape[0], strides):
for x in range(0, transfer_shape[1], strides):
cur_item = img_new[y:y+tp_row, x:x+tp_col].reshape(-1)
im2col[:, count] = cur_item
count += 1
return im2col, transfer_shape // strides
def myconv(img, template, padding="valid", strides=1):
im2col, transfer_shape = my_img2col(img, template.shape, padding=padding, strides=strides)
filter_flatten = template.reshape(1, -1)
conv_out = np.matmul(filter_flatten, im2col)
return conv_out.reshape(transfer_shape[0],transfer_shape[1])
import time
import cv2
img = np.random.rand(100, 100)
template = np.ones((3,3))
start1 = time.perf_counter()
conv_out1 = numpy_conv(img, template)
end1 = time.perf_counter()
time_consume1 = (end1-start1)*1000
print('methold 1 %.4f ms'%time_consume1)
start2 = time.perf_counter()
conv_out2= myconv(img, template, padding='valid', strides=1)
end2 = time.perf_counter()
time_consume2 = (end2-start2)*1000
print('methold 2 %.4f ms'%time_consume2)
start2 = time.perf_counter()
conv_out3= cv2.filter2D(img,-1, template) #调用cv2的卷积
end2 = time.perf_counter()
time_consume3 = (end2-start2)*1000
print('methold 3 %.4f ms'%time_consume3)
结果如下:可见img2col比常规卷积快3倍。但相较底层为C的速度还是慢了近200倍!python的循环计算还是很低效。
methold 1 62.4741 ms
methold 2 19.9291 ms
methold 3 0.1726 ms