前面我们已经讨论过双线性插值在图像放大中的使用,图像放大及双线性插值算法。并给出了具体的C++实现,本文主要讨论在FCN中,双线性插值初始化转置卷积(反卷积)。
import numpy as np
import cv2
def bilinear_kernel(src, dst_size):
"""
src: 源图像
dst_size: 放大后的图像大小,例如(512,640)
"""
dst_h, dst_w = dst_size
src_h, src_w, channel = src.shape[0],src.shape[1],src.shape[2]
# 如果放大后的图像和原图大小一样,则直接返回原图
if src_h == dst_h and src_w == dst_w:
return src.copy()
scale_x = float(src_w) / dst_w
scale_y = float(src_h) / dst_h
dst = np.zeros((dst_h, dst_w, channel), dtype=np.uint8)
for n in range(channel): # 通道循环
for dst_y in range(dst_h):
for dst_x in range(dst_w):
# src + 0.5 = (dst+0.5)*scale
# 计算目标像素在源图上的坐标
src_x = (dst_x + 0.5) * scale_x -0.5
src_y = (dst_y + 0.5) * scale_y -0.5
# 去定位上下左右四个点
src_x_0 = int(np.floor(src_x))
src_y_0 = int(np.floor(src_y))
src_x_1 = min(src_x_0 + 1, src_w - 1) # 防止出界
src_y_1 = min(src_y_0 + 1, src_h - 1)
# 双线性插值
value0 = (src_x_1 - src_x) * src[src_y_0, src_x_0, n]+(src_x - src_x_0) * src[src_y_0, src_x_1, n]
value1 = (src_x_1 - src_x) * src[src_y_1, src_x_0, n]+(src_x - src_x_0) * src[src_y_1, src_x_1, n]
dst[dst_y, dst_x, n] = int((src_y_1 - src_y) * value0 + (src_y - src_y_0) * value1)
return dst
img = cv2.imread("1.png")
output = bilinear_kernel(img,(512,512))
cv2.imshow("input-img", img)
cv2.imshow("output-img", output)
cv2.waitKey(0)
def bilinear_kernel(in_channels, out_channels, kernerl_size):
"""
双线性插值法初始化转置卷积的权重
"""
factor = (kernerl_size + 1) //2
if kernerl_size % 2 ==1:
center = factor -1
else:
center = factor - 0.5
og = np.ogrid[:kernerl_size, :kernerl_size]
bilinear_filter = (1 - abs(og[0] - center) / factor) * (1 - abs(og[1] - center) / factor)
weight = np.zeros((in_channels, out_channels, kernerl_size, kernerl_size), dtype=np.float32)
weight[range(in_channels), range(out_channels), :, :,] = bilinear_filter
return torch.from_numpy(weight)
这段代码来自FCN官方的代码库。
假如你在网络的初始化中定义了转置卷积,其后就可以直接使用bilinear_kernel函数对其权重进行初始化。
self.unsample_8x = nn.ConvTranspose2d(num_classes, num_classes, 16, 8, 4, bias=False)
self.unsample_8x.weight.data = bilinear_kernel(num_classes, num_classes, 16)