CNN在深度学习中有着举足轻重的地位,主要用于特征提取。在TensorFlow中涉及的函数是tf.nn.conv2d。
tf.nn.conv2d(input, filter, strides, padding, use_cudnn_on_gpu=True, data_format=“NHWC”, dilations=[1, 1, 1, 1], name=None)
输出结果是shape为[batch, out_height, out_width, out_channels],batch取决于input,out_channels取决于filter,而out_height与out_width取决于所有参数,参考示意图
补的方式如下:
补的行数:pad_along_height = max((out_height - 1) * strides[1] + filter_height - in_height, 0)
补的列数:pad_along_width = max((out_width - 1) * strides[2] + filter_width - in_width, 0)
pad_top = pad_along_height // 2
pad_bottom = pad_along_height - pad_top
pad_left = pad_along_width // 2
pad_right = pad_along_width - pad_left
测试实例
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
import tensorflow as tf
input = tf.Variable(tf.random_normal([1,16,64,3]))
filter = tf.Variable(tf.random_normal([3,5,3,32]))
op = tf.nn.conv2d(input, filter, strides=[1, 2, 2, 1], padding='VALID')
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
res = (sess.run(op))
print (res.shape)
# (1, 7, 30, 32)
tf.nn.atrous_conv2d(value, filters, rate, padding, name=None)
A positive int32. The stride with which we sample input values across the height
and width
dimensions. Equivalently, the rate by which we upsample the filter values by inserting zeros across the height
and width
dimensions. In the literature, the same parameter is sometimes called input stride
or dilation
.
输出shape为
VALID
[batch, height - 2 * (filter_width - 1), width - 2 * (filter_height - 1), out_channels].
SAME
[batch, height, width, out_channels].
深入理解:
反卷积操作是卷积的反向
tf.nn.conv2d_transpose(value, filter, output_shape, strides, padding=‘SAME’, data_format=‘NHWC’, name=None)
在这里解释一下output_shape
这个参数,反卷积操作是卷积的反向,在卷积计算中输出维度是上取整,那么这就说明输入维度可能是多个,所以在反卷积计算时需要用户给出这个输入维度。
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
import tensorflow as tf
tf.enable_eager_execution()
tf.set_random_seed(1234)
def conv2d_same(inputs, kernel, stride,rate=1):
if stride == 1:
return tf.nn.conv2d(inputs, kernel, [1, stride, stride, 1], padding='SAME')
else:
kernel_size = kernel.shape.as_list()[0]
kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1)
pad_total = kernel_size_effective - 1
pad_beg = pad_total // 2
pad_end = pad_total - pad_beg
paddings = [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]]
print(paddings)
inputs_ = tf.pad(inputs, paddings)
print(tf.squeeze(inputs_))
same_conv = tf.nn.conv2d(inputs_, kernel, [1, stride, stride, 1], padding='VALID')
print("=============conv2d_same=============")
print(tf.squeeze(same_conv))
def conv2d_fun(inputs, kernel, stride, padding):
conv = tf.nn.conv2d(inputs, kernel, [1, stride, stride, 1], padding=padding)
print("============" + padding + "==" + "S("+str(stride)+")" + "============")
print(tf.squeeze(conv))
def test(k_size):
src = tf.random_uniform((1, k_size, k_size, 1), 0, 5, tf.int32, seed=0)
src = tf.cast(src, tf.float32)
print("=============inputs{}==============".format(src.shape.as_list()))
print(tf.squeeze(src))
kernel = tf.random_uniform((3, 3, 1, 1), -1, 2, tf.int32, seed=0)
kernel = tf.cast(kernel, tf.float32)
print("=============inputs{}==============".format(kernel.shape.as_list()))
print(tf.squeeze(kernel))
conv2d_fun(src, kernel, 1, "SAME")
conv2d_fun(src, kernel, 2, "SAME")
conv2d_fun(src, kernel, 1, "VALID")
conv2d_fun(src, kernel, 2, "VALID")
conv2d_same(src, kernel, 2)
if __name__ == "__main__":
test(7)
print("=" * 80)
test(8)