PointNet在实际搭建网络结构时,其实是将 N×3 N × 3 的点云当作图片处理,即height=N
,width=3
。作者对其搭建网络所用到的各种层进行了二次封装,存放于tf_util.py中。本文主要对常用到的conv2D,fc,max_pool源码进行分析。
def _variable_with_weight_decay(name, shape, stddev, wd, use_xavier=True):
"""Helper to create an initialized Variable with weight decay.
Note that the Variable is initialized with a truncated normal distribution.
A weight decay is added only if one is specified.
Args:
name: name of the variable
shape: list of ints
stddev: standard deviation of a truncated Gaussian
wd: add L2Loss weight decay multiplied by this float. If None, weight
decay is not added for this Variable.
use_xavier: bool, whether to use xavier initializer
Returns:
Variable Tensor
"""
if use_xavier:
initializer = tf.contrib.layers.xavier_initializer()
else:
initializer = tf.truncated_normal_initializer(stddev=stddev)
var = _variable_on_cpu(name, shape, initializer)
if wd is not None:
weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')
tf.add_to_collection('losses', weight_decay)
return var
用于初始化参数
def conv2d(inputs,
num_output_channels,
kernel_size,
scope,
stride=[1, 1],
padding='SAME',
use_xavier=True,
stddev=1e-3,
weight_decay=0.0,
activation_fn=tf.nn.relu,
bn=False,
bn_decay=None,
is_training=None):
""" 2D convolution with non-linear operation.
Args:
inputs: 4-D tensor variable BxHxWxC
num_output_channels: int
kernel_size: a list of 2 ints
scope: string
stride: a list of 2 ints
padding: 'SAME' or 'VALID'
use_xavier: bool, use xavier_initializer if true
stddev: float, stddev for truncated_normal init
weight_decay: float
activation_fn: function
bn: bool, whether to use batch norm
bn_decay: float or float tensor variable in [0,1]
is_training: bool Tensor variable
Returns:
Variable tensor
"""
with tf.variable_scope(scope) as sc:
kernel_h, kernel_w = kernel_size
num_in_channels = inputs.get_shape()[-1].value
kernel_shape = [kernel_h, kernel_w,
num_in_channels, num_output_channels]
kernel = _variable_with_weight_decay('weights',
shape=kernel_shape,
use_xavier=use_xavier,
stddev=stddev,
wd=weight_decay)
stride_h, stride_w = stride
outputs = tf.nn.conv2d(inputs, kernel,
[1, stride_h, stride_w, 1],
padding=padding)
biases = _variable_on_cpu('biases', [num_output_channels],
tf.constant_initializer(0.0))
outputs = tf.nn.bias_add(outputs, biases)
if bn:
outputs = batch_norm_for_conv2d(outputs, is_training,
bn_decay=bn_decay, scope='bn')
if activation_fn is not None:
outputs = activation_fn(outputs)
return outputs
该函数输入为4-D的Tensor,而我们处理的数据是 B×N×3 B × N × 3 的点云数据point_cloud
,实际代码中对point_cloud
进行了扩维处理input_image = tf.expand_dims(point_cloud, -1)
,将点云拓展成了BHWC格式的图像,此时input_image
的维度为 B×N×3×1 B × N × 3 × 1 ,可以直接放入tensorflow封装好的网络中处理。实际代码中也是这么做的。我们可以结合调用函数一起来看:
batch_size = point_cloud.get_shape()[0].value
num_point = point_cloud.get_shape()[1].value
end_points = {}
input_image = tf.expand_dims(point_cloud, -1)
# Point functions (MLP implemented as conv2d)
net = tf_util.conv2d(input_image, 64, [1,3],
padding='VALID', stride=[1,1],
bn=True, is_training=is_training,
scope='conv1', bn_decay=bn_decay)
batch_size
表明这个batch中包含多少个点云数据,num_point
表明每个点云中含有多少个点,input_image
则为拓维后的点云数据,即可以把batch中的每个点云看成 N×3 N × 3 的一张图片。input_image
作为4-D的Tensor传入tf_util.conv2d
,可以看到num_output_channels=64
,kernel_size=[1,3]
,padding='VALID'
,stride=[1,1]
,bn=True
,is_training=is_training
,scope='conv1'
,bn_decay=bn_decat
。
回到tf_util.conv2d
中,通过with tf.variable_scope(scope) as sc:
保证其内部namespace不冲突。
kernel_h, kernel_w = kernel_size
num_in_channels = inputs.get_shape()[-1].value
kernel_shape = [kernel_h, kernel_w,
num_in_channels, num_output_channels]
kernel = _variable_with_weight_decay('weights',
shape=kernel_shape,
use_xavier=use_xavier,
stddev=stddev,
wd=weight_decay)
stride_h, stride_w = stride
以上代码是为讲看作图片的点云传入tf.nn.conv2d
做维度上的设置,在本次调用中kernel_h=1
,kernel_w=3
,即卷积核大小为 1×3 1 × 3 ,num_in_channels=1
,kernel_shape=[1,3,1,64]
,同时通过_variable_with_weight_decay
初始化kernel
参数。stride_h=1
,stride_w=1
即卷积时逐格移动。接着tf_util.conv2d
中讲输入数据inputs
传入tf.nn.conv2d
中做卷积:
outputs = tf.nn.conv2d(inputs, kernel,
[1, stride_h, stride_w, 1],
padding=padding)
biases = _variable_on_cpu('biases', [num_output_channels],
tf.constant_initializer(0.0))
outputs = tf.nn.bias_add(outputs, biases)
即对inputs
B×N×3×1 B × N × 3 × 1 中的每个 N×3 N × 3 做卷积,其中卷积核大小为[1,3],不进行padding填充,并对输出结果添加biases
,最后outputs
为 B×N×1×64 B × N × 1 × 64 。
def max_pool2d(inputs,
kernel_size,
scope,
stride=[2, 2],
padding='VALID'):
""" 2D max pooling.
Args:
inputs: 4-D tensor BxHxWxC
kernel_size: a list of 2 ints
stride: a list of 2 ints
Returns:
Variable tensor
"""
with tf.variable_scope(scope) as sc:
kernel_h, kernel_w = kernel_size
stride_h, stride_w = stride
outputs = tf.nn.max_pool(inputs,
ksize=[1, kernel_h, kernel_w, 1],
strides=[1, stride_h, stride_w, 1],
padding=padding,
name=sc.name)
return outputs
maxpooling2d层同tensorflow自带max_pool层没有什么较大区别,只不过简化了tf.nn.max_pool
中ksize
和strides
参数的设置,并添加了scope
用于设置其命名空间。
def fully_connected(inputs,
num_outputs,
scope,
use_xavier=True,
stddev=1e-3,
weight_decay=0.0,
activation_fn=tf.nn.relu,
bn=False,
bn_decay=None,
is_training=None):
""" Fully connected layer with non-linear operation.
Args:
inputs: 2-D tensor BxN
num_outputs: int
Returns:
Variable tensor of size B x num_outputs.
"""
with tf.variable_scope(scope) as sc:
num_input_units = inputs.get_shape()[-1].value
weights = _variable_with_weight_decay('weights',
shape=[num_input_units, num_outputs],
use_xavier=use_xavier,
stddev=stddev,
wd=weight_decay)
outputs = tf.matmul(inputs, weights)
biases = _variable_on_cpu('biases', [num_outputs],
tf.constant_initializer(0.0))
outputs = tf.nn.bias_add(outputs, biases)
if bn:
outputs = batch_norm_for_fc(outputs, is_training, bn_decay, 'bn')
if activation_fn is not None:
outputs = activation_fn(outputs)
return outputs
fully_connected
用于生成全连接层,总体思路就是用_variable_with_weight_decay
生成 w×x+b w × x + b 中的 w w ,并加上偏置biases
。最后通过参数提供batch_norm和activation等功能
总体来说,作者对tf.nn层次的API进行了进一步的封装,并无特殊之处,基本可以用tf.layers层次的API替代