以下链接是个人关于MVSNet(R-MVSNet)-多视角立体深度推导重建 所有见解,如有错误欢迎大家指出,我会第一时间纠正。有兴趣的朋友可以加微信:17575010159 相互讨论技术。若是帮助到了你什么,一定要记得点赞!因为这是对我最大的鼓励。 文末附带 \color{blue}{文末附带} 文末附带 公众号 − \color{blue}{公众号 -} 公众号− 海量资源。 \color{blue}{ 海量资源}。 海量资源。
3D点云重建0-00:MVSNet(R-MVSNet)–目录-史上最新无死角讲解:https://blog.csdn.net/weixin_43013761/article/details/102852209
接下来要讲解的是mvsnet/model.py中inference函数的:
ref_tower = UNetDS2GN({'data': ref_image}, is_training=True, reuse=False)
可以看到,该网络就是输入一张,然后得到一个特征图,大家注意一点的就是,这里的特征图大小和深度图一样的(长宽)进入UNetDS2GN这个类可以看:
# 注意,该类会继承Network, cnn_wrapper/network.py文件可以找到
class UNetDS2GN(Network):
"""2D U-Net with group normalization."""
......
......
反正机构也简单,无非就卷积来卷积去,这里就不就是了,但是要带大家看一个点,那就是class UNetDS2GN(Network),可以知道其继承了Network,这是什么呢?我们cnn_wrapper/network.py可以找到他的实现(如果嫌烦躁可以末尾看总结):
#!/usr/bin/env python
"""
Copyright 2019, Zixin Luo & Yao Yao, HKUST.
CNN layer wrapper.
Please be noted that the center and scale paramter are disabled by default for all BN / GN layers
"""
from __future__ import print_function
import os
import sys
import numpy as np
import tensorflow as tf
from tools.common import Notify
DEFAULT_PADDING = 'SAME'
def layer(op):
"""Decorator for composable network layers."""
def layer_decorated(self, *args, **kwargs):
"""Layer decoration."""
# We allow to construct low-level layers instead of high-level networks,
# 如果我们是构建一个低层的网络,也就是说inputs就是第一层的输入
if self.inputs is None or (len(args) > 0 and isinstance(args[0], tf.Tensor)):
layer_output = op(self, *args, **kwargs)
return layer_output
# Automatically set a name if not provided.,如果之前没有设定name,则现在设定一个
name = kwargs.setdefault('name', self.get_unique_name(op.__name__))
# Figure out the layer inputs,
# 这里的terminals,存储的是前一个网络的输出
if not self.terminals:
raise RuntimeError('No input variables found for layer %s.' % name)
# 如果现在只构建了一层网络的,则只存储了第一层的输出,即terminals[0]
elif len(self.terminals) == 1:
# 把上层网络的输出,当作
layer_input = self.terminals[0]
# 如果已经构架了多层,则terminals保存了多层网络的输出,
else:
layer_input = list(self.terminals)
# Perform the operation and get the output.把上一层网络的输出,当初该层的输入,然后获得新的输出
layer_output = op(self, layer_input, *args, **kwargs)
# Add to layer LUT,添加到表格之中
self.layers[name] = layer_output
# This output is now the input for the next layer.
# 现在这个网络的输出,同时也是下一层网络的输入,也就是把他加入到self.terminals中
self.feed(layer_output)
# Return self for chained calls.
return self
return layer_decorated
class Network(object):
"""Class NetWork."""
def __init__(self, inputs, is_training,
dropout_rate=0.5, seed=None, epsilon=1e-5, reuse=False, fcn=True, regularize=True,
**kwargs):
# The input nodes for this network,为网络的输入节点
self.inputs = inputs
# If true, the resulting variables are set as trainable,如果设置为ture,表示训练,会进行反向传播
self.trainable = is_training if isinstance(is_training, bool) else True
# If true, variables are shared between feature towers,如果设置为ture,则共享特征塔
self.reuse = reuse
# If true, layers like batch normalization or dropout are working in training mode
# 如果设置为true,则会使用bn,或者dropout
self.training = is_training
# Dropout rate
self.dropout_rate = dropout_rate
# Seed for randomness,设置随机种子
self.seed = seed
# Add regularizer for parameters,为参数添加正则化
self.regularizer = tf.contrib.layers.l2_regularizer(1.0) if regularize else None
# The epsilon paramater in BN layer,BN层的超参数
self.bn_epsilon = epsilon
self.extra_args = kwargs
if inputs is not None:
# The current list of terminal nodes
self.terminals = []
# Mapping from layer names to layers
self.layers = dict(inputs)
# If true, dense layers will be omitted in network construction,如果设置为true,则使用全链接层
self.fcn = fcn
self.setup()
def setup(self):
'''Construct the network. '''
# 该函数主要是用于子类重新的,把子类的模型嵌套进来
raise NotImplementedError('Must be implemented by the subclass.')
def load(self, data_path, session, ignore_missing=False, exclude_var=None):
'''Load network weights.
data_path: The path to the numpy-serialized network weights
session: The current TensorFlow session
ignore_missing: If true, serialized weights for missing layers are ignored.,如果设置为true,缺少权重的网络层则忽略掉
'''
# 加载np类型的模型文件
data_dict = np.load(data_path).item()
if exclude_var is not None:
keyword = exclude_var.split(',')
assign_op = []
for op_name in data_dict:
# exclude_var是指定那个需要被忽略的层,则该些层不需要加载权重
if exclude_var is not None:
find_keyword = False
for tmp_keyword in keyword:
if op_name.find(tmp_keyword) >= 0:
find_keyword = True
if find_keyword:
continue
# 对需要加载权重的参数进行赋值
with tf.variable_scope(op_name, reuse=True):
for param_name, data in data_dict[op_name].iteritems():
try:
var = tf.get_variable(param_name)
assign_op.append(var.assign(data))
except ValueError:
if not ignore_missing:
raise
else:
print(Notify.WARNING, ':'.join(
[op_name, param_name]), "is omitted.", Notify.ENDC)
# 运行模型图中的所有op
session.run(assign_op)
def feed(self, *args):
'''Set the input(s) for the next operation by replacing the terminal nodes.
The arguments can be either layer names or the actual layers,其实就是把输入的加入到terminals[]中
'''
assert args
self.terminals = []
for fed_layer in args:
if isinstance(fed_layer, str):
try:
fed_layer = self.layers[fed_layer]
except KeyError:
raise KeyError('Unknown layer name fed: %s' % fed_layer)
self.terminals.append(fed_layer)
return self
def get_output(self):
'''Returns the current network output.
获得最后一层网络输出的结果
'''
return self.terminals[-1]
def get_output_by_name(self, layer_name):
'''
Get graph node by layer name
:param layer_name: layer name string
:return: tf node
'''
return self.layers[layer_name]
def get_unique_name(self, prefix):
'''Returns an index-suffixed unique name for the given prefix.
This is used for auto-generating layer names based on the type-prefix.
'''
ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1
return '%s_%d' % (prefix, ident)
def change_inputs(self, input_tensors):
assert len(input_tensors) == 1
for key in input_tensors:
self.layers[key] = input_tensors[key]
# 下面的函数,都是各种使用layer装饰器,封装的卷积,池化,激活,等等
@layer
# 普通的卷积层
def conv(self,
input_tensor,
kernel_size,
filters,
strides,
name,
relu=True,
dilation_rate=1,
padding=DEFAULT_PADDING,
biased=True,
reuse=False,
separable=False):
"""2D/3D convolution."""
kwargs = {'filters': filters,
'kernel_size': kernel_size,
'strides': strides,
'activation': tf.nn.relu if relu else None,
'use_bias': biased,
'dilation_rate': dilation_rate,
'trainable': self.trainable,
'reuse': self.reuse or reuse,
'bias_regularizer': self.regularizer if biased else None,
'name': name,
'padding': padding}
if separable:
kwargs['depthwise_regularizer'] = self.regularizer
kwargs['pointwise_regularizer'] = self.regularizer
else:
kwargs['kernel_regularizer'] = self.regularizer
if len(input_tensor.get_shape()) == 4:
if not separable:
return tf.layers.conv2d(input_tensor, **kwargs)
else:
return tf.layers.separable_conv2d(input_tensor, **kwargs)
elif len(input_tensor.get_shape()) == 5:
if not separable:
return tf.layers.conv3d(input_tensor, **kwargs)
else:
raise NotImplementedError('No official implementation for separable_conv3d')
else:
raise ValueError('Improper input rank for layer: ' + name)
@layer
# 带gn正则化的卷积层
def conv_gn(self,
input_tensor,
kernel_size,
filters,
strides,
name,
relu=True,
center=False,
scale=False,
dilation_rate=1,
channel_wise=True,
group=32,
group_channel=8,
padding=DEFAULT_PADDING,
biased=False,
separable=False):
assert len(input_tensor.get_shape()) == 4
conv = self.conv(input_tensor, kernel_size, filters, strides, name, relu=False,
dilation_rate=dilation_rate, padding=padding,
biased=biased, reuse=self.reuse, separable=separable)
# tranpose: [bs, h, w, c] to [bs, c, h, w] following the paper
x = tf.transpose(conv, [0, 3, 1, 2])
shape = tf.shape(x)
N = shape[0]
C = x.get_shape()[1]
H = shape[2]
W = shape[3]
if channel_wise:
G = max(1, C // group_channel)
else:
G = min(group, C)
# normalization
x = tf.reshape(x, [N, G, C // G, H, W])
mean, var = tf.nn.moments(x, [2, 3, 4], keep_dims=True)
x = (x - mean) / tf.sqrt(var + self.bn_epsilon)
# per channel scale and bias (gamma and beta)
with tf.variable_scope(name + '/gn', reuse=self.reuse):
if scale:
gamma = tf.get_variable('gamma', [C], dtype=tf.float32, initializer=tf.ones_initializer())
else:
gamma = tf.constant(1.0, shape=[C])
if center:
beta = tf.get_variable('beta', [C], dtype=tf.float32, initializer=tf.zeros_initializer())
else:
beta = tf.constant(0.0, shape=[C])
gamma = tf.reshape(gamma, [1, C, 1, 1])
beta = tf.reshape(beta, [1, C, 1, 1])
output = tf.reshape(x, [-1, C, H, W]) * gamma + beta
# tranpose: [bs, c, h, w] to [bs, h, w, c] following the paper
output = tf.transpose(output, [0, 2, 3, 1])
if relu:
output = self.relu(output, name + '/relu')
return output
@layer
# 带bn正则化的卷积层
def conv_bn(self,
input_tensor,
kernel_size,
filters,
strides,
name,
relu=True,
center=False,
scale=False,
dilation_rate=1,
padding=DEFAULT_PADDING,
biased=False,
separable=False,
reuse=False):
conv = self.conv(input_tensor, kernel_size, filters, strides, name, relu=False,
dilation_rate=dilation_rate, padding=padding,
biased=biased, reuse=reuse, separable=separable)
conv_bn = self.batch_normalization(conv, name + '/bn',
center=center, scale=scale, relu=relu, reuse=reuse)
return conv_bn
@layer
# 反卷积操作
def deconv(self,
input_tensor,
kernel_size,
filters,
strides,
name,
relu=True,
padding=DEFAULT_PADDING,
biased=True,
reuse=False):
"""2D/3D deconvolution."""
kwargs = {'filters': filters,
'kernel_size': kernel_size,
'strides': strides,
'activation': tf.nn.relu if relu else None,
'use_bias': biased,
'trainable': self.trainable,
'reuse': self.reuse or reuse,
'kernel_regularizer': self.regularizer,
'bias_regularizer': self.regularizer if biased else None,
'name': name,
'padding': padding}
if len(input_tensor.get_shape()) == 4:
return tf.layers.conv2d_transpose(input_tensor, **kwargs)
elif len(input_tensor.get_shape()) == 5:
return tf.layers.conv3d_transpose(input_tensor, **kwargs)
else:
raise ValueError('Improper input rank for layer: ' + name)
@layer
# 带bn的反卷积操作
def deconv_bn(self,
input_tensor,
kernel_size,
filters,
strides,
name,
relu=True,
center=False,
scale=False,
padding=DEFAULT_PADDING,
biased=False,
reuse=False):
deconv = self.deconv(input_tensor, kernel_size, filters, strides, name,
relu=False, padding=padding, biased=biased, reuse=reuse)
deconv_bn = self.batch_normalization(deconv, name + '/bn',
center=center, scale=scale, relu=relu, reuse=reuse)
return deconv_bn
@layer
# 带gn的反卷积操作
def deconv_gn(self,
input_tensor,
kernel_size,
filters,
strides,
name,
relu=False,
center=False,
scale=False,
channel_wise=True,
group=32,
group_channel=8,
padding=DEFAULT_PADDING,
biased=False):
assert len(input_tensor.get_shape()) == 4
# deconvolution
deconv = self.deconv(input_tensor, kernel_size, filters, strides, name,
relu=False, padding=padding, biased=biased, reuse=self.reuse)
# group normalization
x = tf.transpose(deconv, [0, 3, 1, 2])
shape = tf.shape(x)
N = shape[0]
C = x.get_shape()[1]
H = shape[2]
W = shape[3]
if channel_wise:
G = max(1, C // group_channel)
else:
G = min(group, C)
# normalization
x = tf.reshape(x, [N, G, C // G, H, W])
mean, var = tf.nn.moments(x, [2, 3, 4], keep_dims=True)
x = (x - mean) / tf.sqrt(var + self.bn_epsilon)
# per channel scale and bias (gamma and beta)
with tf.variable_scope(name + '/gn', reuse=self.reuse):
if scale:
gamma = tf.get_variable('gamma', [C], dtype=tf.float32, initializer=tf.ones_initializer())
else:
gamma = tf.constant(1.0, shape=[C])
if center:
beta = tf.get_variable('beta', [C], dtype=tf.float32, initializer=tf.zeros_initializer())
else:
beta = tf.constant(0.0, shape=[C])
gamma = tf.reshape(gamma, [1, C, 1, 1])
beta = tf.reshape(beta, [1, C, 1, 1])
output = tf.reshape(x, [-1, C, H, W]) * gamma + beta
# tranpose: [bs, c, h, w, c] to [bs, h, w, c] following the paper
output = tf.transpose(output, [0, 2, 3, 1])
if relu:
output = self.relu(output, name + '/relu')
return output
@layer
def relu(self, input_tensor, name=None):
"""ReLu activation."""
return tf.nn.relu(input_tensor, name=name)
@layer
def max_pool(self, input_tensor, pool_size, strides, name, padding=DEFAULT_PADDING):
"""Max pooling."""
return tf.layers.max_pooling2d(input_tensor,
pool_size=pool_size,
strides=strides,
padding=padding,
name=name)
.......
# 下面都是对各种函数的装饰
大家可以看到,这里有一个装饰器,就是def layer(op),他的作用呢?就是保存每一层的输出,然后还会把这一层的输出当做下一层的输入。这样就很方便的减少了代码量。然后还有一个函数没有实现,就是
def setup(self):
'''Construct the network. '''
# 该函数主要是用于子类重新的,把子类的模型嵌套进来
raise NotImplementedError('Must be implemented by the subclass.')
其是用于继承的子类实现的。也就是子类想要什么网络几个,由他自己去设定。
注意一点的是,无论r img 还是 s img他们的特征提取,都是使用UNetDS2GN,并且他们的权重参数都是共享的。