1.使用2-D卷积提取深度特征。首先使用fiter size:5*5,stride:2的conv2d 将输入降维(1/2H,1/2W).
imgl0=F.relu(self.bn0(self.conv0(imgLeft)))
imgr0=F.relu(self.bn0(self.conv0(imgRight)))
self.conv0=nn.Conv2d(3,32,5,2,2)
self.bn0=nn.BatchNorm2d(32)
self.res_block=self._make_layer(block,self.in_planes,32,num_block[0],stride=1)
def _make_layer(self,block,in_planes,planes,num_block,stride):
strides=[stride]+[1]*(num_block-1)
layers=[]
for step in strides:
layers.append(block(in_planes,planes,step))
return nn.Sequential(*layers)
def GcNet(height,width,maxdisp):
return GC_NET(BasicBlock,ThreeDConv,[8,1],height,width,maxdisp)
class BasicBlock(nn.Module): #basic block for Conv2d
def __init__(self,in_planes,planes,stride=1):
super(BasicBlock,self).__init__()
self.conv1=nn.Conv2d(in_planes,planes,kernel_size=3,stride=stride,padding=1)
self.bn1=nn.BatchNorm2d(planes)
self.conv2=nn.Conv2d(planes,planes,kernel_size=3,stride=1,padding=1)
self.bn2=nn.BatchNorm2d(planes)
self.shortcut=nn.Sequential()
def forward(self, x):
out=F.relu(self.bn1(self.conv1(x)))
out=self.bn2(self.conv2(out))
out+=self.shortcut(x)
out=F.relu(out)
return out
self.conv1=nn.Conv2d(32,32,3,1,1)
def cost_volume(self,imgl,imgr):
B, C, H, W = imgl.size()
cost_vol = torch.zeros(B, C * 2, self.maxdisp , H, W).type_as(imgl)
for i in range(self.maxdisp):
if i > 0:
cost_vol[:, :C, i, :, i:] = imgl[:, :, :, i:]
cost_vol[:, C:, i, :, i:] = imgr[:, :, :, :-i]
else:
cost_vol[:, :C, i, :, :] = imgl
cost_vol[:, C:, i, :, :] = imgr
return cost_vol
cost_volum = self.cost_volume(imgl1, imgr1)
unary features
,这样网络可以学习到absolute representation
,并可以结合 context .用这种拼接的方式优于距离度量函数(L1,L2,cosine)1.合并的’cost volume’ feature size=64,通过两层conv3d把feature size降到32.
self.conv3d_1 = nn.Conv3d(64, 32, 3, 1, 1)
self.bn3d_1 = nn.BatchNorm3d(32)
self.conv3d_2 = nn.Conv3d(32, 32, 3, 1, 1)
self.bn3d_2 = nn.BatchNorm3d(32)
2.第一个 sub-sampled layer,使1/2变成1/4.
self.block_3d_1 = self._make_layer(block_3d, 64, 64, num_block[1], stride=2)
class ThreeDConv(nn.Module):
def __init__(self,in_planes,planes,stride=1):
super(ThreeDConv, self).__init__()
self.conv1 = nn.Conv3d(in_planes, planes, kernel_size=3, stride=stride, padding=1)
self.bn1 = nn.BatchNorm3d(planes)
self.conv2 = nn.Conv3d(planes, planes, kernel_size=3, stride=1, padding=1)
self.bn2 = nn.BatchNorm3d(planes)
self.conv3=nn.Conv3d(planes,planes,kernel_size=3,stride=1,padding=1)
self.bn3=nn.BatchNorm3d(planes)
def forward(self, x):
out=F.relu(self.bn1(self.conv1(x)))
out=F.relu(self.bn2(self.conv2(out)))
out=F.relu(self.bn3(self.conv3(out)))
return out
self.conv3d_3 = nn.Conv3d(64, 64, 3, 2, 1)
self.bn3d_3 = nn.BatchNorm3d(64)
3.第二、第三个下采样层类似,直接说第四个下采样层。
self.block_3d_4 = self._make_layer(block_3d, 64, 128, num_block[1], stride=2)
1.原文的描述是,下采样提高速度和增大感受野的同时,也使细节丢失。作者使用残差层,将高分辨率的特征图与下采样层级联。高分辨率的图像使用转置卷积nn.ConvTranspose3d()得到,下面看一下残差结构如何形成。
# deconv3d
self.deconv1 = nn.ConvTranspose3d(128, 64, 3, 2, 1, 1)
self.debn1 = nn.BatchNorm3d(64)
deconv3d = F.relu(self.debn1(self.deconv1(conv3d_block_4)) + conv3d_block_3)
original_size = [1, self.maxdisp*2, imgLeft.size(2), imgLeft.size(3)]
self.deconv5 = nn.ConvTranspose3d(32, 1, 3, 2, 1, 1)
out = deconv3d.view( original_size)
prob = F.softmax(-out, 1)
disp1 = self.regression(prob)
criterion = SmoothL1Loss().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)