特斯拉自动驾驶
深度估计,就是获取图像中场景里的每个点到相机的距离信息,这种距离信息组成的图我们称之为深度图,英文叫Depth map
代码实现:
class deepFeatureExtractor_ResNext101(nn.Module):
def __init__(self,args, lv6 = False):
super(deepFeatureExtractor_ResNext101, self).__init__()
self.args = args
# after passing ReLU : H/2 x W/2
# after passing Layer1 : H/4 x W/4
# after passing Layer2 : H/8 x W/8
# after passing Layer3 : H/16 x W/16
self.encoder = models.resnext101_32x8d(pretrained=True)
self.fixList = ['layer1.0','layer1.1','.bn']
self.lv6 = lv6
if lv6 is True:
self.layerList = ['relu','layer1','layer2','layer3', 'layer4']
self.dimList = [64, 256, 512, 1024,2048]
else:
del self.encoder.layer4
del self.encoder.fc
self.layerList = ['relu','layer1','layer2','layer3']
self.dimList = [64, 256, 512, 1024]
for name, parameters in self.encoder.named_parameters():
if name == 'conv1.weight':
parameters.requires_grad = False
if any(x in name for x in self.fixList):
parameters.requires_grad = False
def forward(self, x):
out_featList = []
feature = x
for k, v in self.encoder._modules.items():
if k == 'avgpool':
break
feature = v(feature)
#feature = v(features[-1])
#features.append(feature)
if any(x in k for x in self.layerList):
out_featList.append(feature)
return out_featList
代码实现:
def forward(self, x):
print(x.shape)
out_featList = self.encoder(x)
rgb_down2 = F.interpolate(x, scale_factor = 0.5, mode='bilinear')
print(rgb_down2.shape)
rgb_down4 = F.interpolate(rgb_down2, scale_factor = 0.5, mode='bilinear')
rgb_down8 = F.interpolate(rgb_down4, scale_factor = 0.5, mode='bilinear')
rgb_down16 = F.interpolate(rgb_down8, scale_factor = 0.5, mode='bilinear')
rgb_down32 = F.interpolate(rgb_down16, scale_factor = 0.5, mode='bilinear')
print(rgb_down32.shape)
rgb_up16 = F.interpolate(rgb_down32, rgb_down16.shape[2:], mode='bilinear')
print(rgb_up16.shape)
rgb_up8 = F.interpolate(rgb_down16, rgb_down8.shape[2:], mode='bilinear')
rgb_up4 = F.interpolate(rgb_down8, rgb_down4.shape[2:], mode='bilinear')
rgb_up2 = F.interpolate(rgb_down4, rgb_down2.shape[2:], mode='bilinear')
rgb_up = F.interpolate(rgb_down2, x.shape[2:], mode='bilinear')
print(rgb_up.shape)
lap1 = x - rgb_up
lap2 = rgb_down2 - rgb_up2
lap3 = rgb_down4 - rgb_up4
lap4 = rgb_down8 - rgb_up8
lap5 = rgb_down16 - rgb_up16
rgb_list = [rgb_down32, lap5, lap4, lap3, lap2, lap1]
d_res_list, depth = self.decoder(out_featList, rgb_list)
return d_res_list, depth
代码实现:
def forward(self, x):
weight = self.weight
weight_mean = weight.mean(dim=1, keepdim=True).mean(dim=2, keepdim=True).mean(dim=3, keepdim=True)
weight = weight - weight_mean
std = weight.view(weight.size(0), -1).std(dim=1).view(-1,1,1,1) + 1e-5
#std = torch.sqrt(torch.var(weight.view(weight.size(0),-1),dim=1)+1e-12).view(-1,1,1,1)+1e-5
weight = weight / std.expand_as(weight)
return F.conv2d(x, weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
代码实现
class Dilated_bottleNeck(nn.Module):
def __init__(self, norm, act, in_feat):
super(Dilated_bottleNeck, self).__init__()
conv = conv_ws
# in feat = 1024 in ResNext101 and ResNet101
self.reduction1 = conv(in_feat, in_feat//2, kernel_size=1, stride = 1, bias=False, padding=0)
self.aspp_d3 = nn.Sequential(myConv(in_feat//2, in_feat//4, kSize=1, stride=1, padding=0, dilation=1,bias=False, norm=norm, act=act, num_groups=(in_feat//2)//16),
myConv(in_feat//4, in_feat//4, kSize=3, stride=1, padding=3, dilation=3,bias=False, norm=norm, act=act, num_groups=(in_feat//4)//16))
self.aspp_d6 = nn.Sequential(myConv(in_feat//2 + in_feat//4, in_feat//4, kSize=1, stride=1, padding=0, dilation=1,bias=False, norm=norm, act=act, num_groups=(in_feat//2 + in_feat//4)//16),
myConv(in_feat//4, in_feat//4, kSize=3, stride=1, padding=6, dilation=6,bias=False, norm=norm, act=act, num_groups=(in_feat//4)//16))
self.aspp_d12 = nn.Sequential(myConv(in_feat, in_feat//4, kSize=1, stride=1, padding=0, dilation=1,bias=False, norm=norm, act=act, num_groups=(in_feat)//16),
myConv(in_feat//4, in_feat//4, kSize=3, stride=1, padding=12, dilation=12,bias=False, norm=norm, act=act, num_groups=(in_feat//4)//16))
self.aspp_d18 = nn.Sequential(myConv(in_feat + in_feat//4, in_feat//4, kSize=1, stride=1, padding=0, dilation=1,bias=False, norm=norm, act=act, num_groups=(in_feat + in_feat//4)//16),
myConv(in_feat//4, in_feat//4, kSize=3, stride=1, padding=18, dilation=18,bias=False, norm=norm, act=act, num_groups=(in_feat//4)//16))
self.reduction2 = myConv(((in_feat//4)*4) + (in_feat//2), in_feat//2, kSize=3, stride=1, padding=1,bias=False, norm=norm, act=act, num_groups = ((in_feat//4)*4 + (in_feat//2))//16)
def forward(self, x):
print(x.shape)
x = self.reduction1(x)
print(x.shape)
d3 = self.aspp_d3(x)
print(d3.shape)
cat1 = torch.cat([x, d3],dim=1)
print(cat1.shape)
d6 = self.aspp_d6(cat1)
print(d6.shape)
cat2 = torch.cat([cat1, d6],dim=1)
print(cat2.shape)
d12 = self.aspp_d12(cat2)
print(d12.shape)
cat3 = torch.cat([cat2, d12],dim=1)
print(cat3.shape)
d18 = self.aspp_d18(cat3)
print(d18.shape)
out = self.reduction2(torch.cat([x,d3,d6,d12,d18], dim=1))
print(out.shape)
return out # 512 x H/16 x W/16
五、coarst-to-fine特征拼接
实现代码:
# decoder 1 - Pyramid level 5
lap_lv5 = torch.sigmoid(self.decoder1(dense_feat))#R5
print(lap_lv5.shape)
lap_lv5_up = self.upscale(lap_lv5, scale_factor = 2, mode='bilinear')
print(lap_lv5_up.shape)
# decoder 2 - Pyramid level 4
dec2 = self.decoder2_up1(dense_feat)
print(dec2.shape)
dec2 = self.decoder2_reduc1(torch.cat([dec2,cat3],dim=1))#252
print(dec2.shape)
dec2_up = self.decoder2_1(torch.cat([dec2,lap_lv5_up,rgb_lv4],dim=1))
print(dec2_up.shape)
dec2 = self.decoder2_2(dec2_up)
print(dec2.shape)
dec2 = self.decoder2_3(dec2)
print(dec2.shape)
lap_lv4 = torch.tanh(self.decoder2_4(dec2) + (0.1*rgb_lv4.mean(dim=1,keepdim=True)))
print(lap_lv4.shape)
# if depth range is (0,1), laplacian of image range is (-1,1)
lap_lv4_up = self.upscale(lap_lv4, scale_factor = 2, mode='bilinear')
print(lap_lv4_up.shape)
# decoder 2 - Pyramid level 3
dec3 = self.decoder2_1_up2(dec2_up)
dec3 = self.decoder2_1_reduc2(torch.cat([dec3,cat2],dim=1))
dec3_up = self.decoder2_1_1(torch.cat([dec3,lap_lv4_up,rgb_lv3],dim=1))
dec3 = self.decoder2_1_2(dec3_up)
lap_lv3 = torch.tanh(self.decoder2_1_3(dec3) + (0.1*rgb_lv3.mean(dim=1,keepdim=True)))
# if depth range is (0,1), laplacian of image range is (-1,1)
lap_lv3_up = self.upscale(lap_lv3, scale_factor = 2, mode='bilinear')
# decoder 2 - Pyramid level 2
dec4 = self.decoder2_1_1_up3(dec3_up)
dec4 = self.decoder2_1_1_reduc3(torch.cat([dec4,cat1],dim=1))
dec4_up = self.decoder2_1_1_1(torch.cat([dec4,lap_lv3_up,rgb_lv2],dim=1))
lap_lv2 = torch.tanh(self.decoder2_1_1_2(dec4_up) + (0.1*rgb_lv2.mean(dim=1,keepdim=True)))
# if depth range is (0,1), laplacian of image range is (-1,1)
lap_lv2_up = self.upscale(lap_lv2, scale_factor = 2, mode='bilinear')
# decoder 2 - Pyramid level 1
dec5 = self.decoder2_1_1_1_up4(dec4_up)
dec5 = self.decoder2_1_1_1_1(torch.cat([dec5,lap_lv2_up,rgb_lv1],dim=1))
dec5 = self.decoder2_1_1_1_2(dec5)
lap_lv1 = torch.tanh(self.decoder2_1_1_1_3(dec5) + (0.1*rgb_lv1.mean(dim=1,keepdim=True)))
# if depth range is (0,1), laplacian of image range is (-1,1)
# Laplacian restoration
lap_lv4_img = lap_lv4 + lap_lv5_up
lap_lv3_img = lap_lv3 + self.upscale(lap_lv4_img, scale_factor = 2, mode = 'bilinear')
lap_lv2_img = lap_lv2 + self.upscale(lap_lv3_img, scale_factor = 2, mode = 'bilinear')
final_depth = lap_lv1 + self.upscale(lap_lv2_img, scale_factor = 2, mode = 'bilinear')
final_depth = torch.sigmoid(final_depth)
print(final_depth.shape)
return [(lap_lv5)*self.max_depth, (lap_lv4)*self.max_depth, (lap_lv3)*self.max_depth, (lap_lv2)*self.max_depth, (lap_lv1)*self.max_depth], final_depth*self.max_depth
# fit laplacian image range (-80,80), depth image range(0,80)
def scale_invariant_loss(valid_out, valid_gt):
logdiff = torch.log(valid_out) - torch.log(valid_gt)
scale_inv_loss = torch.sqrt((logdiff ** 2).mean() - 0.85*(logdiff.mean() ** 2))*10.0
return scale_inv_loss