Faster RCNN模型如何自定义损失函数

Faster RCNN模型如何自定义损失函数

  • 1. 代码分析:
    • 1.1 _fasterRCNN类:最基础的模型类
    • 1.2 resnet类:继承_fasterRCNN类
    • 1.3 vgg类:继承_fasterRCNN类
    • 1.4 如果要修改loss,一共需要修改哪些内容:
      • 1.4.1 模型代码:/lib/model/faster_rcnn/faster_rcnn.py,/lib/model/faster_rcnn/resnet.py,/lib/model/faster_rcnn/vgg16.py
      • 1.4.2 训练代码:trainval_net.py
      • 1.4.3 测试代码:test_net.py
      • 1.4.4 运行参数代码:\lib\model\utils\parser_func.py
  • 2. 修改模型代码:(以添加confidence loss为例)
    • 2.1 修改_fasterRCNN类:
    • 2.2 修改resnet类:
    • 2.3 修改vgg16类:
  • 3. 修改训练代码:
  • 4. 修改测试代码:
  • 5. 修改运行参数代码:

1. 代码分析:

1.1 _fasterRCNN类:最基础的模型类

/lib/model/faster_rcnn/faster_rcnn.py

class _fasterRCNN(nn.Module):

1.2 resnet类:继承_fasterRCNN类

/lib/model/faster_rcnn/resnet.py

class resnet(_fasterRCNN):

1.3 vgg类:继承_fasterRCNN类

与1.2同理

1.4 如果要修改loss,一共需要修改哪些内容:

1.4.1 模型代码:/lib/model/faster_rcnn/faster_rcnn.py,/lib/model/faster_rcnn/resnet.py,/lib/model/faster_rcnn/vgg16.py

1.4.2 训练代码:trainval_net.py

1.4.3 测试代码:test_net.py

1.4.4 运行参数代码:\lib\model\utils\parser_func.py

2. 修改模型代码:(以添加confidence loss为例)

2.1 修改_fasterRCNN类:

修改/lib/model/faster_rcnn/faster_rcnn.py

class _fasterRCNN(nn.Module):
	# 旧的__init__:
	# def __init__(self, classes, class_agnostic):
	# 新的__init__:
	def __init__(self, classes, class_agnostic, conf):
		......
		# 旧的初始化loss:
	    # self.RCNN_loss_cls = 0
	    # self.RCNN_loss_bbox = 0
	    # 新的初始化loss,这里添加自己定义的损失,如:
	    self.RCNN_loss_cls = 0
	    self.RCNN_loss_bbox = 0
	    self.conf = conf
		......
	# 旧的forward:
	# def forward(self, im_data, im_info, gt_boxes, num_boxes):
	# 新的forward:
	def forward(
        self, 
        im_data,
        im_info,
        gt_boxes,
        num_boxes,
        target=False,
        test=False,
        eta=1.0,
        hints=False,
    ):
    	...
		# 旧的compute object classification probability:
        # cls_score = self.RCNN_cls_score(pooled_feat)
        # cls_prob = F.softmax(cls_score, 1)
        # 新的compute object classification probability:
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)
        if self.conf:
            pooled_feat_conf = pooled_feat.detach()
            # confidence
            confidence = F.sigmoid(self.netD_confidence(pooled_feat_conf))
            # Make sure we don't have any numerical instability
            eps = 1e-12
            pred_original = torch.clamp(cls_prob, 0.0 + eps, 1.0 - eps)
            confidence = torch.clamp(confidence, 0.0 + eps, 1.0 - eps)
            confidence_loss = -torch.log(confidence)
		...
		# 旧的返回值:
		# return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
		# 新的返回值:
		if self.conf:
            return (
                rois,
                cls_prob,
                bbox_pred,
                rpn_loss_cls,
                rpn_loss_bbox,
                RCNN_loss_cls,
                RCNN_loss_bbox,
                rois_label,
                None,
                None,
                confidence_loss,
                confidence,
            )
        else:
            return (
                rois,
                cls_prob,
                bbox_pred,
                rpn_loss_cls,
                rpn_loss_bbox,
                RCNN_loss_cls,
                RCNN_loss_bbox,
                rois_label,
                None,
                None,
                None,
                None,
            )

2.2 修改resnet类:

/lib/model/faster_rcnn/resnet.py

# 整个netD_confidence都是新加的,是用来计算confidence loss的层结构:
class netD_confidence(nn.Module):
    def __init__(self, feat_d):
        super(netD_confidence, self).__init__()
        self.fc1 = nn.Linear(feat_d, 128)
        self.bn = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, 1)

    def forward(self, x):
        x = self.fc2(F.dropout(self.bn(self.fc1(x))))
        return x

class resnet(_fasterRCNN):
	# 旧的 __init__:
	#def __init__(self, classes, num_layers=101, pretrained=False, class_agnostic=False):
	
	# 新的 __init__:多个conf=None
	def __init__( self, classes, num_layers=101, pretrained=False, class_agnostic=False, conf=None):
		self.model_path = 'data/pretrained_model/resnet101_caffe.pth'
    	self.dout_base_model = 1024
    	self.pretrained = pretrained
    	self.class_agnostic = class_agnostic
    	# 多加一行conf的初始化:
    	self.conf = conf
    	...
		# 旧的_fasterRCNN.__init__:
		# _fasterRCNN.__init__(self, classes, class_agnostic)
		# 新的_fasterRCNN.__init__:多一个conf
		_fasterRCNN.__init__(self, classes, class_agnostic, conf)
		...
	def _init_modules(self):
		...
		# 旧的build resnet:
		# self.RCNN_base = nn.Sequential(resnet.conv1, resnet.bn1,resnet.relu,
      	# resnet.maxpool,resnet.layer1,resnet.layer2,resnet.layer3)
    	# self.RCNN_top = nn.Sequential(resnet.layer4)
    	# self.RCNN_cls_score = nn.Linear(2048, self.n_classes)
    	# if self.class_agnostic:
      	# 	self.RCNN_bbox_pred = nn.Linear(2048, 4)
    	# else:
      	#	self.RCNN_bbox_pred = nn.Linear(2048, 4 * self.n_classes)

		# 新的build resnet:
		self.RCNN_base = nn.Sequential(resnet.conv1, resnet.bn1,resnet.relu,
      	resnet.maxpool,resnet.layer1,resnet.layer2,resnet.layer3)
    	self.RCNN_top = nn.Sequential(resnet.layer4)
        feat_d = 2048
        self.RCNN_cls_score = nn.Linear(feat_d, self.n_classes)
        self.netD_confidence = netD_confidence(feat_d)
        if self.class_agnostic:
            self.RCNN_bbox_pred = nn.Linear(feat_d, 4)
        else:
            self.RCNN_bbox_pred = nn.Linear(feat_d, 4 * self.n_classes)
		...
		

2.3 修改vgg16类:

同2.2

3. 修改训练代码:

trainval_net.py

...
if __name__ == '__main__':
	...
	# 旧的initilize the network:
	# if args.net == 'vgg16':
    # 	fasterRCNN = vgg16(imdb.classes, pretrained=True, class_agnostic=args.class_agnostic)
 	#elif args.net == 'res101':
    #	fasterRCNN = resnet(imdb.classes, 101, pretrained=True, class_agnostic=args.class_agnostic)
  	#elif args.net == 'res50':
    #	fasterRCNN = resnet(imdb.classes, 50, pretrained=True, class_agnostic=args.class_agnostic)
  	#elif args.net == 'res152':
    #	fasterRCNN = resnet(imdb.classes, 152, pretrained=True, class_agnostic=args.class_agnostic)
  	#else:
    #	print("network is not defined")
    #	pdb.set_trace()
    
    # 新的initilize the network:
	if args.net == 'vgg16':
		fasterRCNN = vgg16(imdb.classes, pretrained=True, class_agnostic=args.class_agnostic, conf=args.conf)
	elif args.net == 'res101':
		fasterRCNN = resnet(imdb.classes, 101, pretrained=True, class_agnostic=args.class_agnostic, conf=args.conf)
	elif args.net == 'res50':
		fasterRCNN = resnet(imdb.classes, 50, pretrained=True, class_agnostic=args.class_agnostic, conf=args.conf)
	elif args.net == 'res152':
		fasterRCNN = resnet(imdb.classes, 152, pretrained=True, class_agnostic=args.class_agnostic, conf=args.conf)
	else:
		print("network is not defined")
		pdb.set_trace()
	...
	# 添加一行conf_gamma,即conf损失在总损失函数中的权重:
	conf_gamma = args.conf_gamma
	...
	fasterRCNN.zero_grad()
    (
		rois,
		cls_prob,
		bbox_pred,
		rpn_loss_cls,
		rpn_loss_box,
		RCNN_loss_cls,
		RCNN_loss_bbox,
		rois_label,
        out_d_pixel, # 新加的
		out_d, # 新加的
		confidence_loss, # 新加的
		_, # 新加的
	) = fasterRCNN(im_data, im_info, gt_boxes, num_boxes, hints=True)
    loss = rpn_loss_cls.mean() + rpn_loss_box.mean() + RCNN_loss_cls.mean() + RCNN_loss_bbox.mean()
	# 加上conf_loss:
	if args.conf:
		conf_loss = confidence_loss.mean()
	...
	if args.mGPUs:
		...
	else:
		loss_rpn_cls = rpn_loss_cls.item()
		loss_rpn_box = rpn_loss_box.item()
		loss_rcnn_cls = RCNN_loss_cls.item()
		loss_rcnn_box = RCNN_loss_bbox.item()
		fg_cnt = torch.sum(rois_label.data.ne(0))
		bg_cnt = rois_label.data.numel() - fg_cnt
		# 加上conf:
		if args.conf:
			loss_conf = conf_loss.item()
			

4. 修改测试代码:

test_net.py

在这里插入代码片

5. 修改运行参数代码:

\lib\model\utils\parser_func.py

def parse_args():
	...
	# 添加conf和conf_gamma:
	parser.add_argument(
        "--ga",
        dest="conf_gamma",
        help="the weight for confidence loss.",
        default=0.1,
        type=float,
    )
    parser.add_argument(
        "--conf",
        dest="conf",
        help="whether use the confidence branch to help teaching process",
        action="store_false",
    )
	...

你可能感兴趣的:(论文复现,深度学习,深度学习,目标检测,计算机视觉)