# 下载代码
git clone https://github.com/thuml/Transfer-Learning-Library.git
# 其他的包的安装参考代码链接中的步骤
采用VOC数据集的格式,我把源域数据放在VOC2007
文件夹中,目标域数据放在VOC2012
文件夹中。
Transfer-Learning-Library/examples/domain_adaptation/object_detction/datasets/
├── VOC2007
│ ├── Annotations
│ ├──ImageSets
│ └──JPEGImages
└── VOC2012
├── Annotations
├── ImageSets
└── JPEGImages
首先需要修改代码中的类别信息:
'''Transfer-Learning-Library/examples/domain_adaptation/object_detection/config/faster_rcnn_R_101_C4_voc.yaml'''
NUM_CLASSES: 20 # 修改为自己的类别数量
#------------------------------------------------------------------------------------------
'''Transfer-Learning-Library/tllib/vision/datasets/object_detection/__init__.py'''
class VOCBase:
# 修改为自己数据集的类别名字
class_names = (
"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat",
"chair", "cow", "diningtable", "dog", "horse", "motorbike", "person",
"pottedplant", "sheep", "sofa", "train", "tvmonitor"
)
原代码中只能加载jpg
格式的图片,将其进行调整(如果你的图片全是jpg
格式的则不需要修改):
'''Transfer-Learning-Library/tllib/vision/datasets/object_detection/__init__.py'''
def __init__(self, root, split="trainval", year=2007, ext=['.jpg', '.jpeg', '.bmp', '.png'], download=True):
# def __init__(self, root, split="trainval", year=2007, ext='.jpg', download=True):
for e in ext:
if os.path.exists(os.path.join(dirname, "JPEGImages", fileid + e)):
jpeg_file = os.path.join(dirname, "JPEGImages", fileid + e)
break
# jpeg_file = os.path.join(dirname, "JPEGImages", fileid + ext)
步骤一:使用带有ResNet-101
的Faster-RCNN
在源域上预训练一个模型
该步骤的配置文件部分可根据需求修改的训练参数:
# Transfer-Learning-Library/examples/domain_adaptation/object_detection/config/faster_rcnn_R_101_C4_voc.yaml
SOLVER:
STEPS: (12000, ) # 控制各部分预热开始的轮次,数值要小于MAX——ITER
MAX_ITER: 16000 # 训练的轮次
WARMUP_ITERS: 100 # 控制每次预热的步长数(不是非常确定)
CHECKPOINT_PERIOD: 2000 # 每2000个轮次保存一次模型
IMS_PER_BATCH: 4 # 训练的batch_size
TEST:
EVAL_PERIOD: 2000 # 每2000个轮次模型在test数据集上验证一遍
VIS_PERIOD: 500 # the period to run visualization. Set to 0 to disable.
训练命令:
# 参考source_only.sh
CUDA_VISIBLE_DEVICES=0 python source_only.py \
--config-file config/faster_rcnn_R_101_C4_voc.yaml \
-s VOC2007 datasets/VOC2007 -t VOC2012 datasets/VOC2012 \
--test VOC2012 datasets/VOC2012 --finetune \
OUTPUT_DIR logs/source_only/faster_rcnn_R_101_C4/voc
步骤二:在源域和目标域上进行域适应训练
修改域适应时配置文件的类别信息:
'''Transfer-Learning-Library/examples/domain_adaptation/object_detection/d_adapt/config/faster_rcnn_R_101_C4_voc.yaml'''
NUM_CLASSES: 20 # 修改为自己的类别数量
该步骤的部分训练参数:
args.epochs-c # 训练类别适配器的迭代次数
args.epochs-b # 训练边框适配器的迭代次数
args.iters_per_epoch-c # 训练类别适配器的每一次迭代中的总轮次
args.iters_per_epoch-b # 训练边框适配器的每一次迭代中的总轮次
args.pretrain_epochs-b # 边框适配器在源域上预训练的迭代次数
cfg.SOLVER.MAX_ITER # 训练目标检测器的总轮次
训练命令:
# 参考d_adapt.sh
pretrained_models=../logs/source_only/faster_rcnn_R_101_C4/voc/model_final.pth
CUDA_VISIBLE_DEVICES=0 python d_adapt.py \
--config-file config/faster_rcnn_R_101_C4_voc.yaml \
-s VOC2007 ../datasets/VOC2007 \
-t VOC2012 ../datasets/VOC2012 --test VOC2012 ../datasets/VOC2012 \
--finetune --bbox-refine \
OUTPUT_DIR logs/faster_rcnn_R_101_C4/voc/phase1 MODEL.WEIGHTS ${pretrained_models} SEED 0
训练得到最终模型,输出在测试集上的评估结果
CUDA_VISIBLE_DEVICES=0 python source_only.py \
--config-file config/faster_rcnn_R_101_C4_voc.yaml \
-s VOC2007 datasets/VOC2007 -t VOC2012 datasets/VOC2012 \
--test VOC2012 datasets/VOC2012 --eval-only \
MODEL.WEIGHTS d_adapt/logs/faster_rcnn_R_101_C4/voc/phase1/model_final.pth
--test
这个参数用于指定测试集,代码中(Transfer-Learning-Library/tllib/vision/datasets/object_detection/__init__.py
)针对VOC2007
和VOC2012
暂时只提供了三种选项:
VOC2007Test
读取VOC2007
的test.txt
数据VOC2007
读取VOC2007
的trainval.txt
数据VOC2012
读取VOC2012
的trainval.txt
数据对测试集的识别结果可视化
CUDA_VISIBLE_DEVICES=0 python visualize.py --config-file config/faster_rcnn_R_101_C4_voc.yaml \
--test VOC2012 datasets/VOC2012 --save-path visualizations/d_adapt/voc \
MODEL.WEIGHTS d_adapt/logs/faster_rcnn_R_101_C4/voc/phase1/model_final.pth
作者的原代码中需要输出模型在目标域上的识别效果,所以默认是目标域有标注文件的。如果你的目标域是没有标注文件的,需要先对代码的一些地方进行修改后再进行训练。
首先数据集部分,仍然是源域数据集存放于VOC2007
文件夹下,目标域数据集(不带标注文件)存放于VOC2012
文件夹下,其中VOC2012/Annotations
目录下是没有文件的。
步骤一:需要修改两处,其余的操作就跟上面一样,同样能得到一个模型logs/source_only/faster_rcnn_R_101_C4/voc/model_final.pth
'''Transfer-Learning-Library/examples/domain_adaptation/object_detection/config/faster_rcnn_R_101_C4_voc.yaml'''
TEST:
EVAL_PERIOD: 0 # 设置为0模型则不对test数据集进行验证
#------------------------------------------------------------------------------------------
'''Transfer-Learning-Library/examples/domain_adaptation/object_detection/source_only.py'''
# return utils.validate(model, logger, cfg, args) # 将这一行注释掉
步骤二:需要修改五处,其余操作就跟上面一样,得到最终模型d_adapt/logs/faster_rcnn_R_101_C4/voc/phase1/model_final.pth
'''Transfer-Learning-Library/examples/domain_adaptation/object_detection/d_adapt/config/faster_rcnn_R_101_C4_voc.yaml'''
TEST:
EVAL_PERIOD: 0
#------------------------------------------------------------------------------------------
'''Transfer-Learning-Library/tllib/vision/datasets/object_detection/__init__.py'''
def load_voc_instances(dirname: str, split: str, class_names, ext='.jpg', bbox_zero_based=False):
with PathManager.open(os.path.join(dirname, "ImageSets", "Main", split + ".txt")) as f:
fileids = np.loadtxt(f, dtype=np.str)
annotation_dirname = PathManager.get_local_path(os.path.join(dirname, "Annotations/"))
dicts = []
skip_classes = set()
for fileid in fileids:
anno_file = os.path.join(annotation_dirname, fileid + ".xml")
for e in ext:
if os.path.exists(os.path.join(dirname, "JPEGImages", fileid + e)):
jpeg_file = os.path.join(dirname, "JPEGImages", fileid + e)
break
# 当数据集名称为VOC2012时,也就是为目标域,标注信息修改为空列表
if dirname.split('/')[-1] == "VOC2012":
image = cv2.imread(jpeg_file)
height, width = image.shape[:2]
r = {
"file_name": jpeg_file,
"image_id": fileid,
"height": height,
"width": width,
"annotations":[]
}
else:
with PathManager.open(anno_file) as f:
tree = ET.parse(f)
r = {
"file_name": jpeg_file,
"image_id": fileid,
"height": int(tree.findall("./size/height")[0].text),
"width": int(tree.findall("./size/width")[0].text),
}
instances = []
for obj in tree.findall("object"):
cls = obj.find("name").text
if cls not in class_names:
skip_classes.add(cls)
continue
bbox = obj.find("bndbox")
bbox = [float(bbox.find(x).text) for x in ["xmin", "ymin", "xmax", "ymax"]]
if bbox_zero_based is False:
bbox[0] -= 1.0
bbox[1] -= 1.0
instances.append(
{"category_id": class_names.index(cls), "bbox": bbox, "bbox_mode": BoxMode.XYXY_ABS}
)
r["annotations"] = instances
dicts.append(r)
print("Skip classes:", list(skip_classes))
return dicts
#------------------------------------------------------------------------------------------
'''Transfer-Learning-Library/examples/domain_adaptation/object_detection/d_adapt/d_adapt.py'''
# data_loader_validation = category_adaptor.prepare_validation_data(prop_t_fg + prop_t_bg) # 注释掉
# category_adaptor.fit(data_loader_source, data_loader_target, data_loader_validation)
category_adaptor.fit(data_loader_source, data_loader_target)
# data_loader_validation = bbox_adaptor.prepare_validation_data(prop_t_fg) # 注释掉
# bbox_adaptor.validate_baseline(data_loader_validation) # 注释掉
# bbox_adaptor.fit(data_loader_source, data_loader_target, data_loader_validation)
bbox_adaptor.fit(data_loader_source, data_loader_target)
# train_target_dataset = get_detection_dataset_dicts(args.targets, proposals_list=prop_t_fg+prop_t_bg)
train_target_dataset = get_detection_dataset_dicts(args.targets, filter_empty=False, proposals_list=prop_t_fg+prop_t_bg)
# return utils.validate(model, logger, cfg, args) # 注释掉
#------------------------------------------------------------------------------------------
'''Transfer-Learning-Library/examples/domain_adaptation/object_detection/d_adapt/category_adaptation.py'''
# print("best_acc1 = {:3.1f}".format(best_acc1)) # 注释掉
#------------------------------------------------------------------------------------------
'''Transfer-Learning-Library/examples/domain_adaptation/object_detection/d_adapt/bbox_adaptation.py'''
# ious_t = AverageMeter("IoU (t)", ":.4e") # 注释掉
# ious_t_adv = AverageMeter("IoU (t, adv)", ":.4e") # 注释掉
# progress = ProgressMeter(
# args.iters_per_epoch,
# [batch_time, data_time, losses, trans_losses, ious, ious_t, ious_s_adv, ious_t_adv],
# prefix="Epoch: [{}]".format(epoch))
progress = ProgressMeter(
args.iters_per_epoch,
[batch_time, data_time, losses, trans_losses, ious, ious_s_adv],
prefix="Epoch: [{}]".format(epoch))
# gt_boxes_t = labels_t['gt_boxes'].to(device).float() # 注释掉
# ious_t.update(iou_between(pred_boxes_t.cpu(), gt_boxes_t.cpu()).mean().item(), x_s.size(0)) # 注释掉
# ious_t_adv.update(iou_between(pred_boxes_t_adv.cpu(), gt_boxes_t.cpu()).mean().item(), x_s.size(0)) # 注释掉
# print("best_iou = {:3.1f}".format(best_iou)) # 注释掉
Write to cache: logs/faster_rcnn_R_101_C4/voc/phase1/cache/proposal/.._datasets_VOC2012_trainval_fg.json
Write to cache: logs/faster_rcnn_R_101_C4/voc/phase1/cache/proposal/.._datasets_VOC2012_trainval_bg.json
Write to cache: logs/faster_rcnn_R_101_C4/voc/phase1/cache/proposal/.._datasets_VOC2007_trainval_fg.json
Write to cache: logs/faster_rcnn_R_101_C4/voc/phase1/cache/proposal/.._datasets_VOC2007_trainval_bg.json
'''
由source_only.py训练得到的预训练模型在目标域/源域数据集上生成前/背景proposals并将其进行保存
'''
----------------------------------------------------
dict_keys([0, 1, 2, 3, 4, 5, 6, 7]) # 数据集所有类别的编号,其中最后一个代表bg
----------------------------------------------------
0 5 0
1 0 0
2 53 0
3 0 0
4 0 0
5 0 0
6 0 0
7 799 0
'''
print(c, len(s), int(self.confidence_ratio * len(s)))
c为类别编号,s为该类别的所有proposals的置信度,len(s)表示预训练模型在目标域上预测得到该类别的proposals数量,self.confidence_ratio=0.0
'''
----------------------------------------------------
confidence threshold for each category:
0 0.422
1 1.0
2 0.763
3 1.0
4 1.0
5 1.0
6 1.0
7 1.0
'''
s.sort(reverse=True)
per_category_thresholds[c] = s[int(self.confidence_ratio * len(s))] if len(s) else 1.
print('\t', c, round(per_category_thresholds[c], 3))
第二列表示在目标域上预测得到的某类别的proposals的最高类别置信度作为confidence threshold,不存在预测为某类别的proposals,则该类别的confidence threshold为1.0
'''
----------------------------------------------------
lr: 0.001 # 每个epoch的学习率
----------------------------------------------------
Epoch: [0][0/5] Time 135.6 (135.6) Data 13.8 (13.8) Loss 2.57 (2.57) Loss(t) 0.00 (0.00) Trans Loss 0.82 (0.82) Cls Acc 0.0 (0.0) Domain Acc 50.0 (50.0)
'''
Time:每个轮次的耗时
Data:每个轮次中加载数据的耗时
Loss:类别适配器的总损失 loss = cls_loss + transfer_loss * args.trade_off + cls_loss_t
Loss(t):目标检测器对目标域proposals预测的类别和类别适配器预测的类别的损失 RobustCrossEntropyLoss()
Trans Loss:判别器的损失,作者论文的式(3) ConditionalDomainAdversarialLoss()
Cls Acc:类别适配器对源域proposals预测的类别的准确率
Domain Acc:域判别器的准确率
'''
----------------------------------------------------
Test: [ 0/30] Time 5.396 ( 5.396) Loss 3.7713e-03 (3.7713e-03) Acc@1 100.00 (100.00)
'''
每个epoch后类别适配器对目标域proposals(包括前景和背景)的类别损失和准确率
'''
----------------------------------------------------
* Acc@1 83.333 # 平均准确率
----------------------------------------------------
global correct: 83.3
mean correct:nan
mean IoU: nan
+---------------+-------+-------------------+
| class | acc | iou |
+---------------+-------+-------------------+
| class 0 | nan | nan |
| class 1 | nan | nan |
| class 2 | nan | nan |
| class 3 | nan | nan |
| class 4 | 0.0 | 0.0 |
| class 5 | nan | nan |
| class 6 | 0.0 | 0.0 |
| bg | 100.0 | 83.33332824707031 |
+---------------+-------+-------------------+
'''
Get the accuracy and IoU for each class in the table format
如果类别适配器训练过程中一直是上面这样的情况,只有背景bg有acc和iou,那么继续运行在边界框适配器训练位置会报错,把d_adapt/logs/faster_rcnn_R_101_C4/voc/phase1下的文件全删掉,重新跑(可把类别适配器训练的epoch数增多点),当出现别的类别有acc和iou值时才能正常运行后面的步骤
'''
----------------------------------------------------
best_acc1 = 83.3 # 类别适配器对目标域的proposals类别识别最高准确率
100%|##########| 499/499 [00:35<00:00, 13.87it/s] # 类别适配器对目标域的前景proposals生成预测类别标签并保存
Write to cache: logs/faster_rcnn_R_101_C4/voc/phase1/cache/feedback/.._datasets_VOC2012_trainval_fg.json
100%|##########| 800/800 [00:44<00:00, 17.90it/s] # 类别适配器对目标域的背景proposals生成预测类别标签并保存
Write to cache: logs/faster_rcnn_R_101_C4/voc/phase1/cache/feedback/.._datasets_VOC2012_trainval_bg.json
----------------------------------------------------
Calculate baseline IoU:
100%|##########| 2/2 [01:33<00:00, 46.85s/it]
* Baseline IoU 0.000
'''
每个epoch后边框适配器在目标域proposals(只有前景)的预测的边框与真实边框的平均IOU
'''
----------------------------------------------------
Epoch: [0][0/5] Time 42.0 (42.0) Data 32.7 (32.7) Loss 0.70 (0.70) IoU 6.0772e-01 (6.0772e-01)
'''
边框适配器在源域上的预训练
Time:每个轮次的耗时
Data:每个轮次中加载数据的耗时
Loss:边框适配器对源域proposals的边框回归损失 smooth_l1_loss()
IoU:边框适配器对源域proposals预测的边框和真实边框的平均IoU
'''
----------------------------------------------------
Epoch: [0][0/5] Time 0.9 (0.9) Data 0.0 (0.0) Loss 0.20 (0.20) Trans Loss -1.16 (-1.16) IoU 8.0306e-01 (8.0306e-01) IoU (t) 0.0000e+00 (0.0000e+00) IoU (s, adv) 8.2482e-01 (8.2482e-01) IoU (t, adv) 0.0000e+00 (0.0000e+00)
'''
边框适配器在两个域上的训练
Time:每个轮次的耗时
Data:每个轮次中加载数据的耗时
Loss:边框适配器的总损失 loss = reg_loss - transfer_loss * args.trade_off
Trans Loss:对抗回归网络G^reg_{adv}的损失,作者论文的式(6) RegressionMarginDisparityDiscrepancy()
IoU:回归网络G^reg对源域proposals预测的边框和真实边框的平均IoU
IoU (t):回归网络G^reg对目标域proposals预测的边框和真实边框的平均IoU
IoU(s, adv):对抗回归网络G^reg_{adv}对源域proposals预测的边框和真实边框的平均IoU
IoU (t, adv):对抗回归网络G^reg_{adv}对目标域proposals预测的边框和真实边框的平均IoU
'''
----------------------------------------------------
best_iou = 0.0 # 边框适配器训练过程中在目标域上最好的IOU值
----------------------------------------------------
100%|##########| 139/139 [00:05<00:00, 27.23it/s] # 边界适配器对目标域的前景proposals生成预测边界框并保存
Write to cache: logs/faster_rcnn_R_101_C4/voc/phase1/cache/feedback_bbox/.._datasets_VOC2012_trainval_fg.json
100%|##########| 712/712 [00:22<00:00, 31.12it/s] # 边界适配器对目标域的背景proposals生成预测边界框并保存
Write to cache: logs/faster_rcnn_R_101_C4/voc/phase1/cache/feedback_bbox/.._datasets_VOC2012_trainval_bg.json
----------------------------------------------------
iter: 19 total_loss_s: 1.469 loss_cls_s: 1.071 loss_box_reg_s: 0.3292 loss_rpn_cls_s: 0.01793 loss_rpn_loc_s: 0.03063 loss_cls_t: 2.185 lr: 1.6141e-05 max_mem: 4074M
'''
total_loss_s:loss_cls_s + loss_box_reg_s + loss_rpn_cls_s + loss_rpn_loc_s
loss_cls_s:目标检测器对源域proposals预测类别的损失
loss_box_reg_s:目标检测器对源域proposals预测的边界框的损失
loss_cls_t:目标检测器对目标域预测类别的损失(以前面适配器生成的伪标签为真实标签)
lr:学习率
'''
----------------------------------------------------
OrderedDict([('bbox', {'AP': nan, 'AP50': nan, 'AP75': nan, 'class 0': 2.7624309392265194, 'class 1': nan, 'class 2': 0.0, 'class 3': 0.0, 'class 4': 0.641025641025641, 'class 5': nan, 'class 6': 2.2222222222222223})])
+---------------+--------------------+
| class | AP |
+---------------+--------------------+
| AP | nan |
| AP50 | nan |
| AP75 | nan |
| class 0 | 2.7624309392265194 |
| class 1 | nan |
| class 2 | 0.0 |
| class 3 | 0.0 |
| class 4 | 0.641025641025641 |
| class 5 | nan |
| class 6 | 2.2222222222222223 |
+---------------+--------------------+
'''
目标检测器在目标域上的识别效果
'''