Faster RCNN fine-tune时的error

1. KeyError: 'max_overlaps'

File "./tools/train_net.py", line 112, in 
max_iters=args.max_iters)
File "/usr/local/caffes/xlw/faster-rcnn-third/tools/../lib/fast_rcnn/train.py", line 155, in train_net
roidb = filter_roidb(roidb)
File "/usr/local/caffes/xlw/faster-rcnn-third/tools/../lib/fast_rcnn/train.py", line 145, in filter_roidb
filtered_roidb = [entry for entry in roidb if is_valid(entry)]
File "/usr/local/caffes/xlw/faster-rcnn-third/tools/../lib/fast_rcnn/train.py", line 134, in is_valid
overlaps = entry['max_overlaps']
KeyError: 'max_overlaps'

解决方法:

清理 FRCN_ROOT/data/cache/ 目录下的缓存文件

2. AssertionError assert (boxes[:, 2] >= boxes[:, 0]).all()

File "/py-faster-rcnn/tools/../lib/datasets/imdb.py", line 108, in append_flipped_images
    assert (boxes[:, 2] >= boxes[:, 0]).all()
AssertionError

这个问题的产生是由于自己制作的数据集中,bounding box的位置正好处于图片的边缘,此时坐标值(x, y)中其中一个为为0,而faster rcnn会对Xmin, Ymin, Xmax, Ymax进行减一操作。

解决方法

  1. 修改lib/datasets/imdb.py中的append_flipped_images()函数。
    boxes[:, 2] = widths[i] - oldx1 - 1这一行下面加入代码:
for b in range(len(boxes)):
  if boxes[b][2]< boxes[b][0]:
    boxes[b][0] = 0
  1. 修改lib/datasets/pascal_voc.py中的_load_pascal_annotation()函数。将Xmin, Ymin, Xmax, Ymax的减一操作去掉,变为:
# Load object bounding boxes into a data frame.
        for ix, obj in enumerate(objs):
            bbox = obj.find('bndbox')
            # Make pixel indexes 0-based
            x1 = float(bbox.find('xmin').text)
            y1 = float(bbox.find('ymin').text)
            x2 = float(bbox.find('xmax').text)
            y2 = float(bbox.find('ymax').text)

3. Waiting for Faster-RCNN_TF/output/faster_rcnn_end2end/voc_2007_trainval/VGGnet_fast_rcnn_iter_70000.ckpt to exist...

解决方法

1. 修改experiment/script/faster_rcnn_end2end.sh

#!/bin/bash
# Usage:
# ./experiments/scripts/faster_rcnn_end2end.sh GPU NET DATASET [options args to {train,test}_net.py]
# DATASET is either pascal_voc or coco.
#
# Example:
# ./experiments/scripts/faster_rcnn_end2end.sh 0 VGG_CNN_M_1024 pascal_voc \
#   --set EXP_DIR foobar RNG_SEED 42 TRAIN.SCALES "[400, 500, 600, 700]"

set -x
set -e

export PYTHONUNBUFFERED="True"

DEV=$1
DEV_ID=$2
NET=$3
DATASET=$4

array=( $@ )
len=${#array[@]}
EXTRA_ARGS=${array[@]:4:$len}
EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}

case $DATASET in
  pascal_voc)
    TRAIN_IMDB="voc_2007_trainval"
    TEST_IMDB="voc_2007_test"
    PT_DIR="pascal_voc"
    ITERS=70000
    ;;
  coco)
    # This is a very long and slow training schedule
    # You can probably use fewer iterations and reduce the
    # time to the LR drop (set in the solver to 350,000 iterations).
    TRAIN_IMDB="coco_2014_train"
    TEST_IMDB="coco_2014_minival"
    PT_DIR="coco"
    ITERS=490000
    ;;
  *)
    echo "No dataset given"
    exit
    ;;
esac

LOG="experiments/logs/faster_rcnn_end2end_${NET}_${EXTRA_ARGS_SLUG}.txt.`date +'%Y-%m-%d_%H-%M-%S'`"
exec &> >(tee -a "$LOG")
echo Logging output to "$LOG"

# 因为model已经训练好,所以可以注释掉
#time python ./tools/train_net.py --device ${DEV} --device_id ${DEV_ID} \
#  --weights data/pretrain_model/VGG_imagenet.npy \
#  --imdb ${TRAIN_IMDB} \
#  --iters ${ITERS} \
#  --cfg experiments/cfgs/faster_rcnn_end2end.yml \
#  --network VGGnet_train \
#  ${EXTRA_ARGS}

set +x
NET_FINAL=`grep -B 1 "done solving" ${LOG} | grep "Wrote snapshot" | awk '{print $4}'`
set -x

time python ./tools/test_net.py --device ${DEV} --device_id ${DEV_ID} \
  # 这里weights的路径修改为自己的模型保存目录
  --weights yourPath/Faster-RCNN_TF/output/faster_rcnn_end2end/voc_2007_trainval \
  --imdb ${TEST_IMDB} \
  --cfg experiments/cfgs/faster_rcnn_end2end.yml \
  --network VGGnet_test \
  ${EXTRA_ARGS}

2. 修改tools/test_net.py

#!/usr/bin/env python

# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------

"""Test a Fast R-CNN network on an image database."""

import _init_paths
from fast_rcnn.test import test_net
from fast_rcnn.config import cfg, cfg_from_file
from datasets.factory import get_imdb
from networks.factory import get_network
import argparse
import pprint
import time, os, sys
import tensorflow as tf

def parse_args():
    """
    Parse input arguments
    """
    parser = argparse.ArgumentParser(description='Test a Fast R-CNN network')
    parser.add_argument('--device', dest='device', help='device to use',
                        default='cpu', type=str)
    parser.add_argument('--device_id', dest='device_id', help='device id to use',
                        default=0, type=int)
    parser.add_argument('--def', dest='prototxt',
                        help='prototxt file defining the network',
                        default=None, type=str)
    parser.add_argument('--weights', dest='model',
                        help='model to test',
                        default=None, type=str)
    parser.add_argument('--cfg', dest='cfg_file',
                        help='optional config file', default=None, type=str)
    parser.add_argument('--wait', dest='wait',
                        help='wait until net file exists',
                        default=True, type=bool)
    parser.add_argument('--imdb', dest='imdb_name',
                        help='dataset to test',
                        default='voc_2007_test', type=str)
    parser.add_argument('--comp', dest='comp_mode', help='competition mode',
                        action='store_true')
    parser.add_argument('--network', dest='network_name',
                        help='name of the network',
                        default=None, type=str)

    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)

    args = parser.parse_args()
    return args

if __name__ == '__main__':
    args = parse_args()

    print('Called with args:')
    print(args)

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)

    print('Using config:')
    pprint.pprint(cfg)

    # 注释掉
    # while not os.path.exists(args.model) and args.wait:
    #     print('Waiting for {} to exist...'.format(args.model))
    #     time.sleep(10)


    imdb = get_imdb(args.imdb_name)
    imdb.competition_mode(args.comp_mode)

    device_name = '/{}:{:d}'.format(args.device,args.device_id)
    print device_name

    network = get_network(args.network_name)
    print 'Use network `{:s}` in training'.format(args.network_name)

    if args.device == 'gpu':
        cfg.USE_GPU_NMS = True
        cfg.GPU_ID = args.device_id
    else:
        cfg.USE_GPU_NMS = False

    # start a session
    saver = tf.train.Saver()
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    
    # 因为新版tf保存模型时不再直接生成.ckpt文件,而是3个checkpoint文件,所以修改restore部分的代码如下
    checkpoint_dir = '/home/nfdw/nfdw/Faster-RCNN_TF/output/faster_rcnn_end2end/voc_2007_trainval'
    while True:
        ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
        if ckpt and ckpt.model_checkpoint_path:
            break
        else:
            print('Waiting for checkpoint in directory {} to exist...'.format(checkpoint_dir))
            time.sleep(10)
    # 修改args.model
    saver.restore(sess, ckpt.model_checkpoint_path)
    print ('Loading model weights from {:s}').format(ckpt.model_checkpoint_path) 
    # 注意weights_filename也要修改
    weights_filename = os.path.splitext(os.path.basename(ckpt.model_checkpoint_path))[0]

    test_net(sess, network, imdb, weights_filename)

4. display error

import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

matplotlib.use() must be called before pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.

你可能感兴趣的:(Faster RCNN fine-tune时的error)