faster rcnn源码解读(四)之数据类型imdb.py和pascal_voc.py(主要是imdb和roidb数据类型的解说)

转载自:faster rcnn源码解读(四)之数据类型imdb.py和pascal_voc.py(主要是imdb和roidb数据类型的解说) - 野孩子的专栏 - 博客频道 - CSDN.NET

http://blog.csdn.net/u010668907/article/details/51945719

faster用python版本的https://github.com/rbgirshick/py-faster-rcnn

imdb.py源码地址:https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/imdb.py

imdb源码:

[python]  view plain  copy
 print ?
  1. # --------------------------------------------------------  
  2. # Fast R-CNN  
  3. # Copyright (c) 2015 Microsoft  
  4. # Licensed under The MIT License [see LICENSE for details]  
  5. # Written by Ross Girshick  
  6. # --------------------------------------------------------  
  7.   
  8. import os  
  9. import os.path as osp  
  10. import PIL  
  11. from utils.cython_bbox import bbox_overlaps  
  12. import numpy as np  
  13. import scipy.sparse  
  14. from fast_rcnn.config import cfg  
  15.   
  16. class imdb(object):  
  17.     """Image database."""  
  18.   
  19.     def __init__(self, name):  
  20.         self._name = name  
  21.         self._num_classes = 0#类别的长度  
  22.         self._classes = []#类别定义  
  23.         self._image_index = []#a list of image name(read from eg:root/data + /VOCdevkit2007/VOC2007/ImageSets/Main/{image_set}.txt)  
  24.         self._obj_proposer = 'selective_search'  
  25.         self._roidb = None#gt_roidb(cfg.TRAIN.PROPOSAL_METHOD=gt导致了此操作)  
  26.         self._roidb_handler = self.default_roidb  
  27.         # Use this dict for storing dataset specific config options  
  28.         self.config = {}  
  29.  
  30.     @property  
  31.     def name(self):  
  32.         return self._name  
  33.  
  34.     @property  
  35.     def num_classes(self):  
  36.         return len(self._classes)  
  37.  
  38.     @property  
  39.     def classes(self):  
  40.         return self._classes  
  41.  
  42.     @property  
  43.     def image_index(self):  
  44.         return self._image_index  
  45.  
  46.     @property  
  47.     def roidb_handler(self):  
  48.         return self._roidb_handler  
  49.  
  50.     @roidb_handler.setter  
  51.     def roidb_handler(self, val):  
  52.         self._roidb_handler = val  
  53.   
  54.     def set_proposal_method(self, method):  
  55.         method = eval('self.' + method + '_roidb')  
  56.         self.roidb_handler = method  
  57.  
  58.     @property  
  59.     def roidb(self):  
  60.         # A roidb is a list of dictionaries, each with the following keys:  
  61.         #   boxes  
  62.         #   gt_overlaps  
  63.         #   gt_classes  
  64.         #   flipped  
  65.         if self._roidb is not None:  
  66.             return self._roidb  
  67.         self._roidb = self.roidb_handler()  
  68.         return self._roidb  
  69.  
  70.     @property  
  71.     def cache_path(self):  
  72.         cache_path = osp.abspath(osp.join(cfg.DATA_DIR, 'cache'))  
  73.         if not os.path.exists(cache_path):  
  74.             os.makedirs(cache_path)  
  75.         return cache_path  
  76.  
  77.     @property  
  78.     def num_images(self):  
  79.       return len(self.image_index)  
  80.   
  81.     def image_path_at(self, i):  
  82.         raise NotImplementedError  
  83.   
  84.     def default_roidb(self):  
  85.         raise NotImplementedError  
  86.   
  87.     def evaluate_detections(self, all_boxes, output_dir=None):  
  88.         """ 
  89.         all_boxes is a list of length number-of-classes. 
  90.         Each list element is a list of length number-of-images. 
  91.         Each of those list elements is either an empty list [] 
  92.         or a numpy array of detection. 
  93.  
  94.         all_boxes[class][image] = [] or np.array of shape #dets x 5 
  95.         """  
  96.         raise NotImplementedError  
  97.   
  98.     def _get_widths(self):  
  99.       return [PIL.Image.open(self.image_path_at(i)).size[0]  
  100.               for i in xrange(self.num_images)]  
  101.   
  102.     def append_flipped_images(self):  
  103.         num_images = self.num_images  
  104.         widths = self._get_widths()  
  105.         for i in xrange(num_images):  
  106.             boxes = self.roidb[i]['boxes'].copy()  
  107.             oldx1 = boxes[:, 0].copy()  
  108.             oldx2 = boxes[:, 2].copy()  
  109.             boxes[:, 0] = widths[i] - oldx2 - 1  
  110.             boxes[:, 2] = widths[i] - oldx1 - 1  
  111.             assert (boxes[:, 2] >= boxes[:, 0]).all()  
  112.             entry = {'boxes' : boxes,  
  113.                      'gt_overlaps' : self.roidb[i]['gt_overlaps'],  
  114.                      'gt_classes' : self.roidb[i]['gt_classes'],  
  115.                      'flipped' : True}  
  116.             self.roidb.append(entry)  
  117.         self._image_index = self._image_index * 2  
  118.   
  119.     def evaluate_recall(self, candidate_boxes=None, thresholds=None,  
  120.                         area='all', limit=None):  
  121.         """Evaluate detection proposal recall metrics. 
  122.  
  123.         Returns: 
  124.             results: dictionary of results with keys 
  125.                 'ar': average recall 
  126.                 'recalls': vector recalls at each IoU overlap threshold 
  127.                 'thresholds': vector of IoU overlap thresholds 
  128.                 'gt_overlaps': vector of all ground-truth overlaps 
  129.         """  
  130.         # Record max overlap value for each gt box  
  131.         # Return vector of overlap values  
  132.         areas = { 'all'0'small'1'medium'2'large'3,  
  133.                   '96-128'4'128-256'5'256-512'6'512-inf'7}  
  134.         area_ranges = [ [0**21e5**2],    # all  
  135.                         [0**232**2],     # small  
  136.                         [32**296**2],    # medium  
  137.                         [96**21e5**2],   # large  
  138.                         [96**2128**2],   # 96-128  
  139.                         [128**2256**2],  # 128-256  
  140.                         [256**2512**2],  # 256-512  
  141.                         [512**21e5**2],  # 512-inf  
  142.                       ]  
  143.         assert areas.has_key(area), 'unknown area range: {}'.format(area)  
  144.         area_range = area_ranges[areas[area]]  
  145.         gt_overlaps = np.zeros(0)  
  146.         num_pos = 0  
  147.         for i in xrange(self.num_images):  
  148.             # Checking for max_overlaps == 1 avoids including crowd annotations  
  149.             # (...pretty hacking :/)  
  150.             max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max(axis=1)  
  151.             gt_inds = np.where((self.roidb[i]['gt_classes'] > 0) &  
  152.                                (max_gt_overlaps == 1))[0]  
  153.             gt_boxes = self.roidb[i]['boxes'][gt_inds, :]  
  154.             gt_areas = self.roidb[i]['seg_areas'][gt_inds]  
  155.             valid_gt_inds = np.where((gt_areas >= area_range[0]) &  
  156.                                      (gt_areas <= area_range[1]))[0]  
  157.             gt_boxes = gt_boxes[valid_gt_inds, :]  
  158.             num_pos += len(valid_gt_inds)  
  159.   
  160.             if candidate_boxes is None:  
  161.                 # If candidate_boxes is not supplied, the default is to use the  
  162.                 # non-ground-truth boxes from this roidb  
  163.                 non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0]  
  164.                 boxes = self.roidb[i]['boxes'][non_gt_inds, :]  
  165.             else:  
  166.                 boxes = candidate_boxes[i]  
  167.             if boxes.shape[0] == 0:  
  168.                 continue  
  169.             if limit is not None and boxes.shape[0] > limit:  
  170.                 boxes = boxes[:limit, :]  
  171.   
  172.             overlaps = bbox_overlaps(boxes.astype(np.float),  
  173.                                      gt_boxes.astype(np.float))  
  174.   
  175.             _gt_overlaps = np.zeros((gt_boxes.shape[0]))  
  176.             for j in xrange(gt_boxes.shape[0]):  
  177.                 # find which proposal box maximally covers each gt box  
  178.                 argmax_overlaps = overlaps.argmax(axis=0)  
  179.                 # and get the iou amount of coverage for each gt box  
  180.                 max_overlaps = overlaps.max(axis=0)  
  181.                 # find which gt box is 'best' covered (i.e. 'best' = most iou)  
  182.                 gt_ind = max_overlaps.argmax()  
  183.                 gt_ovr = max_overlaps.max()  
  184.                 assert(gt_ovr >= 0)  
  185.                 # find the proposal box that covers the best covered gt box  
  186.                 box_ind = argmax_overlaps[gt_ind]  
  187.                 # record the iou coverage of this gt box  
  188.                 _gt_overlaps[j] = overlaps[box_ind, gt_ind]  
  189.                 assert(_gt_overlaps[j] == gt_ovr)  
  190.                 # mark the proposal box and the gt box as used  
  191.                 overlaps[box_ind, :] = -1  
  192.                 overlaps[:, gt_ind] = -1  
  193.             # append recorded iou coverage level  
  194.             gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))  
  195.   
  196.         gt_overlaps = np.sort(gt_overlaps)  
  197.         if thresholds is None:  
  198.             step = 0.05  
  199.             thresholds = np.arange(0.50.95 + 1e-5, step)  
  200.         recalls = np.zeros_like(thresholds)  
  201.         # compute recall for each iou threshold  
  202.         for i, t in enumerate(thresholds):  
  203.             recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)  
  204.         # ar = 2 * np.trapz(recalls, thresholds)  
  205.         ar = recalls.mean()  
  206.         return {'ar': ar, 'recalls': recalls, 'thresholds': thresholds,  
  207.                 'gt_overlaps': gt_overlaps}  
  208.   
  209.     def create_roidb_from_box_list(self, box_list, gt_roidb):  
  210.         assert len(box_list) == self.num_images, \  
  211.                 'Number of boxes must match number of ground-truth images'  
  212.         roidb = []  
  213.         for i in xrange(self.num_images):  
  214.             boxes = box_list[i]  
  215.             num_boxes = boxes.shape[0]  
  216.             overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32)  
  217.   
  218.             if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0:  
  219.                 gt_boxes = gt_roidb[i]['boxes']  
  220.                 gt_classes = gt_roidb[i]['gt_classes']  
  221.                 gt_overlaps = bbox_overlaps(boxes.astype(np.float),  
  222.                                             gt_boxes.astype(np.float))  
  223.                 argmaxes = gt_overlaps.argmax(axis=1)  
  224.                 maxes = gt_overlaps.max(axis=1)  
  225.                 I = np.where(maxes > 0)[0]  
  226.                 overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]  
  227.   
  228.             overlaps = scipy.sparse.csr_matrix(overlaps)  
  229.             roidb.append({  
  230.                 'boxes' : boxes,  
  231.                 'gt_classes' : np.zeros((num_boxes,), dtype=np.int32),  
  232.                 'gt_overlaps' : overlaps,  
  233.                 'flipped' : False,  
  234.                 'seg_areas' : np.zeros((num_boxes,), dtype=np.float32),  
  235.             })  
  236.         return roidb  
  237.  
  238.     @staticmethod  
  239.     def merge_roidbs(a, b):  
  240.         assert len(a) == len(b)  
  241.         for i in xrange(len(a)):  
  242.             a[i]['boxes'] = np.vstack((a[i]['boxes'], b[i]['boxes']))  
  243.             a[i]['gt_classes'] = np.hstack((a[i]['gt_classes'],  
  244.                                             b[i]['gt_classes']))  
  245.             a[i]['gt_overlaps'] = scipy.sparse.vstack([a[i]['gt_overlaps'],  
  246.                                                        b[i]['gt_overlaps']])  
  247.             a[i]['seg_areas'] = np.hstack((a[i]['seg_areas'],  
  248.                                            b[i]['seg_areas']))  
  249.         return a  
  250.   
  251.     def competition_mode(self, on):  
  252.         """Turn competition mode on or off."""  
  253.         pass  


get_imdb->factory->pascal_voc->(继承)imdb

factory

  year = ['2007', '2012']

  split = ['train', 'val', 'trainval', 'test']

 

imdb

  image_set: split

  devkit_path: config.DATA_DIR(root/data/) + VOCdevkit + year

  data_path: devkit_path + '/' + 'VOC' + year

  image_index: a list read image name from

      例如,root/data + /VOCdevkit2007/VOC2007/ImageSets/Main/{image_set}.txt

  roidb: gt_roidb得到(cfg.TRAIN.PROPOSAL_METHOD=gt导致了此操作)

  classes: 类别定义

  num_classes: 类别的长度

  class_to_ind{类别名:类别索引}字典


  num_images(): image_index'length,数据库中图片个数

  image_path_atindex): 得到第index图片的地址,data_path + '/' + 'JPEGImages' + image_index[index] + image_ext(.jpg)

 

  在train_faster_rcnn_alt_opt.pyimdb.set_proposal_method之后一旦用imdb.roidb都会用gt_roidb读取xml中的内容中得到部分信息

xml的地址:data_path + '/' + 'Annotations' + '/' + index + '.xml'

          (root/data/) + VOCdevkit + year  + '/' + 'VOC' + year + '/' + 'Annotations' + '/' + index + '.xml'

get_training_roidb: 对得到的roi做是否反转(参见roidbflipped,为了扩充数据库)和到roidb.pyprepare_roidb中计算得到roidb的其他数据

 

一张图有一个roidb,每个roidb是一个字典

roidb:

  boxes: four rows.the proposal.left-up,right-down

  gt_overlaps: lenbox*类别数(即,每个box对应的类别。初始化时,从xml读出来的类别对应类别值是1.0,被压缩保存)

  gt_classes: 每个box的类别索引

  flipped: true,代表图片被水平反转,改变了boxes里第一、三列的值(所有原图都这样的操作,imdb.image_index*2(cfg.TRAIN.USE_FLIPPED会导致此操作的发生,见train.py 116)

  seg_areas: box的面积

  (下面的值在roidb.pyprepare_roidb中得到)

  imageimage_path_atindex),此roi的图片地址

  width:此图片的宽

  height: 高

  max_classes: box的类别=labelsgt_overlaps行最大值索引)

  max_overlaps: gt_overlaps行最大值)(max_overlaps=0max_classes=0,即都是背景,否则不正确)  

output_dir: ROOT_DIR + 'output' + EXP_DIR('faster_rcnn_alt_opt') + imdb.name("voc_2007_trainval" or "voc_2007_test")


你可能感兴趣的:(目标检测(Object,Detection))