paddleDetection框架如何将模型预测结果保存成json文件

之前我用mmdetection的时候,发现自带的脚本可以把预测结果转为json文件。前面我也用的yolov4代码只用了pytorch框架,根据预测代码修改了一份将模型预测结果保存成json文件的代码。具体参考我的这篇博客将yolov4预测结果保存为json文件。

最近我学了一下paddleDetection框架,里面集成了很多常用的网络模型,用起来很方便,但是我想将模型预测出来的bbox结果保存成json文件,找了一下官方文档,似乎没找到已经写好的脚本,所以我又根据tools/infer.py以及它所引用的代码,改写了一份可以把模型预测结果保存成json文件的代码。

具体思路

具体思路如下:
先从tools/infer.py看起,从run这个函数可以看到调用了ppdet/engine文件夹里面的trainer.py里面Trainer类的predict方法将检测框画在原图上,保存在output文件夹下。有点坑的是,这里的ppdet并不是代码里面的ppdet文件夹,而是安装paddleDetection环境下的ppdet文件夹。所以如果你修改了ppdet文件夹里面的代码运行没有效果记得去找paddle21/lib/python3.8/site-packages/paddledet-2.0.1-py3.8.egg/ppdet/文件夹修改。

from ppdet.engine import Trainer

def run(FLAGS, cfg):
    # build trainer
    trainer = Trainer(cfg, mode='test')

    # load weights
    trainer.load_weights(cfg.weights)

    # get inference images
    images = get_test_images(FLAGS.infer_dir, FLAGS.infer_img)

    # inference
    trainer.predict(
        images,
        draw_threshold=FLAGS.draw_threshold,
        output_dir=FLAGS.output_dir,
        save_txt=FLAGS.save_txt)

接着去看predict函数的具体实现:

from ppdet.utils.visualizer import visualize_results, save_result
def predict(self,
                images,
                draw_threshold=0.5,
                output_dir='output',
                save_txt=False):
        self.dataset.set_images(images)
        loader = create('TestReader')(self.dataset, 0)

        imid2path = self.dataset.get_imid2path()

        anno_file = self.dataset.get_anno()
        clsid2catid, catid2name = get_categories(self.cfg.metric, anno_file)

        # Run Infer 
        self.status['mode'] = 'test'
        self.model.eval()
        for step_id, data in enumerate(loader):
            self.status['step_id'] = step_id
            # forward
            outs = self.model(data)
            for key in ['im_shape', 'scale_factor', 'im_id']:
                outs[key] = data[key]
            for key, value in outs.items():
                outs[key] = value.numpy()

            batch_res = get_infer_results(outs, clsid2catid)
            bbox_num = outs['bbox_num']
            start = 0
            for i, im_id in enumerate(outs['im_id']):
                image_path = imid2path[int(im_id)]
                image = Image.open(image_path).convert('RGB')
                self.status['original_image'] = np.array(image.copy())

                end = start + bbox_num[i]
                bbox_res = batch_res['bbox'][start:end] \
                        if 'bbox' in batch_res else None
                mask_res = batch_res['mask'][start:end] \
                        if 'mask' in batch_res else None
                segm_res = batch_res['segm'][start:end] \
                        if 'segm' in batch_res else None

                image = visualize_results(image, bbox_res, mask_res, segm_res,
                                          int(outs['im_id']), catid2name,
                                          draw_threshold)
                self.status['result_image'] = np.array(image.copy())
                if self._compose_callback:
                    self._compose_callback.on_step_end(self.status)
                # save image with detection
                save_name = self._get_save_image_name(output_dir, image_path)
                logger.info("Detection bbox results save in {}".format(
                    save_name))
                image.save(save_name, quality=95)
                if save_txt:
                    save_path = os.path.splitext(save_name)[0] + '.txt'
                    save_result(save_path, bbox_res, catid2name, draw_threshold)
                start = end

我们可以看到,如果将save_txt参数,将会调用ppdet/utils/visualizer.py里面的save_result函数,在output文件夹下生成对应图片的预测信息txt文。

def save_result(save_path, bbox_res, catid2name, threshold):
    """
    save result as txt
    """
    with open(save_path, 'w') as f:
        for dt in bbox_res:
            catid, bbox, score = dt['category_id'], dt['bbox'], dt['score']
            if score < threshold:
                continue
            # each bbox result as a line
            # for rbox: classname score x1 y1 x2 y2 x3 y3 x4 y4
            # for bbox: classname score x1 y1 w h
            bbox_pred = '{} {} '.format(catid2name[catid], score) + ' '.join(
                [str(e) for e in bbox])
            f.write(bbox_pred + '\n')

由此,我们可以添加一个变量save_json,在trainer.py中参考predict函数写一个save_json函数,调用visualizer.py的save_result修改一下,写成save_rusult_json函数。想法是在infer.py里面的run函数那里,从test.json文件把所有测试集的图片信息读进来,然后根据image_name每次取一张,通过调用save_result_json函数和save_json函数将预测框信息保存在content_json列表里,然后调用run函数将content_json列表里面的值写进一个json文件里面。

run函数

def run(FLAGS, cfg):
    # build trainer
    trainer = Trainer(cfg, mode='test')

    # load weights
    trainer.load_weights(cfg.weights)

    # get inference images
    # images = get_test_images(FLAGS.infer_dir, FLAGS.infer_img)
    path = "./dataset/coco/test2017/"
    path_json = "./dataset/coco/annotations/instances_test2017.json"
    save_path = 'result/result.json'
    content_json=[]
    f = open(path_json, 'r', encoding='utf-8') 
    s = f.read()
    rest = json.loads(s)
    #a = -1
    for i in rest['images']:
       #a+=1
       #if a<1501:
       #    continue
       file_name = i["file_name"]
       image_id = i["id"]
       FLAGS.infer_img=path+file_name
       image = get_test_images(FLAGS.infer_dir, FLAGS.infer_img)
       content_json.extend(trainer.save_json(
        image,
        image_id,
        draw_threshold=FLAGS.draw_threshold,
        output_dir=FLAGS.output_dir,
        save_json=FLAGS.save_json))
       #if a==1998:   # 会在1998后面多出一个],记得删除再提交
       #    with open(save_path, 'a') as f:
       #        json.dump(content_json, f)
       #    break
    f.close()
    with open(save_path, 'w') as f:
               json.dump(content_json, f)

save_json函数

def save_json(self,
                image,
                image_id,
                draw_threshold=0.5,
                output_dir='result',
                save_json=True):
        self.dataset.set_images(image)
        loader = create('TestReader')(self.dataset, 0)
        imid2path = self.dataset.get_imid2path()
        anno_file = self.dataset.get_anno()
        clsid2catid, catid2name = get_categories(self.cfg.metric, anno_file)
        # Run Infer 
        self.status['mode'] = 'test'
        self.model.eval()
        content_json = []
        for step_id, data in enumerate(loader):
            self.status['step_id'] = step_id
            # forward
            outs = self.model(data)
            for key in ['im_shape', 'scale_factor', 'im_id']:
                outs[key] = data[key]
            for key, value in outs.items():
                outs[key] = value.numpy()
            batch_res = get_infer_results(outs, clsid2catid)
            bbox_num = outs['bbox_num']
            start = 0
            for i, im_id in enumerate(outs['im_id']):
                image_path = imid2path[int(im_id)]
                image = Image.open(image_path).convert('RGB')
                self.status['original_image'] = np.array(image.copy())

                end = start + bbox_num[i]
                bbox_res = batch_res['bbox'][start:end] \
                        if 'bbox' in batch_res else None
                mask_res = batch_res['mask'][start:end] \
                        if 'mask' in batch_res else None
                segm_res = batch_res['segm'][start:end] \
                        if 'segm' in batch_res else None


                if save_json:                    
                    content_json.extend(save_result_json(bbox_res, image_id, draw_threshold))
                start = end
        return content_json

save_result_json函数

def save_result_json(bbox_res, image_id, threshold):
    """
    save result as json
    """
    content_json=[]
    for dt in bbox_res:
        catid, bbox, score = dt['category_id'], dt['bbox'], dt['score']
        if score < threshold:
            continue
        # each bbox result as a line
        # for rbox: classname score x1 y1 x2 y2 x3 y3 x4 y4
        # for bbox: classname score x1 y1 w h
        
        content_dic = {
                "image_id":image_id,
                "category_id": catid,
                "bbox": bbox,
                "score": score
            }
        # print(content_dic)
        content_json.append(content_dic)
    # with open(save_path, 'w') as f:
        # json.dump(content_json, f)
    return content_json

这样运行python tools/infer.py -c configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml -o weights=output/ppyolov2_r50vd_dcn_365e_coco/1000.pdparams就可以生成json结果文件了。

若还要在json文件最后加上类别信息,在content_json列表后面加上即可:

#cat={}
#info = [{"id": 1, "name": "rect_eye", "supercategory": "none"}, {"id": 2, "name": "sphere_eye", "supercategory": "none"}, {"id": 3, "name": "box_eye", "supercategory": "none"}]
#cat["categories"]=info
#content_json.extend(cat)
#content_json.extend(cat["categories"])

但是程序运行一会可能会报内存不够的错误,我是直接打开保存的json文件,看看写到那里了,在run函数里面加个计数器,跳过前面的图片信息,由于我们的写入方式是追加模式a,所以在次运行程序可以接着把后面的图片信息补上。内存不够大,又懒得改框架里面具体检测的代码,只能用这种笨方法了。我测1999张图片,跑了三次终于跑完了。如果有更好的实现方式,欢迎交流指正。

你可能感兴趣的:(目标检测,python,深度学习,paddleDetection)