之前我用mmdetection的时候,发现自带的脚本可以把预测结果转为json文件。前面我也用的yolov4代码只用了pytorch框架,根据预测代码修改了一份将模型预测结果保存成json文件的代码。具体参考我的这篇博客将yolov4预测结果保存为json文件。
最近我学了一下paddleDetection框架,里面集成了很多常用的网络模型,用起来很方便,但是我想将模型预测出来的bbox结果保存成json文件,找了一下官方文档,似乎没找到已经写好的脚本,所以我又根据tools/infer.py
以及它所引用的代码,改写了一份可以把模型预测结果保存成json文件的代码。
具体思路如下:
先从tools/infer.py
看起,从run这个函数可以看到调用了ppdet/engine文件夹里面的trainer.py里面Trainer类的predict方法将检测框画在原图上,保存在output文件夹下。有点坑的是,这里的ppdet并不是代码里面的ppdet文件夹,而是安装paddleDetection环境下的ppdet文件夹。所以如果你修改了ppdet文件夹里面的代码运行没有效果记得去找paddle21/lib/python3.8/site-packages/paddledet-2.0.1-py3.8.egg/ppdet/
文件夹修改。
from ppdet.engine import Trainer
def run(FLAGS, cfg):
# build trainer
trainer = Trainer(cfg, mode='test')
# load weights
trainer.load_weights(cfg.weights)
# get inference images
images = get_test_images(FLAGS.infer_dir, FLAGS.infer_img)
# inference
trainer.predict(
images,
draw_threshold=FLAGS.draw_threshold,
output_dir=FLAGS.output_dir,
save_txt=FLAGS.save_txt)
接着去看predict函数的具体实现:
from ppdet.utils.visualizer import visualize_results, save_result
def predict(self,
images,
draw_threshold=0.5,
output_dir='output',
save_txt=False):
self.dataset.set_images(images)
loader = create('TestReader')(self.dataset, 0)
imid2path = self.dataset.get_imid2path()
anno_file = self.dataset.get_anno()
clsid2catid, catid2name = get_categories(self.cfg.metric, anno_file)
# Run Infer
self.status['mode'] = 'test'
self.model.eval()
for step_id, data in enumerate(loader):
self.status['step_id'] = step_id
# forward
outs = self.model(data)
for key in ['im_shape', 'scale_factor', 'im_id']:
outs[key] = data[key]
for key, value in outs.items():
outs[key] = value.numpy()
batch_res = get_infer_results(outs, clsid2catid)
bbox_num = outs['bbox_num']
start = 0
for i, im_id in enumerate(outs['im_id']):
image_path = imid2path[int(im_id)]
image = Image.open(image_path).convert('RGB')
self.status['original_image'] = np.array(image.copy())
end = start + bbox_num[i]
bbox_res = batch_res['bbox'][start:end] \
if 'bbox' in batch_res else None
mask_res = batch_res['mask'][start:end] \
if 'mask' in batch_res else None
segm_res = batch_res['segm'][start:end] \
if 'segm' in batch_res else None
image = visualize_results(image, bbox_res, mask_res, segm_res,
int(outs['im_id']), catid2name,
draw_threshold)
self.status['result_image'] = np.array(image.copy())
if self._compose_callback:
self._compose_callback.on_step_end(self.status)
# save image with detection
save_name = self._get_save_image_name(output_dir, image_path)
logger.info("Detection bbox results save in {}".format(
save_name))
image.save(save_name, quality=95)
if save_txt:
save_path = os.path.splitext(save_name)[0] + '.txt'
save_result(save_path, bbox_res, catid2name, draw_threshold)
start = end
我们可以看到,如果将save_txt参数,将会调用ppdet/utils/visualizer.py
里面的save_result函数,在output文件夹下生成对应图片的预测信息txt文。
def save_result(save_path, bbox_res, catid2name, threshold):
"""
save result as txt
"""
with open(save_path, 'w') as f:
for dt in bbox_res:
catid, bbox, score = dt['category_id'], dt['bbox'], dt['score']
if score < threshold:
continue
# each bbox result as a line
# for rbox: classname score x1 y1 x2 y2 x3 y3 x4 y4
# for bbox: classname score x1 y1 w h
bbox_pred = '{} {} '.format(catid2name[catid], score) + ' '.join(
[str(e) for e in bbox])
f.write(bbox_pred + '\n')
由此,我们可以添加一个变量save_json,在trainer.py中参考predict函数写一个save_json函数,调用visualizer.py
的save_result修改一下,写成save_rusult_json函数。想法是在infer.py里面的run函数那里,从test.json文件把所有测试集的图片信息读进来,然后根据image_name每次取一张,通过调用save_result_json函数和save_json函数将预测框信息保存在content_json列表里,然后调用run函数将content_json列表里面的值写进一个json文件里面。
def run(FLAGS, cfg):
# build trainer
trainer = Trainer(cfg, mode='test')
# load weights
trainer.load_weights(cfg.weights)
# get inference images
# images = get_test_images(FLAGS.infer_dir, FLAGS.infer_img)
path = "./dataset/coco/test2017/"
path_json = "./dataset/coco/annotations/instances_test2017.json"
save_path = 'result/result.json'
content_json=[]
f = open(path_json, 'r', encoding='utf-8')
s = f.read()
rest = json.loads(s)
#a = -1
for i in rest['images']:
#a+=1
#if a<1501:
# continue
file_name = i["file_name"]
image_id = i["id"]
FLAGS.infer_img=path+file_name
image = get_test_images(FLAGS.infer_dir, FLAGS.infer_img)
content_json.extend(trainer.save_json(
image,
image_id,
draw_threshold=FLAGS.draw_threshold,
output_dir=FLAGS.output_dir,
save_json=FLAGS.save_json))
#if a==1998: # 会在1998后面多出一个],记得删除再提交
# with open(save_path, 'a') as f:
# json.dump(content_json, f)
# break
f.close()
with open(save_path, 'w') as f:
json.dump(content_json, f)
def save_json(self,
image,
image_id,
draw_threshold=0.5,
output_dir='result',
save_json=True):
self.dataset.set_images(image)
loader = create('TestReader')(self.dataset, 0)
imid2path = self.dataset.get_imid2path()
anno_file = self.dataset.get_anno()
clsid2catid, catid2name = get_categories(self.cfg.metric, anno_file)
# Run Infer
self.status['mode'] = 'test'
self.model.eval()
content_json = []
for step_id, data in enumerate(loader):
self.status['step_id'] = step_id
# forward
outs = self.model(data)
for key in ['im_shape', 'scale_factor', 'im_id']:
outs[key] = data[key]
for key, value in outs.items():
outs[key] = value.numpy()
batch_res = get_infer_results(outs, clsid2catid)
bbox_num = outs['bbox_num']
start = 0
for i, im_id in enumerate(outs['im_id']):
image_path = imid2path[int(im_id)]
image = Image.open(image_path).convert('RGB')
self.status['original_image'] = np.array(image.copy())
end = start + bbox_num[i]
bbox_res = batch_res['bbox'][start:end] \
if 'bbox' in batch_res else None
mask_res = batch_res['mask'][start:end] \
if 'mask' in batch_res else None
segm_res = batch_res['segm'][start:end] \
if 'segm' in batch_res else None
if save_json:
content_json.extend(save_result_json(bbox_res, image_id, draw_threshold))
start = end
return content_json
def save_result_json(bbox_res, image_id, threshold):
"""
save result as json
"""
content_json=[]
for dt in bbox_res:
catid, bbox, score = dt['category_id'], dt['bbox'], dt['score']
if score < threshold:
continue
# each bbox result as a line
# for rbox: classname score x1 y1 x2 y2 x3 y3 x4 y4
# for bbox: classname score x1 y1 w h
content_dic = {
"image_id":image_id,
"category_id": catid,
"bbox": bbox,
"score": score
}
# print(content_dic)
content_json.append(content_dic)
# with open(save_path, 'w') as f:
# json.dump(content_json, f)
return content_json
这样运行python tools/infer.py -c configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml -o weights=output/ppyolov2_r50vd_dcn_365e_coco/1000.pdparams
就可以生成json结果文件了。
若还要在json文件最后加上类别信息,在content_json列表后面加上即可:
#cat={}
#info = [{"id": 1, "name": "rect_eye", "supercategory": "none"}, {"id": 2, "name": "sphere_eye", "supercategory": "none"}, {"id": 3, "name": "box_eye", "supercategory": "none"}]
#cat["categories"]=info
#content_json.extend(cat)
#content_json.extend(cat["categories"])
但是程序运行一会可能会报内存不够的错误,我是直接打开保存的json文件,看看写到那里了,在run函数里面加个计数器,跳过前面的图片信息,由于我们的写入方式是追加模式a,所以在次运行程序可以接着把后面的图片信息补上。内存不够大,又懒得改框架里面具体检测的代码,只能用这种笨方法了。我测1999张图片,跑了三次终于跑完了。如果有更好的实现方式,欢迎交流指正。