高效标注数据,这里以特定的文字检测(文字交通标志文字检测)为例.
以下是百度开源API进行文字检测与识别的python代码
from glob import glob
from tqdm import tqdm
import shutil
import os
##还没标记的图片,用百度API检测出来
image_test=glob('/home/zj/OCR/projects/AdvancedEAST/icpr/test/nodetimage/*.jpg')
for imagepath in tqdm(image_test):
result_dict=baidu_det_api_high(imagepath)
det2json(imagepath,result_dict,'./test/baidu_det_json/')
draw(imagepath,result_dict,'./test/baidu_det/')
from urllib.parse import urlencode
import urllib3,base64
def baidu_det_api_high(imagepath):
access_token='24.3e825a7d...'(官网获取)
http=urllib3.PoolManager()
##通用文字识别(含位置信息版)一天500次
url='https://aip.baidubce.com/rest/2.0/ocr/v1/accurate?access_token='+access_token
#对base64数据进行urlencode处理
f = open(imagepath,'rb')
#参数image:图像base64编码
img = base64.b64encode(f.read())
params={'image':img,'language_type':'CHN_ENG','vertexes_location':'true'}
params=urlencode(params)
request=http.request('POST',
url,
body=params,
headers={'Content-Type':'application/x-www-form-urlencoded'})
#对返回的byte字节进行处理。Python3输出位串,而不是可读的字符串,需要进行转换
result = str(request.data,'utf-8')
###result由str转换为dict eval
result_dict=eval(result)
return result_dict
#print(result_dict)
#print(result_dict['words_result'])##得到的是一个列表
注意我修改了labelme的源码,#注释掉了app.py中的 #if line_color:#if fill_color:,而且下面的是Windows下的labelme的使用,如果是ubuntu的请修改encoding='gbk'
import json
import cv2
import os
from collections import OrderedDict
def det2json(image_path,result_dict,savepath):
"""
image_path:'./test/image_test/43_788_533.jpg'
result_dict:检测的结果,result_dict['words_result'],是一个里面是字典的列表
savepath:'/test/baidu_det_json/'
"""
image_name=os.path.basename(image_path)
jsom_name= os.path.basename(image_path)[:-4]+'.json'#43_788_533.json
f = open(image_path,'rb')
img=cv2.imread(image_path)
imageHeight,imageWidth,channel=img.shape
#参数image:图像base64编码
imageData = base64.b64encode(f.read()).decode('utf-8')#.decode('utf-8')是为了去除字符串前面的r
#json的前面部分
data_prex=OrderedDict([("version", "3.16.0"),("flags", {}),("shapes",[])])
json_final=json.dumps(data_prex,skipkeys=False, ensure_ascii=False,sort_keys=False,indent=2)
#json_final表示最终的json
json_final=json_final[:-3]
#print(json_final)
if 'words_result' in result_dict and result_dict['words_result_num']>0:
for dict_ocr in result_dict['words_result']:
if is_chinese_or_num(dict_ocr['words']):
#print(dict_ocr['words'],dict_ocr['vertexes_location'])
label=dict_ocr['words']
box=dict_ocr['vertexes_location']
#OrderedDict不然保存的顺序会乱
#@###注意!!因为"line_color", null后面的引号,我去不掉,就改了labelme的源码
#注释掉了app.py中的 #if line_color:#if fill_color:
data_cen=OrderedDict([( "label", label),("line_color", 'null'),("fill_color", 'null'),
("points",[[box[0]['x'],box[0]['y']],[box[1]['x'],box[1]['y']],
[box[2]['x'],box[2]['y']],[box[3]['x'],box[3]['y']]]),
("shape_type", "polygon"),
("flags", {})
])
json_final+=json.dumps(data_cen,skipkeys=False, ensure_ascii=False,sort_keys=False,indent=4)
json_final+=','
#print(json_final)
json_final=json_final[:-1]+']'+','
data_final=OrderedDict([("lineColor",[0,255,0,128]),("fillColor",[255,0,0,128]),("imagePath",r'..\TextImages\\'[:-1] +image_name),
("imageData",str(imageData)),("imageHeight", imageHeight),("imageWidth", imageWidth)])
json_final+=json.dumps(data_final,skipkeys=False, ensure_ascii=False,sort_keys=False,indent=2)[1:]
#print(json_final)
with open(savepath+jsom_name, 'w',encoding='gbk') as f:#wondow要用要用gbk编码
f.write(json_final)
#det2json('./test/image_test/43_788_533.jpg',result_dict,'./test/baidu_det_json/')
###将识别结果画在原图上瞅瞅
import cv2
import os
import numpy as np
def draw(imagepath,result_dict,newpath):
"""
imagepath:图片路径
result_dict:识别结果
return:画出检测结果
"""
im=cv2.imread(imagepath)
picname=os.path.basename(imagepath)
if 'words_result' in result_dict and result_dict['words_result_num']>0:#如果存在检测结果
for dict_ocr in result_dict['words_result']:
#一张图,2个标记,每个标记四个定点,一个label
if is_chinese_or_num(dict_ocr['words']):
label=dict_ocr['words']
box=dict_ocr['vertexes_location']
pts=np.array([[box[0]['x'],box[0]['y']],[box[1]['x'],box[1]['y']],
[box[2]['x'],box[2]['y']],[box[3]['x'],box[3]['y']]],np.int32)
pts = pts.reshape((-1,1,2))
cv2.polylines(im,[pts],True,(0,255,255),1)##[box]!!
#cv2.putText(im,label,(box[1]['x'],box[1]['y']),cv2.FONT_HERSHEY_COMPLEX,3,(255,255,0),2)
cv2.imwrite(newpath+picname,im)