百度ocr需要先申请APPID,API_KEY和SECRET_KEY。
首先用一个工厂模式来构建AipOcr;
代码如下:
public class BDFactory {
private static AipOcr aipOcr;
public static AipOcr getAipOcr(){
if(aipOcr==null){
synchronized (AipOcr.class) {
if(aipOcr==null){
aipOcr = new AipOcr(AIConstant.BD_OCR_APPID, AIConstant.BD_OCR_APPKEY, AIConstant.BD_OCR_SECRETKEY);
}
}
}
return aipOcr;
}
}
AipOcr的一些常用方法:
//通用文字识别
public static String basicGeneral (String file){
HashMap options = new HashMap<>(4);
options.put("language_type", "CHN_ENG");
options.put("detect_direction", "true"); // 检测图片朝上
options.put("detect_language", "true"); // 检测语言,默认是不检查
options.put("probability", "true"); //是否返回识别结果中每一行的置信度
JSONObject jsonObject = aipOcr.basicGeneral(file, options);
return jsonObject.toString();
请求结果实例:
String result = basicGeneral("D:\\test.jpg");
{"log_id":23564476787080234428,"wordsResult":[
{"probability":{"average":0.968971,"min":0.955538,"variance":1.8E-4},"words":"目录"},
{"probability":{"average":0.871825,"min":0.744326,"variance":0.016256},"words":"可能你可能"}],
"words_result_num":2,"language":-1,"direction":0}
public static JSONObject basicAccurateGeneral (String file) {
HashMap options = new HashMap<>(4);
options.put("language_type", "CHN_ENG");
options.put("detect_direction", "true"); // 检测图片朝上
options.put("detect_language", "true"); // 检测语言,默认是不检查
options.put("probability", "true"); //是否返回识别结果中每一行的置信度
//方式一
JSONObject jsonObject = aipOcr.basicAccurateGeneral(file, options);//或者是传url
//方式二
byte[] bytes = new byte[0];
try {
bytes = FileCopyUtils.copyToByteArray(new FileInputStream(file));
} catch (IOException e) {
e.printStackTrace();
}
return aipOcr.basicAccurateGeneral(bytes, options);
}
请求结果实例:
//高精度版
JSONObject result = basicAccurateGeneral("D:\\Test2.jpeg");
String s = result.toString();
{"log_id":76589744465573053,"wordsResult":[
{"probability":{"average":0.985876,"min":0.940408,"variance":4.62E-4},"words":"克隆可怜和"},
{"probability":{"average":0.999704,"min":0.99927,"variance":0},"words":"科技馆黄金矿工"},
{"probability":{"average":0.996163,"min":0.964984,"variance":7.3E-5},"words":"也发个黄金还是集合春节后技能"},
{"probability":{"average":0.993361,"min":0.960509,"variance":1.84E-4},"words":"公交卡高科技"},
{"probability":{"average":0.989808,"min":0.951065,"variance":3.72E-4},"words":"号流量计欧普偶皮"}],
"words_result_num":5,"language":-1,"direction":0}
//含位置信息的通用文字识别
public static JSONObject general (String file) {
HashMap options = new HashMap<>();
options.put("language_type", "CHN_ENG");
options.put("detect_direction", "true"); // 检测图片朝上
options.put("vertexes_location", "true");
options.put("recognize_granularity", "big");
options.put("detect_language", "true"); // 检测语言,默认是不检查
options.put("probability", "true"); //是否返回识别结果中每一行的置信度
// JSONObject jsonObject = aipOcr.basicAccurateGeneral(file, options); 或者是传url
byte[] bytes = new byte[0];
try {
bytes = FileCopyUtils.copyToByteArray(new FileInputStream(file));
} catch (IOException e) {
e.printStackTrace();
}
return aipOcr.general(bytes, options);
}
请求结果实例:
JSONObject jsonObject = general("D:\\Test3.jpg");
{"log_id":456353476587574564675,"words_result":[
{"probability":{"average":0.955421,"min":0.81863,"variance":0.004858},"words":"将很反感几号放假"},
{"probability":{"average":0.887704,"min":0.364053,"variance":0.054941},"words":"规划局发过火发"},
{"probability":{"average":0.357126,"min":0.599519,"variance":0.015032},"words":"33"},
{"probability":{"average":0.764071,"min":0.764071,"variance":0},"words":"i"}],
"words_result_num":4,"language":-1,"direction":0}
public static JSONObject enhancedGeneral (String file) {
HashMap options = new HashMap<>(6);
options.put("language_type", "CHN_ENG");
options.put("detect_direction", "true");
options.put("detect_language", "true");
options.put("probability", "true");
// JSONObject jsonObject = aipOcr.basicAccurateGeneral(file, options); 或者是传url
byte[] bytes = new byte[0];
try {
bytes = Util.readFileByBytes(file);
} catch (IOException e) {
e.printStackTrace();
}
//含繁体字版本
//JSONObject jsonObject = aipOcr.enhancedGeneral(bytes, options);
JSONObject jsonObject = enhancedGeneral("D:\\test4.jpg");
return jsonObject;
}
// 身份证识别
public static JSONObject idCard (String file,String side) {
HashMap options = new HashMap<>(6);
options.put("detect_direction", "true");
//是否开启身份证风险类型(身份证复印件、临时身份证、身份证翻拍、修改过的身份证)功能,默认不开启,即:false。可选值:true-开启;false-不开启
options.put("detect_risk", "true");
// JSONObject jsonObject = aipOcr.basicAccurateGeneral(file, options); 或者是传url
byte[] bytes = new byte[0];
try {
bytes = Util.readFileByBytes(file);
} catch (IOException e) {
e.printStackTrace();
}
//side:front正面 ;back背面
JSONObject jsonObject = aipOcrJava.idcard(bytes, side,options);
return jsonObject;
}
结果实例:
JSONObject front = idCard("D:\\card.jpg", "front");{"log_id":**********************,"words_result":
{"姓名":{"words":"***","location":{"top":444,"left":138,"width":172,"height":63}}, "民族":{"words":"汉","location":{"top":574,"left":419,"width":36,"height":44}},
"住址":{"words":"****************","location":{"top":788,"left":125,"width":568,"height":130}},
"公民身份号码":{"words":"************00122","location":{"top":1045,"left":361,"width":707,"height":76}},
"出生":{"words":"******8","location":{"top":0,"left":0,"width":0,"height":0}},
"性别":{"words":"男","location":{"top":0,"left":0,"width":0,"height":0}}},"words_result_num":6,"image_status":"normal","direction":0}
//背面 "back"
JSONObject back = idCard("D:\\card2.jpg","back");
{"log_id":1473111127994119341,"words_result":
{"失效日期":{"words":"20220502","location":{"top":1029,"left":732,"width":190,"height":42}},
"签发机关":{"words":"***县公安局","location":{"top":945,"left":504,"width":236,"height":45}},
"签发日期":{"words":"20120502","location":{"top":1037,"left":508,"width":198,"height":42}}},
"words_result_num":3,"image_status":"normal","direction":0}
/**
* 图像方向,当detect_direction=true时存在。
* -1:未定义,
* - 0:正向,
* - 1: 逆时针90度,
* - 2:逆时针180度,
* - 3:逆时针270度
*/
private Integer direction;
/**
* normal-识别正常
* reversed_side-身份证正反面颠倒
* non_idcard-上传的图片中不包含身份证
* blurred-身份证模糊
* other_type_card-其他类型证照
* over_exposure-身份证关键字段反光或过曝
* unknown-未知状态
*/
private String image_status;
/**
* 识别结果数,表示words_result的元素个数
*/
private Integer words_result_num;
/**
* normal-正常身份证;copy-复印件;temporary-临时身份证;screen-翻拍;unknow-其他未知情况
*/
private String risk_type;
// 银行卡识别
public static JSONObject bankCard (String file) {
HashMap options = new HashMap<>(6);
// JSONObject jsonObject = aipOcrJava.basicAccurateGeneral(file, options); 或者是传url
byte[] bytes = new byte[0];
try {
bytes = Util.readFileByBytes(file);
} catch (IOException e) {
e.printStackTrace();
}
JSONObject jsonObject = aipOcr.bankcard(bytes,options);
return jsonObject;
}
//银行卡识别
//bank_card_type 银行卡类型,0:不能识别; 1: 借记卡; 2: 信用卡
{"result":{"bank_card_number":"621**********13","bank_card_type":1,"bank_name":"邮储银行"},"log_id":32564645765876708921}
//驾驶证识别
public static JSONObject drivingLicense (String file) {
HashMap options = new HashMap<>(6);
options.put("detect_direction", "true"); //检查朝上
// JSONObject jsonObject = aipOcr.basicAccurateGeneral(file, options); 或者是传url
byte[] bytes = new byte[0];
try {
bytes = Util.readFileByBytes(file);
} catch (IOException e) {
e.printStackTrace();
}
JSONObject jsonObject = aipOcr.drivingLicense(bytes,options);
return jsonObject;
}
结果:
{"log_id":2986902282992381075,"words_result":
{"姓名":{"words":"***"},"至":{"words":"20211124"},
"证号":{"words":"5********"},
"出生日期":{"words":"19921004"},
"住址":{"words":"**********号"},
"国籍":{"words":"中国"},
"初次领证日期":{"words":"20151124"},
"准驾车型":{"words":"C1"},
"有效期限":{"words":"20151124"},
"性别":{"words":"女"}},
"words_result_num":10,"direction":-1}
//行驶证 识别
public static JSONObject vehicleLicense (String file) {
HashMap options = new HashMap<>(6);
options.put("detect_direction", "false"); //不检查朝上
options.put("accuracy", "normal");
// normal 使用快速服务,1200ms左右时延;缺省或其它值使用高精度服务,1600ms左右时延
// JSONObject jsonObject = aipOcr.basicAccurateGeneral(file, options); 或者是传url
byte[] bytes = new byte[0];
try {
bytes = Util.readFileByBytes(file);
} catch (IOException e) {
e.printStackTrace();
}
JSONObject jsonObject = aipOcr.vehicleLicense(bytes,options);
return jsonObject;
}
//车牌识别
public static JSONObject plateLicense (String file) {
HashMap options = new HashMap<>(6);
options.put("multi_detect", "true");
//是否检测多张车牌,默认为false,当置为true的时候可以对一张图片内的多张车牌进行识别
// JSONObject jsonObject = aipOcr.basicAccurateGeneral(file, options); 或者是传url
byte[] bytes = new byte[0];
try {
bytes = Util.readFileByBytes(file);
} catch (IOException e) {
e.printStackTrace();
}
JSONObject jsonObject = aipOcr.plateLicense(bytes,options);
return jsonObject;
}
//车牌识别
{"log_id":5545684252572090954,"words_result":[
{"number":"京A****","
vertexes_location":[{"x":44,"y":106},{"x":243,"y":107},{"x":243,"y":172},{"x":44,"y":170}],
"color":"blue",
"probability":[1,0.9999933242797852,0.9999986886978149,0.9999988079071045,0.9999998807907104,0.9999427795410156,0.9997766613960266]}]}
// 营业执照识别
public static JSONObject businessLicense (String file) {
HashMap options = new HashMap<>(6);
// JSONObject jsonObject = aipOcr.basicAccurateGeneral(file, options); 或者是传url
byte[] bytes = new byte[0];
try {
bytes = Util.readFileByBytes(file);
} catch (IOException e) {
e.printStackTrace();
}
JSONObject jsonObject = aipOcr.businessLicense(bytes,options);
return jsonObject;
}
//自定义模版文字识别
// 自定义模版文字识别,是针对百度官方没有推出相应的模版,但是当用户需要对某一类卡证/票据(如房产证、军官证、火车票等)进行结构化的提取内容时
// 可以使用该产品快速制作模版,进行识别。
public static JSONObject custom (String file) {
HashMap options = new HashMap<>(6);
String templateSign = "Nsdax2424asaAS791823112"; //通用模板的 编号
// JSONObject jsonObject = aipOcr.basicAccurateGeneral(file, options); 或者是传url
byte[] bytes = new byte[0];
try {
bytes = Util.readFileByBytes(file);
} catch (IOException e) {
e.printStackTrace();
}
JSONObject jsonObject = aipOcr.custom(bytes,templateSign,options);
return jsonObject;
}