百度开发者文字识别(身份证+图片+护照)

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.util.List;
import java.util.Map;

import org.apache.log4j.Logger;
import org.json.JSONObject;

/**
 * http 工具类
 * 
 * @since 1.6
 */
public class BaiduOCRHttpUtil {

	private final static String BAIDU_APP_CLIENT_ID = "IDSPpc10YHtORRV9T5ia9grN";
	private final static String BAIDU_APP_CLIENT_SECRET = "IDSPpc10YHtORRV9T5ia9grN";
	private final static String PASSPORT_URL = "https://aip.baidubce.com/rest/2.0/ocr/v1/passport";
	private final static String IMG_URL = "https://aip.baidubce.com/rest/2.0/ocr/v1/webimage";

	private final static String ACCESS_TOKE_URL = "https://aip.baidubce.com/oauth/2.0/token";

	private final static String ID_CARD_URL = "https://aip.baidubce.com/rest/2.0/ocr/v1/idcard";

	private final static Logger log = Logger.getLogger(BaiduOCRHttpUtil.class);

	/**
	 * 获取百度的AccessToken 参考 百度Access
	 * Token获取
	 * 
	 * @return 百度的AccessToken
	 * @exception RuntimeException
	 *                1.unknown client id(API Key不正确)
* 2.Client authentication failed(Secret Key不正确)
* 3 other(访问不通等等)
*/
public static String getAccessToken() { try { String getAccessTokenUrl = String .format("%s?grant_type=client_credentials&client_id=%s&client_secret=%s", ACCESS_TOKE_URL, BAIDU_APP_CLIENT_ID, BAIDU_APP_CLIENT_SECRET); String accessTokenInfo = getUrlResultData(getAccessTokenUrl); JSONObject accessTokenJson = new JSONObject(accessTokenInfo); if (accessTokenJson.has("error")) { throw new RuntimeException( accessTokenJson.getString("error_description")); } return accessTokenJson.getString("access_token"); } catch (Exception e) { e.printStackTrace(); log.error(e.getMessage(), e); throw new RuntimeException(e.getMessage()); } } /** * InputStream会取一次会关闭,使用下面这些InputStream要注意文件是否要保存
* MultipartFile的InputStream
* req.getInputStream
* httpconn.getInputStream
* * @return * @throws IOException * @throws UnsupportedEncodingException *
* URLEncoder.encode(imgStr, "UTF-8") */
public static String inputStreamToString(InputStream in) throws UnsupportedEncodingException, IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); byte[] buffer = new byte[1024]; int len; while ((len = in.read(buffer)) > -1) { baos.write(buffer, 0, len); } baos.flush(); String imgStr = Base64Util.encode(baos.toByteArray()); String imgParam = URLEncoder.encode(imgStr, "UTF-8"); return imgParam; } /** * 通过护照图片获取护照信息
* 支持对中国大陆居民护照的资料页进行结构化识别,包含国家码、姓名、姓名拼音、性别、护照号、出生日期、出生地点、签发日期、有效期至、签发地点。
* 参考 百度文字识别-护照识别 * 百度文字识别-通用文字识别 * * @param imgParam * 图像数据,base64编码后进行urlencode,要求base64编码和urlencode后大小不超过4M, * 最短边至少15px,最长边最大4096px,支持jpg/jpeg/png/bmp格式 * @param accessToken * 百度的AccessToken,通过{@link #getAccessToken()}获取 * @exception RuntimeException * 参数校验 accessToken为空,护照图片为空,图片超出4M,护照未扫描出信息,访问不通...
* * @return 1 */
public static String passport(String accessToken, String imgParam) throws RuntimeException { try { if (accessToken == null || accessToken.length() == 0) { throw new RuntimeException("accessToken为空"); } if (imgParam == null || imgParam.length() == 0) { throw new RuntimeException("护照图片为空"); } if (imgParam.getBytes().length > 4 * 1024 * 1024) { throw new RuntimeException("图片超出4M"); } String passportInfo = post(PASSPORT_URL, accessToken, "image=" + imgParam); if (passportInfo == null || passportInfo.length() == 0) { throw new RuntimeException("护照识别失败"); } JSONObject passportJson = new JSONObject(passportInfo); if (passportJson.getInt("words_result_num") < 1) {// 识别结果数,表示words_result的元素个数 throw new RuntimeException("护照未扫描出信息"); } return passportInfo; } catch (Exception e) { e.printStackTrace(); log.error(e.getMessage(), e); throw new RuntimeException(e.getMessage()); } } /** * 身份证识别 * 支持对大陆居民二代身份证正反面的所有字段进行结构化识别,包括姓名、性别、民族、出生日期、住址、身份证号、签发机关、有效期限; * 同时,支持对用户上传的身份证图片进行图像风险和质量检测,可识别图片是否为复印件或临时身份证,是否被翻拍或编辑,是否存在正反颠倒、模糊、欠曝、过曝等质量问题。 *参考 百度文字识别-身份证识别 * 百度文字识别-通用文字识别 * * * @param imgParam * 图像数据,base64编码后进行urlencode,要求base64编码和urlencode后大小不超过4M, * 最短边至少15px,最长边最大4096px,支持jpg/jpeg/png/bmp格式 * @param accessToken * 百度的AccessToken,通过{@link #getAccessToken()}获取 * @exception RuntimeException * 参数校验 accessToken为空,护照图片为空,图片超出4M,身份证未扫描出信息,访问不通...
* * @return */
public static String idcard(String accessToken, String imgParam) { // 请求url try { if (accessToken == null || accessToken.length() == 0) { throw new RuntimeException("accessToken为空"); } if (imgParam == null || imgParam.length() == 0) { throw new RuntimeException("护照图片为空"); } if (imgParam.getBytes().length > 4 * 1024 * 1024) { throw new RuntimeException("图片超出4M"); } String param = "id_card_side=" + "front" + "&image=" + imgParam; String idcardInfo = post(ID_CARD_URL, accessToken, param); if (idcardInfo == null || idcardInfo.length() == 0) { throw new RuntimeException("身份证识别失败"); } JSONObject jsonObject = new JSONObject(idcardInfo); if (jsonObject.getInt("words_result_num") < 1) {// 识别结果数,表示words_result的元素个数 throw new RuntimeException("身份证未扫描出信息"); } return idcardInfo; } catch (Exception e) { e.printStackTrace(); log.error(e.getMessage(), e); throw new RuntimeException(e.getMessage()); } } /** * 身份证识别 * 支持对大陆居民二代身份证正反面的所有字段进行结构化识别,包括姓名、性别、民族、出生日期、住址、身份证号、签发机关、有效期限; * 同时,支持对用户上传的身份证图片进行图像风险和质量检测,可识别图片是否为复印件或临时身份证,是否被翻拍或编辑,是否存在正反颠倒、模糊、欠曝、过曝等质量问题。 *参考 百度文字识别-身份证识别 * 百度文字识别-通用文字识别 * * * @param imgParam * 图像数据,base64编码后进行urlencode,要求base64编码和urlencode后大小不超过4M, * 最短边至少15px,最长边最大4096px,支持jpg/jpeg/png/bmp格式 * @param accessToken * 百度的AccessToken,通过{@link #getAccessToken()}获取 * @exception RuntimeException * 参数校验 accessToken为空,护照图片为空,图片超出4M,图片未扫描出信息,访问不通...
* * @return */
public static String img(String accessToken, String imgParam) { // 请求url try { // 本地文件路径 if (accessToken == null || accessToken.length() == 0) { throw new RuntimeException("accessToken为空"); } if (imgParam == null || imgParam.length() == 0) { throw new RuntimeException("护照图片为空"); } if (imgParam.getBytes().length > 4 * 1024 * 1024) { throw new RuntimeException("图片超出4M"); } String param = "image=" + imgParam; String result = post(IMG_URL, accessToken, param); if (result == null || result.length() == 0) { throw new RuntimeException("图片识别失败"); } JSONObject jsonObject = new JSONObject(result); if (jsonObject.getInt("words_result_num") < 1) {// 识别结果数,表示words_result的元素个数 throw new RuntimeException("图片未扫描出信息"); } return result; } catch (Exception e) { e.printStackTrace(); log.error(e.getMessage(), e); throw new RuntimeException(e.getMessage()); } } private static String post(String requestUrl, String accessToken, String params) throws Exception { String contentType = "application/x-www-form-urlencoded"; return post(requestUrl, accessToken, contentType, params); } private static String post(String requestUrl, String accessToken, String contentType, String params) throws Exception { String encoding = "UTF-8"; if (requestUrl.contains("nlp")) { encoding = "GBK"; } return post(requestUrl, accessToken, contentType, params, encoding); } private static String post(String requestUrl, String accessToken, String contentType, String params, String encoding) throws Exception { String url = requestUrl + "?access_token=" + accessToken; return postGeneralUrl(url, contentType, params, encoding); } private static String postGeneralUrl(String generalUrl, String contentType, String params, String encoding) throws Exception { URL url = new URL(generalUrl); // 打开和URL之间的连接 HttpURLConnection connection = (HttpURLConnection) url.openConnection(); connection.setRequestMethod("POST"); // 设置通用的请求属性 connection.setRequestProperty("Content-Type", contentType); connection.setRequestProperty("Connection", "Keep-Alive"); connection.setUseCaches(false); connection.setDoOutput(true); connection.setDoInput(true); // 得到请求的输出流对象 DataOutputStream out = new DataOutputStream( connection.getOutputStream()); out.write(params.getBytes(encoding)); out.flush(); out.close(); // 建立实际的连接 connection.connect(); // 获取所有响应头字段 Map<String, List<String>> headers = connection.getHeaderFields(); // // 遍历所有的响应头字段 // for (String key : headers.keySet()) { // System.err.println(key + "--->" + headers.get(key)); // } // 定义 BufferedReader输入流来读取URL的响应 BufferedReader in = null; in = new BufferedReader(new InputStreamReader( connection.getInputStream(), encoding)); String result = ""; String getLine; while ((getLine = in.readLine()) != null) { result += getLine; } in.close(); // System.err.println("result:" + result); return result; } private static String getUrlResultData(String url) { HttpURLConnection http = null; try { URL ur = new URL(url); http = (HttpURLConnection) ur.openConnection(); http.setConnectTimeout(10000); http.setRequestMethod("POST");// 设定请求的方法为"POST" http.setDoInput(true);// 设置是否从httpUrlConnection读入 // 设置是否向httpUrlConnection输出,因为这个是post请求,参数要放在 http正文内,因此需要设为true, // 默认情况下是false; http.setDoOutput(true); http.setUseCaches(false); http.connect(); BufferedWriter out = new BufferedWriter(new OutputStreamWriter( http.getOutputStream(), "UTF-8")); out.flush(); // 接收返回数据 InputStream is = http.getInputStream(); byte[] b = new byte[1024]; int c = -1; StringBuilder ret = new StringBuilder(); while ((c = is.read(b)) != -1) { ret.append(new String(b, 0, c, "UTF-8")); } String decode = URLDecoder.decode(ret.toString(), "UTF-8"); return decode; } catch (Exception e) { e.printStackTrace(); } return null; } } class Base64Util { private static final char last2byte = (char) Integer .parseInt("00000011", 2); private static final char last4byte = (char) Integer .parseInt("00001111", 2); private static final char last6byte = (char) Integer .parseInt("00111111", 2); private static final char lead6byte = (char) Integer .parseInt("11111100", 2); private static final char lead4byte = (char) Integer .parseInt("11110000", 2); private static final char lead2byte = (char) Integer .parseInt("11000000", 2); private static final char[] encodeTable = new char[] { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/' }; public Base64Util() { } public static String encode(byte[] from) { StringBuilder to = new StringBuilder( (int) ((double) from.length * 1.34D) + 3); int num = 0; char currentByte = 0; int i; for (i = 0; i < from.length; ++i) { for (num %= 8; num < 8; num += 6) { switch (num) { case 0: currentByte = (char) (from[i] & lead6byte); currentByte = (char) (currentByte >>> 2); case 1: case 3: case 5: default: break; case 2: currentByte = (char) (from[i] & last6byte); break; case 4: currentByte = (char) (from[i] & last4byte); currentByte = (char) (currentByte << 2); if (i + 1 < from.length) { currentByte = (char) (currentByte | (from[i + 1] & lead2byte) >>> 6); } break; case 6: currentByte = (char) (from[i] & last2byte); currentByte = (char) (currentByte << 4); if (i + 1 < from.length) { currentByte = (char) (currentByte | (from[i + 1] & lead4byte) >>> 4); } } to.append(encodeTable[currentByte]); } } if (to.length() % 4 != 0) { for (i = 4 - to.length() % 4; i > 0; --i) { to.append("="); } } return to.toString(); } }

你可能感兴趣的:(java,百度开发者)