为了简便财务总是要对照着别人发来的表格图片制作成自己的表格
图片识别 识别成表格 表格识别 ocr
使用阿里云api
购买(印刷文字识别-表格识别) https://market.aliyun.com/products/57124001/cmapi024968.html
获得阿里云图片识别表格的appcode
效果图如下
整合的代码
package com.xai.wuye.controller.api;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONException;
import com.alibaba.fastjson.JSONObject;
import com.xai.wuye.common.JsonResult;
import com.xai.wuye.exception.ResultException;
import com.xai.wuye.model.AParam
import com.xai.wuye.service.CarService;
import com.xai.wuye.util.HttpUtils;
import org.apache.http.HttpResponse;
import org.apache.http.util.EntityUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.FileSystemResource;
import org.springframework.http.HttpHeaders;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.scheduling.annotation.EnableAsync;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.ResponseBody;
import org.springframework.web.multipart.MultipartFile;
import java.io.*;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import static org.apache.tomcat.util.codec.binary.Base64.encodeBase64;
@Controller
@EnableAsync
@RequestMapping("/api/ocr")
public class AliOCRImages {
@Autowired
CarService carService;
private String OcrPath = "/home/runApp/car/orc/";
@ResponseBody
@RequestMapping("table")
public JsonResult getFirstLicence(@RequestParam(value = "file", required = false) MultipartFile file) {
if (file == null || file.isEmpty()||file.getSize() > 1204*1204*3)
throw new ResultException(0,"文件为null,且不能大于3M");
String filename = file.getOriginalFilename();
String filepath = OcrPath+"temp/"+filename;
File newFile = new File(filepath);
try {
file.transferTo(newFile);
String host = "https://form.market.alicloudapi.com";
String path = "/api/predict/ocr_table_parse";
// 输入阿里的code
String appcode = "4926a667ee6c41329c278361*****";
String imgFile = "图片路径";
Boolean is_old_format = false;//如果文档的输入中含有inputs字段,设置为True, 否则设置为False
//请根据线上文档修改configure字段
JSONObject configObj = new JSONObject();
configObj.put("format", "xlsx");
configObj.put("finance", false);
configObj.put("dir_assure", false);
String config_str = configObj.toString();
// configObj.put("min_size", 5);
//String config_str = "";
String method = "POST";
Map headers = new HashMap();
//最后在header中的格式(中间是英文空格)为Authorization:APPCODE 83359fd73fe94948385f570e3c139105
headers.put("Authorization", "APPCODE " + appcode);
Map querys = new HashMap();
// 对图像进行base64编码
String imgBase64 = "";
try {
byte[] content = new byte[(int) newFile.length()];
FileInputStream finputstream = new FileInputStream(newFile);
finputstream.read(content);
finputstream.close();
imgBase64 = new String(encodeBase64(content));
} catch (IOException e) {
e.printStackTrace();
return null;
}
// 拼装请求body的json字符串
JSONObject requestObj = new JSONObject();
try {
if(is_old_format) {
JSONObject obj = new JSONObject();
obj.put("image", getParam(50, imgBase64));
if(config_str.length() > 0) {
obj.put("configure", getParam(50, config_str));
}
JSONArray inputArray = new JSONArray();
inputArray.add(obj);
requestObj.put("inputs", inputArray);
}else{
requestObj.put("image", imgBase64);
if(config_str.length() > 0) {
requestObj.put("configure", config_str);
}
}
} catch (JSONException e) {
e.printStackTrace();
}
String bodys = requestObj.toString();
try {
/**
* 重要提示如下:
* HttpUtils请从
* https://github.com/aliyun/api-gateway-demo-sign-java/blob/master/src/main/java/com/aliyun/api/gateway/demo/util/HttpUtils.java
* 下载
*
* 相应的依赖请参照
* https://github.com/aliyun/api-gateway-demo-sign-java/blob/master/pom.xml
*/
HttpResponse response = HttpUtils.doPost(host, path, method, headers, querys, bodys);
int stat = response.getStatusLine().getStatusCode();
if(stat != 200){
System.out.println("Http code: " + stat);
System.out.println("http header error msg: "+ response.getFirstHeader("X-Ca-Error-Message"));
System.out.println("Http body error msg:" + EntityUtils.toString(response.getEntity()));
return null;
}
String res = EntityUtils.toString(response.getEntity());
JSONObject res_obj = JSON.parseObject(res);
Long fileName = System.currentTimeMillis();
if(is_old_format) {
JSONArray outputArray = res_obj.getJSONArray("outputs");
String output = outputArray.getJSONObject(0).getJSONObject("outputValue").getString("dataValue");
JSONObject out = JSON.parseObject(output);
System.out.println(out.toJSONString());
}else{
String tmp_base64path = OcrPath + fileName;
File tmp_base64file = new File(tmp_base64path);
if(!tmp_base64file.exists()){
tmp_base64file.getParentFile().mkdirs();
}
tmp_base64file.createNewFile();
// write
FileWriter fw = new FileWriter(tmp_base64file, true);
BufferedWriter bw = new BufferedWriter(fw);
bw.write(res_obj.getString("tables"));
bw.flush();
bw.close();
fw.close();
String exelFilePath = OcrPath + fileName + "_1.xlsx";
Runtime.getRuntime().exec("touch "+exelFilePath).destroy();
Process exec = Runtime.getRuntime().exec("sed -i -e 's/\\\\n/\\n/g' " + tmp_base64path);
exec.waitFor();
exec.destroy();
Process exec1 = null;
String[] cmd = { "/bin/sh", "-c", "base64 -d " + tmp_base64path + " > " + exelFilePath };
exec1 = Runtime.getRuntime().exec(cmd);
exec1.waitFor();
exec1.destroy();
return JsonResult.success(fileName);
}
} catch (Exception e) {
e.printStackTrace();
}
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
@ResponseBody
@RequestMapping("getId")
public ResponseEntity getFirstLicence(String id) {
String exelFilePath = OcrPath + id + "_1.xlsx";
return export(new File(exelFilePath));
}
public ResponseEntity export(File file) {
if (file == null) {
return null;
}
HttpHeaders headers = new HttpHeaders();
headers.add("Cache-Control", "no-cache, no-store, must-revalidate");
headers.add("Content-Disposition", "attachment; filename=" + System.currentTimeMillis() + ".xls");
headers.add("Pragma", "no-cache");
headers.add("Expires", "0");
headers.add("Last-Modified", new Date().toString());
headers.add("ETag", String.valueOf(System.currentTimeMillis()));
return ResponseEntity
.ok()
.headers(headers)
.contentLength(file.length())
.contentType(MediaType.parseMediaType("application/octet-stream"))
.body(new FileSystemResource(file));
}
public static JSONObject getParam(int type, String dataValue) {
JSONObject obj = new JSONObject();
try {
obj.put("dataType", type);
obj.put("dataValue", dataValue);
} catch (JSONException e) {
e.printStackTrace();
}
return obj;
}
}
大功告成
table
将文件拖到此处,或点击上传
只能上传jpg/png文件,且不超过500kb