使用阿里云的图片识别成表格ocr(将图片表格转换成excel)

为了简便财务总是要对照着别人发来的表格图片制作成自己的表格

  • 图片识别 识别成表格 表格识别 ocr
  • 使用阿里云api
  • 购买(印刷文字识别-表格识别) https://market.aliyun.com/products/57124001/cmapi024968.html
  • 获得阿里云图片识别表格的appcode
效果图如下

使用阿里云的图片识别成表格ocr(将图片表格转换成excel)_第1张图片

整合的代码
package com.xai.wuye.controller.api;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONException;
import com.alibaba.fastjson.JSONObject;
import com.xai.wuye.common.JsonResult;
import com.xai.wuye.exception.ResultException;
import com.xai.wuye.model.AParam
import com.xai.wuye.service.CarService;
import com.xai.wuye.util.HttpUtils;
import org.apache.http.HttpResponse;
import org.apache.http.util.EntityUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.FileSystemResource;
import org.springframework.http.HttpHeaders;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.scheduling.annotation.EnableAsync;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.ResponseBody;
import org.springframework.web.multipart.MultipartFile;

import java.io.*;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;

import static org.apache.tomcat.util.codec.binary.Base64.encodeBase64;

@Controller
@EnableAsync
@RequestMapping("/api/ocr")
public class AliOCRImages {


    @Autowired
    CarService carService;


    private String OcrPath = "/home/runApp/car/orc/";


    @ResponseBody
    @RequestMapping("table")
    public JsonResult getFirstLicence(@RequestParam(value = "file", required = false) MultipartFile file) {
        if (file == null || file.isEmpty()||file.getSize() > 1204*1204*3)
            throw new ResultException(0,"文件为null,且不能大于3M");

        String filename = file.getOriginalFilename();
        String filepath = OcrPath+"temp/"+filename;
        File newFile = new File(filepath);
        try {
            file.transferTo(newFile);

            String host = "https://form.market.alicloudapi.com";
            String path = "/api/predict/ocr_table_parse";
            
            // 输入阿里的code
            String appcode = "4926a667ee6c41329c278361*****";
            String imgFile = "图片路径";
            Boolean is_old_format = false;//如果文档的输入中含有inputs字段,设置为True, 否则设置为False
            //请根据线上文档修改configure字段
            JSONObject configObj = new JSONObject();
            configObj.put("format", "xlsx");
            configObj.put("finance", false);
            configObj.put("dir_assure", false);
            String config_str = configObj.toString();
            //            configObj.put("min_size", 5);
            //String config_str = "";

            String method = "POST";
            Map headers = new HashMap();
            //最后在header中的格式(中间是英文空格)为Authorization:APPCODE 83359fd73fe94948385f570e3c139105
            headers.put("Authorization", "APPCODE " + appcode);

            Map querys = new HashMap();

            // 对图像进行base64编码
            String imgBase64 = "";
            try {

                byte[] content = new byte[(int) newFile.length()];
                FileInputStream finputstream = new FileInputStream(newFile);
                finputstream.read(content);
                finputstream.close();
                imgBase64 = new String(encodeBase64(content));
            } catch (IOException e) {
                e.printStackTrace();
                return null;
            }
            // 拼装请求body的json字符串
            JSONObject requestObj = new JSONObject();
            try {
                if(is_old_format) {
                    JSONObject obj = new JSONObject();
                    obj.put("image", getParam(50, imgBase64));
                    if(config_str.length() > 0) {
                        obj.put("configure", getParam(50, config_str));
                    }
                    JSONArray inputArray = new JSONArray();
                    inputArray.add(obj);
                    requestObj.put("inputs", inputArray);
                }else{
                    requestObj.put("image", imgBase64);
                    if(config_str.length() > 0) {
                        requestObj.put("configure", config_str);
                    }
                }
            } catch (JSONException e) {
                e.printStackTrace();
            }
            String bodys = requestObj.toString();

            try {
                /**
                 * 重要提示如下:
                 * HttpUtils请从
                 * https://github.com/aliyun/api-gateway-demo-sign-java/blob/master/src/main/java/com/aliyun/api/gateway/demo/util/HttpUtils.java
                 * 下载
                 *
                 * 相应的依赖请参照
                 * https://github.com/aliyun/api-gateway-demo-sign-java/blob/master/pom.xml
                 */
                HttpResponse response = HttpUtils.doPost(host, path, method, headers, querys, bodys);
                int stat = response.getStatusLine().getStatusCode();
                if(stat != 200){
                    System.out.println("Http code: " + stat);
                    System.out.println("http header error msg: "+ response.getFirstHeader("X-Ca-Error-Message"));
                    System.out.println("Http body error msg:" + EntityUtils.toString(response.getEntity()));
                    return null;
                }

                String res = EntityUtils.toString(response.getEntity());
                JSONObject res_obj = JSON.parseObject(res);
                Long fileName = System.currentTimeMillis();
                if(is_old_format) {



                    JSONArray outputArray = res_obj.getJSONArray("outputs");
                    String output = outputArray.getJSONObject(0).getJSONObject("outputValue").getString("dataValue");
                    JSONObject out = JSON.parseObject(output);
                    System.out.println(out.toJSONString());


                }else{

                    String tmp_base64path = OcrPath + fileName;
                    File tmp_base64file = new File(tmp_base64path);
                    if(!tmp_base64file.exists()){
                        tmp_base64file.getParentFile().mkdirs();
                    }
                    tmp_base64file.createNewFile();

                    // write
                    FileWriter fw = new FileWriter(tmp_base64file, true);
                    BufferedWriter bw = new BufferedWriter(fw);
                    bw.write(res_obj.getString("tables"));
                    bw.flush();
                    bw.close();
                    fw.close();

                    String exelFilePath = OcrPath + fileName + "_1.xlsx";
                    Runtime.getRuntime().exec("touch "+exelFilePath).destroy();
                    Process exec = Runtime.getRuntime().exec("sed -i -e 's/\\\\n/\\n/g' " + tmp_base64path);
                    exec.waitFor();
                    exec.destroy();

                    Process exec1 = null;
                    String[] cmd = { "/bin/sh", "-c", "base64 -d " + tmp_base64path + " > " + exelFilePath };
                    exec1 = Runtime.getRuntime().exec(cmd);
                    exec1.waitFor();
                    exec1.destroy();


                    return JsonResult.success(fileName);
                }
            } catch (Exception e) {
                e.printStackTrace();
            }



        } catch (IOException e) {
            e.printStackTrace();
        }

        return null;
    }


    @ResponseBody
    @RequestMapping("getId")
    public ResponseEntity getFirstLicence(String id) {
        String exelFilePath = OcrPath + id + "_1.xlsx";
        return export(new File(exelFilePath));
    }


    public ResponseEntity export(File file) {
        if (file == null) {
            return null;
        }
        HttpHeaders headers = new HttpHeaders();
        headers.add("Cache-Control", "no-cache, no-store, must-revalidate");
        headers.add("Content-Disposition", "attachment; filename=" + System.currentTimeMillis() + ".xls");
        headers.add("Pragma", "no-cache");
        headers.add("Expires", "0");
        headers.add("Last-Modified", new Date().toString());
        headers.add("ETag", String.valueOf(System.currentTimeMillis()));

        return ResponseEntity
                .ok()
                .headers(headers)
                .contentLength(file.length())
                .contentType(MediaType.parseMediaType("application/octet-stream"))
                .body(new FileSystemResource(file));
    }

    public static JSONObject getParam(int type, String dataValue) {
        JSONObject obj = new JSONObject();
        try {
            obj.put("dataType", type);
            obj.put("dataValue", dataValue);
        } catch (JSONException e) {
            e.printStackTrace();
        }
        return obj;
    }

}

大功告成

  • 以下是静态页面代码



  
  
  
    table


  
将文件拖到此处,或点击上传
只能上传jpg/png文件,且不超过500kb
{{ item.name }}
修改名字

转载于:https://www.cnblogs.com/rolandlee/p/10671544.html

你可能感兴趣的:(使用阿里云的图片识别成表格ocr(将图片表格转换成excel))