Java调用 Tesseract 实现 图片中文字识别

1.下载语言包(简体中文)

地址:https://github.com/tesseract-ocr/tessdata/blob/master/chi_sim.traineddata

2.将语言包放入IDEA中的resources下面

Java调用 Tesseract 实现 图片中文字识别_第1张图片

3.加入maven依赖


<dependency>
   <groupId>net.java.dev.jnagroupId>
   <artifactId>jnaartifactId>
   <version>4.1.0version>
dependency>
<dependency>
   <groupId>net.sourceforge.tess4jgroupId>
   <artifactId>tess4jartifactId>
   <version>2.0.1version>
   <exclusions>
      <exclusion>
         <groupId>com.sun.jnagroupId>
         <artifactId>jnaartifactId>
      exclusion>
   exclusions>
dependency>

4.实现代码

package com.xinjian.x.modules;

import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.util.ImageHelper;
import net.sourceforge.tess4j.util.LoadLibs;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;

public class Test {
    public static void main(String[] args){
        try{
            File imageFile = new File("D:/imagesImage/6/1.jpg");
            BufferedImage img= ImageIO.read(imageFile);
            int width=new Double(img.getWidth()*0.2).intValue();
            int height=new Double(img.getHeight()*0.05).intValue();
         //截取图片,要扫描的位置
            img=ImageHelper.getSubImage(img, width, height, 250, 100);
         //图片置灰
            img = ImageHelper.convertImageToGrayscale(img);
            //图片锐化
            img = ImageHelper.convertImageToBinary(img);
            //图片放大5,增强识别率(很多图片本身无法识别,放大5倍时就可以轻易识,但是考滤到客户电脑配置低,针式打印机打印不连贯的问题,这里就放大5)
            img = ImageHelper.getScaledInstance(img, img.getWidth() * 5, img.getHeight() * 5);
            //ImageIO.write(img, "jpg", new File("D:/imagesImage/jcaptcha0.jpg"));
            ITesseract instance = new Tesseract();
         //获取tessdata下的文件
            File tessDataFolder = LoadLibs.extractTessResources("tessdata");
         //设置语言包
            instance.setLanguage("chi_sim");
         //设置语言包位置
            instance.setDatapath(tessDataFolder.getAbsolutePath());
            String result = instance.doOCR(img);
            System.out.println(result);
        }catch(Exception e){
            System.out.println(e.getMessage());
        }
    }
}

你可能感兴趣的:(ocr识别)