Java 图片提取验证码

2019独角兽企业重金招聘Python工程师标准>>> hot3.png

安装 Tesseract-OCR

Windows 版本

  1. tesseract-ocr-setup-xx.xx.exe       
  2. chi_sim.traineddata.gz   中文语言包  

这两个请自行百度即可,然后我们将其安装在D:下,其中将语言包放在安装目录下的tessdata的目录下。

Linux 版本

我使用的是centos7,下面给出安装tesseract的前提条件。

  1. 安装编译库
    yum install autoconf automake libtool
    yum install libjpeg-devel libpng-devel libtiff-devel zlib-devel

    请注意这里面是两个库,使用了顿号隔开的,一般系统都有存在这个库的

  2. 安装依赖的leptonica库

    wget http://www.leptonica.com/source/leptonica-1.72.tar.gz
    tar -xvf leptonica-1.72.tar.gz  
    cd leptonica-1.72
    ./configure --with-libpng && make && make install

    这个依赖库要注意,一定是1.71以上的版本。

  3. 下载编译tesseract

    wget https://github.com/tesseract-ocr/tesseract/archive/3.04.00.tar.gz
    mv 3.04.00  Tesseract3.04.00.tar.gz
    tar -xvf Tesseract3.04.00.tar.gz  
    cd tesseract-3.04.00/
    ./autogen.sh
    ./configure
    make && make install
    ldconfig

    tesseract我安装在了 /usr/local 这个目录下,名称为 tesseract-3.04.00 。如果你使用的是3.01的版本,需要在./autogen.sh 后面执行mkdir m4;这条命令,否则他会提示m4这个目录不存在。

  4. 下载识别库(语言包)
    wget --no-check-certificate https://github.com/tesseract-ocr/tessdata/raw/master/eng.traineddata
    wget --no-check-certificate https://github.com/tesseract-ocr/tessdata/raw/master/chi_sim.traineddata 
    wget --no-check-certificate https://github.com/tesseract-ocr/tessdata/raw/master/chi_tra.traineddata
    wget http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.eng.tar.gz
    

    注意此处的语言包以及解压出的语言包都要放在 /usr/local/share/tessdata/ 目录下。

Java 读取数据 

  1. 启动命令程序
    package com.zefun.common.utils;
    
    import java.io.BufferedReader;
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.InputStreamReader;
    import java.util.ArrayList;
    import java.util.List;
    
    import org.apache.log4j.Logger;
    import org.jdesktop.swingx.util.OS;
    
    /**
     * ocr 读取命令程序
    * @author 高国藩
    * @date 2016年12月5日 上午10:27:16
     */
    public class Ocr {
        
        /** 英文字母小写l,并非数字1 */
        private static final String LANG_OPTION = "-l";
        /** 系统换行符 */
        private static final String EOL = System.getProperty("line.separator");
        /** 系统目录符 */
        private static final String GNL = System.getProperty("file.separator");
        /** log */
        private Logger logger = Logger.getLogger(Ocr.class);
        
        /** 目标,下面分别为Linux和Windows下的程序配备,Linux不需要此处的路径了 */
    //    private String tessPath = GNL + "usr" + GNL + "local" + GNL + "tesseract-3.04.00";
        private String tessPath = new File("D:\\Tesseract-OCR").getAbsolutePath();
    
        /**
         * 解析图片
        * @author 高国藩
        * @date 2016年12月5日 上午10:28:47
        * @param imageFile          image file
        * @param imageFormat        转码路径     
        * @return ver_code          
        * @throws Exception         异常处理啊
         */
        public String recognizeText(File imageFile, String imageFormat) throws Exception {
            
            File tempImage = ImageIOHelper.createImage(imageFile, imageFormat);
            File outputFile = new File(imageFile.getParentFile(), "output");
            StringBuffer strB = new StringBuffer();
            List cmd = new ArrayList();
            if (OS.isWindowsXP()) {
                cmd.add(tessPath + "//tesseract");
            } 
            else if (OS.isLinux()) {
                cmd.add("tesseract");
            } 
            else {
                cmd.add(tessPath + "//tesseract");
            }
            cmd.add("");
            cmd.add(outputFile.getName());
            cmd.add(LANG_OPTION);
            // cmd.add("chi_sim");  更换语言包
            cmd.add("eng");
    
            ProcessBuilder pb = new ProcessBuilder();
            pb.directory(imageFile.getParentFile());
    
            cmd.set(1, tempImage.getName());
            pb.command(cmd);
            pb.redirectErrorStream(true);
    
            Process process = pb.start();
            logger.info(cmd.toString());
            // tesseract.exe 1.jpg 1 -l chi_sim
            int w = process.waitFor();
    
            // 删除临时正在工作文件
            tempImage.delete();
    
            if (w == 0) {
                BufferedReader in = new BufferedReader(new InputStreamReader(
                        new FileInputStream(outputFile.getAbsolutePath() + ".txt"),
                        "UTF-8"));
    
                String str;
                while ((str = in.readLine()) != null) {
                    strB.append(str).append(EOL);
                }
                in.close();
            } 
            else {
                String msg;
                switch (w) {
                    case 1:
                        msg = "Errors accessing files.There may be spaces in your image's filename.";
                        break;
                    case 29:
                        msg = "Cannot recongnize the image or its selected region.";
                        break;
                    case 31:
                        msg = "Unsupported image format.";
                        break;
                    default:
                        msg = "Errors occurred.";
                }
                tempImage.delete();
                throw new RuntimeException(msg);
            }
            new File(outputFile.getAbsolutePath() + ".txt").delete();
            logger.info("图形识别结果 ====>>> " + strB.toString());
            return strB.toString();
        }
        
    }
    

    此处要注意一下tesseract的命令目录,Windows和Linux的目录不同,尤其分隔符。

  2. 解析图片程序
    package com.zefun.common.utils;
    
    import java.awt.image.BufferedImage;
    import java.io.File;
    import java.io.IOException;
    import java.util.Iterator;
    import java.util.Locale;
    
    import javax.imageio.IIOImage;
    import javax.imageio.ImageIO;
    import javax.imageio.ImageReader;
    import javax.imageio.ImageWriteParam;
    import javax.imageio.ImageWriter;
    import javax.imageio.metadata.IIOMetadata;
    import javax.imageio.stream.ImageInputStream;
    import javax.imageio.stream.ImageOutputStream;
    
    import com.sun.media.imageio.plugins.tiff.TIFFImageWriteParam;
    
    /**
     * ver_image 图片解析器
    * @author 高国藩
    * @date 2016年12月5日 上午10:31:09
     */
    public class ImageIOHelper {
        /** 
         * 图片文件转换为tif格式 
         * @param imageFile 文件路径 
         * @param imageFormat 文件扩展名 
         * @return  路径
         */
        public static File createImage(File imageFile, String imageFormat) {
            File tempFile = null;
            try {
                Iterator readers = ImageIO
                        .getImageReadersByFormatName(imageFormat);
                ImageReader reader = readers.next();
    
                ImageInputStream iis = ImageIO.createImageInputStream(imageFile);
                reader.setInput(iis);
                // Read the stream metadata
                IIOMetadata streamMetadata = reader.getStreamMetadata();
    
                // Set up the writeParam
                TIFFImageWriteParam tiffWriteParam = new TIFFImageWriteParam(
                        Locale.CHINESE);
                tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED);
    
                // Get tif writer and set output to file
                Iterator writers = ImageIO
                        .getImageWritersByFormatName("tiff");
                ImageWriter writer = writers.next();
    
                BufferedImage bi = reader.read(0);
                IIOImage image = new IIOImage(bi, null, reader.getImageMetadata(0));
                tempFile = tempImageFile(imageFile);
                ImageOutputStream ios = ImageIO.createImageOutputStream(tempFile);
                writer.setOutput(ios);
                writer.write(streamMetadata, image, tiffWriteParam);
                ios.close();
    
                writer.dispose();
                reader.dispose();
    
            } 
            catch (IOException e) {
                e.printStackTrace();
            }
            return tempFile;
        }
    
        /**
         * 格式化图片
        * @author 高国藩
        * @date 2016年12月5日 上午10:31:41
        * @param imageFile  imageFile
        * @return           File
         */
        private static File tempImageFile(File imageFile) {
            String path = imageFile.getPath();
            StringBuffer strB = new StringBuffer(path);
            strB.insert(path.lastIndexOf('.'), 0);
            return new File(strB.toString().replaceFirst("(?<=//.)(//w+)$", "tif"));
        }
    }
    

    改程序会将图片首先解析为tif类型文件,在其中读取出数据。

  3. 测试加载

    String valCode = new Ocr().recognizeText(new File(new File(path4).getAbsolutePath()), "jpg");
    logger.info(valCode);

    注意在测试中的文件路径问题,Linux和Windows区别很大。

  4. Maven 包管理
  5. 	
    		net.java.dev.jna
    		jna
    		4.2.1
    	
    	
    		net.sourceforge.tess4j
    		tess4j
    		2.0.1
    		
    			
    				com.sun.jna
    				jna
    			
    		
    	
    	
    		com.kenai.nbpwr
    		org-jdesktop-swingx
    		1.6-201002261215
    	

     

使用Java模拟系统登录

package com.zefun.wechat.controller;

import java.io.File;
import java.io.InputStream;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import javax.servlet.http.HttpServletRequest;

import org.apache.commons.io.FileUtils;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.BasicCookieStore;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.apache.log4j.Logger;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.servlet.ModelAndView;

import com.zefun.common.consts.Url;
import com.zefun.common.utils.HttpClientUtil;
import com.zefun.common.utils.Ocr;
import com.zefun.web.controller.BaseController;
import net.sf.json.JSONObject;

/**
 * ImageUtilsController 
* @author 高国藩
* @date 2016年12月5日 下午12:01:32
 */
@Controller
public class ImageUtilsController extends BaseController {

    /** logger */
    private Logger logger = Logger.getLogger(ImageUtilsController.class);
    
    /**
     * login
    * @author 高国藩
    * @date 2016年12月6日 下午6:49:10
    * @param request    request
    * @return           ModelAndView
     */
    @RequestMapping(value = Url.MessagePushMember.VIEW_IMAGE, method = RequestMethod.GET)
    public ModelAndView appointView(HttpServletRequest request){
        try {
            String verPath = "http://vip1.sentree.com.cn/shair/vc";
            String loginAction = "http://vip1.sentree.com.cn/shair/loginAction!ajaxLogin.action";
            
            CloseableHttpClient httpImageClientStore = HttpClientBuilder.create().build();
            HttpGet imagePath = new HttpGet(verPath);
            HttpResponse imageResponse = httpImageClientStore.execute(imagePath);
            
            BasicCookieStore cookieStore = new BasicCookieStore();
            cookieStore = HttpClientUtil.setCookieStore(imageResponse, cookieStore, "vip1.sentree.com.cn");
            
            InputStream is = imageResponse.getEntity().getContent();
            String imageSystemPath = "D:/" + new Date().getTime() + ".jpg";
            File tempImagePathFile = new File(imageSystemPath);
            FileUtils.copyInputStreamToFile(is, tempImagePathFile);
            String valCode = new Ocr().recognizeText(tempImagePathFile, "jpg");
            
            CloseableHttpClient httpClientLogin = HttpClients.custom().setDefaultCookieStore(cookieStore).build();
            HttpPost httpPost = new HttpPost(loginAction);
            Map params = new HashMap<>();
            params.put("login", "fs");
            params.put("passwd", "ab82443397");
            params.put("rand", valCode.trim());
            List pairs = HttpClientUtil.geneNameValPairs(params);
            httpPost.setEntity(new UrlEncodedFormEntity(pairs, "UTF-8"));
            RequestConfig reqConf = RequestConfig.DEFAULT;
            httpPost.setConfig(reqConf);
            HttpResponse loginResult = httpClientLogin.execute(httpPost);
            String loginCode = EntityUtils.toString(loginResult.getEntity());
            
            if (JSONObject.fromObject(loginCode).get("code").toString().equals("7")){
                logger.info("系统侵入成功 ...");
            }
            FileUtils.deleteQuietly(tempImagePathFile);
        } 
        catch (Exception e) {
            e.printStackTrace();
        } 
        return null;
    }

}

 

转载于:https://my.oschina.net/gaoguofan/blog/800197

你可能感兴趣的:(Java 图片提取验证码)