Java实现验证码识别

java中识别验证码比较简单,使用的软件是tesseractocr,这个软件需要安装在本地中,傻瓜式安装(方便调用) 。

github下载地址 
https://github.com/tesseract-ocr/tessdata

博主是在官网下载的。

该软件默认的识别的是英文。如果需要识别中文,需要将中文的训练文本chi_sim.traineddata存放到C:\Program Files (x86)\Tesseract-OCR\tessdata。

简单的验证码识别 直接调用 Tesseract的 doOCR(image)方法即可。如果验证码的噪点多 并且有干扰线,这时候就需要对图像就行处理了。

图片处理大致思路:做灰度然后二值化 然后去除干扰线。

话不多说上代码。

实现代码

public static void main(String[] args) { 
	String url = "验证图片地址";
        //验证码保存地址
        String path= "C:\\Users\\Administrator\\Desktop\\1.jpg"; 
        //下载验证码 
        downloadPicture(url,path);
	Demo demo= new Demo(); 
	String code= demo.FindOCR(path,false);
	System.out.println(code); 
}

下载验证码很简单就是用HTTPClient获取验证图片的链接然后下载就可以了。我这里只放一个下载的代码。至于获取连接的每个网站的请求也不一样就不放出了。

    private static void downloadPicture(String urlList,String path) {
        URL url = null;
        try {
            url = new URL(urlList);
            DataInputStream dataInputStream = new DataInputStream(url.openStream());
            FileOutputStream fileOutputStream = new FileOutputStream(new File(path));
            ByteArrayOutputStream output = new ByteArrayOutputStream();
            byte[] buffer = new byte[1024];
            int length;
 
            while ((length = dataInputStream.read(buffer)) > 0) {
                output.write(buffer, 0, length);
            }
            fileOutputStream.write(output.toByteArray());
            dataInputStream.close();
            fileOutputStream.close();
        } catch (MalformedURLException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

识别验证码的工具类

public  String FindVCode(String srcImg, boolean language) {
        try {
            File imgFile = new File(srcImg);
            if (!srcImg.exists()) {
                return "图片路径有误或不存在";
            }
            BufferedImage testImage = ImageIO.read(imgFile);
            Tesseract tesseract= new Tesseract();
            // 默认的图片库
            instance.setDatapath("/usr/local/share/tessdata/");
            if (language) {
                tesseract.setLanguage("chi_sim");
            }
            String vCode= null;
            // 下面是去图像优化的过程 不需要的可以不用 直接 vCode =instance.doOCR(testImage) ;
            BufferedImage cleanedImg = cleanLinesInImage(testImage);
            vCode= tesseract.doOCR(cleanedBufferedImage);
            return vCode;
        } catch (Exception e) {
            e.printStackTrace();
            return "未知错误";
        }
    }

图片处理过程

private BufferedImage cleanLinesInImage(BufferedImage image) throws IOException{

        BufferedImage bufferedImage = oriBufferedImage;
        int h = bufferedImage.getHeight();
        int w = bufferedImage.getWidth();

        for (int x = 0; x < width; x++) {
            for (int y = 0; y < height; y++) {
                 boolean c = true;
                 // 这个像素块上下左右是不是都是黑色的,如果是,这个像素当作黑色的
                 int roundWhiteCount = 0;
                 if (isBlackColor(bufferedImage , x + 1, y + 1)){
                     roundWhiteCount++;
                    }
                 if (isBlackColor(bufferedImage , x + 1, y - 1)){
                     roundWhiteCount++;
                    }
                 if (isBlackColor(bufferedImage , x - 1, y + 1)){
                     roundWhiteCount++;
                    }
                 if (isBlackColor(bufferedImage , x - 1, y - 1)){
                     roundWhiteCount++;
                    }
                 if (roundWhiteCount >= 4) {
                     c = false;
                    }
                 if (!isBlackColor(bufferedImage , x, y) && c) {
                     image.setRGB(x, y, 0xFFFFFFFF); //argb:AARRGGBB
                 }
        }
    }
    //把不是纯白色的像素块变成黑色的,用来做判断条件
    for (int x = 0; x < width; x++) {
        for (int y = 0; y < height; y++) {
            // 不是纯白就填黑
            if ((bufferedImage .getRGB(x, y) & 0xFFFFFF) != (new Color(255, 255, 255).getRGB() & 0xFFFFFF)) {
                bufferedImage .setRGB(x, y, 0xFF000000);
            }
        }
    }
        // 二值化
        int threshold = ostu(gray, w, h);
        BufferedImage binaryBufferedImage= new BufferedImage(w, h, BufferedImage.TYPE_BYTE_BINARY);
        for (int x = 0; x < w; x++)
        {
            for (int y = 0; y < h; y++)
            {
                if (gray[x][y] > threshold)
                {
                    gray[x][y] |= 0x00FFFF;
                } else
                {
                    gray[x][y] &= 0xFF0000;
                }
                binaryBufferedImage.setRGB(x, y, gray[x][y]);
            }
        }
   cleanImage(binaryBufferedImage,h,w );

  return binaryBufferedImage;
}
private boolean isBlackColor(BufferedImage image, int x, int y) {
    // 检查这个像素块是不是边缘的
    if (x < 0 || y < 0 || x >= image.getWidth() || y >= image.getHeight()) {
        return false;
    }

    int pixel = image.getRGB(x, y);

    return
            // R
            (pixel & 0xFF0000) >> 16 < 30
             // G
             && (pixel & 0xFF00) >> 8 < 30
             // B
             && (pixel & 0xFF) < 30;
}
public void cleanImage(BufferedImage binaryBufferedImage,int h ,int w ){
        //去除干扰线条
        for(int y = 1; y < h-1; y++){
            for(int x = 1; x < w-1; x++){
                boolean flag = false ;
                if(isBlack(binaryBufferedImage.getRGB(x, y))){
                    //左右均为空时,去掉此点
                    if(isWhite(binaryBufferedImage.getRGB(x-1, y)) && isWhite(binaryBufferedImage.getRGB(x+1, y))){
                        flag = true;
                    }
                    //上下均为空时,去掉此点
                    if(isWhite(binaryBufferedImage.getRGB(x, y+1)) && isWhite(binaryBufferedImage.getRGB(x, y-1))){
                        flag = true;
                    }
                    //斜上下为空时,去掉此点
                    if(isWhite(binaryBufferedImage.getRGB(x-1, y+1)) && isWhite(binaryBufferedImage.getRGB(x+1, y-1))){
                        flag = true;
                    }
                    if(isWhite(binaryBufferedImage.getRGB(x+1, y+1)) && isWhite(binaryBufferedImage.getRGB(x-1, y-1))){
                        flag = true;
                    }
                    if(flag){
                        binaryBufferedImage.setRGB(x,y,-1);
                    }
                }
            }
        }
    }

    public Mat bufferedImageToMat(BufferedImage bi) {
        Mat mat = new Mat(bi.getHeight(), bi.getWidth(), CvType.CV_8UC1);

        byte[] white = new byte[] { (byte) 255 };
        byte[] black = new byte[] { (byte) 0 };

        for (int x=0; x 300)
        {
            return true;
        }
        return false;
    }

    public int isBlackOrWhite(int colorInt)
    {
        if (getColorBright(colorInt) < 30 || getColorBright(colorInt) > 730)
        {
            return 1;
        }
        return 0;
    }

    public int getColorBright(int colorInt)
    {
        Color color = new Color(colorInt);
        return color.getRed() + color.getGreen() + color.getBlue();
    }

    public int ostu(int[][] gray, int w, int h)
    {
        int[] histData = new int[w * h];
        // Calculate histogram
        for (int x = 0; x < w; x++)
        {
            for (int y = 0; y < h; y++)
            {
                int red = 0xFF & gray[x][y];
                histData[red]++;
            }
        }

        // Total number of pixels
        int total = w * h;

        float sum = 0;
        for (int t = 0; t < 256; t++)
            sum += t * histData[t];

        float sumB = 0;
        int wB = 0;
        int wF = 0;

        float varMax = 0;
        int threshold = 0;

        for (int t = 0; t < 256; t++)
        {
            wB += histData[t]; // Weight Background
            if (wB == 0)
                continue;

            wF = total - wB; // Weight Foreground
            if (wF == 0)
                break;

            sumB += (float) (t * histData[t]);

            float mB = sumB / wB; // Mean Background
            float mF = (sum - sumB) / wF; // Mean Foreground

            // Calculate Between Class Variance
            float varBetween = (float) wB * (float) wF * (mB - mF) * (mB - mF);

            // Check if new maximum found
            if (varBetween > varMax)
            {
                varMax = varBetween;
                threshold = t;
            }
        }

        return threshold;
    }

其中部分是借鉴网上的源代码。如果有编写不对,或者可以修改的更好的建议请指出。

 

 

你可能感兴趣的:(Java实现验证码识别)