package image.images; import java.io.File; import java.io.IOException; import java.io.InputStream; import org.apache.commons.io.FileUtils; import org.apache.http.HttpEntity; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; /** * 负责爬去验证码图片 * @author FMm * */ public class HttpClientImage { public static void main(String[] args) throws ClientProtocolException, IOException, InterruptedException { // 创建httpClient实例 CloseableHttpClient httpClient = HttpClients.createDefault(); // url String url = "http://con.monyun.cn:9960/aut_checkCode.hts?iden=1422083454409442236&t=0.06292891333169814"; // 循环爬去20张验证码 for (int i = 0; i < 20; i++) { // 休眠一下否则图片加载不出来, Thread.sleep(2000); // 创建请求 HttpGet httpGet = new HttpGet(url); // 执行请求 CloseableHttpResponse response = httpClient.execute(httpGet); // 获取实体对象 HttpEntity httpEntity = response.getEntity(); if (httpEntity != null) { // 打印返回类型保证是图片类型 System.out.println("Content-Type:" + httpEntity.getContentType().getValue()); // 获取InputStream流 InputStream inputStream = httpEntity.getContent(); // 文件复制,common io 包下,需要 引入依赖 FileUtils.copyToFile(inputStream, new File("/Users/mac/Desktop/image/" + i + ".png")); } } httpClient.close(); } }
package image.images; import java.awt.Color; import java.awt.image.BufferedImage; import java.io.File; import java.io.FileFilter; import java.io.IOException; import java.net.HttpURLConnection; import java.net.URL; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import javax.imageio.ImageIO; import javax.imageio.ImageWriter; import javax.imageio.stream.ImageOutputStream; import javax.net.ssl.HttpsURLConnection; /** * 图片背景处理,提高识别度 * @author FMm * */ public class ImageConverter { private static ListfileList = new ArrayList (); public static void main(String[] args) { // 图片所在的根目录 , 图片去除水印后的存储目录 // 支持批量去除图片水印,第一个参数为图片存在目录,第二为新建图片目录 convertAllImages("/Users/mac/Desktop/image/", "/Users/mac/Desktop/images/"); } private static void convertAllImages(String dir, String saveDir) { File dirFile = new File(dir); File saveDirFile = new File(saveDir); dir = dirFile.getAbsolutePath(); saveDir = saveDirFile.getAbsolutePath(); loadImages(new File(dir)); for (File file : fileList) { String filePath = file.getAbsolutePath(); String dstPath = saveDir + filePath.substring(filePath.indexOf(dir) + dir.length(), filePath.length()); System.out.println("converting: " + filePath); replaceColor(file.getAbsolutePath(), dstPath); } } public static void loadImages(File f) { if (f != null) { if (f.isDirectory()) { File[] fileArray = f.listFiles(); if (fileArray != null) { for (int i = 0; i < fileArray.length; i++) { loadImages(fileArray[i]); } } } else { String name = f.getName(); if (name.endsWith("png") || name.endsWith("jpg")) { fileList.add(f); } } } } @SuppressWarnings("unused") private static void replaceFolderImages(String dir) { File dirFile = new File(dir); File[] files = dirFile.listFiles(new FileFilter() { public boolean accept(File file) { String name = file.getName(); if (name.endsWith("png") || name.endsWith("jpg")) { return true; } return false; } }); for (File img : files) { replaceColor(img.getAbsolutePath(), img.getAbsolutePath()); } } private static void replaceColor(String srcFile, String dstFile) { try { Color color = new Color(255, 195, 195); replaceImageColor(srcFile, dstFile, color, Color.WHITE); } catch (IOException e) { e.printStackTrace(); } } public static void replaceImageColor(String file, String dstFile, Color srcColor, Color targetColor) throws IOException { URL http; if (file.trim().startsWith("https")) { http = new URL(file); HttpsURLConnection conn = (HttpsURLConnection) http.openConnection(); conn.setRequestMethod("GET"); } else if (file.trim().startsWith("http")) { http = new URL(file); HttpURLConnection conn = (HttpURLConnection) http.openConnection(); conn.setRequestMethod("GET"); } else { http = new File(file).toURI().toURL(); } BufferedImage bi = ImageIO.read(http.openStream()); if (bi == null) { return; } Color wColor = new Color(255, 255, 255);// 白色 for (int i = 0; i < bi.getWidth(); i++) { for (int j = 0; j < bi.getHeight(); j++) { System.out.println(i + "---" + j); int color = bi.getRGB(i, j); Color oriColor = new Color(color); int red = oriColor.getRed(); int greed = oriColor.getGreen(); int blue = oriColor.getBlue(); // 以下if处是则改变图片颜色 将图片背景改变成纯白色,提高识别度 if (i < 4 | i > 100) { bi.setRGB(i, j, wColor.getRGB()); //bi.setRGB()方法是把对应的xy坐标的颜色改变成纯白色Color wColor = new Color(255, 255, 255);// 白色 } if (j < 4 | j > 35) { bi.setRGB(i, j, wColor.getRGB()); } if (red < 107 && greed < 107 && blue < 107) { bi.setRGB(i, j, wColor.getRGB()); } if ((red <= 255 && red > 200) & (greed <= 255 && greed >= 200) & blue <= 255 && blue >= 200) { bi.setRGB(i, j, wColor.getRGB()); } } } String type = file.substring(file.lastIndexOf(".") + 1, file.length()); Iterator it = ImageIO.getImageWritersByFormatName(type); ImageWriter writer = it.next(); File f = new File(dstFile); f.getParentFile().mkdirs(); ImageOutputStream ios = ImageIO.createImageOutputStream(f); writer.setOutput(ios); writer.write(bi); bi.flush(); ios.flush(); ios.close(); } }
package image.images; import java.io.File; import java.util.regex.Matcher; import java.util.regex.Pattern; import net.sourceforge.tess4j.ITesseract; import net.sourceforge.tess4j.Tesseract; import net.sourceforge.tess4j.TesseractException; /** * 读取验证码并返回结构 * @author FMm * */ public class ReadImage { public static void main(String[] args) throws TesseractException { ITesseract instance = new Tesseract(); //如果未将tessdata放在根目录下需要指定绝对路径,tessdata包下是tess4j训练语言包 instance.setDatapath("/Users/mac/Desktop/tessdata"); //如果需要识别英文之外的语种,需要指定识别语种,并且需要将对应的语言包放进项目中 instance.setLanguage("eng"); // 指定识别图片路径 File imgDir = new File("/Users/mac/Desktop/images/8.png"); long startTime = System.currentTimeMillis(); String ocrResult = instance.doOCR(imgDir); System.out.println(ocrResult); if(isStartWithNumber(ocrResult)) { String start = ocrResult.substring(0,1); String meg = ocrResult.substring(1,2); String end = ocrResult.substring(2,3); // 输出识别结果 System.out.println("OCR Result: \n" + ocrResult + "\n 耗时:" + (System.currentTimeMillis() - startTime) + "ms"); // 得出结果 System.out.println(start+meg+end+"="+(Integer.parseInt(start)+Integer.parseInt(end))); } else { System.out.println("仅支持数字"); return; } } public static boolean isStartWithNumber(String str) { Pattern pattern = Pattern.compile("[0-9]*"); Matcher isNum = pattern.matcher(str.charAt(0)+""); if (!isNum.matches()) { return false; } return true; } }
maven文件
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0modelVersion> <groupId>imagegroupId> <artifactId>imagesartifactId> <version>0.0.1-SNAPSHOTversion> <packaging>jarpackaging> <name>imagesname> <url>http://maven.apache.orgurl> <properties> <project.build.sourceEncoding>UTF-8project.build.sourceEncoding> properties> <dependencies> <dependency> <groupId>org.apache.httpcomponentsgroupId> <artifactId>httpclientartifactId> <version>4.5.8version> dependency> <dependency> <groupId>org.jsoupgroupId> <artifactId>jsoupartifactId> <version>1.12.1version> dependency> <dependency> <groupId>commons-iogroupId> <artifactId>commons-ioartifactId> <version>2.5version> dependency> <dependency> <groupId>com.alibabagroupId> <artifactId>fastjsonartifactId> <version>1.2.47version> dependency> <dependency> <groupId>junitgroupId> <artifactId>junitartifactId> <version>3.8.1version> <scope>testscope> dependency> <dependency> <groupId>net.sourceforge.tess4jgroupId> <artifactId>tess4jartifactId> <version>4.4.1version> dependency> dependencies> project>
不是maven项目所需jar
org/apache/httpcomponents/httpclient/4.5.8/httpclient-4.5.8.jar
org/apache/httpcomponents/httpcore/4.4.11/httpcore-4.4.11.jar
commons-logging/commons-logging/1.2/commons-logging-1.2.jar
commons-codec/commons-codec/1.11/commons-codec-1.11.jar
org/jsoup/jsoup/1.12.1/jsoup-1.12.1.jar
commons-io/commons-io/2.5/commons-io-2.5.jar
com/alibaba/fastjson/1.2.47/fastjson-1.2.47.jar
junit/junit/3.8.1/junit-3.8.1.jar
net/sourceforge/tess4j/tess4j/4.4.1/tess4j-4.4.1.jar
net/java/dev/jna/jna/5.4.0/jna-5.4.0.jar
com/github/jai-imageio/jai-imageio-core/1.4.0/jai-imageio-core-1.4.0.jar
org/ghost4j/ghost4j/1.0.1/ghost4j-1.0.1.jar
log4j/log4j/1.2.17/log4j-1.2.17.jar
commons-beanutils/commons-beanutils/1.9.2/commons-beanutils-1.9.2.jar
commons-collections/commons-collections/3.2.1/commons-collections-3.2.1.jar
org/apache/xmlgraphics/xmlgraphics-commons/1.4/xmlgraphics-commons-1.4.jar
com/lowagie/itext/2.1.7/itext-2.1.7.jar
org/apache/pdfbox/pdfbox/2.0.17/pdfbox-2.0.17.jar
org/apache/pdfbox/fontbox/2.0.17/fontbox-2.0.17.jar
org/apache/pdfbox/pdfbox-tools/2.0.17/pdfbox-tools-2.0.17.jar
org/apache/pdfbox/pdfbox-debugger/2.0.17/pdfbox-debugger-2.0.17.jar
org/apache/pdfbox/jbig2-imageio/3.0.2/jbig2-imageio-3.0.2.jar
net/sourceforge/lept4j/lept4j/1.12.3/lept4j-1.12.3.jar
org/jboss/jboss-vfs/3.2.14.Final/jboss-vfs-3.2.14.Final.jar
org/jboss/logging/jboss-logging/3.1.4.GA/jboss-logging-3.1.4.GA.jar
ch/qos/logback/logback-classic/1.2.3/logback-classic-1.2.3.jar
ch/qos/logback/logback-core/1.2.3/logback-core-1.2.3.jar
org/slf4j/slf4j-api/1.7.25/slf4j-api-1.7.25.jar
org/slf4j/jul-to-slf4j/1.7.28/jul-to-slf4j-1.7.28.jar
org/slf4j/jcl-over-slf4j/1.7.28/jcl-over-slf4j-1.7.28.jar
org/slf4j/log4j-over-slf4j/1.7.28/log4j-over-slf4j-1.7.28.jar
所需的tess4j文件,文件名称:tessdata/,这个文件你可以放在项目中,也可以放在其他位置但是路径不能写错
tessdata/
chi_sim.traineddata // 中文识别
eng.traineddata //数字识别 下载地址https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata
osd.traineddata
pdf.ttf
pdf.ttx
readme
tess4j官网文件下载:https://sourceforge.net/projects/tess4j/