使用tesseract进行识 windows环境 64位 32位均可
tesseract安装到http://code.google.com/p/tesseract-ocr/下载 tesseract-ocr-setup-3.02.02.exe并安装,默认安装目录
tesseract = "C:\\Program Files (x86)\\Tesseract-OCR";默认目录
package com.cn.myservlet; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.PrintWriter; import java.io.UnsupportedEncodingException; import java.net.URLEncoder; import java.util.Iterator; import java.util.List; import javax.servlet.ServletException; import javax.servlet.ServletOutputStream; import javax.servlet.ServletResponse; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import myjava.utils.strings.UnicodeEncoding; import myjava.utils.url.SearchBarcodeDec; import org.apache.commons.fileupload.FileItem; import org.apache.commons.fileupload.FileItemFactory; import org.apache.commons.fileupload.FileUploadException; import org.apache.commons.fileupload.disk.DiskFileItemFactory; import org.apache.commons.fileupload.servlet.ServletFileUpload; import com.google.zxing.utils.ZxingEAN13DecoderHandler; import commons.utils.UpLoadFileHelper; public class Zxing_UpLoadFile extends HttpServlet { // 下面的代码开始使用Commons-UploadFile组件处理上传的文件数据 private static FileItemFactory factory = new DiskFileItemFactory(); // 建立FileItemFactory对象 private static ServletFileUpload upload = new ServletFileUpload(factory); protected void service(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { System.err.println("新请求"); request.setCharacterEncoding("UTF-8"); // 设置处理请求参数的编码格式 response.setContentType("text/html;charset=UTF-8"); // 设置Content-Type字段值 try { // 获得附加信息 String type = URLEncoder.encode(request.getParameter("msg"), "UTF-8"); } catch (Exception e1) { } File uploadFile=null; try { uploadFile = new UpLoadFileHelper(this) .saveAndGetFile(request); String code = new ZxingEAN13DecoderHandler() .decode(uploadFile); System.err.println("解码内容如下:"); System.err.println(code); ServletOutputStream os = response.getOutputStream(); PrintWriter pw = new PrintWriter(os); if(code==null) { code="can't parse img"; }else { System.err.println(code); try { code=new SearchBarcodeDec().search(code).toString(); code=new UnicodeEncoding().chinaToUnicode(code); } catch (Exception e) { //code="code can't found"; } } pw.write(code); pw.flush(); pw.close(); } catch (FileUploadException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } if(uploadFile!=null&&uploadFile.exists()) { uploadFile.delete(); } } }
package com.google.zxing.utils; /* * Copyright 2009 ZXing authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.awt.Graphics2D; import java.awt.geom.AffineTransform; import java.awt.image.BufferedImage; import java.awt.image.WritableRaster; import com.google.zxing.LuminanceSource; /** * This LuminanceSource implementation is meant for J2SE clients and our blackbox unit tests. * * @author [email protected] (Daniel Switkin) * @author Sean Owen * @author [email protected] (Wolfgang Jung) */ public final class BufferedImageLuminanceSource extends LuminanceSource { private static final double MINUS_45_IN_RADIANS = -0.7853981633974483; // Math.toRadians(-45.0) private final BufferedImage image; private final int left; private final int top; private static final boolean EXPLICIT_LUMINANCE_CONVERSION; static { String property = System.getProperty("explicitLuminanceConversion"); if (property == null) { property = System.getenv("EXPLICIT_LUMINANCE_CONVERSION"); } EXPLICIT_LUMINANCE_CONVERSION = Boolean.parseBoolean(property); } public BufferedImageLuminanceSource(BufferedImage image) { this(image, 0, 0, image.getWidth(), image.getHeight()); } public BufferedImageLuminanceSource(BufferedImage image, int left, int top, int width, int height) { super(width, height); if (image.getType() == BufferedImage.TYPE_BYTE_GRAY) { this.image = image; } else { int sourceWidth = image.getWidth(); int sourceHeight = image.getHeight(); if (left + width > sourceWidth || top + height > sourceHeight) { throw new IllegalArgumentException("Crop rectangle does not fit within image data."); } this.image = new BufferedImage(sourceWidth, sourceHeight, BufferedImage.TYPE_BYTE_GRAY); if (EXPLICIT_LUMINANCE_CONVERSION) { WritableRaster raster = this.image.getRaster(); int[] buffer = new int[width]; for (int y = top; y < top + height; y++) { image.getRGB(left, y, width, 1, buffer, 0, sourceWidth); for (int x = 0; x < width; x++) { int pixel = buffer[x]; // see comments in implicit branch if ((pixel & 0xFF000000) == 0) { pixel = 0xFFFFFFFF; // = white } // .229R + 0.587G + 0.114B (YUV/YIQ for PAL and NTSC) buffer[x] = (306 * ((pixel >> 16) & 0xFF) + 601 * ((pixel >> 8) & 0xFF) + 117 * (pixel & 0xFF) + 0x200) >> 10; } raster.setPixels(left, y, width, 1, buffer); } } else { // The color of fully-transparent pixels is irrelevant. They are often, technically, fully-transparent // black (0 alpha, and then 0 RGB). They are often used, of course as the "white" area in a // barcode image. Force any such pixel to be white: if (image.getAlphaRaster() != null) { int[] buffer = new int[width]; for (int y = top; y < top + height; y++) { image.getRGB(left, y, width, 1, buffer, 0, sourceWidth); boolean rowChanged = false; for (int x = 0; x < width; x++) { if ((buffer[x] & 0xFF000000) == 0) { buffer[x] = 0xFFFFFFFF; // = white rowChanged = true; } } if (rowChanged) { image.setRGB(left, y, width, 1, buffer, 0, sourceWidth); } } } // Create a grayscale copy, no need to calculate the luminance manually this.image.getGraphics().drawImage(image, 0, 0, null); } } this.left = left; this.top = top; } @Override public byte[] getRow(int y, byte[] row) { if (y < 0 || y >= getHeight()) { throw new IllegalArgumentException("Requested row is outside the image: " + y); } int width = getWidth(); if (row == null || row.length < width) { row = new byte[width]; } // The underlying raster of image consists of bytes with the luminance values image.getRaster().getDataElements(left, top + y, width, 1, row); return row; } @Override public byte[] getMatrix() { int width = getWidth(); int height = getHeight(); int area = width * height; byte[] matrix = new byte[area]; // The underlying raster of image consists of area bytes with the luminance values image.getRaster().getDataElements(left, top, width, height, matrix); return matrix; } @Override public boolean isCropSupported() { return true; } @Override public LuminanceSource crop(int left, int top, int width, int height) { return new BufferedImageLuminanceSource(image, this.left + left, this.top + top, width, height); } /** * This is always true, since the image is a gray-scale image. * * @return true */ @Override public boolean isRotateSupported() { return true; } @Override public LuminanceSource rotateCounterClockwise() { int sourceWidth = image.getWidth(); int sourceHeight = image.getHeight(); // Rotate 90 degrees counterclockwise. AffineTransform transform = new AffineTransform(0.0, -1.0, 1.0, 0.0, 0.0, sourceWidth); // Note width/height are flipped since we are rotating 90 degrees. BufferedImage rotatedImage = new BufferedImage(sourceHeight, sourceWidth, BufferedImage.TYPE_BYTE_GRAY); // Draw the original image into rotated, via transformation Graphics2D g = rotatedImage.createGraphics(); g.drawImage(image, transform, null); g.dispose(); // Maintain the cropped region, but rotate it too. int width = getWidth(); return new BufferedImageLuminanceSource(rotatedImage, top, sourceWidth - (left + width), getHeight(), width); } public LuminanceSource rotateCounterClockwise45() { int width = getWidth(); int height = getHeight(); int oldCenterX = left + width / 2; int oldCenterY = top + height / 2; // Rotate 45 degrees counterclockwise. AffineTransform transform = AffineTransform.getRotateInstance(MINUS_45_IN_RADIANS, oldCenterX, oldCenterY); int sourceDimension = Math.max(image.getWidth(), image.getHeight()); BufferedImage rotatedImage = new BufferedImage(sourceDimension, sourceDimension, BufferedImage.TYPE_BYTE_GRAY); // Draw the original image into rotated, via transformation Graphics2D g = rotatedImage.createGraphics(); g.drawImage(image, transform, null); g.dispose(); int halfDimension = Math.max(width, height) / 2; int newLeft = Math.max(0, oldCenterX - halfDimension); int newTop = Math.max(0, oldCenterY - halfDimension); int newRight = Math.min(sourceDimension - 1, oldCenterX + halfDimension); int newBottom = Math.min(sourceDimension - 1, oldCenterY + halfDimension); return new BufferedImageLuminanceSource(rotatedImage, newLeft, newTop, newRight - newLeft, newBottom - newTop); } }
package com.google.zxing.utils; import java.awt.image.BufferedImage; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.InputStream; import java.net.URL; import javax.imageio.ImageIO; import com.google.zxing.BinaryBitmap; import com.google.zxing.LuminanceSource; import com.google.zxing.MultiFormatReader; import com.google.zxing.Result; import com.google.zxing.common.HybridBinarizer; /** * @blog http://sjsky.iteye.com * @author Michael */ public class ZxingEAN13DecoderHandler { /** * @param imgPath * @return String */ public String decode(String imgPath) { return decode(new File(imgPath)); } public String decode(File f) { try { return decode(new FileInputStream(f)); } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } return null; } public String decode(InputStream in) { BufferedImage image = null; Result result = null; try { image = ImageIO.read(in); if (image == null) { System.out.println("the decode image may be not exit."); } LuminanceSource source = new BufferedImageLuminanceSource(image); BinaryBitmap bitmap = new BinaryBitmap(new HybridBinarizer(source)); result = new MultiFormatReader().decode(bitmap, null); return result.getText(); } catch (Exception e) { e.printStackTrace(); } return null; } public String decode(URL url) { BufferedImage image = null; Result result = null; try { image = ImageIO.read(url); if (image == null) { System.out.println("the decode image may be not exit."); } LuminanceSource source = new BufferedImageLuminanceSource(image); BinaryBitmap bitmap = new BinaryBitmap(new HybridBinarizer(source)); result = new MultiFormatReader().decode(bitmap, null); return result.getText(); } catch (Exception e) { e.printStackTrace(); } return null; } }
package tesseract; import java.awt.image.BufferedImage; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.Reader; import java.net.URL; import java.util.ArrayList; import java.util.List; import myjava.utils.img.BufferedImageHandler; import myjava.utils.img.ImageIOHelper; import myjava.utils.img.ImageType; import org.apache.commons.lang.StringUtils; public class CMD_OCR implements Runnable { /** 回调接口 */ private CMD_CallBack callBack; /** tesseract 主程序目录 */ public static String tesseract; static { // String exePath = StringUtils.replace( // CMD_OCR.class.getResource("tesseract.exe").getPath(), "%20", " ") // .substring(1); // tesseract = exePath.substring(0, exePath.lastIndexOf("/")); tesseract = "C:\\Program Files (x86)\\Tesseract-OCR"; System.err.println("初始化 OCR目录--" + tesseract); } public String getTxt() { return txt; } public void setTxt(String txt) { this.txt = txt; } public String getResult() { return result; } public void setResult(String result) { this.result = result; } /** 结果缓存文本 */ public String txt; /** 想要处理的文件 */ public String img; /** 最终结果 */ public String result; private File temp_img = null; private File temp_txt = null; private BufferedReader br; private String encding = "chi_sim"; private BufferedImageHandler bih; public CMD_OCR(String img, CMD_CallBack callback) { super(); this.img = img; this.callBack = callback; this.txt = new File(img).getName(); } public CMD_OCR(String img, CMD_CallBack callback, String encding, BufferedImageHandler bih) { super(); this.img = img; this.callBack = callback; this.txt = new File(img).getName(); this.encding = encding; this.bih = bih; } public void run() { // 将文件进行锐化高清处理 /*****/ try { // 将文件转化为tiff格式 temp_img = getTiffImg(new File(img)); // 执行cmd指令 temp_txt = getCmdFile(temp_img); result = getTxtContent(temp_txt); if (callBack != null) { callBack.callBack(result); } System.err.println("OCR code=" + result); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } destory(); } private void destory() { try { if (temp_img != null && temp_img.exists()) temp_img.delete(); if (temp_txt != null && temp_txt.exists()) temp_txt.delete(); if (br != null) try { br.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } private String getTxtContent(File temp_txt) throws IOException { br = new BufferedReader(new InputStreamReader(new FileInputStream( temp_txt), "utf-8")); StringBuffer sb = new StringBuffer(); String str; while ((str = br.readLine()) != null) { sb.append(str); } br.close(); return sb.toString(); } private File getCmdFile(File temp_img) throws Exception { List<String> cmd = new ArrayList<String>(); cmd.add(tesseract + "\\tesseract"); cmd.add(temp_img.getPath()); cmd.add(txt); cmd.add("-l"); // cmd.add("eng"); cmd.add(encding); System.out.println(temp_img.getPath()); ProcessBuilder pb = new ProcessBuilder(); pb.directory(new File(tesseract)); System.out.println(cmd.toString()); ProcessBuilder obj = pb.command(cmd); pb.redirectErrorStream(true); Process process = pb.start(); // any error message? CMD_Print errorGobbler = new CMD_Print(process.getErrorStream(), "ERROR"); // any output? CMD_Print outputGobbler = new CMD_Print(process.getInputStream(), "OUTPUT"); // kick them off errorGobbler.start(); outputGobbler.start(); int code = process.waitFor(); process.destroy(); String msg = null; switch (code) { case 1: msg = "Errors accessing files. There may be spaces in your image's filename."; break; case 29: msg = "Cannot recognize the image or its selected region."; break; case 31: msg = "Unsupported image format."; break; default: msg = "Errors occurred."; } System.out.println(msg); if (code != 0) { throw new Exception(msg); } return new File(tesseract + "\\" + txt + ".txt"); } private File getTiffImg(File img) { return new ImageIOHelper(bih).createImage(img, ImageType.getFormatInFile(img)); } /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub CMD_OCR ocr = new CMD_OCR("C:\\Tesseract-OCR\\IMG_20130821_134512.jpg", null); new Thread(ocr).start(); } }