1.开发在java中的集成需要java开发环境,并且引入jar包(厂商提供的jar包):com.abbyy.FREngine.jar
2.java集成的代码展示(并且实现了生成pdf每页大小一致功能,对汉字的支持,生成pdf为双层pdf可以进行复制):
package com.iboyaa.ocr; import com.abbyy.FREngine.*; import com.iboyaa.model.Material; import com.iboyaa.ocr.BatchProcessing.ImageSourceImpl; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.text.SimpleDateFormat; import java.util.Date; public class OcrUtil { private final static Logger logger = LoggerFactory.getLogger(OcrUtil.class); private IEngine engine = null; public static void main( String[] args ) { long start = System.currentTimeMillis(); try { Material material =new Material(); material.setFolderOnly("1535702091255"); //String folderdOly ="1535532134900"; OcrUtil ocrUtil = new OcrUtil(); ocrUtil.Run(material); } catch( Exception ex ) { displayMessage( ex.getMessage() ); } System.out.println(System.currentTimeMillis()-start); } private static void displayMessage( String message ) { System.out.println( message ); logger.info(message); } public void Run(String folderdOly) throws Exception { // Load ABBYY FineReader Engine loadEngine(); try{ // Process with ABBYY FineReader Engine processWithEngine(folderdOly); } finally { // Unload ABBYY FineReader Engine unloadEngine(); } } public void Run(Material material) throws Exception { // Load ABBYY FineReader Engine loadEngine(); try{ // Process with ABBYY FineReader Engine processWithEngine(material); } finally { // Unload ABBYY FineReader Engine unloadEngine(); } } private void processWithEngine(Material material) { try { // Setup FREngine setupFREngine(); // Batch processing batchProcessing(material); } catch( Exception ex ) { displayMessage( ex.getMessage() ); } } private void processWithEngine(String folderdOly) { try { // Setup FREngine setupFREngine(); // Batch processing batchProcessing(folderdOly); } catch( Exception ex ) { displayMessage( ex.getMessage() ); } } //保证生成的pdf中内容大小一致 private void batchProcessing(Material material) throws Exception { System.out.println("-------------"); String folderdOly =material.getFolderOnly(); SimpleDateFormat df = new SimpleDateFormat("yyyyMMdd");//设置日期格式 String dateName =""; if (folderdOly!=null&&!"".equals(folderdOly)){ Date date =new Date(Long.parseLong(folderdOly)); dateName = df.format(date);// new Date()为获取当前系统时间,也可使用当前时间戳 }else{ Date date =new Date(System.currentTimeMillis()); dateName = df.format(date);// new Date()为获取当前系统时间,也可使用当前时间戳 } //图片路径文件夹 String sourceFolder = CombinePaths( SamplesConfig.GetSamplesFolder(), "saomiaoImg/"+dateName+"/"+folderdOly+"/img"); //pdf生成路径文件夹 String resultFolder = CombinePaths( SamplesConfig.GetSamplesFolder(), "saomiaoImg/"+dateName+"/"+folderdOly ); // Check source folder existence if( !isDirectoryExist( sourceFolder ) ) { throw new Exception( "Cannot find " + sourceFolder ); } // Create result folder if it doesn't exist createDirectory( resultFolder ); // Create ImageSourceImpl for accessing to images files in source folder ImageSourceImpl imageSource = new ImageSourceImpl( sourceFolder ); if( imageSource.IsEmpty() ) { throw new Exception( "No images in specified folder." ); } IBatchProcessor batchProcessor = engine.CreateBatchProcessor(); // Obtain recognized pages and export them to RTF format // Create document IFRDocument document = engine.CreateFRDocument(); //对图片设置大小一样 IPrepareImageMode pim = engine.CreatePrepareImageMode(); pim.setCorrectSkew(false); // pim.setCorrectSkewMode(CorrectSkewModeEnum.CSM_CorrectSkewByHorizontalLines.getValue() + CorrectSkewModeEnum.CSM_CorrectSkewByVerticalText.getValue()); pim.setAutoOverwriteResolution(false); pim.setOverwriteResolution(true); // Start batch processor for specified image source batchProcessor.Start( imageSource, null, pim, null, null ); IFRPage page = batchProcessor.GetNextProcessedPage(); try { int num = 0; while( page != null ) { num++; // Synthesize page before export page.Synthesize(null); // Export page to file with the same name and pdf extension String resultFilePath = page.getSourceImagePath(); displayMessage( "Process..." +resultFilePath+"-----"); document.AddImageFile(resultFilePath, pim, null); page = batchProcessor.GetNextProcessedPage(); } //编码,汉字 IRecognizerParams rp = engine.CreateRecognizerParams(); rp.setLowResolutionMode(true); rp.SetPredefinedTextLanguage("English, ChinesePRC"); IObjectsExtractionParams oep = engine.CreateObjectsExtractionParams(); oep.setDetectTextOnPictures(true); oep.setEnableAggressiveTextExtraction(true); IPageAnalysisParams pap = engine.CreatePageAnalysisParams(); pap.setEnableTextExtractionMode(true); IPagePreprocessingParams ppp = engine.CreatePagePreprocessingParams(); ppp.setCorrectOrientation(true); IDocumentProcessingParams dpp = engine.CreateDocumentProcessingParams(); dpp.getPageProcessingParams().setRecognizerParams(rp); dpp.getPageProcessingParams().setObjectsExtractionParams(oep); dpp.getPageProcessingParams().setPageAnalysisParams(pap); dpp.getPageProcessingParams().setPagePreprocessingParams(ppp); // Process document displayMessage( "Process..." ); document.Process( dpp ); IPDFExportParams pdfParams = engine.CreatePDFExportParams(); //快速 pdfParams.setScenario(PDFExportScenarioEnum.PES_MaxSpeed); //均衡 //pdfParams.setScenario( PDFExportScenarioEnum.PES_Balanced ); pdfParams.getPDFFeatures().getPaperSizeParams().setPaperSizeMode(PaperSizeModeEnum.PSM_ImageSize); // pdfParams.getPDFFeatures().getPaperSizeParams().setPaperSizeMode(PaperSizeModeEnum.PSM_UserDefined); // pdfParams.getPDFFeatures().getPaperSizeParams().setPaperWidth(11909); // pdfParams.getPDFFeatures().getPaperSizeParams().setPaperHeight(16834); String pdfExportPath =resultFolder + "/"+folderdOly+".pdf"; document.Export( pdfExportPath, FileExportFormatEnum.FEF_PDF, pdfParams ); material.setPage(num); material.setAddress("/saomiaoImg/"+dateName+"/"+folderdOly+ "/"+folderdOly+".pdf"); }catch( Exception ex ) { displayMessage( ex.getMessage() ); } finally { // Close document document.Close(); } } private void setupFREngine() { displayMessage( "Loading predefined profile..." ); engine.LoadPredefinedProfile( "DocumentConversion_Accuracy" ); // Possible profile names are: // "DocumentConversion_Accuracy", "DocumentConversion_Speed", // "DocumentArchiving_Accuracy", "DocumentArchiving_Speed", // "BookArchiving_Accuracy", "BookArchiving_Speed", // "TextExtraction_Accuracy", "TextExtraction_Speed", // "FieldLevelRecognition", // "BarcodeRecognition_Accuracy", "BarcodeRecognition_Speed", // "HighCompressedImageOnlyPdf", // "BusinessCardsProcessing", // "EngineeringDrawingsProcessing", // "Version9Compatibility", // "Default" } public static String CombinePaths( String path1, String path2 ) { File file1 = new File( path1 ); File file2 = new File( file1, path2 ); return file2.getPath(); } private static boolean isDirectoryExist( String path ) { File file = new File( path ); return file.exists(); } private static void createDirectory( String path ) { File file = new File( path ); if( !file.exists() ) { file.mkdir(); } } //无法保证生成的pdf中内容大小一致 private void batchProcessing(String folderdOly) throws Exception { SimpleDateFormat df = new SimpleDateFormat("yyyyMMdd");//设置日期格式 String dateName =""; if (folderdOly!=null&&!"".equals(folderdOly)){ Date date =new Date(Long.parseLong(folderdOly)); dateName = df.format(date);// new Date()为获取当前系统时间,也可使用当前时间戳 }else{ Date date =new Date(System.currentTimeMillis()); dateName = df.format(date);// new Date()为获取当前系统时间,也可使用当前时间戳 } //图片路径文件夹 String sourceFolder = CombinePaths( SamplesConfig.GetSamplesFolder(), "saomiaoImg/"+dateName+"/"+folderdOly+"/img"); //pdf生成路径文件夹 String resultFolder = CombinePaths( SamplesConfig.GetSamplesFolder(), "saomiaoImg/"+dateName+"/"+folderdOly ); // Check source folder existence if( !isDirectoryExist( sourceFolder ) ) { throw new Exception( "Cannot find " + sourceFolder ); } createDirectory( resultFolder ); // Create ImageSourceImpl for accessing to images files in source folder ImageSourceImpl imageSource = new ImageSourceImpl( sourceFolder ); if( imageSource.IsEmpty() ) { throw new Exception( "No images in specified folder." ); } IBatchProcessor batchProcessor = engine.CreateBatchProcessor(); // Obtain recognized pages and export them to RTF format // Create document IFRDocument document = engine.CreateFRDocument(); IPrepareImageMode pim = engine.CreatePrepareImageMode(); pim.setCorrectSkew(true); pim.setCorrectSkewMode(CorrectSkewModeEnum.CSM_CorrectSkewByHorizontalLines.getValue() + CorrectSkewModeEnum.CSM_CorrectSkewByVerticalText.getValue()); pim.setAutoOverwriteResolution(true); // Start batch processor for specified image source batchProcessor.Start( imageSource, null, pim, null, null ); IFRPage page = batchProcessor.GetNextProcessedPage(); try { while( page != null ) { // Synthesize page before export page.Synthesize(null); // Export page to file with the same name and pdf extension String resultFilePath = page.getSourceImagePath(); displayMessage( "Process..." +resultFilePath+"-----"); document.AddImageFile(resultFilePath, null, null); page = batchProcessor.GetNextProcessedPage(); } IRecognizerParams rp = engine.CreateRecognizerParams(); rp.setLowResolutionMode(true); rp.SetPredefinedTextLanguage("English, ChinesePRC"); IObjectsExtractionParams oep = engine.CreateObjectsExtractionParams(); oep.setDetectTextOnPictures(true); oep.setEnableAggressiveTextExtraction(true); IPageAnalysisParams pap = engine.CreatePageAnalysisParams(); pap.setEnableTextExtractionMode(true); IPagePreprocessingParams ppp = engine.CreatePagePreprocessingParams(); ppp.setCorrectOrientation(true); IDocumentProcessingParams dpp = engine.CreateDocumentProcessingParams(); dpp.getPageProcessingParams().setRecognizerParams(rp); dpp.getPageProcessingParams().setObjectsExtractionParams(oep); dpp.getPageProcessingParams().setPageAnalysisParams(pap); dpp.getPageProcessingParams().setPagePreprocessingParams(ppp); // Process document displayMessage( "Process..." ); document.Process( dpp ); // Save results displayMessage( "Saving results..." ); // Save results to rtf with default parameters // String rtfExportPath = resultFolder+ "\\Demo.rtf"; // document.Export( rtfExportPath, FileExportFormatEnum.FEF_RTF, null ); // // // Save results to pdf using 'balanced' scenario IPDFExportParams pdfParams = engine.CreatePDFExportParams(); pdfParams.setScenario( PDFExportScenarioEnum.PES_Balanced ); String pdfExportPath =resultFolder + "\\"+folderdOly+".pdf"; document.Export( pdfExportPath, FileExportFormatEnum.FEF_PDF, pdfParams ); }catch( Exception ex ) { displayMessage( ex.getMessage() ); } finally { // Close document document.Close(); } } private void loadEngine() throws Exception { displayMessage( "Initializing Engine..." ); engine = Engine.GetEngineObject( SamplesConfig.GetDllFolder(), SamplesConfig.GetDeveloperSN() ); // IMultiProcessingParams multiProcessingParams=engine.getMultiProcessingParams(); // multiProcessingParams.set } private void unloadEngine() throws Exception { displayMessage( "Deinitializing Engine..." ); engine = null; Engine.DeinitializeEngine(); } }