上篇有提到這段代碼:
DocHander docHander = DocHanderFactory.buildDocHander(fileName); attachDocument = docHander.getDocument(attach);
下面我們看一看實現細節。
抽象類DocHander的代碼:
public abstract class DocHander { public static String FIELD_CONTENT = "contents"; public abstract Document getDocument(byte[] inputByte) throws Exception; protected Document addContent(Document document, String content){ document.add(new Field(DocHander.FIELD_CONTENT, content ,Field.Store.YES,Field.Index.TOKENIZED)); return document; } }
現看看工廠類DocHanderFactory的代碼:
public abstract class DocHanderFactory { public static DocHander buildDocHander(String fileName){ DocHander docHander = null; if (fileName.toLowerCase().endsWith(".doc")){ docHander = new WordDocHander(); } else if(fileName.toLowerCase().endsWith(".xls")){ docHander = new ExcelDocHander(); } else if(fileName.toLowerCase().endsWith(".pdf")){ docHander = new PdfDocHander(); } else if(fileName.toLowerCase().endsWith(".html") || fileName.toLowerCase().endsWith(".htm")){ docHander = new HtmlDocHander(); } else{ docHander = new TxtDocHander(); } return docHander; } }
以下貼出WordDocHander、 ExcelDocHander、PdfDocHander的代碼,因為別人已經幫我們包好了,所以我們寫起來很簡單,非常感謝他們!
public class WordDocHander extends DocHander { public Document getDocument(byte[] inputByte) throws IOException { InputStream inputStream = new ByteArrayInputStream(inputByte); // TODO Auto-generated method stub Document document = new Document(); WordExtractor extractor = new WordExtractor(inputStream); addContent(document,extractor.getText()); return document; } }
public class ExcelDocHander extends DocHander { public Document getDocument(byte[] inputByte) throws IOException { // TODO Auto-generated method stub InputStream inputStream = new ByteArrayInputStream(inputByte); Document document = new Document(); HSSFWorkbook wb = new HSSFWorkbook(inputStream); ExcelExtractor extractor = new ExcelExtractor(wb); extractor.setFormulasNotResults(true); extractor.setIncludeSheetNames(false); String content = extractor.getText(); return addContent(document, content); } }
public class PdfDocHander extends DocHander { public Document getDocument(byte[] inputByte) throws IOException { // Document document = LucenePDFDocument.getDocument(inputStream);//如何你不需要摘要顯示所搜索到的內容,就可以直接用這個簡單的方法 InputStream inputStream = new ByteArrayInputStream(inputByte); Document document = new Document(); PDDocument pdfDocument = PDDocument.load(inputStream ); try { if( pdfDocument.isEncrypted() ) { //Just try using the default password and move on pdfDocument.decrypt( "" ); }//create a writer where to append the text content. StringWriter writer = new StringWriter(); PDFTextStripper stripper = new PDFTextStripper(); stripper.writeText( pdfDocument, writer ); String contents = writer.getBuffer().toString(); super.addContent(document, contents); } catch (CryptographyException e) { // TODO Auto-generated catch block e.printStackTrace(); throw new IOException( "Error decrypting document: " + e ); } catch (InvalidPasswordException e) { // TODO Auto-generated catch block e.printStackTrace(); throw new IOException( "Error decrypting document: " + e ); } return document; } }