/** * Project Name:docsearch * File Name:Index.java * Package Name:cn.tramp.docsearch.index * Date:2014年2月27日 下午5:10:15 * Copyright (c) 2014, [email protected] All Rights Reserved. * */ package cn.tramp.docsearch.index; import java.io.File; import java.io.IOException; import java.util.List; import org.apache.commons.io.FileUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; import org.apache.lucene.document.DateTools; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.DateTools.Resolution; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.LongField; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import cn.tramp.docsearch.dao.IDocumentMapper; import cn.tramp.docsearch.domain.DocumentInfo; import cn.tramp.docsearch.util.IndexPropertyUtil; import cn.tramp.docsearch.util.PdfUtil; /** * ClassName:Index <br/> * Function: Index. <br/> * Reason: Index. <br/> * Date: 2014年2月27日 下午5:10:15 <br/> * @author zhangzhaoyu * @version * @since JDK 1.7 * @see */ public class Index { private final static Log logger = LogFactory.getLog(Index.class); private Directory directory; private String indexPath; private String docmentPath; private IDocumentMapper mapper; /** * * 构造的时候初始化 indexPath 和 directory * */ public Index() { try { indexPath = IndexPropertyUtil.getKeyValueByName("indexPath"); docmentPath = IndexPropertyUtil.getKeyValueByName("docmentPath"); directory = FSDirectory.open(new File(indexPath)); } catch (IOException e) { e.printStackTrace(); logger.info("init Index class error"); } } private IndexWriter getIndexWriter() throws IOException { // 标准分词器 一元分词 //Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); Analyzer analyzer = new SmartChineseAnalyzer(Version.LUCENE_43); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_43, analyzer); // 设置缓冲区大小 iwc.setRAMBufferSizeMB(3); return new IndexWriter(directory, iwc); } private IndexWriter getIndexWriter(Analyzer analyzer) throws IOException { IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_43, analyzer); // 设置缓冲区大小 iwc.setRAMBufferSizeMB(3); return new IndexWriter(directory, iwc); } public void index(List<DocumentInfo> docList, boolean flag) { IndexWriter writer = null; try { writer = getIndexWriter(); if (flag) { writer.deleteAll(); logger.info("delete all"); writer.commit(); } for (DocumentInfo docInfo : docList) { Document doc = new Document(); doc.add(new StringField("doc_id", docInfo.getDoc_id().toString(), Store.YES)); doc.add(new TextField("doc_name", docInfo.getDoc_name(), Store.YES)); doc.add(new StringField("doc_type", docInfo.getDocType().getType_name(), Store.YES)); String docContent = ""; if (docInfo.getDocType().getType_name().equals(".pdf")) { //doc.add(new TextField("content", docInfo.getDoc_name(), Store.YES)); docContent = PdfUtil.getPdfContent(docmentPath + docInfo.getDoc_location()); } else { docContent = this.getFileContent(docInfo.getDoc_location()); //doc.add(new TextField("content", docContent, Store.YES)); } //String docContent = PdfUtil.getPdfContent(docmentPath + docInfo.getDoc_location()); doc.add(new StringField("content", docContent, Store.YES)); doc.add(new StringField("doc_location", docInfo.getDoc_location(), Store.YES)); doc.add(new StringField("add_datetime", DateTools.dateToString(docInfo.getAdd_datetime(), Resolution.DAY), Store.YES)); doc.add(new StringField("modify_datetime", DateTools.dateToString(docInfo.getModify_datetime(), Resolution.DAY), Store.YES)); doc.add(new StringField("upload_author", docInfo.getUpload_author(), Store.YES)); doc.add(new StringField("author", docInfo.getAuthor(), Store.YES)); writer.addDocument(doc); } writer.forceMerge(1); writer.commit(); } catch (IOException e) { e.printStackTrace(); } finally { try { writer.close(); } catch (IOException e) { e.printStackTrace(); } } } /** * * close:<br /> * 关闭Derectory * * @author zhangzhaoyu */ public void close() { try { this.directory.close(); } catch (IOException e) { e.printStackTrace(); } } private String getFileContent(String filePath) throws IOException { String fileAbPath = this.docmentPath + filePath; return FileUtils.readFileToString(new File(fileAbPath), "utf-8"); } public IDocumentMapper getMapper() { return mapper; } public void setMapper(IDocumentMapper mapper) { this.mapper = mapper; } }