Lucene 建立索引

/** 
 * Project Name:docsearch 
 * File Name:Index.java 
 * Package Name:cn.tramp.docsearch.index 
 * Date:2014年2月27日 下午5:10:15 
 * Copyright (c) 2014, [email protected] All Rights Reserved. 
 * 
*/  
  
package cn.tramp.docsearch.index;  

import java.io.File;
import java.io.IOException;
import java.util.List;

import org.apache.commons.io.FileUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.DateTools.Resolution;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

import cn.tramp.docsearch.dao.IDocumentMapper;
import cn.tramp.docsearch.domain.DocumentInfo;
import cn.tramp.docsearch.util.IndexPropertyUtil;
import cn.tramp.docsearch.util.PdfUtil;

/** 
 * ClassName:Index <br/> 
 * Function: Index. <br/> 
 * Reason:   Index. <br/> 
 * Date:     2014年2月27日 下午5:10:15 <br/> 
 * @author   zhangzhaoyu 
 * @version   
 * @since    JDK 1.7
 * @see       
 */
public class Index {
	private final static Log logger  = LogFactory.getLog(Index.class);
	private Directory directory;
	private String indexPath;
	private String docmentPath;
	
	private IDocumentMapper mapper;
	
	/**
	 * 
	 * 构造的时候初始化 indexPath 和 directory
	 *
	 */
	public Index() {
		try {
			indexPath = IndexPropertyUtil.getKeyValueByName("indexPath");
			docmentPath = IndexPropertyUtil.getKeyValueByName("docmentPath");
			directory = FSDirectory.open(new File(indexPath));
		} catch (IOException e) {
			e.printStackTrace();
			logger.info("init Index class error");
		}
	}
	
	private IndexWriter getIndexWriter() throws IOException {
		// 标准分词器 一元分词
		//Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43);
		Analyzer analyzer = new SmartChineseAnalyzer(Version.LUCENE_43);
		IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_43, analyzer);
		// 设置缓冲区大小
		iwc.setRAMBufferSizeMB(3);
		return new IndexWriter(directory, iwc);
	}
	
	private IndexWriter getIndexWriter(Analyzer analyzer) throws IOException {
		IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_43, analyzer);
		// 设置缓冲区大小
		iwc.setRAMBufferSizeMB(3);
		return new IndexWriter(directory, iwc);
	}

	public void index(List<DocumentInfo> docList, boolean flag) {
		IndexWriter writer = null;
		try {
			writer = getIndexWriter();
			if (flag) {
				writer.deleteAll();
				logger.info("delete all");
				writer.commit();
			}
			for (DocumentInfo docInfo : docList) {
				Document doc = new Document();
				
				doc.add(new StringField("doc_id", docInfo.getDoc_id().toString(), Store.YES));
				doc.add(new TextField("doc_name", docInfo.getDoc_name(), Store.YES));
				doc.add(new StringField("doc_type", docInfo.getDocType().getType_name(), Store.YES));
				
				String docContent = "";
				if (docInfo.getDocType().getType_name().equals(".pdf")) {
					//doc.add(new TextField("content", docInfo.getDoc_name(), Store.YES));
					docContent = PdfUtil.getPdfContent(docmentPath + docInfo.getDoc_location());
				} else {
					docContent = this.getFileContent(docInfo.getDoc_location());
					//doc.add(new TextField("content", docContent, Store.YES));
				}
				//String docContent = PdfUtil.getPdfContent(docmentPath + docInfo.getDoc_location());
				doc.add(new StringField("content", docContent, Store.YES));
				doc.add(new StringField("doc_location", docInfo.getDoc_location(), Store.YES));
				doc.add(new StringField("add_datetime", DateTools.dateToString(docInfo.getAdd_datetime(), Resolution.DAY), Store.YES)); 
				doc.add(new StringField("modify_datetime", DateTools.dateToString(docInfo.getModify_datetime(), Resolution.DAY), Store.YES)); 
				doc.add(new StringField("upload_author", docInfo.getUpload_author(), Store.YES));
				doc.add(new StringField("author", docInfo.getAuthor(), Store.YES));
				writer.addDocument(doc);
			}
			
			writer.forceMerge(1);
			writer.commit();
			
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			try {
				writer.close();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
		
	}
	
	/**
	 * 
	 * close:<br />
	 * 关闭Derectory
	 *
	 * @author zhangzhaoyu
	 */
	public void close() {
		try {
			this.directory.close();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	
	
	private String getFileContent(String filePath) throws IOException {
		String fileAbPath = this.docmentPath + filePath;
		return FileUtils.readFileToString(new File(fileAbPath), "utf-8");
	}
	
	public IDocumentMapper getMapper() {
		return mapper;
	}

	public void setMapper(IDocumentMapper mapper) {
		this.mapper = mapper;
	}
	
	
}
 

你可能感兴趣的:(Lucene)