第一章:初识lucene

lucene实战程序示例:

建立索引(使用Indexer索引文本文件)

package lucene;

import java.io.File;
import java.io.FileFilter;
import java.io.FileReader;
import java.io.IOException;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class Indexer {
	public static String indexDir = "h:/lucene"; // 创建索引目录
	public static String dataDir = "h:/listFile";// 对指定目录中的(txt)文件进行索引
	private IndexWriter writer;//索引类

	/**
	 * 初始化writer
	 * 
	 * @param indexDir
	 *            索引存放目录
	 * @throws IOException
	 */
	public Indexer(String indexDir) throws IOException {
		Directory dir = FSDirectory.open(new File(indexDir));
		IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35,
				new StandardAnalyzer(Version.LUCENE_35));
		writer = new IndexWriter(dir, config);
	}

	/**
	 * 关闭writer
	 * 
	 * @throws Exception
	 */
	public void close() throws Exception {
		writer.close();
	}

	/**
	 * 过滤文件(只索引txt文件)
	 * 
	 * @param dir
	 *            要被索引的文件目录
	 * @param filter
	 *            过滤器
	 * @return 返回被索引的文档数
	 * @throws Exception
	 */
	public int index(String dir, FileFilter filter) throws Exception {
		File[] files = new File(dir).listFiles();
		for (File f : files) {
			if (!f.isDirectory() && !f.isHidden() && f.exists() && f.canRead()
					&& (filter == null || filter.accept(f))) {
				indexFile(f);
			}
		}
		return writer.numDocs();
	}

	/**
	 * 添加索引文件
	 * 
	 * @param f
	 *            符合条件的索引文件
	 * @throws Exception
	 */
	private void indexFile(File f) throws Exception {
		System.out.print("Indexing " + f.getCanonicalPath());
		Document doc = getDocument(f);
		writer.addDocument(doc);
	}

	/**
	 * 向文档中添加索引字段
	 * 
	 * @param f
	 *            索引文件
	 * @return 文档doc
	 * @throws Exception
	 */
	public Document getDocument(File f) throws Exception {
		Document doc = new Document();
		doc.add(new Field("contents", new FileReader(f)));
		doc.add(new Field("filename", f.getName(), Field.Store.YES,
				Field.Index.NOT_ANALYZED));
		doc.add(new Field("fullpath", f.getCanonicalPath(), Field.Store.YES,
				Field.Index.NOT_ANALYZED));
		return doc;
	}

	public static void main(String[] args) {
		long start = System.currentTimeMillis();
		Indexer indexer = null;
		int numIndexed = 0;
		try {
			indexer = new Indexer(indexDir);
			numIndexed = indexer.index(dataDir, new TextFilesFilter());
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}finally{
			try {
				//IndexerWriter写索引操作关闭,提交写索引(如没关闭会造成索引无法完整创建,查询时出错)
				indexer.close();
			} catch (Exception e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		}
		long end = System.currentTimeMillis();
		System.out.println("\n Indexing " + numIndexed + " files took "
				+ (end - start) + "milliseconds");

	}

	static class TextFilesFilter implements FileFilter {

		@Override
		public boolean accept(File pathname) {
			return pathname.getName().toLowerCase().endsWith(".txt");
		}

	}
}
第一章:初识lucene_第1张图片



搜索索引(Searcher)

package lucene;

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class Searcher {
	public static String indexDir = "h:/lucene";
	public static void search(String indexDir, String q) throws IOException,
			ParseException {
		Directory dir = FSDirectory.open(new File(indexDir));
		IndexReader reader = IndexReader.open(dir);
		IndexSearcher searcher = new IndexSearcher(reader);
//		QueryParser parser = new QueryParser(Version.LUCENE_35, "contents",
//				new StandardAnalyzer(Version.LUCENE_35));
//		Query query = parser.parse(q);
		Query query = new TermQuery(new Term("contents",q));
		long start = System.currentTimeMillis();
		TopDocs hits = searcher.search(query, 10);
		long end = System.currentTimeMillis();

		System.err.println("Found " + hits.totalHits + " document(s) (in "
				+ (end - start) + "milliseconds) that matched query'" + q
				+ "':");

		for (ScoreDoc scoreDoc : hits.scoreDocs) {
			Document doc = searcher.doc(scoreDoc.doc);
			System.out.println(doc.get("fullpath"));
		}
		searcher.close();
	}

	public static void main(String[] args) {
		try {
			search(indexDir, "apache");
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (ParseException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}
}



你可能感兴趣的:(索引,Lucene)