Lucene实例(增删改查)

前面两篇博文已经介绍过了Lucene,大家也对Lucene有了初步了解,我呢,在这里给出我项目中的一些实际的例子,这些例子中包含了

对索引的增删改查操作,还包括关键字高亮~~~

当然这些例子建立在Lucene3.0的基础之上,是对txt文件创建的索引

好的以下是代码~~

 

创建索引类:

 

 

import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.Date;

import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.DateTools; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.SimpleFSDirectory; import org.apache.lucene.util.Version;

public class  Index{

 /**   * @param args   * @throws IOException   */  public static void main(String[] args) throws IOException {   // 保存索引文件的地方   String indexDir = "C:\\Users\\ZEQ\\Desktop\\ZZUluceneTest1\\index";   // 将要搜索TXT文件的地方   String dateDir = "C:\\Users\\ZEQ\\Desktop\\ZZUluceneTest1\\data";   IndexWriter indexWriter = null;   // 创建Directory对象 ,FSDirectory代表待索引的文件存在磁盘上   Directory dir = new SimpleFSDirectory(new File(indexDir));   // 创建IndexWriter对象,第一个参数是Directory,第二个是分词器,第三个表示是否是创建,如果为false为在此基础上面修改,第四表示表示分词的最大值,比如说new   // MaxFieldLength(2),就表示两个字一分,一般用IndexWriter.MaxFieldLength.LIMITED   indexWriter = new IndexWriter(dir, new StandardAnalyzer(     Version.LUCENE_30), true, IndexWriter.MaxFieldLength.UNLIMITED);         File[] files = new File(dateDir).listFiles();   for (int i = 0; i < files.length; i++) {    Document doc = new Document();    // 创建Field对象,并放入doc对象中        doc.add(new Field("id", i+"", Field.Store.YES,      Field.Index.NOT_ANALYZED));    doc.add(new Field("contents", new FileReader(files[i])));        doc.add(new Field("path", files[i].getAbsolutePath(), Field.Store.YES,      Field.Index.ANALYZED));    doc.add(new Field("indexDate", DateTools.dateToString(new Date(),      DateTools.Resolution.DAY), Field.Store.YES,      Field.Index.NOT_ANALYZED));    // 写入IndexWriter    indexWriter.addDocument(doc);   }   // 查看IndexWriter里面有多少个索引   System.out.println("总共------》" + indexWriter.numDocs());      indexWriter.close();

 }

}

 

查找索引类:

 

 
import java.io.File;
import java.util.Date;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
/**
 * @author ht
 * 查询
 *
 */
public class Seacher {
	private static String INDEX_DIR = "C:\\Users\\ZEQ\\Desktop\\ZZUluceneTest1\\index";//索引所在的路径
	private static String KEYWORD = " ";//关键词
	private static int TOP_NUM = 10;//显示前10条结果
	
	public static void main(String[] args) throws Exception {
		File indexDir = new File(INDEX_DIR);
	    if (!indexDir.exists() || !indexDir.isDirectory()) {
	    	throw new Exception(indexDir +" 该目录不存在~~");
	    }
	    search(indexDir, KEYWORD);//调用search方法进行查询
	}
	/**查询
	 * @param indexDir
	 * @param q
	 * @throws Exception
	 */
	public static void search(File indexDir, String q) throws Exception {
	    IndexSearcher is = new  IndexSearcher(FSDirectory.open(indexDir),true);//read-only
	    String field = "contents";
	    
	    QueryParser parser = new QueryParser(Version.LUCENE_30, field, new StandardAnalyzer(Version.LUCENE_30));//有变化的地方
	    Query query = parser.parse(q);
	
	    TopScoreDocCollector collector = TopScoreDocCollector.create(TOP_NUM , false);//有变化的地方
	    
	    long start = new Date().getTime();// start time
	    
	    is.search(query, collector);
	    ScoreDoc[] hits = collector.topDocs().scoreDocs;
	
	    
	    for (int i = 0; i < hits.length; i++) {
	    	Document doc = is.doc(hits[i].doc);//new method is.doc()
	    	System.out.println("路径:"+doc.get("path")+"---------------------"+doc.getField("path")+"\n创建时间:"+doc.get("indexDate"));
	    	
		}
	    long end = new Date().getTime();//end time
	
	    System.out.println("\n找到" + collector.getTotalHits() +
	    	      "个结果,总共花费时间 : " + (end - start)+"毫秒"
	    	      );
	  }
}


 

增加索引:

 

import java.io.File;
import java.io.IOException;
import java.util.Date;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

/*
 * 添加索引
 * 
 * 
 */

public class addIndex {
	
	
	
	public static void main(String s[]) throws IOException, ParseException
	{
		
		
		Directory dir=FSDirectory.open(new File("C:\\Users\\ZEQ\\Desktop\\ZZUluceneTest1\\index"));
		IndexWriter iw=new IndexWriter(dir, new StandardAnalyzer(
				Version.LUCENE_30), false, IndexWriter.MaxFieldLength.UNLIMITED);//注意在执行删除索引时IndexWriter中的Boolean属性必须为false;否则其他索引会被删除(true为重新建立索引,false为不重新建索引)
		Document doc=new Document();
		
		doc.add(new Field("contents", "java", Field.Store.YES,
				Field.Index.ANALYZED));
		doc.add(new Field("path", "新添加的路径www.baidu.com", Field.Store.YES,
				Field.Index.ANALYZED));
		doc.add(new Field("indexDate", DateTools.dateToString(new Date(),
				DateTools.Resolution.DAY), Field.Store.YES,
				Field.Index.NOT_ANALYZED));
		
		iw.addDocument(doc);
		IndexReader id=IndexReader.open(dir);//
		System.out.println("总共索引数"+id.maxDoc());
		
		iw.commit();
		iw.close();
		System.out.println("删除条数:"+id.numDeletedDocs());
	} 

}


删除索引:

 

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

/*
 * 删除索引
 * 
 * 
 */

public class deleteIndex {
	
	
	
	public static void main(String s[]) throws IOException, ParseException
	{
		Directory dir=FSDirectory.open(new File("C:\\Users\\ZEQ\\Desktop\\ZZUluceneTest1\\index"));
		IndexWriter iw=new IndexWriter(dir, new StandardAnalyzer(
				Version.LUCENE_30), false, IndexWriter.MaxFieldLength.UNLIMITED);//注意在执行删除索引时IndexWriter中的Boolean属性必须为false;否则其他索引会被删除(true为重新建立索引,false为不重新建索引)
		
		QueryParser qp =new QueryParser(Version.LUCENE_30,"contents",new StandardAnalyzer(Version.LUCENE_30));
		Query p=qp.parse("新添加的contents");
		iw.deleteDocuments(p);
		IndexReader id=IndexReader.open(dir);//
		System.out.println("总共索引数"+id.maxDoc());
		iw.commit();
		System.out.println("删除条数:"+id.numDeletedDocs());
	} 

}


 

更新索引:

 

import java.awt.font.OpenType;
import java.io.File;
import java.io.IOException;
import java.util.Date;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

/*
 * 更新索引
 * 
 * 
 */

public class updateIndex {

	public static void main(String s[]) throws IOException, ParseException {
		Directory dir = FSDirectory.open(new File(
				"C:\\Users\\ZEQ\\Desktop\\ZZUluceneTest1\\index"));
		IndexSearcher is = new IndexSearcher(dir, true);// read-only
		IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer(
				Version.LUCENE_30), false, IndexWriter.MaxFieldLength.UNLIMITED);// 注意在执行删除索引时IndexWriter中的Boolean属性必须为false;否则其他索引会被删除(true为重新建立索引,false为不重新建索引)
       //新建要拿来替换的doc
		Document doc = new Document();
		doc.add(new Field("contents", "android", Field.Store.YES,
				Field.Index.ANALYZED));
		doc.add(new Field("path", "新添加的路径www.baidu.com android", Field.Store.YES,
				Field.Index.ANALYZED));
		doc.add(new Field("indexDate", DateTools.dateToString(new Date(),
				DateTools.Resolution.DAY), Field.Store.YES,
				Field.Index.ANALYZED));

		/*
		 * 使用indexwriter中的updatedocument不知道为什么。只添加,不会更新
		 * 
		 * 所以这里只好采用 “先删除,再添加” 的方式就行更新操作了
		 */
		long start = new Date().getTime();// start time
         ///查找要被替换的所有对应的项
		QueryParser qp = new QueryParser(Version.LUCENE_30, "contents",
				new StandardAnalyzer(Version.LUCENE_30));
		Query p = qp.parse("java");

		/*
		 * 查找模块,查找要更新的项
		 */
		TopScoreDocCollector collector = TopScoreDocCollector.create(10, false);// 有变化的地方
		is.search(p, collector);
		ScoreDoc[] hits = collector.topDocs().scoreDocs;
// 
		//删除所有查到的,只添加一条
//		if (hits.length > 0)
//		{// 控制添加,如果有要更新的项,就进行,没有就不进行
//			/*
//			 * 这里有个问题要注意:当搜索出多个要更新的项,,将所有的项都删除了,但是就添加了一个,这里可以用id唯一标示来解决这个问题,
//			 * 也可以用hits数组来循环删除添加,来解决这个问题,如注释1
//			 * 但是呢,对于数据库操作时,基本不会出现这样的问题,因为数据库中有id项,直接对id进行查询就行了,不会出现重复项
//			 */
//			// 删除操作
//			iw.deleteDocuments(p);
//			// 添加操作
//			iw.addDocument(doc);
//
//		}
	 	
		/*注释1  删除几条,添加几条*/
		   if(hits.length > 0){//如果有更新项才进行操作,否则不进行
		   iw.deleteDocuments(p);//删除只执行一次就行,就可以将所有的删除
		  for(int i=0 ; i < hits.length ;i++){
		  
			  iw.addDocument(doc);
			
		  }
		   }
		 
		
		iw.optimize();//优化索引
		iw.close();
		IndexReader id = IndexReader.open(dir);
		System.out.println("总共索引数" + id.maxDoc());
		long end = new Date().getTime();// end time
	   System.out.println("删除条数:" + id.numDeletedDocs() + "总共花费时间:"
				+ (end - start));
	}

}


 

关键字高亮:

 

import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.Date;

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class HighLighter {

	/**
	 * 高亮设置
	 * 
	 * @param query
	 * @param doc
	 * @param field
	 * @return
	 */

	private static String INDEX_DIR = "C:\\Users\\ZEQ\\Desktop\\ZZUluceneTest1\\index";// 索引所在的路径
	private static String KEYWORD = "android";// 关键词
	private static int TOP_NUM = 100;// 显示前100条结果

	private static String toHighlighter(Query query, Document doc, String field)
	{// 关键字高亮显示
		try {
			SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter(
					"<font color=\"red\">", "</font>");
			Highlighter highlighter = new Highlighter(simpleHtmlFormatter,
					new QueryScorer(query));
			StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
			TokenStream tokenStream1 = analyzer.tokenStream("path",
					new StringReader(doc.get(field)));
			String highlighterStr = highlighter.getBestFragment(tokenStream1,
					doc.get(field));
			return highlighterStr == null ? doc.get(field) : highlighterStr;
		} catch (IOException e) {
			// TODO Auto-generated catch block
			System.out.println(e);
		} catch (InvalidTokenOffsetsException e) {
			// TODO Auto-generated catch block
			System.out.println(e);
		}
		return null;
	}

	
	public static void main(String[] args) throws Exception
	{
		File indexDir = new File(INDEX_DIR);
		if (!indexDir.exists() || !indexDir.isDirectory()) {
			throw new Exception(indexDir + " 该目录不存在~~");
		}
		search(indexDir, KEYWORD);// 调用search方法进行查询
	
	}

	/**
	 * 查询
	 * 
	 * @param indexDir
	 * @param q
	 * @throws Exception
	 */
	public static void search(File indexDir, String q) throws Exception
	{
		IndexSearcher is = new IndexSearcher(FSDirectory.open(indexDir), true);// read-only
		String field = "contents";

		QueryParser parser = new QueryParser(Version.LUCENE_30, field,
				new StandardAnalyzer(Version.LUCENE_30));// 有变化的地方
		Query query = parser.parse(q);
		TopScoreDocCollector collector = TopScoreDocCollector.create(TOP_NUM,
				false);// 有变化的地方

		long start = new Date().getTime();// 计算开始时间

		is.search(query, collector);

		ScoreDoc[] hits = collector.topDocs().scoreDocs;
		for (int i = 0; i < hits.length; i++) {
			Document doc = is.doc(hits[i].doc);// new method is.doc()
			// System.out.println("路径:"+doc.get("path")+"---------------------"+doc.getField("path")+"\n创建时间:"+doc.get("indexDate"));
			System.out.println("高亮----------------"
					+ toHighlighter(query, doc, "path") + "\n 创建时间:"
					+ doc.get("indexDate"));
		}

		long end = new Date().getTime();// 计算结束时间

		System.out.println("\n找到" + collector.getTotalHits() + "个结果,总共花费时间 : "
				+ (end - start) + "毫秒");

	}
	
	

}


 

以上是我自己整理的,亲自调试过,没什么问题~~~对于更新索引操作我目前还没什么好的办法!!!希望大家批评指正!

 

在这里把源代码以及架包和索引例子给大家贡献出来~~~~

http://download.csdn.net/detail/zeq9069/6571589

 

 

 

 

 

 

 

 

 

你可能感兴趣的:(Lucene,实例)