Lucene查询简述


本示例是对Lucene查询,对结果进行了一些处理(Lucene 3.5):

  1、支持前缀搜索,如*国,可以搜索出中国、美国等国字结尾的词的内容:
        支持后缀匹配,如国* 则可以搜索中国、美国等以国字结尾的词,*:*可以查询所有索引。
  parser.setAllowLeadingWildcard(true);
  2、搜索时在有通配符时可以不区分大小写:
  // 有通配符时不转换大小写
  parser.setLowercaseExpandedTerms(false);
  3、结果进行多字段排序,详细见代码排序部分;

  4、结果高亮显示,详细见代码高亮部分。

 package cn.test.gxg.engine.query; 
  
 import java.io.File; 
 import java.io.IOException; 
 import java.io.StringReader; 
  
 import org.apache.lucene.analysis.Analyzer; 
 import org.apache.lucene.analysis.TokenStream; 
 import org.apache.lucene.analysis.standard.StandardAnalyzer; 
 import org.apache.lucene.document.Document; 
 import org.apache.lucene.document.Field; 
 import org.apache.lucene.document.Fieldable; 
 import org.apache.lucene.document.NumericField; 
 import org.apache.lucene.document.Field.Store; 
 import org.apache.lucene.index.CorruptIndexException; 
 import org.apache.lucene.index.IndexReader; 
 import org.apache.lucene.index.IndexWriter; 
 import org.apache.lucene.queryParser.ParseException; 
 import org.apache.lucene.queryParser.QueryParser; 
 import org.apache.lucene.search.IndexSearcher; 
 import org.apache.lucene.search.Query; 
 import org.apache.lucene.search.ScoreDoc; 
 import org.apache.lucene.search.Searcher; 
 import org.apache.lucene.search.Sort; 
 import org.apache.lucene.search.SortField; 
 import org.apache.lucene.search.TopDocs; 
 import org.apache.lucene.search.highlight.Highlighter; 
 import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; 
 import org.apache.lucene.search.highlight.QueryScorer; 
 import org.apache.lucene.search.highlight.SimpleFragmenter; 
 import org.apache.lucene.search.highlight.SimpleHTMLFormatter; 
 import org.apache.lucene.store.FSDirectory; 
 import org.apache.lucene.store.LockObtainFailedException; 
 import org.apache.lucene.util.Version; 
  
 /** 
  * 创建索引并查询示例 
  * 
  * @createTime: Feb 22, 2010 3:02:28 PM 
  * @author:  <a href="mailto:[email protected]">天涯 </a> 
  * @version: 0.1 
  * @lastVersion: 0.1 
  * @updateTime: 
  * @updateAuthor:  <a href="mailto:[email protected]">天涯 </a> 
  * @changesSum: 
  *  
  */ 
 public class QueryTest { 
 	public static void main(String[] args) { 
 		//索引目录   D:\workspace\code\java\TestLucene3\index\txt\test 
 		String INDNEX_PATH = "D:\\workspace\\code\\java\\TestLucene3\\index\\txt\\test"; 
 		createIndex(INDNEX_PATH); 
 		search(INDNEX_PATH); 
 	} 
 	 
 	public static void createIndex (String indexPath) { 
 		// 获取中文分词器,查询的时候也要用一样的分词器。不然会导致查询结果不准确 
 		Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); 
 		// 建立索引 
 		IndexWriter writer; 
 		NumericField nField = null; 
 		try { 
 			writer = new IndexWriter(FSDirectory.open(new File(indexPath)), 
 					analyzer, true, IndexWriter.MaxFieldLength.LIMITED); 
  
 			Document doc = new Document(); 
 			Field field = null; 
 			for(int i =0; i  <10; i++) { 
 				doc = new Document(); 
 				field = new Field("Code", "feinnocdb_App_info"+i, Field.Store.YES, 
 						Field.Index.ANALYZED); 
 				doc.add(field); 
 				nField = new NumericField("Id", Store.YES, true).setIntValue(i%3); 
 				doc.add(nField); 
 				field = new Field("Name", "国家名字-" + i, Field.Store.YES, 
 						Field.Index.ANALYZED); 
 				doc.add(nField); 
 				field = new Field("Content", "中国中华人民共和国—" + i, Field.Store.YES, 
 						Field.Index.ANALYZED); 
 				doc.add(field); 
 				nField = new NumericField("Type", Store.YES, true).setIntValue((i%10)); 
 				doc.add(nField); 
 				nField = new NumericField("Price", Store.YES, true).setFloatValue((i%3)); 
 				doc.add(nField); 
 				nField = new NumericField("Sex", Store.YES, true).setIntValue((i%2)); 
 				doc.add(nField); 
 				writer.addDocument(doc); 
 			} 
 			writer.close(); 
 			System.out.println("Indexed success!"); 
 		} catch (Exception e) { 
 			e.printStackTrace(); 
 		} 
 	} 
 	 
 	public static void search(String indexPath) { 
 		//获取Lucene标准分词器,可以使用其他分词器,前提是创建索引的时候也使用相同的分词器       
 		Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); 
 		//建立索引       
 		try { 
 			IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexPath))); 
 			QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "Content", analyzer); 
 			Query query = null; 
 			String q = "Content:国"; 
 			try { 
 				query = parser.parse(q); 
 				// 支持后缀匹配,如*国 则可以搜索中国、美国等以国字结尾的词,*:*可以查询所有索引。 
 				parser.setAllowLeadingWildcard(true); 
 				// 有通配符时不转换大小写 
 				parser.setLowercaseExpandedTerms(false); 
 			} catch (ParseException e) { 
 				e.printStackTrace(); 
 			} 
 			 
 			// 多字段排序,设置在前面的会优先排序 
 			SortField[] sortFields = new SortField[2]; 
 		    SortField sortField = new SortField("Id", SortField.INT, true); 
 		    SortField FIELD_SEX = new SortField("Sex", SortField.INT, false); 
 		    sortFields[0] = sortField; 
 		    sortFields[1] = FIELD_SEX; 
 		    Sort sort = new Sort(sortFields); 
 		    // 单字段排序 
 		    /* 
 			SortField sortField = new SortField("Id", SortField.INT, true); 
 			Sort sort = new Sort(sortField); 
 		     */ 
 			 
 			Searcher searcher = new IndexSearcher(reader); 
 			// 如果不需要排序则使用注释掉的代码查询 
                        // TopDocs topDocs = searcher.search(query, 100); 
 			TopDocs topDocs = searcher.search(query, null, 1000, sort); 
 			 
 			System.out.println("查询语句为:" + query.toString()); 
 			System.out.println("查询到数据条数为:" + topDocs.totalHits); 
 			if (topDocs.totalHits != 0) { 
 				// 用作高亮显示的Query语句。绝大多数情况都是使用查询的Query语句。 
 				// 这里为了演示,所以不那样做 
 				Query hilightQuery = null; 
 				try { 
 					hilightQuery = parser.parse("Content:中"); 
 				} catch (ParseException e) { 
 					// TODO Auto-generated catch block 
 					e.printStackTrace(); 
 				} 
 				 
 				// 设置需要高亮的字段值 
 				String[] highlightCol = {"Content", "Name"}; 
 				Highlighter highlighter = null; 
 				// 关键字高亮显示设置 
 				// 设定高亮显示的格式,也就是对高亮显示的词组加上前缀后缀   
 				SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(" <FONT COLOR='RED'>", " </FONT>"); 
 				highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(hilightQuery)); 
 				 
 				//设置每次返回的字符数 
 			    highlighter.setTextFragmenter(new SimpleFragmenter(1000));  
 				 
 			        // 遍历查询的索引,得到具体索引值。 
 				for(ScoreDoc sd : topDocs.scoreDocs) { 
 					Document document = searcher.doc(sd.doc); 
 					for (Fieldable fa : document.getFields()) { 
 						String value = document.get(fa.name()); 
 						for (String col : highlightCol) { 
 							if(fa.name().equals(col)) { 
 								//设置高显内容 
 								TokenStream tokenStream = analyzer.tokenStream("Content",new StringReader(value));  
 								value = highlighter.getBestFragment(tokenStream, value); 
 							} 
 						} 
 						System.out.print(fa.name() + ":" + value + "  "); 
 					} 
 					System.out.println(); 
 				} 
 			} 
 			reader.close(); 
 		} catch (CorruptIndexException e) { 
 			// TODO Auto-generated catch block       
 			e.printStackTrace(); 
 		} catch (LockObtainFailedException e) { 
 			// TODO Auto-generated catch block       
 			e.printStackTrace(); 
 		} catch (IOException e) { 
 			// TODO Auto-generated catch block       
 			e.printStackTrace(); 
 		} catch (InvalidTokenOffsetsException e) { 
 			// TODO Auto-generated catch block 
 			e.printStackTrace(); 
 		} 
 	} 
 } 



你可能感兴趣的:(exception,String,null,Lucene,query,import)