对于IndexReader而言,反复使用Index.open打开会有很大的开销,所以一般在整个程序的生命周期中只会打开一个IndexReader,通过这个IndexReader来创建不同的IndexSearcher,如果使用单例模式,可能出现的问题有:
1、当使用Writer修改了索引之后不会更新信息,所以需要使用IndexReader.openIfChange方法操作
如果IndexWriter在创建完成之后,没有关闭,需要进行commit操作之后才能提交
public IndexSearcher getSearcher() { try { if(reader==null) { reader = IndexReader.open(directory); } else { IndexReader tr = IndexReader.openIfChanged(reader); if(tr!=null) { reader.close(); reader = tr; } } return new IndexSearcher(reader); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return null; }
2、TermQuery查询
public void searchByTerm(String field,String name,int num) { try { IndexSearcher searcher = getSearcher(); Query query = new TermQuery(new Term(field,name)); TopDocs tds = searcher.search(query, num); System.out.println("一共查询了:"+tds.totalHits); for(ScoreDoc sd:tds.scoreDocs) { Document doc = searcher.doc(sd.doc); System.out.println(doc.get("id")+"---->"+ doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+ doc.get("attach")+","+doc.get("date")); } searcher.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } //测试方法 @Test public void searchByTerm() { su.searchByTerm("content","i",3); }
3、TermRangeQuery查询
public void searchByTermRange(String field,String start,String end,int num) { try { IndexSearcher searcher = getSearcher(); Query query = new TermRangeQuery(field,start,end,true, true); TopDocs tds = searcher.search(query, num); System.out.println("一共查询了:"+tds.totalHits); for(ScoreDoc sd:tds.scoreDocs) { Document doc = searcher.doc(sd.doc); System.out.println(doc.get("id")+"---->"+ doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+ doc.get("attach")+","+doc.get("date")); } searcher.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } //测试方法 @Test public void searchByTermRange() { //查询name以a开头和s结尾的 su.searchByTermRange("name","a","s",10); //由于attachs是数字类型,使用TermRange无法查询 //su.searchByTermRange("attach","2","10", 5); }
4、NumericRangeQuery查询
public void searchByNumricRange(String field,int start,int end,int num) { try { IndexSearcher searcher = getSearcher(); Query query = NumericRangeQuery.newIntRange(field,start, end,true,true); TopDocs tds = searcher.search(query, num); System.out.println("一共查询了:"+tds.totalHits); for(ScoreDoc sd:tds.scoreDocs) { Document doc = searcher.doc(sd.doc); System.out.println(doc.get("id")+"---->"+ doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+ doc.get("attach")+","+doc.get("date")); } searcher.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } //测试方法 @Test public void searchByNumRange() { su.searchByNumricRange("attach",2,10, 5); }
5、PrefixQuery查询
public void searchByPrefix(String field,String value,int num) { try { IndexSearcher searcher = getSearcher(); Query query = new PrefixQuery(new Term(field,value)); TopDocs tds = searcher.search(query, num); System.out.println("一共查询了:"+tds.totalHits); for(ScoreDoc sd:tds.scoreDocs) { Document doc = searcher.doc(sd.doc); System.out.println(doc.get("id")+"---->"+ doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+ doc.get("attach")+","+doc.get("date")); } searcher.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } //测试方法 @Test public void searchByPrefix() { su.searchByPrefix("content", "s", 10); }
注:我们在添加索引的时候的content是Field.Store.NO的
doc.add(new Field("content",contents[i],Field.Store.NO,Field.Index.ANALYZED));
6、WildcardQuery查询
public void searchByWildcard(String field,String value,int num) { try { IndexSearcher searcher = getSearcher(); //在传入的value中可以使用通配符:?和*,?表示匹配一个字符,*表示匹配任意多个字符 Query query = new WildcardQuery(new Term(field,value)); TopDocs tds = searcher.search(query, num); System.out.println("一共查询了:"+tds.totalHits); for(ScoreDoc sd:tds.scoreDocs) { Document doc = searcher.doc(sd.doc); System.out.println(doc.get("id")+"---->"+ doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+ doc.get("attach")+","+doc.get("date")); } searcher.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } //测试方法 @Test public void searchByWildcard() { //匹配@itat.org结尾的所有字符 su.searchByWildcard("email", "*@itat.org", 10); System.out.println("======================================="); //匹配j开头的有三个字符的name su.searchByWildcard("name", "j???", 10); }
7、BooleanQuery查询
public void searchByBoolean(int num) { try { IndexSearcher searcher = getSearcher(); BooleanQuery query = new BooleanQuery(); /* * BooleanQuery可以连接多个子查询 * Occur.MUST表示必须出现 * Occur.SHOULD表示可以出现 * Occur.MUSE_NOT表示不能出现 */ query.add(new TermQuery(new Term("name","zhangsan")), Occur.MUST_NOT); query.add(new TermQuery(new Term("content","game")),Occur.SHOULD); TopDocs tds = searcher.search(query, num); System.out.println("一共查询了:"+tds.totalHits); for(ScoreDoc sd:tds.scoreDocs) { Document doc = searcher.doc(sd.doc); System.out.println(doc.get("id")+"---->"+ doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+ doc.get("attach")+","+doc.get("date")); } searcher.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } //测试方法 @Test public void searchByBoolean() { su.searchByBoolean(10); }
将query.add(new TermQuery(new Term("name","zhangsan")), Occur.MUST_NOT);设置为query.add(new TermQuery(new Term("name","zhangsan")), Occur.SHOULD);
8、PhraseQuery查询
private String[] contents = { "welcome to visited the space,I like book", "hello boy, I like pingpeng ball", "my name is cc I like game", "I like football", "I like football and I like basketball too", "I like movie and swim" };
public void searchByPhrase(int num) { try { IndexSearcher searcher = getSearcher(); PhraseQuery query = new PhraseQuery(); query.setSlop(3); query.add(new Term("content","pingpeng")); //第一个Term query.add(new Term("content","i")); //产生距离之后的第二个Term TopDocs tds = searcher.search(query, num); System.out.println("一共查询了:"+tds.totalHits); for(ScoreDoc sd:tds.scoreDocs) { Document doc = searcher.doc(sd.doc); System.out.println(doc.get("id")+"---->"+ doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+ doc.get("attach")+","+doc.get("date")); } searcher.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } //测试方法 @Test public void searchByPhrase() { su.searchByPhrase(10); }
9、FuzzyQuery查询
public void searchByFuzzy(int num) { try { IndexSearcher searcher = getSearcher(); FuzzyQuery query = new FuzzyQuery(new Term("name","mase"),0.4f,0); System.out.println(query.getPrefixLength()); System.out.println(query.getMinSimilarity()); TopDocs tds = searcher.search(query, num); System.out.println("一共查询了:"+tds.totalHits); for(ScoreDoc sd:tds.scoreDocs) { Document doc = searcher.doc(sd.doc); System.out.println(doc.get("id")+"---->"+ doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+ doc.get("attach")+","+doc.get("date")); } searcher.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } //测试方法 @Test public void searchByFuzzy() { su.searchByFuzzy(10); }
10、QueryParse查询
public void searchByQueryParse(Query query,int num) { try { IndexSearcher searcher = getSearcher(); TopDocs tds = searcher.search(query, num); System.out.println("一共查询了:"+tds.totalHits); for(ScoreDoc sd:tds.scoreDocs) { Document doc = searcher.doc(sd.doc); System.out.println(doc.get("id")+"---->"+ doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+ doc.get("attach")+","+doc.get("date")+"=="+sd.score); } searcher.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } //测试方法 @Test public void searchByQueryParse() throws ParseException { //1、创建QueryParser对象,默认搜索域为content QueryParser parser = new QueryParser(Version.LUCENE_35, "content", new StandardAnalyzer(Version.LUCENE_35)); //改变空格的默认操作符,以下可以改成AND //parser.setDefaultOperator(Operator.AND); //开启第一个字符的通配符匹配,默认关闭因为效率不高 parser.setAllowLeadingWildcard(true); //搜索content中包含有like的 Query query = parser.parse("like"); //有basketball或者football的,空格默认就是OR query = parser.parse("basketball football"); //改变搜索域为name为mike //query = parser.parse("content:like"); //同样可以使用*和?来进行通配符匹配 // query = parser.parse("name:j*"); //通配符默认不能放在首位 // query = parser.parse("email:*@itat.org"); //匹配name中没有mike但是content中必须有football的,+和-要放置到域说明前面 query = parser.parse("- name:mike + like"); //匹配一个区间,注意:TO必须是大写 //query = parser.parse("id:[1 TO 6]"); //闭区间匹配只会匹配到2 //query = parser.parse("id:{1 TO 3}"); //完全匹配I Like Football的 //query = parser.parse("\"I like football\""); //匹配I 和football之间有一个单词距离的 //query = parser.parse("\"I football\"~1"); //模糊查询 //query = parser.parse("name:make~"); //没有办法匹配数字范围(自己扩展Parser) //query = parser.parse("attach:[2 TO 10]"); su.searchByQueryParse(query, 10); }
11、分页查询
/** * 根据页码和分页大小获取上一次的最后一个ScoreDoc */ private ScoreDoc getLastScoreDoc(int pageIndex,int pageSize,Query query,IndexSearcher searcher) throws IOException { if(pageIndex==1)return null;//如果是第一页就返回空 int num = pageSize*(pageIndex-1);//获取上一页的数量 TopDocs tds = searcher.search(query, num); return tds.scoreDocs[num-1]; }
public void searchPageByAfter(String query,int pageIndex,int pageSize) { try { Directory dir = FileIndexUtils.getDirectory(); IndexSearcher searcher = getSearcher(dir); QueryParser parser = new QueryParser(Version.LUCENE_35,"content",new StandardAnalyzer(Version.LUCENE_35)); Query q = parser.parse(query); //先获取上一页的最后一个元素 ScoreDoc lastSd = getLastScoreDoc(pageIndex, pageSize, q, searcher); //通过最后一个元素搜索下页的pageSize个元素 TopDocs tds = searcher.searchAfter(lastSd,q, pageSize); for(ScoreDoc sd:tds.scoreDocs) { Document doc = searcher.doc(sd.doc); System.out.println(sd.doc+":"+doc.get("path")+"-->"+doc.get("filename")); } searcher.close(); } catch (org.apache.lucene.queryParser.ParseException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } }
@Test public void testSearchPage02() { for(int i=1;i<7;i++){ su.searchPageByAfter("java", i,10); System.out.println("=========================================="); } }