1、使用QueryParser执行查询
package com.hb.lucence.test; import java.io.IOException; import java.text.DateFormat; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Date; import java.util.HashMap; import java.util.Map; import java.util.regex.Pattern; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericField; import org.apache.lucene.document.Field.Store; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.queryParser.QueryParser.Operator; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Version; import org.junit.Test; public class ParseQueryTest { private Directory directory; private IndexReader indexReader; private String[] ids = { "1", "2", "3", "4", "5", "6" }; private String[] emails = { "[email protected]", "[email protected]", "[email protected]", "[email protected]", "[email protected]", "[email protected]" }; private String[] contents = { "welcome to visited the space,I like book heye", "hello boye, I like pingpeng ball", "my name is cc I like game", "I like football", "I like football and I like basketball too", "I like movie and swim" }; private String[] names = { "zhangsan", "lisi", "john", "jetty", "mike", "jake" }; private Map<String, Float> scores = new HashMap<String, Float>(); private int[] attachs = { 2, 3, 1, 4, 5, 5 }; private Date[] dates = null; /** * 初始化日期数据 */ public void initDate() { SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); dates = new Date[6]; int[] attachs = { 2, 3, 1, 4, 5, 5 }; try { dates[0] = sdf.parse("2010-02-19"); dates[1] = sdf.parse("2012-01-11"); dates[2] = sdf.parse("2011-09-19"); dates[3] = sdf.parse("2010-12-22"); dates[4] = sdf.parse("2012-01-01"); dates[5] = sdf.parse("2011-05-19"); } catch (ParseException e) { e.printStackTrace(); } } public ParseQueryTest(){ initDate(); } public void index() { IndexWriter iw = null; try { //1、创建目录 directory = new RAMDirectory(); //2、创建IndexWriter对象 iw = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35))); Document doc = null; for (int i = 0; i < 6; i++) { //3、创建document对象 doc = new Document(); //4、添加Field对象 doc.add(new Field("id", ids[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); doc.add(new Field("email", emails[i], Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("content", contents[i], Field.Store.NO, Field.Index.ANALYZED)); doc.add(new Field("name", names[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); doc.add(new NumericField("date", Store.YES, true).setLongValue(dates[i].getTime())); doc.add(new NumericField("number", Field.Store.YES, true).setIntValue(attachs[i])); String et = emails[i].substring(emails[i].lastIndexOf("@") + 1); // System.out.println(et); if (scores.containsKey(et)) { //配置索引的加权值 doc.setBoost(scores.get(et)); } else { doc.setBoost(0.5f); } iw.addDocument(doc); } } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { if (iw != null) { try { //6、关闭indexwriter //iw.commit(); iw.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } } } /** * 由于打开indexReader需要较大的资源 * @return */ public IndexSearcher getSercher() { try { if (indexReader == null) { indexReader = IndexReader.open(directory); } else { //如果IndexReader对象有变化,则返回IndexReader对象,否则返回null IndexReader newIndexReader = IndexReader.openIfChanged(indexReader); if (newIndexReader != null) { indexReader = newIndexReader; } } } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return new IndexSearcher(indexReader); } public void printer(TopDocs tds, IndexSearcher searcher) { ScoreDoc[] sds = tds.scoreDocs; try { for (ScoreDoc sd : sds) { Document doc = searcher.doc(sd.doc); System.out.println(doc.get("id") + "---->" + doc.get("name") + "[" + doc.get("email") + "]-->" + doc.get("id") + "," + doc.get("number")+","+doc.get("date")); } } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } /** * 根据默认域查询 */ public void defaultParserSearch(){ try { this.index(); IndexSearcher searcher = this.getSercher(); //创建QueryParser对象 QueryParser parser = new QueryParser(Version.LUCENE_35, "content", new StandardAnalyzer(Version.LUCENE_35)); //查找默认域content中包含football的document Query query = parser.parse("football"); TopDocs tds = searcher.search(query, 10); this.printer(tds, searcher); } catch (org.apache.lucene.queryParser.ParseException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } @Test public void defaultParserSearchTest(){ ParseQueryTest main = new ParseQueryTest(); main.defaultParserSearch(); } /** * 查询不同的默认域 */ public void changeFieldSearch(){ try { this.index(); IndexSearcher searcher = this.getSercher(); //创建QueryParser对象 QueryParser parser = new QueryParser(Version.LUCENE_35, "content", new StandardAnalyzer(Version.LUCENE_35)); //查找域name中包含lisi的document Query query = parser.parse("name:lisi"); TopDocs tds = searcher.search(query, 10); this.printer(tds, searcher); } catch (org.apache.lucene.queryParser.ParseException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } @Test public void changeFieldSearchTest(){ ParseQueryTest main = new ParseQueryTest(); main.changeFieldSearch(); } /** * 通配符查询测试 */ public void wildcardSearch(){ try { this.index(); IndexSearcher searcher = this.getSercher(); //创建QueryParser对象 QueryParser parser = new QueryParser(Version.LUCENE_35, "content", new StandardAnalyzer(Version.LUCENE_35)); /** * 查找域name中以z开头的document * Query query = parser.parse("name:z*"); * 默认是不支持*在最前面 * parser.setAllowLeadingWildcard(true); * Query query = parser.parse("name:*e"); */ /** * 默认是OR操作,parser.setDefaultOperator(Operator.AND); * Query query = parser.parse("football AND basketball"); * 等价于 * Query query = parser.parse("football basketball"); */ /** * 默认是OR操作,parser.setDefaultOperator(Operator.OR); * Query query = parser.parse("football OR basketball"); * 等价于 * Query query = parser.parse("football basketball"); */ /** * 只能在单个分词中使用通配符,例如heye, 即 使用 “hello boye”就匹配不到 * Query query = parser.parse("h*y?"); */ /** * 默认属性content存在football不存在basketball * Query query = parser.parse("+football -basketball"); * name属性包含mike 并且 content属性包含 swim(测试一定要有+,如果单独使用-则无法查询数据) * Query query = parser.parse("+ name:mike AND + swim"); * id是1到5 ,并且 content域中不包含football 和 welcome * Query query = parser.parse("id:[1 TO 5] -content:football -content:welcome"); */ /** * 查询id属性值在 1 到 3 之间的,TO一定要大写 * 包含1 和3 * Query query = parser.parse("id:[1 TO 3)"); * 或者 * 不包含1 和3 * Query query = parser.parse("id:{1 TO 3}"); */ /** * 查询content域中包含 I like pingpeng * Query query = parser.parse("content:\"I like pingpeng\""); */ /** * 在content属性中查询I pingpeng之间含一个单词的document * Query query = parser.parse("content:\"I pingpeng\"~1"); */ /** * 查询name域中像jaee的,例如jake * Query query = parser.parse("name:jaee~"); */ // Query query = null; // try { // DateFormat dateFormat1 = new SimpleDateFormat("yyyy-MM-dd"); // Date myDate1 = dateFormat1.parse("2012-01-01"); // Date myDate2 = dateFormat1.parse("2013-01-01"); // System.out.println("date:["+myDate1.getTime()+" TO "+myDate2.getTime()+"]"); // query = parser.parse("date:["+myDate1.getTime()+" TO "+myDate2.getTime()+"]"); // } catch (ParseException e) { // e.printStackTrace(); // } MyQueryParser myQueryParser = new MyQueryParser(Version.LUCENE_35,"content", new StandardAnalyzer(Version.LUCENE_35)); Query query = myQueryParser.getRangeQuery("date", "20120101", "20130101", true); /** * 没有办法匹配数字范围,需要自己拓展parser * MyQueryParser myQueryParser = new MyQueryParser(Version.LUCENE_35,"content", new StandardAnalyzer(Version.LUCENE_35)); * Query query = myQueryParser.getRangeQuery("number", "4", "6", true); */ TopDocs tds = searcher.search(query, 10); this.printer(tds, searcher); } catch (org.apache.lucene.queryParser.ParseException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } @Test public void wildcardSearchTest(){ ParseQueryTest main = new ParseQueryTest(); main.wildcardSearch(); } @Test public void wildcardSearchTest1(){ String regex = "\\d{8}"; String dateType = "yyyyMMdd"; System.out.println(Pattern.matches(regex, "20120101")); } }
2、自定义QueryParser类
package com.hb.lucence.test; import java.text.SimpleDateFormat; import java.util.regex.Pattern; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.util.Version; import org.apache.lucene.search.Query; public class MyQueryParser extends QueryParser { public MyQueryParser(Version matchVersion, String f, Analyzer a) { super(matchVersion, f, a); } @Override protected Query getWildcardQuery(String field, String termStr) throws ParseException { throw new ParseException("由于性能原因,已禁用通配符搜索,请输入更精确的信息进行搜索 ^_^ ^_^"); } @Override protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException { throw new ParseException("由于性能原因,已禁用模糊搜索,请输入更精确的信息进行搜索 ^_^ ^_^"); } @Override protected Query getRangeQuery(String field, String start, String end, boolean inclusive) throws ParseException { if (field.equals("number")) { // 默认的QueryParser.parse(String query)表达式中并不支持'size:[20 TO 80]'数字的域值 // 这样一来,针对数字的域值进行特殊处理,那么QueryParser表达式就支持数字了 return NumericRangeQuery.newIntRange(field, Integer.parseInt(start), Integer.parseInt(end), inclusive, inclusive); } else if (field.equals("date")) { String regex = "\\d{8}"; String dateType = "yyyyMMdd"; // Pattern pattern = Pattern.compile(regex); if (Pattern.matches(regex, start) && Pattern.matches(regex, end)) { SimpleDateFormat sdf = new SimpleDateFormat(dateType); try { long min = sdf.parse(start).getTime(); long max = sdf.parse(end).getTime(); // 使之支持日期的检索,应用时直接QueryParser.parse("date:[20130407 TO 20130701]") return NumericRangeQuery.newLongRange(field, min, max, inclusive, inclusive); } catch (java.text.ParseException e) { e.printStackTrace(); } } else { throw new ParseException("Unknown date format, please use '" + dateType + "'"); } } // 如没找到匹配的Field域,那么返回默认的TermRangeQuery return super.getRangeQuery(field, start, end, inclusive); } }