lucene QueryParser查询

1、使用QueryParser执行查询

 

package com.hb.lucence.test;

import java.io.IOException;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.queryParser.QueryParser.Operator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;

public class ParseQueryTest {
	
	private Directory directory;
	private IndexReader indexReader;
	private String[] ids = { "1", "2", "3", "4", "5", "6" };
	private String[] emails = { "[email protected]", "[email protected]", "[email protected]", "[email protected]", "[email protected]", "[email protected]" };
	private String[] contents = { "welcome to visited the space,I like book heye", "hello boye, I like pingpeng ball", "my name is cc I like game", "I like football", "I like football and I like basketball too", "I like movie and swim" };
	private String[] names = { "zhangsan", "lisi", "john", "jetty", "mike", "jake" };
	private Map<String, Float> scores = new HashMap<String, Float>();
	private int[] attachs = { 2, 3, 1, 4, 5, 5 };
	
	private Date[] dates = null;
	/**
	 * 初始化日期数据
	 */
	public void initDate() {
		SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
		dates = new Date[6];
		int[] attachs = { 2, 3, 1, 4, 5, 5 };
		try {
			dates[0] = sdf.parse("2010-02-19");
			dates[1] = sdf.parse("2012-01-11");
			dates[2] = sdf.parse("2011-09-19");
			dates[3] = sdf.parse("2010-12-22");
			dates[4] = sdf.parse("2012-01-01");
			dates[5] = sdf.parse("2011-05-19");
		} catch (ParseException e) {
			e.printStackTrace();
		}
	}
	
	public ParseQueryTest(){
		initDate();
	}
	
	public void index() {
		IndexWriter iw = null;
		try {
			//1、创建目录
			directory = new RAMDirectory();
			//2、创建IndexWriter对象
			iw = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
			Document doc = null;
			for (int i = 0; i < 6; i++) {
				//3、创建document对象
				doc = new Document();
				//4、添加Field对象
				doc.add(new Field("id", ids[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
				doc.add(new Field("email", emails[i], Field.Store.YES, Field.Index.NOT_ANALYZED));
				doc.add(new Field("content", contents[i], Field.Store.NO, Field.Index.ANALYZED));
				doc.add(new Field("name", names[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
				doc.add(new NumericField("date", Store.YES, true).setLongValue(dates[i].getTime()));
				doc.add(new NumericField("number", Field.Store.YES, true).setIntValue(attachs[i]));
				String et = emails[i].substring(emails[i].lastIndexOf("@") + 1);
//				System.out.println(et);
				if (scores.containsKey(et)) {
					//配置索引的加权值
					doc.setBoost(scores.get(et));
				} else {
					doc.setBoost(0.5f);
				}
				iw.addDocument(doc);
			}
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			if (iw != null) {
				try {
					//6、关闭indexwriter
					//iw.commit();
					iw.close();
				} catch (CorruptIndexException e) {
					e.printStackTrace();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
		}
	}

	/**
	 * 由于打开indexReader需要较大的资源
	 * @return
	 */
	public IndexSearcher getSercher() {
		try {
			if (indexReader == null) {
				indexReader = IndexReader.open(directory);
			} else {
				//如果IndexReader对象有变化,则返回IndexReader对象,否则返回null
				IndexReader newIndexReader = IndexReader.openIfChanged(indexReader);
				if (newIndexReader != null) {
					indexReader = newIndexReader;
				}
			}
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return new IndexSearcher(indexReader);
	}
	
	public void printer(TopDocs tds, IndexSearcher searcher) {
		ScoreDoc[] sds = tds.scoreDocs;
		try {
			for (ScoreDoc sd : sds) {
				Document doc = searcher.doc(sd.doc);
				System.out.println(doc.get("id") + "---->" + doc.get("name") + "[" + doc.get("email") + "]-->" + doc.get("id") + "," + doc.get("number")+","+doc.get("date"));
			}
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	/**
	 * 根据默认域查询
	 */
	public void defaultParserSearch(){
		try {
			this.index();
			IndexSearcher searcher = this.getSercher();
			//创建QueryParser对象
			QueryParser parser = new QueryParser(Version.LUCENE_35, "content", new StandardAnalyzer(Version.LUCENE_35));
			//查找默认域content中包含football的document
			Query query = parser.parse("football");
			
			TopDocs tds = searcher.search(query, 10);
			this.printer(tds, searcher);
			
		} catch (org.apache.lucene.queryParser.ParseException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		
	}
	
	@Test
	public void defaultParserSearchTest(){
		ParseQueryTest main = new ParseQueryTest();
		main.defaultParserSearch();
	}
	
	/**
	 * 查询不同的默认域
	 */
	public void changeFieldSearch(){
		try {
			this.index();
			IndexSearcher searcher = this.getSercher();
			//创建QueryParser对象
			QueryParser parser = new QueryParser(Version.LUCENE_35, "content", new StandardAnalyzer(Version.LUCENE_35));
			//查找域name中包含lisi的document
			Query query = parser.parse("name:lisi");
			
			TopDocs tds = searcher.search(query, 10);
			this.printer(tds, searcher);
			
		} catch (org.apache.lucene.queryParser.ParseException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	@Test
	public void changeFieldSearchTest(){
		ParseQueryTest main = new ParseQueryTest();
		main.changeFieldSearch();
	}

	/**
	 * 通配符查询测试
	 */
	public void wildcardSearch(){
		try {
			this.index();
			IndexSearcher searcher = this.getSercher();
			//创建QueryParser对象
			QueryParser parser = new QueryParser(Version.LUCENE_35, "content", new StandardAnalyzer(Version.LUCENE_35));
			/**
			 * 查找域name中以z开头的document
			 * Query query = parser.parse("name:z*");
			 * 默认是不支持*在最前面
			 * parser.setAllowLeadingWildcard(true);
			 * Query query = parser.parse("name:*e");
			 */
			
			/**
			 *  默认是OR操作,parser.setDefaultOperator(Operator.AND);
			 *  Query query = parser.parse("football AND basketball");
			 *  等价于
			 *  Query query = parser.parse("football  basketball");
			 */
			
			/**
			 *  默认是OR操作,parser.setDefaultOperator(Operator.OR);
			 *  Query query = parser.parse("football OR basketball");
			 *  等价于
			 *  Query query = parser.parse("football  basketball");
			 */
			
			/**
			 * 只能在单个分词中使用通配符,例如heye, 即 使用 “hello boye”就匹配不到
			 * Query query = parser.parse("h*y?");
			 */
			
			/**
			 * 默认属性content存在football不存在basketball
			 * Query query = parser.parse("+football -basketball");
			 * name属性包含mike 并且 content属性包含 swim(测试一定要有+,如果单独使用-则无法查询数据)
			 * Query query = parser.parse("+ name:mike AND + swim");
			 * id是1到5 ,并且 content域中不包含football 和 welcome
			 * Query query = parser.parse("id:[1 TO 5] -content:football -content:welcome");
			 */
			
			/**
			 * 查询id属性值在 1 到 3 之间的,TO一定要大写
			 * 包含1 和3
			 * Query query = parser.parse("id:[1 TO 3)");
			 * 或者
			 * 不包含1 和3
			 * Query query = parser.parse("id:{1 TO 3}");
			 */
			
			/**
			 * 查询content域中包含 I like pingpeng
			 * Query query = parser.parse("content:\"I like pingpeng\"");
			 */
			
			/**
			 * 在content属性中查询I pingpeng之间含一个单词的document
			 * Query query = parser.parse("content:\"I pingpeng\"~1");
			 */
			
			/**
			 * 查询name域中像jaee的,例如jake
			 * Query query = parser.parse("name:jaee~");
			 */
			
			
//			Query query = null;
//			try {
//				DateFormat dateFormat1 = new SimpleDateFormat("yyyy-MM-dd");
//				Date myDate1 = dateFormat1.parse("2012-01-01");
//				Date myDate2 = dateFormat1.parse("2013-01-01");
//				System.out.println("date:["+myDate1.getTime()+" TO "+myDate2.getTime()+"]");
//				query = parser.parse("date:["+myDate1.getTime()+" TO "+myDate2.getTime()+"]");
//			} catch (ParseException e) {
//				e.printStackTrace();
//			}
			
			MyQueryParser myQueryParser = new MyQueryParser(Version.LUCENE_35,"content", new StandardAnalyzer(Version.LUCENE_35));
			Query query = myQueryParser.getRangeQuery("date", "20120101", "20130101", true);
			
			/**
			 * 没有办法匹配数字范围,需要自己拓展parser
			 * MyQueryParser myQueryParser = new MyQueryParser(Version.LUCENE_35,"content", new StandardAnalyzer(Version.LUCENE_35));
			 * Query query = myQueryParser.getRangeQuery("number", "4", "6", true);
			 */
			
			
			TopDocs tds = searcher.search(query, 10);
			this.printer(tds, searcher);
			
		} catch (org.apache.lucene.queryParser.ParseException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	@Test
	public void wildcardSearchTest(){
		ParseQueryTest main = new ParseQueryTest();
		main.wildcardSearch();
	}
	
	@Test
	public void wildcardSearchTest1(){
		String regex = "\\d{8}";
		String dateType = "yyyyMMdd";
		System.out.println(Pattern.matches(regex, "20120101"));
	}
	
}

 

2、自定义QueryParser类

package com.hb.lucence.test;

import java.text.SimpleDateFormat;
import java.util.regex.Pattern;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.util.Version;
import org.apache.lucene.search.Query; 

public class MyQueryParser extends QueryParser {

	public MyQueryParser(Version matchVersion, String f, Analyzer a) {
		super(matchVersion, f, a);
	}

	@Override
	protected Query getWildcardQuery(String field, String termStr) throws ParseException {
		throw new ParseException("由于性能原因,已禁用通配符搜索,请输入更精确的信息进行搜索 ^_^ ^_^");
	}

	@Override
	protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException {
		throw new ParseException("由于性能原因,已禁用模糊搜索,请输入更精确的信息进行搜索 ^_^ ^_^");
	}

	@Override
	protected Query getRangeQuery(String field, String start, String end, boolean inclusive) throws ParseException {
		if (field.equals("number")) {
			// 默认的QueryParser.parse(String query)表达式中并不支持'size:[20 TO 80]'数字的域值
			// 这样一来,针对数字的域值进行特殊处理,那么QueryParser表达式就支持数字了
			return NumericRangeQuery.newIntRange(field, Integer.parseInt(start), Integer.parseInt(end), inclusive, inclusive);
		} else if (field.equals("date")) {
			String regex = "\\d{8}";
			String dateType = "yyyyMMdd";
//			Pattern pattern = Pattern.compile(regex);
			if (Pattern.matches(regex, start) && Pattern.matches(regex, end)) {
				SimpleDateFormat sdf = new SimpleDateFormat(dateType);
				try {
					long min = sdf.parse(start).getTime();
					long max = sdf.parse(end).getTime();
					// 使之支持日期的检索,应用时直接QueryParser.parse("date:[20130407 TO 20130701]")
					return NumericRangeQuery.newLongRange(field, min, max, inclusive, inclusive);
				} catch (java.text.ParseException e) {
					e.printStackTrace();
				}
				
			} else {
				throw new ParseException("Unknown date format, please use '" + dateType + "'");
			}
		}
		// 如没找到匹配的Field域,那么返回默认的TermRangeQuery
		return super.getRangeQuery(field, start, end, inclusive);
	}
}

 

你可能感兴趣的:(QueryParser)