Summary:
1) Execute precise query using TermQuery
2) Execute fuzzy String type range query using TermRangeQuery
3) Execute precise Numeric type range query using NumericRangeQuery
1. We can use TermQuery to execute precise query.
2. Example as below:
1) Main Function
package edu.xmu.lucene.Lucene_ModuleOne; import java.io.IOException; import java.util.List; import org.apache.lucene.analysis.SimpleAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericField; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Version; public class SearcherUtil { private Directory directory; private IndexReader reader; public SearcherUtil() { directory = new RAMDirectory(); } public IndexSearcher getSearcher() { try { if (null == reader) { reader = IndexReader.open(directory); } else { IndexReader tempReader = IndexReader.openIfChanged(reader); if (null != tempReader) { reader.close(); reader = tempReader; } } } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return new IndexSearcher(reader); } public void buildIndex(List<Student> studentList) { IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35, new SimpleAnalyzer(Version.LUCENE_35)); IndexWriter writer = null; Document doc = null; try { writer = new IndexWriter(directory, config); for (Student student : studentList) { doc = new Document(); doc.add(new Field("id", student.getId(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("name", student.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("password", student.getPassword(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("gender", student.getGender(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new NumericField("score", Field.Store.YES, true) .setIntValue(student.getScore())); writer.addDocument(doc); } } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { try { writer.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } } /** * Precise query using TermQuery * * @param field * @param name */ public void searchByTerm(String fieldName, String fieldValue) { IndexSearcher searcher = getSearcher(); Query query = new TermQuery(new Term(fieldName, fieldValue)); try { TopDocs tds = searcher.search(query, 100); System.out.println("Total Hits: " + tds.totalHits); for (ScoreDoc sd : tds.scoreDocs) { Document document = searcher.doc(sd.doc); System.out.println("id = " + document.get("id") + ", name = " + document.get("name") + ", password = " + document.get("password") + ", gender = " + document.get("gender") + ", score = " + document.get("score")); } } catch (IOException e) { e.printStackTrace(); } finally { try { searcher.close(); } catch (IOException e) { e.printStackTrace(); } } } }
2) Test Case
package edu.xmu.lucene.Lucene_ModuleOne; import java.util.ArrayList; import java.util.List; import org.junit.Before; import org.junit.Test; public class SearcherUtilTest { private SearcherUtil searcherUtil = null; @Before public void setUp() { searcherUtil = new SearcherUtil(); } private void testBuildIndex() { List<Student> studentList = new ArrayList<Student>(); Student student = new Student("1", "Davy", "Jones", "Male", 100); studentList.add(student); student = new Student("1", "Davy", "Jones", "Male", 110); studentList.add(student); student = new Student("2", "Jones", "Davy", "Male", 120); studentList.add(student); student = new Student("3", "Calyp", "Jones", "Female", 130); studentList.add(student); student = new Student("4", "Pso", "Caly", "Female", 140); studentList.add(student); searcherUtil.buildIndex(studentList); } @Test public void testSearch() { testBuildIndex(); searcherUtil.searchByTerm("gender", "Female"); } }
3) Console Output
Total Hits: 2 id = 3, name = Calyp, password = Jones, gender = Female, score = 130 id = 4, name = Pso, password = Caly, gender = Female, score = 140
Comments:
1) When we execute query using searchByTerm("gender", "Fema");
The result set is empty cause there is no gender whose value equals male. This is precise query.
The difference of precise query and fuzzy query is when we execute query above using fuzzy query, the size of result set would be 2 because "Female" contains "Fema"
3. We can use TermRangeQuery to execute range query
4. Example as below
1) Main Function
public void searchByTermRange(String fieldName, String fieldValueStart, String fieldValueEnd, int resultSize) { IndexSearcher searcher = getSearcher(); /** * @param1 fieldName : field * @param2 fieldValueStart : lowerTerm * @param3 fieldValueEnd : upperTerm * @param4 true : includeLower * @param5 true : includeUpper */ Query query = new TermRangeQuery(fieldName, fieldValueStart, fieldValueEnd, true, true); try { TopDocs tds = searcher.search(query, resultSize); Document document = null; for (ScoreDoc doc : tds.scoreDocs) { document = searcher.doc(doc.doc); System.out.println("id = " + document.get("id") + ", name = " + document.get("name") + ", password = " + document.get("password") + ", gender = " + document.get("gender") + ", score = " + document.get("score")); } } catch (IOException e) { e.printStackTrace(); } finally { try { searcher.close(); } catch (IOException e) { e.printStackTrace(); } } }
2) Test Case
public class SearcherUtilTest { private SearcherUtil searcherUtil = null; @Before public void setUp() { searcherUtil = new SearcherUtil(); } private void testBuildIndex() { List<Student> studentList = new ArrayList<Student>(); Student student = new Student("1", "Davy", "Jones", "Male", 100); studentList.add(student); student = new Student("2", "Davy", "Jones", "Male", 110); studentList.add(student); student = new Student("3", "Jones", "Davy", "Male", 120); studentList.add(student); student = new Student("4", "Calyp", "Jones", "Female", 130); studentList.add(student); student = new Student("5", "Pso", "Caly", "Female", 140); studentList.add(student); searcherUtil.buildIndex(studentList); } @Test public void testSearchByTermRange() { testBuildIndex(); searcherUtil.searchByTermRange("id", "1", "3", 100); } }
3) Console Output
id = 1, name = Davy, password = Jones, gender = Male, score = 100 id = 2, name = Davy, password = Jones, gender = Male, score = 110 id = 3, name = Jones, password = Davy, gender = Male, score = 120
Comments:
1) This is fuzzy query not precise query
2) When we rebuild index as below
private void testBuildIndex() { List<Student> studentList = new ArrayList<Student>(); Student student = new Student("11", "Davy", "Jones", "Male", 100); studentList.add(student); student = new Student("22", "Davy", "Jones", "Male", 110); studentList.add(student); student = new Student("33", "Jones", "Davy", "Male", 120); studentList.add(student); student = new Student("44", "Calyp", "Jones", "Female", 130); studentList.add(student); student = new Student("55", "Pso", "Caly", "Female", 140); studentList.add(student); searcherUtil.buildIndex(studentList); }
And execute the same query, the output is as below
id = 11, name = Davy, password = Jones, gender = Male, score = 100 id = 22, name = Davy, password = Jones, gender = Male, score = 110
Because "1" < "11" < "2" < "22" < "3" < "33", and the range is "1" <= range <= "3". The output above is obvious.
3) How can we query the students whose score is in the range of 100<= value <= 120? --> This is about int value query not String value.
1) Attempt-01: Execute query for int value just like String
@Test public void testSearchByTermRange() { testBuildIndex(); searcherUtil.searchByTermRange("score", "100", "120", 100); }
Output --> Empty
Attempt-01: Failed!
2) Attempt-02: Execute query for int value using NumericRangeQuery --> Please pay attention to this as it is precise range query.
public void searchByNumericRange(String fieldName, int fieldValueStart, int fieldValueEnd, int resultSize) { IndexSearcher searcher = getSearcher(); Query query = NumericRangeQuery.newIntRange(fieldName, fieldValueStart, fieldValueEnd, true, true); try { TopDocs tds = searcher.search(query, resultSize); Document document = null; for (ScoreDoc scoreDoc : tds.scoreDocs) { document = searcher.doc(scoreDoc.doc); System.out.println("id = " + document.get("id") + ", name = " + document.get("name") + ", password = " + document.get("password") + ", gender = " + document.get("gender") + ", score = " + document.get("score")); } } catch (IOException e) { e.printStackTrace(); } }
@Test public void testSearchByNumericRange() { testBuildIndex(); searcherUtil.searchByNumericRange("score", 100, 120, 100); }
id = 11, name = Davy, password = Jones, gender = Male, score = 100 id = 22, name = Davy, password = Jones, gender = Male, score = 110 id = 33, name = Jones, password = Davy, gender = Male, score = 120