要实现搜索功能的话,首先要做的是,进行索引(Indexing),在这里分享一下索引过程的相关知识。
Lucene索引过程主要分为3个操作阶段:将数据转换成文本,分析文本,并将分析过的文本保存到数据库。
之前的索引,是从文件遍历,添加到索引,在这里,我们手动的创建一个索引。
package org.ygy.lucene.index; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Date; import java.util.List; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.DoubleField; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.IntField; import org.apache.lucene.document.StringField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; public class BookIndex { private List<BookEntity> books = new ArrayList<BookEntity>(); public static final String INDEX_BOOK = "/home/yuguiyang/Documents/lucene_temp"; /** * initial book data */ private void initial() { BookEntity book = null; for(int i=0; i<10; i++) { book = new BookEntity(); book.setId(100 + i); book.setTitle("title_" + i); book.setAuthor("author_" + i); book.setPrice(20.00 + i); book.setPublishDate(new Date()); books.add(book); } } //create books index public void indexing(){ try { Directory dir = FSDirectory.open(new File(BookIndex.INDEX_BOOK)); addDocuments(dir); } catch (IOException e) { e.printStackTrace(); } } private void addDocuments(Directory dir) throws IOException { //初始化分析器 Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_45); //IndexWriter配置信息 IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_45 , analyzer); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir , iwc); //初始化数据源 initial(); //遍历数据源,加入索引 for(int i=0; i<books.size(); i++) { Document doc = new Document(); BookEntity book = books.get(i); System.out.println(i + "->" + book); doc.add(new IntField("id" , book.getId(), Store.YES)); doc.add(new StringField("title" , book.getTitle() , Store.YES)); doc.add(new StringField("author" , book.getAuthor() , Store.YES)); doc.add(new DoubleField("price" , book.getPrice(), Store.NO)); doc.add(new StringField("publishDate" , book.getPublishDate().toString() , Store.NO)); writer.addDocument(doc); } writer.close(); } public static void main(String[] args) { BookIndex bookIndex = new BookIndex(); bookIndex.indexing(); } }
package org.ygy.lucene.index; import java.util.Date; public class BookEntity { private Integer id; // 书籍ID private String title; // 书籍名称 private String author; // 作者 private Date publishDate; // 出版日期 private Double price; // 单价 public Integer getId() { return id; } public void setId(Integer id) { this.id = id; } public String getTitle() { return title; } public void setTitle(String title) { this.title = title; } public String getAuthor() { return author; } public void setAuthor(String author) { this.author = author; } public Date getPublishDate() { return publishDate; } public void setPublishDate(Date publishDate) { this.publishDate = publishDate; } public Double getPrice() { return price; } public void setPrice(Double price) { this.price = price; } @Override public int hashCode() { final int prime = 31; int result = 1; result = prime * result + ((id == null) ? 0 : id.hashCode()); return result; } @Override public boolean equals(Object obj) { if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; BookEntity other = (BookEntity) obj; if (id == null) { if (other.id != null) return false; } else if (!id.equals(other.id)) return false; return true; } @Override public String toString() { return "BookEntity [id=" + id + ", title=" + title + ", author=" + author + ", publishDate=" + publishDate + ", price=" + price + "]"; } }
package org.ygy.lucene.index; import java.io.File; import java.io.IOException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; public class BookSearch { public static void searching(String field, String queryString) { // 读取索引 IndexReader reader = null; try { reader = DirectoryReader.open(FSDirectory.open(new File(BookIndex.INDEX_BOOK))); } catch (IOException e) { e.printStackTrace(); } // 分析器 Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_45); // 解析器 QueryParser parser = new QueryParser(Version.LUCENE_45, field, analyzer); try { //根据关键字初始化Query Query query = parser.parse(queryString); System.out.println("Searching for:" + field + "->" + queryString); // 查询索引 IndexSearcher searcher = new IndexSearcher(reader); doSearch(searcher, query); reader.close(); } catch (ParseException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } private static void doSearch(IndexSearcher searcher, Query query) throws IOException { TopDocs results = searcher.search(query, 100); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println("总条数:" + numTotalHits); int start = 0; int end = Math.min(numTotalHits, 100); // 遍历查询结果 for (int i = start; i < end; i++) { Document doc = searcher.doc(hits[i].doc); //show the book detail System.out.println("id->" + doc.get("id")); System.out.println("title->" + doc.get("title")); System.out.println("author->" + doc.get("author")); System.out.println("price->" + doc.get("price")); System.out.println("publishDate->" + doc.get("publishDate")); } } }
package org.ygy.lucene.index; public class BookClient { public static void main(String[] args) { //1.indexing //2.search BookSearch.searching("id", "100"); BookSearch.searching("title", "title_0"); BookSearch.searching("author", "author_0"); BookSearch.searching("price" , "20.0"); } }
这里,就发现了一个问题,我们使用id,price 搜索时,没有找到数据;但是使用title和author搜索时,就可以查询到数据。暂时,无法理解,有待学习。
在测试时,还发现一个问题,就是有关大小写的问题:
在这里,我们把原来的"author_“改为大写的"Author_",重新运行一遍索引程序,再重新查询。
好吧,悲剧了,不管是查询“author_0”,还是“Author_0”,都查不到数据了,这里的大小写,不知道是什么问题。
这两个问题先记着,在学习中解决。
有的时候,应用程序可以产生一个给定词语的一系列同义词,我们当然希望,在搜索同义词的时候,也把该条记录也查出来,在Lucene中可以很简单的实现。
在BookIndex中修改:
我们在第一条记录中,的title字段中,追加“op"和"ygy",然后再查询:
我们,看一下,能否查询到结果:
好了,这样就实现了。