Lucene3.5学习总结:
Lucene主要分为两大块:索引和搜索。相关包可能官网上下载。
官方网为:http://lucene.apache.org/core/old_versioned_docs/versions/3_5_0/index.html
索引分为文件索引和内存索引,下面介绍的是文件索引。包括新建、删除、更新、读取索引。索引中文分词可以研究下IKAnalyzer。
import java.io.File; import java.io.IOException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; /** * 索引 * * @author * @version v 0.1 2012-3-6 上午10:50:25 */ public class Index { /** 索引文件路径 */ private static final String INDEX_PATH = "/workspace2/indexing"; /** 编号 */ public static final String AUCTION_NO = "auctionNo"; /** 名称 */ public static final String AUCTION_NAME = "auctionName"; /** 价格 */ public static final String MAX_PRICE = "maxPrice"; /** 日期 */ public static final String END_DATE = "endDate"; /** * 程序入口 * @param args */ public static void main(String[] args) { createIndex(); // deleteIndex(); // readIndex(); updateIndex(); } /** * 创建索引 */ public static void createIndex(){ try { Directory directory = FSDirectory.open(new File(INDEX_PATH)); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_35); IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_35,analyzer); IndexWriter writer = new IndexWriter(directory, iwConfig); //Document添加索引值 Document doc = new Document(); Field auctionNoField = new Field(AUCTION_NO, "10003",Field.Store.YES, Field.Index.NOT_ANALYZED); Field auctionNameField = new Field(AUCTION_NAME, "汇园果汁", Field.Store.YES, Field.Index.ANALYZED); Field endDateField = new Field(END_DATE, "2012-03-06 18:00:00",Field.Store.YES, Field.Index.NOT_ANALYZED); doc.add(auctionNoField); doc.add(auctionNameField); doc.add(new NumericField(Index.MAX_PRICE, Field.Store.YES, true).setDoubleValue(300)); doc.add(endDateField); writer.addDocument(doc); // writer.addDocuments(docs);//添加多个索引 writer.close(); System.out.println("=========创建索引完成"); } catch (IOException e) { e.printStackTrace(); } } /** * IndexWriter方式删除索引 * <ul> * <li>全部删除</li> * <li>单个或多个删除</li> * </ul> */ public static void indexWriterDeleteIndex(){ try { Directory directory = FSDirectory.open(new File(INDEX_PATH)); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_35); IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_35,analyzer); IndexWriter writer = new IndexWriter(directory, iwConfig); // writer.deleteAll();//删除所有索引 Term term = new Term(Index.AUCTION_NO, "10001");//删除单个索引 writer.deleteDocuments(term); writer.close(); System.out.println("=========删除索引成功"); } catch (IOException e) { e.printStackTrace(); } } /** * IndexReader方式删除索引 */ public static void indexReaderDeleteIndex(){ try { Directory directory = FSDirectory.open(new File(INDEX_PATH)); IndexReader reader = IndexReader.open(directory, false);//设为true为只读模式 Term term = new Term(Index.AUCTION_NO, "10000");//删除单个索引 reader.deleteDocuments(term); reader.flush(); reader.close(); } catch (IOException e) { e.printStackTrace(); } } /** * 更新索引(先删除再创建) * <ul> * <li>更新单个索引</li> * <li>更新多个索引</li> * </ul> */ public static void updateIndex(){ try { Directory directory = FSDirectory.open(new File(INDEX_PATH)); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_35); IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_35,analyzer); IndexWriter writer = new IndexWriter(directory, iwConfig); //Document添加索引值 Document doc = new Document(); Field auctionNoField = new Field(AUCTION_NO, "10003",Field.Store.YES, Field.Index.NOT_ANALYZED); Field auctionNameField = new Field(AUCTION_NAME, "商品名称", Field.Store.YES, Field.Index.ANALYZED); Field endDateField = new Field(END_DATE, "2011-03-06 18:00:00",Field.Store.YES, Field.Index.NOT_ANALYZED); doc.add(auctionNoField); doc.add(auctionNameField); doc.add(new NumericField(Index.MAX_PRICE, Field.Store.YES, true).setDoubleValue(200)); doc.add(endDateField); //根据唯一商品ID进行更新索引 Term term = new Term(Index.AUCTION_NO,"10003"); writer.updateDocument(term, doc);//更新索引 // writer.updateDocuments(delTerm, docs);//更新多个索引 writer.close(); System.out.println("=========更新索引完成"); } catch (IOException e) { e.printStackTrace(); } } /** * 读索引 */ public static void readIndex(){ try { Directory directory = FSDirectory.open(new File(INDEX_PATH)); IndexReader reader = IndexReader.open(directory, true);//设为true为只读模式 int num = reader.numDocs(); for (int i = 0; i < num; i++) { Document doc = reader.document(i); System.out.println(doc); } reader.close(); } catch (IOException e) { e.printStackTrace(); } } } Lucene查询有很多种,下面介绍了一些常用的查询 Lucene查询语法请参考:http://lucene.apache.org/core/old_versioned_docs/versions/3_5_0/queryparsersyntax.html 下面是搜索代码: import java.io.File; import java.io.IOException; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.MultiFieldQueryParser; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleFragmenter; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import com.index.Index; /** * * * @author * @version v 0.1 2012-3-6 下午01:33:16 */ public class Search { public IndexSearcher searcher = null; String keyword = "北 AND 要"; String keyword2 = "100"; /** * 程序入口 * @param args */ public static void main(String[] args) { Search search = new Search(); search.getSearcher(); // search.termQuery();//词条查询 search.booleanQuery_1(); // search.booleanQuery();//组合查询 // search.wildcardQuery();//通配符查询 // search.phraseQuery();//短语查询 // search.prefixQuery();//前缀查询 // search.multiPhraseQuery();//多短语查询- // search.fuzzyQuery();//模糊查询 // search.termRangeQuery();//文本范围查询 2011-03-06 18:00:00 TO 2012-03-06 18:00:00 // search.numericRangeQuery(100.00,200.00);//数字范围查询 // search.sortQuery();//排序查询 // search.heightQuery();//高亮查询 // search.pageQuery(2, 5);//分页查询 } /** * 获得搜索 */ public void getSearcher(){ IndexReader reader = null; try { reader = IndexReader.open(FSDirectory.open(new File("/workspace2/indexing")), true); searcher = new IndexSearcher(reader); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } /** * 查询 * @param q */ public void query(Query q){ TopScoreDocCollector collector = TopScoreDocCollector.create(5*10, false); try { searcher.search(q, collector); int count = collector.getTotalHits(); System.out.println("------------获得 "+count+" 记录!"); TopDocs top = collector.topDocs(); ScoreDoc[] docs = top.scoreDocs; for (ScoreDoc sd : docs) { Document doc = searcher.doc(sd.doc); System.out.println(doc.get(Index.AUCTION_NO)+" , "+doc.get(Index.AUCTION_NAME)+" , "+doc.get(Index.MAX_PRICE)+" , "+doc.get(Index.END_DATE)); } searcher.close();//关闭搜索 } catch (IOException e) { e.printStackTrace(); } } /** * 排序查询 * @param q */ public void querySort(Query q,Sort sort){ System.out.println("==============排序搜索"); TopScoreDocCollector collector = TopScoreDocCollector.create(5*10, false); try { searcher.search(q,1000, sort); int count = collector.getTotalHits(); System.out.println("------------获得 "+count+" 记录!"); TopDocs top = collector.topDocs(); ScoreDoc[] docs = top.scoreDocs; for (ScoreDoc sd : docs) { Document doc = searcher.doc(sd.doc); System.out.println(doc.get(Index.AUCTION_NO)+" , "+doc.get(Index.AUCTION_NAME)+" , "+doc.get(Index.MAX_PRICE)+" , "+doc.get(Index.END_DATE)); } searcher.close(); } catch (IOException e) { e.printStackTrace(); } } /** * 词条搜索 TermQuery */ public void termQuery(){ Term t = new Term(Index.AUCTION_NAME, keyword); TermQuery q = new TermQuery(t); System.out.println("=====词条搜索====="); query(q); } /** * MultiTermQuery */ public void multiTermQuery(){ } public void booleanQuery_1() { BooleanQuery q = new BooleanQuery(); QueryParser parser = new QueryParser(Version.LUCENE_35, Index.AUCTION_NAME, new StandardAnalyzer(Version.LUCENE_35)); try { Query query = parser.parse(keyword); TermQuery termQuery = new TermQuery(new Term(Index.AUCTION_NAME, keyword)); q.add(query, BooleanClause.Occur.SHOULD); System.out.println("q : " + q.toString()); System.out.println("========= 组合搜索"); query(q); } catch (ParseException e) { e.printStackTrace(); } } /** * 组合搜索 BooleanQuery * MUST_NOT :不包含 * SHOULD :表或关系 * MUST :表并关系 */ public void booleanQuery(){ BooleanQuery q = new BooleanQuery(); String[] s = keyword.split(" "); if (s.length > 0) { for (int i = 0; i < s.length; i++) { // TermQuery termQuery = new TermQuery(new Term(Index.AUCTION_NAME,s[i])); if (s[i].indexOf("-") != -1) { String query = s[i].replaceAll("-", " NOT "); TermQuery termQuery = new TermQuery(new Term(Index.AUCTION_NAME,query)); q.add(termQuery,BooleanClause.Occur.MUST_NOT); }else{ TermQuery termQuery = new TermQuery(new Term(Index.AUCTION_NAME,s[i])); q.add(termQuery, BooleanClause.Occur.SHOULD); } } }else{ TermQuery termQuery = new TermQuery(new Term(Index.AUCTION_NAME,keyword)); q.add(termQuery,BooleanClause.Occur.SHOULD); } System.out.println("q : "+q.toString()); System.out.println("========= 组合搜索"); query(q); } /** * * 通配符搜索 WildcardQuery * ?* */ public void wildcardQuery(){ Term t = new Term(Index.AUCTION_NAME, keyword); WildcardQuery q = new WildcardQuery(t); System.out.println(q.toString()); System.out.println("=======通配符搜索"); query(q); } /** * 短语搜索 PhraseQuery */ public void phraseQuery(){ PhraseQuery q = new PhraseQuery(); q.add(new Term(Index.AUCTION_NAME,keyword)); q.add(new Term(Index.AUCTION_NAME, keyword2)); q.setSlop(10);//设置坡度,默认为0。两个关键字之间的字符数量 System.out.println("=======短语搜索"); query(q); } /** * 前缀搜索 PrefixQuery */ public void prefixQuery(){ Term term = new Term(Index.AUCTION_NAME, keyword); PrefixQuery q = new PrefixQuery(term); System.out.println("==========前缀搜索"); query(q); } /** * 多短语搜索 MultiPhraseQuery */ public void multiPhraseQuery(){ Term[] terms = new Term[]{new Term(Index.AUCTION_NAME, keyword),new Term(Index.AUCTION_NAME,keyword2)}; MultiPhraseQuery q = new MultiPhraseQuery(); q.add(terms); q.setSlop(0);//设置坡度,默认为0。两个关键字之间的字符数量 System.out.println("==========多短语搜索"); query(q); } /** * 模糊搜索 FuzzyQuery */ public void fuzzyQuery(){ Term term = new Term(Index.AUCTION_NAME, keyword); FuzzyQuery q = new FuzzyQuery(term); //默认匹配度为0.5,当该值越小,模糊匹配度越低 // FuzzyQuery q = new FuzzyQuery(term, 0.1f); System.out.println("q:"+q.toString()); System.out.println("=======模糊搜索"); query(q); } /** * 文本范围搜索 TermRangeQuery * 后面两个参数分别为是否包含前边界和后边界 */ public void termRangeQuery(){ TermRangeQuery q = new TermRangeQuery(Index.END_DATE, keyword, keyword2, true, false); System.out.println("===========范围搜索"); query(q); } /** * 数字范围搜索 NumericRangeQuery * 后面两个参数分别为是否包含前边界和后边界 */ public void numericRangeQuery(double start,double end){ Query q = NumericRangeQuery.newDoubleRange(Index.MAX_PRICE, start, end, true, true); System.out.println("===========数字范围搜索"); query(q); } /** * 跨度查询 SpanQuery */ public void spanQuery(){ } /** * 排序搜索(根据拍品名称按价格排序) */ public void sortQuery(){ try { QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_35, new String[]{Index.AUCTION_NAME}, new StandardAnalyzer(Version.LUCENE_35)); Query q = parser.parse(keyword); Sort sort = new Sort(); sort.setSort(new SortField(Index.MAX_PRICE, SortField.DOUBLE, false));//true为降序,false为升序 ScoreDoc[] hits = searcher.search(q, null, Integer.MAX_VALUE, sort).scoreDocs; System.out.println(hits.length); for (int i = 0; i < hits.length; i++) { Document doc = searcher.doc(hits[i].doc); System.out.println(doc.get(Index.AUCTION_NAME)+" , "+doc.get(Index.MAX_PRICE)); } searcher.close(); } catch (ParseException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } /** * 高亮显示搜索 * Lucene高亮与solr高亮有些不一样,Lucene是先查询出结果再设置高亮, * 而solr是先设置高亮再查询,直接得到高亮内容 */ public void heightQuery(){ Term t = new Term(Index.AUCTION_NAME, keyword); TermQuery q = new TermQuery(t); TopScoreDocCollector collector = TopScoreDocCollector.create(5*10, false); try { searcher.search(q, collector); int count = collector.getTotalHits(); System.out.println("------------获得 "+count+" 记录!"); TopDocs top = collector.topDocs(); ScoreDoc[] docs = top.scoreDocs; for (ScoreDoc sd : docs) { Document doc = searcher.doc(sd.doc); String auctionName = doc.get(Index.AUCTION_NAME); SimpleHTMLFormatter shf = new SimpleHTMLFormatter("<span style='color:red'>", "</span>"); Highlighter highlighter = new Highlighter(shf, new QueryScorer(q)); highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE)); String content = highlighter.getBestFragment(new StandardAnalyzer(Version.LUCENE_35), Index.AUCTION_NAME, auctionName); System.out.println(doc.get(Index.AUCTION_NO)+" , "+content+" , "+doc.get(Index.MAX_PRICE)+" , "+doc.get(Index.END_DATE)); } } catch (IOException e) { e.printStackTrace(); } catch (InvalidTokenOffsetsException e) { e.printStackTrace(); } } /** * 分页查询 * * @param start * @param howMany */ public void pageQuery(int start, int howMany){ Term t = new Term(Index.AUCTION_NAME, keyword); TermQuery q = new TermQuery(t); System.out.println("=============分页搜索"); this.doPageSearch(q, start, howMany); } /** * 分页 * */ public void doPageSearch(Query q, int start, int howMany){ TopScoreDocCollector collector = TopScoreDocCollector.create(start+howMany, false); try { searcher.search(q, collector); int count = collector.getTotalHits(); System.out.println("------------获得 "+count+" 记录!"); TopDocs top = collector.topDocs(start, howMany); ScoreDoc[] docs = top.scoreDocs; for (ScoreDoc sd : docs) { Document doc = searcher.doc(sd.doc); System.out.println(doc.get(Index.AUCTION_NO)+","+doc.get(Index.AUCTION_NAME)); } } catch (IOException e) { e.printStackTrace(); } } }