Lucene学习总结

Lucene3.5学习总结:

Lucene主要分为两大块:索引和搜索。相关包可能官网上下载。

官方网为:http://lucene.apache.org/core/old_versioned_docs/versions/3_5_0/index.html

索引分为文件索引和内存索引,下面介绍的是文件索引。包括新建、删除、更新、读取索引。索引中文分词可以研究下IKAnalyzer。

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

/**
 * 索引
 *
 * @author
 * @version v 0.1 2012-3-6 上午10:50:25
 */
public class Index {
    /** 索引文件路径 */
    private static final String INDEX_PATH = "/workspace2/indexing";
   
    /** 编号 */
    public static final String AUCTION_NO = "auctionNo";
    /** 名称 */
    public static final String AUCTION_NAME = "auctionName";
    /** 价格 */
    public static final String MAX_PRICE = "maxPrice";
    /** 日期 */
    public static final String END_DATE = "endDate";
   
    /**
     * 程序入口
     * @param args
     */
    public static void main(String[] args) {
        createIndex();
//        deleteIndex();
//        readIndex();
        updateIndex();
    }

    /**
     * 创建索引
     */
    public static void createIndex(){
        try {
            Directory directory = FSDirectory.open(new File(INDEX_PATH));
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_35);
            IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_35,analyzer);
            IndexWriter writer = new IndexWriter(directory, iwConfig);
           
            //Document添加索引值
            Document doc = new Document();
            Field auctionNoField = new Field(AUCTION_NO, "10003",Field.Store.YES, Field.Index.NOT_ANALYZED);
            Field auctionNameField = new Field(AUCTION_NAME, "汇园果汁", Field.Store.YES, Field.Index.ANALYZED);
            Field endDateField = new Field(END_DATE, "2012-03-06 18:00:00",Field.Store.YES, Field.Index.NOT_ANALYZED);
            doc.add(auctionNoField);
            doc.add(auctionNameField);
            doc.add(new NumericField(Index.MAX_PRICE, Field.Store.YES, true).setDoubleValue(300));
            doc.add(endDateField);
           
            writer.addDocument(doc);
//            writer.addDocuments(docs);//添加多个索引
            writer.close();
            System.out.println("=========创建索引完成");
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
   
    /**
     * IndexWriter方式删除索引
     * <ul>
     *         <li>全部删除</li>
     *         <li>单个或多个删除</li>
     * </ul>
     */
    public static void indexWriterDeleteIndex(){
        try {
            Directory directory = FSDirectory.open(new File(INDEX_PATH));
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_35);
            IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_35,analyzer);
            IndexWriter writer = new IndexWriter(directory, iwConfig);
//            writer.deleteAll();//删除所有索引
            Term term = new Term(Index.AUCTION_NO, "10001");//删除单个索引
            writer.deleteDocuments(term);
            writer.close();
            System.out.println("=========删除索引成功");
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
   
    /**
     * IndexReader方式删除索引
     */
    public static void indexReaderDeleteIndex(){
        try {
            Directory directory = FSDirectory.open(new File(INDEX_PATH));
            IndexReader reader = IndexReader.open(directory, false);//设为true为只读模式
            Term term = new Term(Index.AUCTION_NO, "10000");//删除单个索引
            reader.deleteDocuments(term);
            reader.flush();
            reader.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
   
    /**
     * 更新索引(先删除再创建)
     * <ul>
     *         <li>更新单个索引</li>
     *         <li>更新多个索引</li>
     * </ul>
     */
    public static void updateIndex(){
        try {
            Directory directory = FSDirectory.open(new File(INDEX_PATH));
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_35);
            IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_35,analyzer);
            IndexWriter writer = new IndexWriter(directory, iwConfig);
           
            //Document添加索引值
            Document doc = new Document();
            Field auctionNoField = new Field(AUCTION_NO, "10003",Field.Store.YES, Field.Index.NOT_ANALYZED);
            Field auctionNameField = new Field(AUCTION_NAME, "商品名称", Field.Store.YES, Field.Index.ANALYZED);
            Field endDateField = new Field(END_DATE, "2011-03-06 18:00:00",Field.Store.YES, Field.Index.NOT_ANALYZED);
            doc.add(auctionNoField);
            doc.add(auctionNameField);
            doc.add(new NumericField(Index.MAX_PRICE, Field.Store.YES, true).setDoubleValue(200));
            doc.add(endDateField);
           
            //根据唯一商品ID进行更新索引
            Term term = new Term(Index.AUCTION_NO,"10003");
            writer.updateDocument(term, doc);//更新索引
//            writer.updateDocuments(delTerm, docs);//更新多个索引
            writer.close();
            System.out.println("=========更新索引完成");
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
   
    /**
     * 读索引
     */
    public static void readIndex(){
        try {
            Directory directory = FSDirectory.open(new File(INDEX_PATH));
            IndexReader reader = IndexReader.open(directory, true);//设为true为只读模式
            int num = reader.numDocs();
            for (int i = 0; i < num; i++) {
                Document doc = reader.document(i);
                System.out.println(doc);
            }
            reader.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

Lucene查询有很多种,下面介绍了一些常用的查询

Lucene查询语法请参考:http://lucene.apache.org/core/old_versioned_docs/versions/3_5_0/queryparsersyntax.html

下面是搜索代码:
import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

import com.index.Index;

/**
 *
 *
 * @author
 * @version v 0.1 2012-3-6 下午01:33:16
 */
public class Search {
    public IndexSearcher searcher = null;
    String keyword = "北 AND 要";
    String keyword2 = "100";
   
    /**
     * 程序入口
     * @param args
     */
    public static void main(String[] args) {
        Search search =  new Search();
        search.getSearcher();
       
//        search.termQuery();//词条查询
        search.booleanQuery_1();
//        search.booleanQuery();//组合查询
//        search.wildcardQuery();//通配符查询
//        search.phraseQuery();//短语查询
//        search.prefixQuery();//前缀查询
//        search.multiPhraseQuery();//多短语查询-
//        search.fuzzyQuery();//模糊查询
//        search.termRangeQuery();//文本范围查询 2011-03-06 18:00:00  TO 2012-03-06 18:00:00
//        search.numericRangeQuery(100.00,200.00);//数字范围查询
//        search.sortQuery();//排序查询
//        search.heightQuery();//高亮查询
//        search.pageQuery(2, 5);//分页查询
    }
   
    /**
     * 获得搜索
     */
    public  void getSearcher(){
        IndexReader reader = null;
        try {
            reader = IndexReader.open(FSDirectory.open(new File("/workspace2/indexing")), true);
            searcher = new IndexSearcher(reader);
        } catch (CorruptIndexException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
   
    /**
     * 查询
     * @param q
     */
    public void query(Query q){
        TopScoreDocCollector collector = TopScoreDocCollector.create(5*10, false);
        try {
            searcher.search(q, collector);
            int count = collector.getTotalHits();
            System.out.println("------------获得 "+count+" 记录!");
            TopDocs top = collector.topDocs();
            ScoreDoc[] docs = top.scoreDocs;
            for (ScoreDoc sd : docs) {
                Document doc = searcher.doc(sd.doc);
                System.out.println(doc.get(Index.AUCTION_NO)+" , "+doc.get(Index.AUCTION_NAME)+" , "+doc.get(Index.MAX_PRICE)+" , "+doc.get(Index.END_DATE));
            }
            searcher.close();//关闭搜索
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
   
    /**
     * 排序查询
     * @param q
     */
    public void querySort(Query q,Sort sort){
        System.out.println("==============排序搜索");
        TopScoreDocCollector collector = TopScoreDocCollector.create(5*10, false);
        try {
             searcher.search(q,1000, sort);
            int count = collector.getTotalHits();
            System.out.println("------------获得 "+count+" 记录!");
            TopDocs top = collector.topDocs();
            ScoreDoc[] docs = top.scoreDocs;
            for (ScoreDoc sd : docs) {
                Document doc = searcher.doc(sd.doc);
                System.out.println(doc.get(Index.AUCTION_NO)+" , "+doc.get(Index.AUCTION_NAME)+" , "+doc.get(Index.MAX_PRICE)+" , "+doc.get(Index.END_DATE));
            }
            searcher.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
       
    }
   
    /**
     * 词条搜索 TermQuery
     */
    public void termQuery(){
        Term t = new Term(Index.AUCTION_NAME, keyword);
        TermQuery q = new TermQuery(t);
        System.out.println("=====词条搜索=====");
        query(q);
    }
   
    /**
     * MultiTermQuery
     */
    public void multiTermQuery(){
       
    }

    public void booleanQuery_1() {
        BooleanQuery q = new BooleanQuery();
        QueryParser parser = new QueryParser(Version.LUCENE_35, Index.AUCTION_NAME, new StandardAnalyzer(Version.LUCENE_35));
        try {
            Query query = parser.parse(keyword);
            TermQuery termQuery = new TermQuery(new Term(Index.AUCTION_NAME,
                    keyword));
            q.add(query, BooleanClause.Occur.SHOULD);
            System.out.println("q : " + q.toString());
            System.out.println("========= 组合搜索");
            query(q);
        } catch (ParseException e) {
            e.printStackTrace();
        }
       
    }
   
    /**
     * 组合搜索 BooleanQuery
     * MUST_NOT :不包含
     * SHOULD :表或关系
     * MUST :表并关系
     */
    public void booleanQuery(){
        BooleanQuery q = new BooleanQuery();
        String[] s = keyword.split(" ");
        if (s.length > 0) {
            for (int i = 0; i < s.length; i++) {
//                TermQuery termQuery = new TermQuery(new Term(Index.AUCTION_NAME,s[i]));
                if (s[i].indexOf("-") != -1) {
                    String query = s[i].replaceAll("-", " NOT ");
                    TermQuery termQuery = new TermQuery(new Term(Index.AUCTION_NAME,query));
                    q.add(termQuery,BooleanClause.Occur.MUST_NOT);
                }else{
                    TermQuery termQuery = new TermQuery(new Term(Index.AUCTION_NAME,s[i]));
                    q.add(termQuery, BooleanClause.Occur.SHOULD);
                }
            }
        }else{
            TermQuery termQuery = new TermQuery(new Term(Index.AUCTION_NAME,keyword));
            q.add(termQuery,BooleanClause.Occur.SHOULD);
        }
        System.out.println("q : "+q.toString());
        System.out.println("========= 组合搜索");
        query(q);
    }
   
    /**
     *
     * 通配符搜索 WildcardQuery
     * ?*
     */
    public void wildcardQuery(){
        Term t = new Term(Index.AUCTION_NAME, keyword);
        WildcardQuery q = new WildcardQuery(t);
        System.out.println(q.toString());
        System.out.println("=======通配符搜索");
        query(q);
    }
   
    /**
     * 短语搜索 PhraseQuery
     */
    public void phraseQuery(){
        PhraseQuery q = new PhraseQuery();
        q.add(new Term(Index.AUCTION_NAME,keyword));
        q.add(new Term(Index.AUCTION_NAME, keyword2));
        q.setSlop(10);//设置坡度,默认为0。两个关键字之间的字符数量
        System.out.println("=======短语搜索");
        query(q);
    }
   
    /**
     * 前缀搜索 PrefixQuery
     */
    public void prefixQuery(){
        Term term = new Term(Index.AUCTION_NAME, keyword);
        PrefixQuery q = new PrefixQuery(term);
        System.out.println("==========前缀搜索");
        query(q);
    }
   
    /**
     * 多短语搜索 MultiPhraseQuery
     */
    public void multiPhraseQuery(){
        Term[] terms = new Term[]{new Term(Index.AUCTION_NAME, keyword),new Term(Index.AUCTION_NAME,keyword2)};
        MultiPhraseQuery q = new MultiPhraseQuery();
        q.add(terms);
        q.setSlop(0);//设置坡度,默认为0。两个关键字之间的字符数量
        System.out.println("==========多短语搜索");
        query(q);
    }
   
    /**
     * 模糊搜索 FuzzyQuery
     */
    public void fuzzyQuery(){
        Term term = new Term(Index.AUCTION_NAME, keyword);
        FuzzyQuery q = new FuzzyQuery(term);
        //默认匹配度为0.5,当该值越小,模糊匹配度越低
//        FuzzyQuery q = new FuzzyQuery(term, 0.1f);
        System.out.println("q:"+q.toString());
        System.out.println("=======模糊搜索");
        query(q);
    }
   
    /**
     * 文本范围搜索 TermRangeQuery
     * 后面两个参数分别为是否包含前边界和后边界
     */
    public void termRangeQuery(){
        TermRangeQuery q    = new TermRangeQuery(Index.END_DATE, keyword, keyword2, true, false);
        System.out.println("===========范围搜索");
        query(q);
    }
   
    /**
     * 数字范围搜索 NumericRangeQuery
     * 后面两个参数分别为是否包含前边界和后边界
     */
    public void numericRangeQuery(double start,double end){
        Query q = NumericRangeQuery.newDoubleRange(Index.MAX_PRICE, start, end, true, true);
        System.out.println("===========数字范围搜索");
        query(q);
    }
   
    /**
     * 跨度查询 SpanQuery
     */
    public void spanQuery(){
       
    }
   
    /**
     * 排序搜索(根据拍品名称按价格排序)
     */
    public void sortQuery(){
        try {
            QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_35, new String[]{Index.AUCTION_NAME}, new StandardAnalyzer(Version.LUCENE_35));
            Query q = parser.parse(keyword);
            Sort sort = new Sort();
            sort.setSort(new SortField(Index.MAX_PRICE, SortField.DOUBLE, false));//true为降序,false为升序
            ScoreDoc[] hits = searcher.search(q, null, Integer.MAX_VALUE, sort).scoreDocs;
            System.out.println(hits.length);
            for (int i = 0; i < hits.length; i++) {
                Document doc  = searcher.doc(hits[i].doc);
                System.out.println(doc.get(Index.AUCTION_NAME)+" , "+doc.get(Index.MAX_PRICE));
            }
            searcher.close();
        } catch (ParseException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
       
       
    }
   
    /**
     * 高亮显示搜索
     * Lucene高亮与solr高亮有些不一样,Lucene是先查询出结果再设置高亮,
     * 而solr是先设置高亮再查询,直接得到高亮内容
     */
    public void heightQuery(){
        Term t = new Term(Index.AUCTION_NAME, keyword);
        TermQuery q = new TermQuery(t);
        TopScoreDocCollector collector = TopScoreDocCollector.create(5*10, false);
        try {
            searcher.search(q, collector);
            int count = collector.getTotalHits();
            System.out.println("------------获得 "+count+" 记录!");
            TopDocs top = collector.topDocs();
            ScoreDoc[] docs = top.scoreDocs;
            for (ScoreDoc sd : docs) {
                Document doc = searcher.doc(sd.doc);
                String auctionName = doc.get(Index.AUCTION_NAME);
                SimpleHTMLFormatter shf = new SimpleHTMLFormatter("<span style='color:red'>", "</span>");
                Highlighter highlighter = new Highlighter(shf, new QueryScorer(q));
                highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE));
                String content = highlighter.getBestFragment(new StandardAnalyzer(Version.LUCENE_35), Index.AUCTION_NAME, auctionName);
                System.out.println(doc.get(Index.AUCTION_NO)+" , "+content+" , "+doc.get(Index.MAX_PRICE)+" , "+doc.get(Index.END_DATE));
            }
        } catch (IOException e) {
            e.printStackTrace();
        } catch (InvalidTokenOffsetsException e) {
            e.printStackTrace();
        }
    }
   
    /**
     * 分页查询
     *
     * @param start
     * @param howMany
     */
    public void pageQuery(int start, int howMany){
        Term t = new Term(Index.AUCTION_NAME, keyword);
        TermQuery q = new TermQuery(t);
        System.out.println("=============分页搜索");
        this.doPageSearch(q, start, howMany);
    }
   
    /**
     * 分页
     *
     */
    public void doPageSearch(Query q, int start, int howMany){
        TopScoreDocCollector collector = TopScoreDocCollector.create(start+howMany, false);
        try {
            searcher.search(q, collector);
            int count = collector.getTotalHits();
            System.out.println("------------获得 "+count+" 记录!");
            TopDocs top = collector.topDocs(start, howMany);
            ScoreDoc[] docs = top.scoreDocs;
            for (ScoreDoc sd : docs) {
                Document doc = searcher.doc(sd.doc);
                System.out.println(doc.get(Index.AUCTION_NO)+","+doc.get(Index.AUCTION_NAME));
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

}
 

 

 

 

你可能感兴趣的:(Lucene)