Lucene示例

车东的博客,里面有更详尽的介绍以及很多资料

添加Lucene依赖, 本文使用7.4.0版本


    org.apache.lucene
    lucene-core
    7.4.0



    org.apache.lucene
    lucene-queryparser
    7.4.0

Lucene示例_第1张图片

构建索引代码如下:

package com.sean.lucene;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import java.nio.file.FileSystems;
import java.nio.file.Path;
import java.util.Date;
import java.util.LinkedList;
import java.util.List;

/**
 * Created by sean on 2018/8/19.
 */
public class CreateTest {

    public static void main(String[] args) throws Exception {
        Path indexPath = FileSystems.getDefault().getPath("d:\\index\\");
//        FSDirectory有三个主要的子类,open方法会根据系统环境自动挑选最合适的子类创建
//        MMapDirectory:Linux, MacOSX, Solaris
//        NIOFSDirectory:other non-Windows JREs
//        SimpleFSDirectory:other JREs on Windows
        Directory dir = FSDirectory.open(indexPath);

        // 分词器
        Analyzer analyzer = new StandardAnalyzer();
        boolean create = true;
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
        if (create){
            indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        }else {
            // lucene是不支持更新的,这里仅仅是删除旧索引,然后创建新索引
            indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        }
        IndexWriter indexWriter = new IndexWriter(dir, indexWriterConfig);

        Document doc = new Document();
        // 域值会被索引,但是不会被分词,即被当作一个完整的token处理,一般用在“国家”或者“ID
        // Field.Store表示是否在索引中存储原始的域值
        // 如果想在查询结果里显示域值,则需要对其进行存储
        // 如果内容太大并且不需要显示域值(整篇文章内容),则不适合存储到索引中
        doc.add(new StringField("Title", "sean", Field.Store.YES));
        long time = new Date().getTime();
        // LongPoint并不存储域值
        doc.add(new LongPoint("LastModified", time));
//        doc.add(new NumericDocValuesField("LastModified", time));
        // 会自动被索引和分词的字段,一般被用在文章的正文部分
        doc.add(new TextField("Content", "this is a test of sean", Field.Store.NO));

        List docs = new LinkedList<>();
        docs.add(doc);

        indexWriter.addDocuments(docs);
        // 默认会在关闭前提交
        indexWriter.close();
    }
}

Lucene示例_第2张图片

索引 查询:

package com.sean.lucene;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import java.nio.file.FileSystems;
import java.nio.file.Path;

/**
 * Created by sean on 2018/8/19.
 */
public class QueryTest {

    public static void main(String[] args) throws Exception {
        Path indexPath = FileSystems.getDefault().getPath("d:\\index\\");
        Directory dir = FSDirectory.open(indexPath);
        // 分词器
        Analyzer analyzer = new StandardAnalyzer();

        IndexReader reader = DirectoryReader.open(dir);
        IndexSearcher searcher = new IndexSearcher(reader);

        // 同时查询多个域
//        String[] queryFields = {"Title", "Content", "LastModified"};
//        QueryParser parser = new MultiFieldQueryParser(queryFields, analyzer);
//        Query query = parser.parse("sean");

        // 一个域按词查doc
//        Term term = new Term("Title", "test");
//        Query query = new TermQuery(term);

        // 模糊查询
//        Term term = new Term("Title", "se*");
//        WildcardQuery query = new WildcardQuery(term);

        // 范围查询
        Query query1 = LongPoint.newRangeQuery("LastModified", 1L, 1637069693000L);

        // 多关键字查询,必须指定slop(key的存储方式)
        PhraseQuery.Builder phraseQueryBuilder = new PhraseQuery.Builder();
        phraseQueryBuilder.add(new Term("Content", "test"));
        phraseQueryBuilder.add(new Term("Content", "sean"));
        phraseQueryBuilder.setSlop(10);
        PhraseQuery query2 = phraseQueryBuilder.build();

        // 复合查询
        BooleanQuery.Builder booleanQueryBuildr = new BooleanQuery.Builder();
        booleanQueryBuildr.add(query1, BooleanClause.Occur.MUST);
        booleanQueryBuildr.add(query2, BooleanClause.Occur.MUST);
        BooleanQuery query = booleanQueryBuildr.build();

        // 返回doc排序
        // 排序域必须存在,否则会报错
        Sort sort = new Sort();
        SortField sortField = new SortField("Title", SortField.Type.SCORE);
        sort.setSort(sortField);

        TopDocs topDocs = searcher.search(query, 10, sort);
        if(topDocs.totalHits > 0)
            for(ScoreDoc scoreDoc : topDocs.scoreDocs){
                int docNum = scoreDoc.doc;
                Document doc = searcher.doc(docNum);
                System.out.println(doc.toString());
            }
    }
}

查询结果如下:

Document>

从索引中查询包含sean字符串的内容,因此查询结果不包含LastModified域相关内容

由于索引没有存储Content域(Field.Store.NO),因此查询结果中不包含Content域相关内容

使用索引查看工具Luke(luke-javafx-7.4.0-luke-release.zip)观察一下索引内容

Lucene示例_第3张图片

Lucene示例_第4张图片

由于设置了Field.Store.No,所以Content域的Value为空

你可能感兴趣的:(Lucene,/,Solr,/,ES)