Lucene(全文检索)索引库维护

Lucene索引库的维护

    Lucene工具类封装
package com.xushuai.lucene;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

import java.io.File;
import java.io.IOException;

/**
 * Lucene工具类
 * Author: xushuai
 * Date: 2018/5/7
 * Time: 12:36
 * Description:
 */
public class LuceneUtil {

    /**
     * 获取索引库写入流对象
     * @auther: xushuai
     * @date: 2018/5/7 12:37
     * @return: 写入流对象
     * @throws: IOException
     */
    public static IndexWriter getIndexWriter(String pathname, Analyzer analyzer) throws IOException {
        //存放索引库的路径
        Directory directory = FSDirectory.open(new File(pathname));
        //创建分析器(使用其子类,标准分析器类)
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LATEST, analyzer);
        //使用索引库路径和分析器构造索引库写入流
        return new IndexWriter(directory,indexWriterConfig);
    }

    /**
     * 获取索引库读取流
     * @auther: xushuai
     * @date: 2018/5/7 12:45
     * @return: 读取流对象
     * @throws: IOException
     */
    public static IndexReader getIndexReader(String pathname) throws IOException {
        //指定索引库位置
        Directory directory = FSDirectory.open(new File(pathname));

        //创建索引库读取流
        return DirectoryReader.open(directory);
    }

    /**
     * 打印结果集到控制台
     * @auther: xushuai
     * @date: 2018/5/7 13:23
     * @throws: IOException
     */
    public static void printResult(IndexSearcher indexSearcher, Query query, int count) throws IOException {
        //执行查询,第一个参数为:查询条件  第二个参数为:结果返回最大个数
        TopDocs topDocs = indexSearcher.search(query, count);
        //打印结果集长度
        System.out.println("查询结果总条数:" + topDocs.totalHits);

        //遍历结果集
        for (ScoreDoc doc:topDocs.scoreDocs) {
            //获取其查询到的文档对象,ScoreDoc对象的doc属性可以获取document的id值
            Document document = indexSearcher.doc(doc.doc);
            //打印文件名
            System.out.println("文件名:  " + document.get("filename"));
            //打印文件大小
            System.out.println("文件大小:" + document.get("filesize"));
            //打印文件路径
            System.out.println("文件路径:" + document.get("filepath"));

            //分割线
            System.out.println("------------------------------------------------------------------------------");
        }
    }
}
一、索引库的修改和删除
package com.xushuai.lucene;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;

import java.io.IOException;

/**
 * 索引库维护
 * Author: xushuai
 * Date: 2018/5/7
 * Time: 12:47
 * Description:对索引库的维护主要分为:增删改查(CRUD),其中新增即为创建索引库,查询内容较多,单独为一个分支。
 */
public class LuceneManager {


    private IndexWriter indexWriter = null;

    /**
     * 加载写入流
     * @auther: xushuai
     * @date: 2018/5/7 17:21
     * @throws: IOException
     */
    @Before
    public void setUp() throws IOException {//注意:IKAnalyzer为第三方分析器,需要单独导包
        indexWriter = LuceneUtil.getIndexWriter("D:\\lucene-solr\\lucene\\index", new IKAnalyzer());
    }

    /**
     * 释放资源
     * @auther: xushuai
     * @date: 2018/5/7 17:21
     * @throws: IOException
     */
    @After
    public void tearDown() throws IOException {
        indexWriter.close();
    }

    /**
     * Lucene索引修改过程:先删除,再添加
     * @auther: xushuai
     * @date: 2018/5/7 12:56
     * @throws: IOException
     */
    @Test
    public void luceneUpdateRepository() throws IOException {
        //创建Document对象
        Document document = new Document();
        //添加域
        document.add(new TextField("fname","修改后的文件名", Field.Store.YES));
        document.add(new TextField("fcontent","修改后的文件内容",Field.Store.YES));

        //修改,其中第一个参数:为一个Term,会根据该Term去匹配要修改的文档对象
        indexWriter.updateDocument(new Term("filename","java"),document);
    }

    /**
     * 删除索引
     * @auther: xushuai
     * @date: 2018/5/7 13:00
     * @throws: IOException
     */
    @Test
    public void luceneDeleteRepository() throws IOException{
        //第一种删除:删除索引库全部内容
        indexWriter.deleteAll();

        //第二种删除:删除指定索引。过程:先查询,后删除
        //新建一个查询条件,例如:删除 "filename" 域的值中含有 "java" 的
        Query query = new TermQuery(new Term("filename","java"));

        //根据条件进行删除(参数为:Query可变数组,可以为多个条件,即组合条件删除)
        indexWriter.deleteDocuments(query);
    }


}



二、查询索引(Query子类)
package com.xushuai.lucene;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.io.IOException;

/**
 * Lucene索引查询
 * Author: xushuai
 * Date: 2018/5/7
 * Time: 13:19
 * Description:
 */
public class LuceneQuery {


    private IndexSearcher indexSearcher = null;


    /**
     * 准备工作
     * @auther: xushuai
     * @date: 2018/5/7 17:27
     * @throws: IOException
     */
    @Before
    public void setUp() throws IOException {
        IndexReader indexReader = LuceneUtil.getIndexReader("D:\\lucene-solr\\lucene\\index");
        indexSearcher = new IndexSearcher(indexReader);
    }

    /**
     * 释放资源
     * @auther: xushuai
     * @date: 2018/5/7 17:27
     * @throws: IOException
     */
    @After
    public void tearDown() throws IOException {
        indexSearcher.getIndexReader().close();
    }

    /**
     * 查询索引目录中的所有文档
     * @auther: xushuai
     * @date: 2018/5/7 13:22
     * @return: 
     * @throws: 
     */
    @Test
    public void luceneMatchAllDocsQuery() throws IOException {
        //创建查询条件对象,MatchAllDocsQuery:查询全部文档对象
        Query query  = new MatchAllDocsQuery();

        //打印查询结果集(最后一个参数为:打印查询出来的结果集中的前10条)
        LuceneUtil.printResult(indexSearcher,query,10);

    }

    /**
     * 精准查询
     * @auther: xushuai
     * @date: 2018/5/7 13:32
     * @return:
     * @throws:
     */
    @Test
    public void luceneTermQuery() throws IOException {
        //创建查询条件对象,TermQuery:精准查询,按Term查询。例如: 查询 filename 域的值为 java的文档对象
        Query query  = new TermQuery(new Term("filename","java"));

        //打印查询结果集(最后一个参数为:打印查询出来的结果集中的前10条)
        LuceneUtil.printResult(indexSearcher,query,10);
    }

    /**
     * 
     * @auther: xushuai
     * @date: 2018/5/7 13:34
     * @return: 
     * @throws: 
     */
    @Test
    public void luceneNumericRangeQuery() throws IOException {
        //创建查询条件对象
        /*
         * newLongRange参数:
         *              1、域名称
         *              2、最小值
         *              3、最大值
         *              4、是否包含最小值:boolean
         *              5、是否包含最大值:boolean
         */
        Query query  = NumericRangeQuery.newLongRange("filesize",50L,200L,true,true);

        //打印查询结果集(最后一个参数为:打印查询出来的结果集中的前10条)
        LuceneUtil.printResult(indexSearcher,query,10);
    }

    /**
     * 条件组合查询
     * @auther: xushuai
     * @date: 2018/5/7 13:38
     * @throws: IOException
     */
    @Test
    public void luceneBooleanQuery() throws IOException{
        //创建查询对象
        BooleanQuery booleanQuery  = new BooleanQuery();

        //创建查询条件
        Query query1 = new TermQuery(new Term("filename","java"));
        Query query2 = new TermQuery(new Term("filename","apache"));
        //设置其两个条件的关系
        /*
         * BooleanClause.Occur.MUST:必须,即该条件必须成立
         * BooleanClause.Occur.MUST_NOT:必须不,即该条件必须不成立
         * BooleanClause.Occur.SHOULD:应该,即该条件可以成立也可以不成立,与OR类似\
         *
         * 下面这个组合条件翻译为:搜索文件名称中含有java或apache的文档对象
         */
        booleanQuery.add(query1, BooleanClause.Occur.SHOULD);
        booleanQuery.add(query2, BooleanClause.Occur.SHOULD);


        //打印查询结果集(最后一个参数为:打印查询出来的结果集中的前10条)
        LuceneUtil.printResult(indexSearcher,booleanQuery,10);
    }

}



你可能感兴趣的:(个人成长,技术点,lucene)