springboot 集成 lucene

简介

  1. 数据每分钟产生200条,使用mysql储存。
  2. 目前有数据超过700M。
  3. 按照日期查询,按月查询包含每次超过20w条以上,时间比较长。
  4. 计划使用lucene优化查询,不适用es是因为项目较小,没有更富裕的资源。

基本步骤

  1. 引入依赖。
  2. 开发工具类。
  3. 开发索引功能,完成索引。
  4. 开发定时任务,完成数据增量更新。
  5. 开发搜索功能,可以搜索数据。

引入依赖

  1. 修改pom文件

<dependency>
    <groupId>org.apache.lucenegroupId>
    <artifactId>lucene-coreartifactId>
    <version>9.7.0version>
dependency>


<dependency>
    <groupId>org.apache.lucenegroupId>
    <artifactId>lucene-queryparserartifactId>
    <version>9.7.0version>
dependency>
  • 注:没有使用更多的包是因为这次优化是以long类型区间计算为主,不需要全文索引,所以有基础的包就够了。

工具类

  1. 实现基本的生成、删除和查询。

import com.xxx.common.ResponseCode;
import com.xxx.common.exception.SystemException;
import com.xxx.common.util.ValidUtil;
import lombok.extern.slf4j.Slf4j;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;

import java.io.File;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;

@Component
@Slf4j
public class LuceneUtil {

	//索引文件存放路径
    @Value("${lucene.index.path}")
    private String luceneIndexPath;

	/**
	 生成索引方法
	 */
    public <T> void createIndex(List<T> list, CreateDocumentHandler handler) {
        File file = new File(luceneIndexPath);
        if (!file.exists()) {
            file.mkdir();
        }
        if (ValidUtil.isEmpty(list)) {
            return;
        }
        long startTime = System.currentTimeMillis();
        IndexWriter writer = null;
        try {
            Directory dir = FSDirectory.open(Paths.get(luceneIndexPath));
            //标准分词器,会自动去掉空格啊,is a the等单词
            Analyzer analyzer = new StandardAnalyzer();
            //将标准分词器配到写索引的配置中
            IndexWriterConfig config = new IndexWriterConfig(analyzer);
            //实例化写索引对象
            writer = new IndexWriter(dir, config);
            for (T t : list) {
                Document doc = handler.createDocument(t);
                writer.addDocument(doc);
            }
            writer.commit();
        } catch (Exception e) {
            throw new SystemException(ResponseCode.ERROR, e);
        } finally {
            try {
                if (null != writer) {
                    writer.close();
                }
            } catch (Exception e) {
                throw new SystemException(ResponseCode.ERROR, e);
            }
        }
        //记录索引结束时间
        long endTime = System.currentTimeMillis();
        log.info("建立索引耗时" + (endTime - startTime) + "毫秒");
    }

	/**
	 清楚所有索引
	 */
    public void clean() {
        File file = new File(luceneIndexPath);
        if (!file.exists()) {
            return;
        }
        long startTime = System.currentTimeMillis();
        IndexWriter writer = null;
        try {
            Directory dir = FSDirectory.open(Paths.get(luceneIndexPath));
            //标准分词器,会自动去掉空格啊,is a the等单词
            Analyzer analyzer = new StandardAnalyzer();
            //将标准分词器配到写索引的配置中
            IndexWriterConfig config = new IndexWriterConfig(analyzer);
            //实例化写索引对象
            writer = new IndexWriter(dir, config);
            writer.deleteAll();
        } catch (Exception e) {
            throw new SystemException(ResponseCode.ERROR, e);
        } finally {
            try {
                if (null != writer) {
                    writer.close();
                }
            } catch (Exception e) {
                throw new SystemException(ResponseCode.ERROR, e);
            }
        }
        //记录索引结束时间
        long endTime = System.currentTimeMillis();
        log.info("清除索引耗时" + (endTime - startTime) + "毫秒");
    }

	/**
	 查询
	 */
    public List<Document> search(CreateQueryParamsHandler handler) {
        File file = new File(luceneIndexPath + File.separator + "write.lock");
        if (!file.exists()) {
            return new ArrayList<>();
        }
        IndexReader reader = null;
        try {
            //获取要查询的路径,也就是索引所在的位置
            Directory dir = FSDirectory.open(Paths.get(luceneIndexPath));
            reader = DirectoryReader.open(dir);
            if (reader == null) {
                return new ArrayList<>();
            }
            //构建IndexSearcher
            IndexSearcher searcher = new IndexSearcher(reader);
            //记录索引开始时间
            long startTime = System.currentTimeMillis();
            //开始查询,查询前10条数据,将记录保存在docs中
            TopDocs docs = handler.handler(searcher);
            //记录索引结束时间
            long endTime = System.currentTimeMillis();
            log.info("索引查询耗时" + (endTime - startTime) + "毫秒");
            List<Document> result = new ArrayList<>(Long.valueOf(docs.totalHits.value).intValue());
            //取出每条查询结果
            for(ScoreDoc scoreDoc : docs.scoreDocs) {
                Document doc = searcher.doc(scoreDoc.doc);
                result.add(doc);
            }
            return result;
        } catch (Exception e) {
            throw new SystemException(ResponseCode.ERROR, e);
        } finally {
            try {
                assert reader != null;
                reader.close();
            } catch (IOException e) {
                throw new SystemException(ResponseCode.ERROR, e);
            }
        }
    }
}

生成索引功能

public void index(Date startDate) {
    log.info("start index! Date : " + DateUtil.format(DateUtil.now()));
    Date curStartDate = startDate;
    while (true) {
        Date curEndDate = DateUtil.datePlusDays(curStartDate, 1);
        List<CurrencyData> list = currencyDataMapper.queryLuceneList(CurrencyDataForm.builder().createTimeBegin(curStartDate.getTime()).createTimeEnd(curEndDate.getTime()).build());
        log.info(String.format("index startDate = %s, endDate = %s, size = %s", DateUtil.format(curStartDate), DateUtil.format(curEndDate), list.size()));
        if (list.size() == 0) {
            CurrencyDataForm countForm = CurrencyDataForm.builder().createTimeBegin(curStartDate.getTime()).build();
            List<CurrencyData> one = currencyDataMapper.getOne(countForm);
            log.info("has more begin:" + DateUtil.format(curEndDate) + ", result: " + (one.size() > 0 ? "yes" : "no"));
            if (one.size() == 0) {
                break;
            }
        }
        luceneUtil.createIndex(list, (CreateDocumentHandler<Data>) data -> {
            Document doc = new Document();
            //开始添加字段
            doc.add(new TextField("dId", data.getDId(), Field.Store.YES));
            doc.add(new TextField("typeId", data.getTypeId(), Field.Store.YES));
            //区间查询需要
            doc.add(new LongPoint("createTime", data.getCreateTime()));
            //储存需要
            doc.add(new StoredField("createTime", data.getCreateTime()));
            // 排序需要
            doc.add(new NumericDocValuesField("sortTime", data.getCreateTime()));
            // 第二个参数需要处理非空的情况
            doc.add(new TextField("value", (ValidUtil.isEmpty(data.getValue()) ? "" : data.getValue()) , Field.Store.YES));
            doc.add(new TextField("unit", (ValidUtil.isEmpty(data.getUnit()) ? "" : data.getUnit()) , Field.Store.YES));
            return doc;
        });
        curStartDate = curEndDate;
    }
    log.info("finish index!");
}
  • 注:每次生成1天的索引,如果本轮没数据,并且大于结束时间也没数据,结束索引。

定时任务

private ThreadPoolTaskExecutor tpe;

tpe.execute(() -> {
    Date startDate = null;
    try {
         startDate = getLastDate();
    } catch (SystemException s) {
        luceneUtil.clean();
        startDate = DateUtil.parse(initStartTime);
    }
    try {
        index(startDate);
    } catch (Exception e) {
        log.info("生成索引异常。", e);
    } finally {
        ScheduledExecutorService executor = Executors.newScheduledThreadPool(1);
        executor.schedule(this::init, 60, TimeUnit.SECONDS);
        executor.shutdown();
    }
});
  • 注:使用线程池+延时任务,实现每60s执行一次功能。

搜索

public List<Data> queryIndex(Form form) {
    List<Data> result = new ArrayList<>();

    List<Document> documentList = luceneUtil.search((searcher) -> {

        BooleanQuery.Builder builder = new BooleanQuery.Builder();

        if (ValidUtil.isNotEmpty(form.getDId())) {
            TermQuery deviceIdQuery = new TermQuery(new Term("dId", form.getDId()));
            builder.add(deviceIdQuery, BooleanClause.Occur.MUST);
        }

        if (ValidUtil.isNotEmpty(form.getTypeId())) {
            TermQuery typeQuery = new TermQuery(new Term("typeId", form.getTypeId()));
            builder.add(deviceIdQuery, BooleanClause.Occur.MUST);
        }

        if (ValidUtil.isNotEmpty(form.getBegin()) && ValidUtil.isNotEmpty(form.getEnd())) {
            Query timeQuery = LongPoint.newRangeQuery("time", form.getBegin().getTime(), form.getEnd().getTime());
            builder.add(timeQuery, BooleanClause.Occur.MUST);
        }

        Sort sort = new Sort(new SortField("sortTime", SortField.Type.LONG, false));
        // 执行查询
        return searcher.search(builder.build(), form.getSize(), sort);
    });
    for (Document document : documentList) {
        Data data = new Data();
        data.setTypeId(Integer.valueOf(document.get("typeId")));
        data.setDId(Integer.valueOf(document.get("dId")));
        data.setTime(document.getField("time").numericValue().longValue());
        data.setValue(document.get("value"));
        data.setUnit(document.get("unit"));
        result.add(data);
    }
    return result;
}

你可能感兴趣的:(搜索引擎,spring,boot,lucene,mybatis)