Springboot集成elasticsearch 使用IK+拼音分词

Springboot集成elasticsearch 使用IK+拼音分词

docker安装ES

下载

docker pull docker.elastic.co/elasticsearch/elasticsearch:6.3.2

启动

docker run -d --name="es" -p 9200:9200 -p 9300:9300 -e "cluster.name=elasticsearch" -e "discovery.type=single-node" docker.elastic.co/elasticsearch/elasticsearch:6.3.2

springboot集成

maven依赖
<dependency>
    <groupId>org.springframework.bootgroupId>
    <artifactId>spring-boot-starter-data-elasticsearchartifactId>
    <version>2.1.4.RELEASEversion>
 dependency>
配置文件
spring:
  data:
    elasticsearch:
      cluster-name: elasticsearch
      cluster-nodes: ip:9300
实体类
package com.my.entity;

import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import org.springframework.data.elasticsearch.annotations.Document;

/**
 * <一句话功能简述>
* () * * @author M.Y * @date 2019/5/30 * @since 1.0.0 */
@AllArgsConstructor @NoArgsConstructor @Data @Document(indexName = "contents",type = "content") //indexName索引名称 可以理解为数据库名 必须为小写 不然会报org.elasticsearch.indices.InvalidIndexNameException异常 //type类型 可以理解为表名 public class GoodsInfo { private Long id; private String name; private String des; }
DAO
package com.my.dao;

import com.my.entity.GoodsInfo;
import org.springframework.data.elasticsearch.repository.ElasticsearchRepository;
import org.springframework.stereotype.Component;

/**
 * <一句话功能简述>
* () * * @author M.Y * @date 2019/5/30 * @since 1.0.0 */
@Component public interface GoodsRepository extends ElasticsearchRepository<GoodsInfo,Long> { }
Controller
package com.my.controller;

import com.my.dao.GoodsRepository;
import com.my.entity.GoodsInfo;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RestController;

/**
 * <一句话功能简述>
* () * * @author M.Y * @date 2019/5/30 * @since 1.0.0 */
@RestController public class GoodsController { @Autowired private GoodsRepository goodsRepository; //http://localhost:8080/save?des= @GetMapping("save") public String save(String des){ GoodsInfo goodsInfo = new GoodsInfo(System.currentTimeMillis(), "商品"+System.currentTimeMillis(),des); goodsRepository.save(goodsInfo); return "success"; } //http://localhost:8080/delete?id= @GetMapping("delete") public String delete(long id){ goodsRepository.deleteById(id); return "success"; } //http://localhost:8080/update?name=修改&des=修改&id= @GetMapping("update") public String update(long id,String name,String description){ GoodsInfo goodsInfo = new GoodsInfo(id, name,description); goodsRepository.save(goodsInfo); return "success"; } //http://localhost:8080/getOne?id= @GetMapping("getOne") public GoodsInfo getOne(long id){ GoodsInfo goodsInfo = goodsRepository.findById(id).orElse(null); return goodsInfo; } }
测试

Springboot集成elasticsearch 使用IK+拼音分词_第1张图片

谷歌elasticsearch-head插件查看

Springboot集成elasticsearch 使用IK+拼音分词_第2张图片

安装IK分词插件

进入容器

docker exec -it es bash

进入目录

cd /usr/share/elasticsearch/

下载安装插件(注意版本要与es一致)

./bin/elasticsearch-plugin install https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v6.3.2/elasticsearch-analysis-ik-6.3.2.zip

退出容器

exit

重启容器

docker restart es

验证结果

ik_max_word:尽可能多的分词

Springboot集成elasticsearch 使用IK+拼音分词_第3张图片

ik_smart:尽可能少的分词

Springboot集成elasticsearch 使用IK+拼音分词_第4张图片

安装拼音转换插件

进入容器

docker exec -it es bash

进入目录

cd /usr/share/elasticsearch/

下载安装插件(注意版本要与es一致)

./bin/elasticsearch-plugin install https://github.com/medcl/elasticsearch-analysis-pinyin/releases/download/v6.3.2/elasticsearch-analysis-pinyin-6.3.2.zip

退出容器

exit

重启容器

docker restart es

新建索引

Springboot集成elasticsearch 使用IK+拼音分词_第5张图片

关闭索引(更新配置前要关闭索引,不然会报错)

Springboot集成elasticsearch 使用IK+拼音分词_第6张图片

创建拼音分词

Springboot集成elasticsearch 使用IK+拼音分词_第7张图片

{
  "analysis" : {
     "analyzer" : {
         "pinyin_analyzer" : {
             "tokenizer" : "my_pinyin"
          }
     },
     "tokenizer" : {
         "my_pinyin" : {
             "type" : "pinyin",
             "keep_separate_first_letter" : false,
             "keep_full_pinyin" : true,
             "keep_original" : true,
             "limit_first_letter_length" : 16,
             "lowercase" : true,
             "remove_duplicated_term" : true
         }
     }
  }
}
验证结果

Springboot集成elasticsearch 使用IK+拼音分词_第8张图片

Springboot集成以上分词

新建索引,新增分词配置

Springboot集成elasticsearch 使用IK+拼音分词_第9张图片

{
  "analysis" : {
     "analyzer" : {
         "pinyin_analyzer" : {
             "tokenizer" : "my_pinyin"
          }
     },
     "tokenizer" : {
         "my_pinyin" : {
             "type" : "pinyin",
             "keep_separate_first_letter" : false,
             "keep_full_pinyin" : true,
             "keep_original" : true,
             "limit_first_letter_length" : 16,
             "lowercase" : true,
             "remove_duplicated_term" : true
         }
     }
  }
}

Springboot集成elasticsearch 使用IK+拼音分词_第10张图片

{
        "properties": {
            "字段名": {
                "type": "keyword",
                "fields": {
                    "pinyin": {
                        "type": "text",
                        "store": false,
                        "term_vector": "with_offsets",
                        "analyzer": "pinyin_analyzer",
                        "boost": 10
                    }
                }
            }
        }
    
}
新实体类
package com.my.entity;

import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import org.springframework.data.elasticsearch.annotations.Document;
import org.springframework.data.elasticsearch.annotations.Mapping;

import java.util.Date;

@AllArgsConstructor
@NoArgsConstructor
@Data
@Document(indexName = "new_film", type = "new")
public class FilmEntity {

    private Long id;
    private String name;
    private String director;
    private Date created ;

    @Override
    public String toString() {
        return "FilmEntity [id=" + id + ", name=" + name + ", director=" + director + "]";
    }
}

新增测试数据
	@Autowired
    FilmRepository filmRepository;
    @GetMapping("save")
    public String save(String des,String name){
        LocalDateTime localDateTime = LocalDateTime.now();
        ZoneId zone = ZoneId.systemDefault();
        Instant instant = localDateTime.atZone(zone).toInstant();
        Date date = Date.from(instant);
        FilmEntity filmEntity = new FilmEntity(System.currentTimeMillis(),name,des,date);
        filmRepository.save(filmEntity);
        return "success";
    }

在这里插入图片描述

查询
	/**
     * 拼接搜索条件
     *
     * @param name     the name
     * @return list
     */
    @GetMapping("search")
    public List<FilmEntity> search(String name) {
        SearchQuery searchQuery = new NativeSearchQueryBuilder()
                .withQuery(structureQuery(name))
                .build();
        List<FilmEntity> list = filmRepository.search(searchQuery).getContent();
        return list;
    }

    /**
     * 中文、拼音混合搜索
     *
     * @param content the content
     * @return dis max query builder
     */
    public DisMaxQueryBuilder structureQuery(String content) {
        //使用dis_max直接取多个query中,分数最高的那一个query的分数即可
        DisMaxQueryBuilder disMaxQueryBuilder = QueryBuilders.disMaxQuery();
        //boost 设置权重,只搜索匹配name和disrector字段
        QueryBuilder ikNameQuery = QueryBuilders.matchQuery("name", content).boost(2f);
        QueryBuilder pinyinNameQuery = QueryBuilders.matchQuery("name.pinyin", content);
        QueryBuilder ikDirectorQuery = QueryBuilders.matchQuery("director", content).boost(2f);
        disMaxQueryBuilder.add(ikNameQuery);
        disMaxQueryBuilder.add(pinyinNameQuery);
        disMaxQueryBuilder.add(ikDirectorQuery);
        return disMaxQueryBuilder;
    }

http://localhost:8080/film/search?name=中国:

[
    {
        "id": 1559724973532,
        "name": "ceshi",
        "director": "中韩渔警冲突调查:韩警平均每天扣1艘中国渔船",
        "created": "2019-06-05T08:56:13.531+0000"
    },
    {
        "id": 1559724949646,
        "name": "测试",
        "director": "中国驻洛杉矶领事馆遭亚裔男子枪击 嫌犯已自首",
        "created": "2019-06-05T08:55:49.645+0000"
    },
    {
        "id": 1559724960792,
        "name": "小明",
        "director": "美国留给伊拉克的是个烂摊子吗",
        "created": "2019-06-05T08:56:00.792+0000"
    }
]

http://localhost:8080/film/search?name=ceshi

http://localhost:8080/film/search?name=测试

[
    {
        "id": 1559786111119,
        "name": "ceshi",
        "director": "中韩渔警冲突调查:韩警平均每天扣1艘中国渔船",
        "created": "2019-06-06T01:55:11.119+0000"
    },
    {
        "id": 1559786123724,
        "name": "测试",
        "director": "中国驻洛杉矶领事馆遭亚裔男子枪击 嫌犯已自首",
        "created": "2019-06-06T01:55:23.724+0000"
    }
]

构建高亮查询
	@Autowired
    TransportClient client;
	/**
     * 构建高亮查询
     * @param des
     * @return
     */
    @GetMapping("query")
    public List<FilmEntity> query(String des) {
        QueryBuilder query = structureQuery(des);
        // 加入查询中
        HighlightBuilder highlightBuilder = new HighlightBuilder();
        highlightBuilder.preTags("");//设置前缀
        highlightBuilder.postTags("");//设置后缀
        highlightBuilder.field("name");//设置高亮字段
        highlightBuilder.field("director");//设置高亮字段
//      highlightBuilder.field("name.pinyin");//这里设置之后没有生效,如果有大佬知道请告知谢谢
        SearchResponse response = client.prepareSearch("new_film")
                .setTypes("new")
                .setQuery(query).highlighter(highlightBuilder).execute().actionGet();

        // 遍历结果, 获取高亮片段
        SearchHits searchHits = response.getHits();
        FilmEntity filmEntity = null;
        List<FilmEntity> result = new ArrayList<>();
        for (SearchHit hit : searchHits) {
            Map<String, Object> entityMap = hit.getSourceAsMap();
            filmEntity = com.alibaba.fastjson.JSON.parseObject(JSON.toJSONString(entityMap), FilmEntity.class);
            if (!StringUtils.isEmpty(hit.getHighlightFields().get("name"))) {
                Text[] text = hit.getHighlightFields().get("name").getFragments();
                filmEntity.setName(text[0].toString());
            }
            if (!StringUtils.isEmpty(hit.getHighlightFields().get("director"))) {
                Text[] text = hit.getHighlightFields().get("director").getFragments();
                filmEntity.setDirector(text[0].toString());
            }
            result.add(filmEntity);
        }
        return result;
    }

http://localhost:8080/film/query?des=中国

[
    {
        "id": 1559786111119,
        "name": "ceshi",
        "director": "韩渔警冲突调查:韩警平均每天扣1艘渔船",
        "created": "2019-06-06T01:55:11.119+0000"
    },
    {
        "id": 1559786123724,
        "name": "测试",
        "director": "驻洛杉矶领事馆遭亚裔男子枪击 嫌犯已自首",
        "created": "2019-06-06T01:55:23.724+0000"
    },
    {
        "id": 1559786119620,
        "name": "小明",
        "director": "美留给伊拉克的是个烂摊子吗",
        "created": "2019-06-06T01:55:19.620+0000"
    }
]

http://localhost:8080/film/query?des=测试

[
    {
        "id": 1559786123724,
        "name": "测试",
        "director": "中国驻洛杉矶领事馆遭亚裔男子枪击 嫌犯已自首",
        "created": "2019-06-06T01:55:23.724+0000"
    },
    {
        "id": 1559786111119,
        "name": "ceshi",
        "director": "中韩渔警冲突调查:韩警平均每天扣1艘中国渔船",
        "created": "2019-06-06T01:55:11.119+0000"
    }
]

代码地址

你可能感兴趣的:(学习笔记)