目录
一.IK分词器
1.IK分词器介绍
2.安装
3.使用
4.自定义词库
二.拼音分词器
1.拼音分词器介绍
2.安装
三.自动补全
1.效果演示
2.实战
四.自动纠错
1.场景描述
2.DSL实现
3.java实现
五.仿京东实战
默认的中文分词是将每个字看成一个词,这显然是不符合要求的,所以我们需要安装中
文分词器来解决这个问题。
IK分词是一款国人开发的相对简单的中文分词器。虽然开发者自2012年之后就不在维护
了,但在工程应用中IK算是比较流行的一款!我们今天就介绍一下IK中文分词器的使用。
ik分词器下载地址
https://github.com/medcl/elasticsearch-analysis-ik/releases
链接:https://pan.baidu.com/s/1z49plwtgCzxprTibFviLlw
提取码:c8xg
(1)先将其解压,将解压后的elasticsearch文件夹重命名文件夹为ik
(2)将ik文件夹拷贝到elasticsearch/plugins 目录下
(3)重新启动es,即可加载IK分词器
下面看下效果吧~,如果没有IK分词器会将所有的字都拆分当成词语
postman请求测试【最小切分】:http://127.0.0.1:9200/testindex/_analyze
post请求,参数为:{"analyzer": "ik_smart", "text": "北京天安门" }
postman请求测试【最小切分】: http://127.0.0.1:9200/testindex/_analyze
post请求参数为:{"analyzer": "ik_max_word", "text": "北京天安门" }
现在测试ik分词器中没有配置过的特殊词语时,还是会对所有字分词,这时候需要自定义词库
如下:
http://127.0.0.1:9200/testindex/_analyze
{"analyzer": "ik_max_word", "text": "你个老6" }
(1)进入elasticsearch/plugins/ik/config目录
(2)新建一个my.dic文件,编辑内容:你个老6
修改IKAnalyzer.cfg.xml(在ik/config目录下)
重新启动elasticsearch,测试分词效果
pinyin 分词器可以让用户输入拼音,就能查找到相关的关键词。比如在某个商城搜索中,输入 shuihu,就能匹配到水壶。这样的体验还是非常好的。
拼音分词器安装地址:
Releases · medcl/elasticsearch-analysis-pinyin · GitHub
从github上下载对应es版本的拼音分词器,解压到plugins目录下,重命名为pinyin,重启es即可生效
测试
创建索引
#创建索引
PUT /medcl/
{
"settings" : {
"analysis" : {
"analyzer" : {
"pinyin_analyzer" : {
"tokenizer" : "my_pinyin"
}
},
"tokenizer" : {
"my_pinyin" : {
"type" : "pinyin",
"keep_separate_first_letter" : false,
"keep_full_pinyin" : true,
"keep_original" : true,
"limit_first_letter_length" : 16,
"lowercase" : true,
"remove_duplicated_term" : true
}
}
}
}
}
#查看分词效果
GET /medcl/_analyze
{
"text": ["汤兵兵"],
"analyzer": "pinyin_analyzer"
}
{
"tokens" : [
{
"token" : "tang",
"start_offset" : 0,
"end_offset" : 0,
"type" : "word",
"position" : 0
},
{
"token" : "汤兵兵",
"start_offset" : 0,
"end_offset" : 0,
"type" : "word",
"position" : 0
},
{
"token" : "tbb",
"start_offset" : 0,
"end_offset" : 0,
"type" : "word",
"position" : 0
},
{
"token" : "bing",
"start_offset" : 0,
"end_offset" : 0,
"type" : "word",
"position" : 1
}
]
}
创建映射
#创建映射
POST /medcl/_mapping
{
"properties": {
"name": {
"type": "keyword",
"fields": {
"pinyin": {
"type": "text",
"store": false,
"term_vector": "with_offsets",
"analyzer": "pinyin_analyzer",
"boost": 10
}
}
}
}
}
批量添加数据
#批量添加数据
POST /medcl/_bulk
{"index": {"_index": "medcl","_id": "1"}}
{"name":"汤兵兵"}
{"index": {"_index": "medcl","_id": "2"}}
{"name":"阚阳阳"}
{"index": {"_index": "medcl","_id": "3"}}
{"name":"汤一辰"}
{"index": {"_index": "medcl","_id": "4"}}
{"name":"汤得明"}
{"index": {"_index": "medcl","_id": "5"}}
{"name":"张继琴"}
{"index": {"_index": "medcl","_id": "6"}}
{"name":"阚佳武"}
{"index": {"_index": "medcl","_id": "7"}}
{"name":"施玉芬"}
{"index": {"_index": "medcl","_id": "8"}}
{"name":"陆毅"}
{"index": {"_index": "medcl","_id": "9"}}
{"name":"刘德华"}
测试拼音搜索
#拼音分词搜索
GET /medcl/_search
{
"query": {
"match": {
"name.pinyin": "tbb"
}
}
}
拼音分词器可选参数配置
** 可选参数 **
keep_first_letter : 启用此选项时,例如:刘德华 > ldh,默认值:true
keep_separate_first_letter : 启用该选项时,将保留第一个字母分开,例如:刘德华 > l,d,h,默认:假的,注意:查询结果也许是太模糊,由于长期过频
limit_first_letter_length : 设置 first_letter 结果的最大长度,默认值:16
keep_full_pinyin : 当启用该选项,例如:刘德华 > [liu,de,hua],默认值:true
keep_joined_full_pinyin : 启用此选项时,例如:刘德华 > [liudehua],默认值:false
keep_none_chinese : 在结果中保留非中文字母或数字,默认值:true
keep_none_chinese_together : 保持非中国信一起,默认值:true,如:DJ 音乐家 - > DJ,yin,yue,jia,当设置为 false,例如:DJ 音乐家 - > D,J,yin,yue,jia,注意:keep_none_chinese 必须先启动
keep_none_chinese_in_first_letter : 第一个字母不能是中文,例如:刘德华 AT2016- > ldhat2016,default:true
keep_none_chinese_in_joined_full_pinyin : 保持非中文字母加入完整拼音,例如:刘德华 2016- > liudehua2016,默认:false
none_chinese_pinyin_tokenize : 打破非中国信成单独的拼音项,如果他们拼音,默认值:true,如:liudehuaalibaba13zhuanghan- > liu,de,hua,a,li,ba,ba,13,zhuang,han,注意: keep_none_chinese 和 keep_none_chinese_together 应首先启用
keep_original : 当启用此选项时,也将保留原始输入,默认值:false
lowercase : 小写非中文字母,默认值:true
trim_whitespace : 默认值:true
remove_duplicated_term : 当启用此选项时,将删除重复项以保存索引,例如:de 的 > de,默认值:false 注意:位置相关查询可能受影响
汉字自动补全
拼音自动补全
(1)数据准备
#创建索引
PUT /product_completion_index/
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 2,
"analysis": {
"analyzer": {
"ik_pinyin_analyzer": {
"type": "custom",
"tokenizer": "ik_smart",
"filter": "pinyin_filter"
}
},
"filter": {
"pinyin_filter": {
"type": "pinyin",
"keep_first_letter": true,
"keep_separate_first_letter": false,
"keep_full_pinyin": true,
"keep_original": true,
"limit_first_letter_length": 16,
"lowercase": true,
"remove_duplicated_term": true
}
}
}
}
}
#创建映射
POST /product_completion_index/_mapping
{
"properties": {
"name": {
"type": "keyword"
},
"searchkey": {
"type": "completion",
"analyzer": "ik_pinyin_analyzer"
}
}
}
#批量新增数据
POST /product_completion_index/_bulk
{"index":{"_index":"product_completion_index","_id":"1"}}
{"name":"小米(MI)","searchkey":"小米手机"}
{"index":{"_index":"product_completion_index","_id":"2"}}
{"searchkey":"小米10","name":"小米(MI)"}
{"index":{"_index":"product_completion_index","_id":"3"}}
{"searchkey":"小米电视","name":"小米(MI)"}
{"index":{"_index":"product_completion_index","_id":"4"}}
{"searchkey":"小米路由器","name":"小米(MI)"}
{"index":{"_index":"product_completion_index","_id":"5"}}
{"searchkey":"小米9","name":"小米(MI)"}
{"index":{"_index":"product_completion_index","_id":"6"}}
{"searchkey":"小米手机","name":"小米(MI)"}
{"index":{"_index":"product_completion_index","_id":"7"}}
{"searchkey":"小米耳环","name":"小米(MI)"}
{"index":{"_index":"product_completion_index","_id":"8"}}
{"searchkey":"小米8","name":"小米(MI)"}
{"index":{"_index":"product_completion_index","_id":"9"}}
{"searchkey":"小米10Pro","name":"小米(MI)"}
{"index":{"_index":"product_completion_index","_id":"10"}}
{"searchkey":"小米笔记本","name":"小米(MI)"}
{"index":{"_index":"product_completion_index","_id":"11"}}
{"searchkey":"小米摄像头","name":"小米(MI)"}
{"index":{"_index":"product_completion_index","_id":"12"}}
{"searchkey":"小米电饭煲","name":"小米(MI)"}
{"index":{"_index":"product_completion_index","_id":"13"}}
{"searchkey":"小米充电宝","name":"小米(MI)"}
{"index":{"_index":"product_completion_index","_id":"14"}}
{"searchkey":"adidas男鞋","name":"adidas男鞋"}
{"index":{"_index":"product_completion_index","_id":"15"}}
{"searchkey":"adidas女鞋","name":"adidas女鞋"}
{"index":{"_index":"product_completion_index","_id":"16"}}
{"searchkey":"adidas外套","name":"adidas外套"}
{"index":{"_index":"product_completion_index","_id":"17"}}
{"searchkey":"adidas裤子","name":"adidas裤子"}
{"index":{"_index":"product_completion_index","_id":"18"}}
{"searchkey":"adidas官方旗舰店","name":"adidas官方旗舰店"}
{"index":{"_index":"product_completion_index","_id":"19"}}
{"searchkey":"阿迪达斯袜子","name":"阿迪达斯袜子"}
{"index":{"_index":"product_completion_index","_id":"20"}}
{"searchkey":"阿迪达斯外套","name":"阿迪达斯外套"}
{"index":{"_index":"product_completion_index","_id":"21"}}
{"searchkey":"阿迪达斯运动鞋","name":"阿迪达斯运动鞋"}
{"index":{"_index":"product_completion_index","_id":"22"}}
{"searchkey":"耐克外套","name":"耐克外套"}
{"index":{"_index":"product_completion_index","_id":"23"}}
{"searchkey":"耐克运动鞋","name":"耐克运动鞋"}
(2)测试
#汉字自动补全
GET product_completion_index/_search
{
"from": 0,
"size": 100,
"suggest": {
"czbk-suggest": {
"prefix": "小米",
"completion": {
"field": "searchkey",
"size": 20,
"skip_duplicates": true
}
}
}
}
// 结果
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 0,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"suggest" : {
"czbk-suggest" : [
{
"text" : "xm",
"offset" : 0,
"length" : 2,
"options" : [
{
"text" : "小米10",
"_index" : "product_completion_index",
"_type" : "_doc",
"_id" : "2",
"_score" : 1.0,
"_source" : {
"searchkey" : "小米10",
"name" : "小米(MI)"
}
},
{
"text" : "小米10Pro",
"_index" : "product_completion_index",
"_type" : "_doc",
"_id" : "9",
"_score" : 1.0,
"_source" : {
"searchkey" : "小米10Pro",
"name" : "小米(MI)"
}
},
{
"text" : "小米8",
"_index" : "product_completion_index",
"_type" : "_doc",
"_id" : "8",
"_score" : 1.0,
"_source" : {
"searchkey" : "小米8",
"name" : "小米(MI)"
}
},
{
"text" : "小米9",
"_index" : "product_completion_index",
"_type" : "_doc",
"_id" : "5",
"_score" : 1.0,
"_source" : {
"searchkey" : "小米9",
"name" : "小米(MI)"
}
},
{
"text" : "小米充电宝",
"_index" : "product_completion_index",
"_type" : "_doc",
"_id" : "13",
"_score" : 1.0,
"_source" : {
"searchkey" : "小米充电宝",
"name" : "小米(MI)"
}
},
{
"text" : "小米手机",
"_index" : "product_completion_index",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.0,
"_source" : {
"name" : "小米(MI)",
"searchkey" : "小米手机"
}
},
{
"text" : "小米摄像头",
"_index" : "product_completion_index",
"_type" : "_doc",
"_id" : "11",
"_score" : 1.0,
"_source" : {
"searchkey" : "小米摄像头",
"name" : "小米(MI)"
}
},
{
"text" : "小米电视",
"_index" : "product_completion_index",
"_type" : "_doc",
"_id" : "3",
"_score" : 1.0,
"_source" : {
"searchkey" : "小米电视",
"name" : "小米(MI)"
}
},
{
"text" : "小米电饭煲",
"_index" : "product_completion_index",
"_type" : "_doc",
"_id" : "12",
"_score" : 1.0,
"_source" : {
"searchkey" : "小米电饭煲",
"name" : "小米(MI)"
}
},
{
"text" : "小米笔记本",
"_index" : "product_completion_index",
"_type" : "_doc",
"_id" : "10",
"_score" : 1.0,
"_source" : {
"searchkey" : "小米笔记本",
"name" : "小米(MI)"
}
},
{
"text" : "小米耳环",
"_index" : "product_completion_index",
"_type" : "_doc",
"_id" : "7",
"_score" : 1.0,
"_source" : {
"searchkey" : "小米耳环",
"name" : "小米(MI)"
}
},
{
"text" : "小米路由器",
"_index" : "product_completion_index",
"_type" : "_doc",
"_id" : "4",
"_score" : 1.0,
"_source" : {
"searchkey" : "小米路由器",
"name" : "小米(MI)"
}
}
]
}
]
}
}
#拼音自动补全
GET product_completion_index/_search
{
"from": 0,
"size": 100,
"suggest": {
"czbk-suggest": {
"prefix": "xiaomi",
"completion": {
"field": "searchkey",
"size": 20,
"skip_duplicates": true
}
}
}
}
// 结果
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 0,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"suggest" : {
"czbk-suggest" : [
{
"text" : "xiaomi",
"offset" : 0,
"length" : 6,
"options" : [
{
"text" : "小米10",
"_index" : "product_completion_index",
"_type" : "_doc",
"_id" : "2",
"_score" : 1.0,
"_source" : {
"searchkey" : "小米10",
"name" : "小米(MI)"
}
},
{
"text" : "小米10Pro",
"_index" : "product_completion_index",
"_type" : "_doc",
"_id" : "9",
"_score" : 1.0,
"_source" : {
"searchkey" : "小米10Pro",
"name" : "小米(MI)"
}
},
{
"text" : "小米8",
"_index" : "product_completion_index",
"_type" : "_doc",
"_id" : "8",
"_score" : 1.0,
"_source" : {
"searchkey" : "小米8",
"name" : "小米(MI)"
}
},
{
"text" : "小米9",
"_index" : "product_completion_index",
"_type" : "_doc",
"_id" : "5",
"_score" : 1.0,
"_source" : {
"searchkey" : "小米9",
"name" : "小米(MI)"
}
},
{
"text" : "小米充电宝",
"_index" : "product_completion_index",
"_type" : "_doc",
"_id" : "13",
"_score" : 1.0,
"_source" : {
"searchkey" : "小米充电宝",
"name" : "小米(MI)"
}
},
{
"text" : "小米手机",
"_index" : "product_completion_index",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.0,
"_source" : {
"name" : "小米(MI)",
"searchkey" : "小米手机"
}
},
{
"text" : "小米摄像头",
"_index" : "product_completion_index",
"_type" : "_doc",
"_id" : "11",
"_score" : 1.0,
"_source" : {
"searchkey" : "小米摄像头",
"name" : "小米(MI)"
}
},
{
"text" : "小米电视",
"_index" : "product_completion_index",
"_type" : "_doc",
"_id" : "3",
"_score" : 1.0,
"_source" : {
"searchkey" : "小米电视",
"name" : "小米(MI)"
}
},
{
"text" : "小米电饭煲",
"_index" : "product_completion_index",
"_type" : "_doc",
"_id" : "12",
"_score" : 1.0,
"_source" : {
"searchkey" : "小米电饭煲",
"name" : "小米(MI)"
}
},
{
"text" : "小米笔记本",
"_index" : "product_completion_index",
"_type" : "_doc",
"_id" : "10",
"_score" : 1.0,
"_source" : {
"searchkey" : "小米笔记本",
"name" : "小米(MI)"
}
},
{
"text" : "小米耳环",
"_index" : "product_completion_index",
"_type" : "_doc",
"_id" : "7",
"_score" : 1.0,
"_source" : {
"searchkey" : "小米耳环",
"name" : "小米(MI)"
}
},
{
"text" : "小米路由器",
"_index" : "product_completion_index",
"_type" : "_doc",
"_id" : "4",
"_score" : 1.0,
"_source" : {
"searchkey" : "小米路由器",
"name" : "小米(MI)"
}
}
]
}
]
}
}
#首字母自动补全
GET product_completion_index/_search
{
"from": 0,
"size": 100,
"suggest": {
"czbk-suggest": {
"prefix": "xm",
"completion": {
"field": "searchkey",
"size": 20,
"skip_duplicates": true
}
}
}
}
//结果
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 0,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"suggest" : {
"czbk-suggest" : [
{
"text" : "xm",
"offset" : 0,
"length" : 2,
"options" : [
{
"text" : "小米10",
"_index" : "product_completion_index",
"_type" : "_doc",
"_id" : "2",
"_score" : 1.0,
"_source" : {
"searchkey" : "小米10",
"name" : "小米(MI)"
}
},
{
"text" : "小米10Pro",
"_index" : "product_completion_index",
"_type" : "_doc",
"_id" : "9",
"_score" : 1.0,
"_source" : {
"searchkey" : "小米10Pro",
"name" : "小米(MI)"
}
},
{
"text" : "小米8",
"_index" : "product_completion_index",
"_type" : "_doc",
"_id" : "8",
"_score" : 1.0,
"_source" : {
"searchkey" : "小米8",
"name" : "小米(MI)"
}
},
{
"text" : "小米9",
"_index" : "product_completion_index",
"_type" : "_doc",
"_id" : "5",
"_score" : 1.0,
"_source" : {
"searchkey" : "小米9",
"name" : "小米(MI)"
}
},
{
"text" : "小米充电宝",
"_index" : "product_completion_index",
"_type" : "_doc",
"_id" : "13",
"_score" : 1.0,
"_source" : {
"searchkey" : "小米充电宝",
"name" : "小米(MI)"
}
},
{
"text" : "小米手机",
"_index" : "product_completion_index",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.0,
"_source" : {
"name" : "小米(MI)",
"searchkey" : "小米手机"
}
},
{
"text" : "小米摄像头",
"_index" : "product_completion_index",
"_type" : "_doc",
"_id" : "11",
"_score" : 1.0,
"_source" : {
"searchkey" : "小米摄像头",
"name" : "小米(MI)"
}
},
{
"text" : "小米电视",
"_index" : "product_completion_index",
"_type" : "_doc",
"_id" : "3",
"_score" : 1.0,
"_source" : {
"searchkey" : "小米电视",
"name" : "小米(MI)"
}
},
{
"text" : "小米电饭煲",
"_index" : "product_completion_index",
"_type" : "_doc",
"_id" : "12",
"_score" : 1.0,
"_source" : {
"searchkey" : "小米电饭煲",
"name" : "小米(MI)"
}
},
{
"text" : "小米笔记本",
"_index" : "product_completion_index",
"_type" : "_doc",
"_id" : "10",
"_score" : 1.0,
"_source" : {
"searchkey" : "小米笔记本",
"name" : "小米(MI)"
}
},
{
"text" : "小米耳环",
"_index" : "product_completion_index",
"_type" : "_doc",
"_id" : "7",
"_score" : 1.0,
"_source" : {
"searchkey" : "小米耳环",
"name" : "小米(MI)"
}
},
{
"text" : "小米路由器",
"_index" : "product_completion_index",
"_type" : "_doc",
"_id" : "4",
"_score" : 1.0,
"_source" : {
"searchkey" : "小米路由器",
"name" : "小米(MI)"
}
}
]
}
]
}
}
(3)代码实现
返回数据封装
package com.tangbb.elasticsearch.pojo;
//import com.fasterxml.jackson.annotation.JsonInclude;
import com.alibaba.fastjson.annotation.JSONField;
import com.alibaba.fastjson.serializer.SerializerFeature;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.tangbb.elasticsearch.pojo.ResultEnum;
import java.io.Serializable;
/**
* @Class: ResponseData
* @Package com.itheima.commons.result
* @Description: 数据返回封装类
* @Company: http://www.itheima.com/
*/
//如果加该注解的字段为null,那么就不序列化
@JsonInclude(JsonInclude.Include.NON_NULL)
public class ResponseData implements Serializable {
//返回码
private String code;
//返回信息
private String desc;
//返回的数据
private T data;
//返回数据总数
private Integer count;
public Integer getCount() {
return count;
}
public void setCount(Integer count) {
this.count = count;
}
public String getCode() {
return code;
}
public void setCode(String code) {
this.code = code;
}
public String getDesc() {
return desc;
}
public T getData() {
return data;
}
public ResponseData(T data, ResultEnum resultEnum) {
this.code = resultEnum.getCode();
this.desc = resultEnum.getDecs();
this.data = data;
}
public ResponseData(ResultEnum resultEnum) {
this.code = resultEnum.getCode();
this.desc = resultEnum.getDecs();
}
public ResponseData(String code, String desc) {
this.code = code;
this.desc = desc;
}
public ResponseData setResultEnum(ResultEnum result) {
this.code = result.getCode();
this.desc = result.getDecs();
return this;
}
public ResponseData setResultEnum(T data, ResultEnum resultEnum, Integer count) {
this.code = resultEnum.getCode();
this.desc = resultEnum.getDecs();
this.data = data;
this.count = count;
return this;
}
public ResponseData(T data, ResultEnum resultEnum, Integer count) {
this.code = resultEnum.getCode();
this.desc = resultEnum.getDecs();
this.data = data;
this.count = count;
}
public ResponseData() {
}
public ResponseData setResultEnum(String code, String desc) {
this.code = code;
this.desc = desc;
return this;
}
}
枚举类
package com.tangbb.elasticsearch.pojo;
/**
* @Class: ResultEnum
* @Package com.itheima.commons.enums
* @Description: 操作提示枚举类
* @Company: http://www.itheima.com/
*/
public enum ResultEnum {
success("200", "操作成功!"),
param_isnull("-400", "参数为空"),
error("-402", "操作失败!"),
server_error("-500", "服务异常"),
data_existent("-504", "数据不存在"),
result_empty("-000", "查询内容为空"),
NOT_SYSTEM_API("404", "不是系统指定api"),
REPEAT("666", "数据已存在"),
HTTP_ERROR("-405", "请求异常");
private String code;
private String decs;
public String getCode() {
return code;
}
public String getDecs() {
return decs;
}
ResultEnum(String code, String decs) {
this.code = code;
this.decs = decs;
}
}
日志类
package com.tangbb.elasticsearch.pojo;
/**
* @Class: ResultEnum
* @Package com.itheima.commons.enums
* @Description: 应用层操作提示
* @Company: http://www.itheima.com/
*/
public enum TipsEnum {
create_index_success("创建索引成功!"),
create_index_fail("创建索引失败!"),
delete_index_success("删除索引成功!"),
delete_index_fail("删除索引失败!"),
open_index_success("打开索引成功!"),
open_index_fail("打开索引失败!"),
close_index_success("关闭索引成功!"),
close_index_fail("关闭索引失败!"),
alias_index_success("索引别名设置成功!"),
alias_index_fail("索引别名设置失败!"),
exists_index_success("索引是否存在查询成功!"),
exists_index_fail("引是否存在查询失败!"),
create_doc_success("创建文档成功!"),
create_doc_fail("创建文档失败!"),
batch_create_doc_success("批量创建文档成功!"),
batch_create_doc_fail("批量创建文档失败!"),
update_doc_success("修改文档成功!"),
update_doc_fail("修改文档失败!"),
get_doc_success("查询文档成功!"),
batch_get_doc_fail("批量查询文档失败!"),
batch_get_doc_success("批量查询文档成功!"),
get_doc_fail("查询文档失败!"),
delete_doc_success("删除文档成功!"),
delete_doc_fail("删除文档失败!"),
csuggest_get_doc_fail("自动补全获取失败!"),
csuggest_get_doc_success("自动补全获取成功!"),
psuggest_get_doc_fail("拼写纠错获取失败!"),
psuggest_get_doc_success("拼写纠错获取成功!"),
tsuggest_get_doc_fail("搜索推荐获取失败!"),
tsuggest_get_doc_success("搜索推荐获取成功!"),
hotwords_get_doc_fail("搜索热词获取失败!"),
hotwords_get_doc_success("搜索热词获取成功!"),
metricagg_get_doc_fail("指标聚合处理失败!"),
metricagg_get_doc_success("指标聚合处理成功!"),
bucketagg_get_doc_fail("桶聚合处理失败!"),
bucketagg_get_doc_success("桶聚合处理成功!"),
index_default("索引创建失败!");
private String message;
public String getMessage() {
return message;
}
TipsEnum(String message) {
this.message = message;
}
}
实体类
package com.tangbb.elasticsearch.pojo;
import com.fasterxml.jackson.annotation.JsonInclude;
import java.io.Serializable;
import java.util.List;
import java.util.Map;
/**
* @Class: CommonEntity
* @Package com.itheima.commons.pojo
* @Description: 公共实体类
* @Company: http://www.itheima.com/
*/
//如果加该注解的字段为null,那么就不序列化
@JsonInclude(JsonInclude.Include.NON_NULL)
public class CommonEntity implements Serializable {
//页码
private int pageNumber;
//每页数据条数
private int pageSize;
//索引名称
private String indexName;
//高亮列
private String highlight;
//排序 DESC ASC
private String sortOrder;
//排序列
private String sortField;
//自动补全建议列
private String suggestFileld;
//自动补全建议值
private String suggestValue;
//自动补全返回个数
private Integer suggestCount;
//动态查询参数封装
Map map;
//批量增加list
private List
控制层
/*
* @Description 自动补全
* @Method: suggester
* @Param: [commonEntity]
* @Update:
* @since: 1.0.0
* @Return: com.oldlu.commons.result.ResponseData
*
*/
@GetMapping(value = "/csuggest")
public ResponseData cSuggest(@RequestBody CommonEntity commonEntity) {
// 构造返回数据
ResponseData rData = new ResponseData();
if (StringUtils.isEmpty(commonEntity.getIndexName()) ||
StringUtils.isEmpty(commonEntity.getSuggestFileld()) ||
StringUtils.isEmpty(commonEntity.getSuggestValue())) {
rData.setResultEnum(ResultEnum.param_isnull);
return rData;
}
//批量查询返回结果
List result = null;
try {
//通过高阶API调用批量新增操作方法
result = contentService.cSuggest(commonEntity);
//通过类型推断自动装箱(多个参数取交集)
rData.setResultEnum(result, ResultEnum.success, result.size());
//日志记录
logger.info(TipsEnum.get_doc_success.getMessage());
} catch (Exception e) {
//日志记录
logger.error(TipsEnum.get_doc_fail.getMessage(), e);
//构建错误返回信息
rData.setResultEnum(ResultEnum.error);
}
return rData;
}
服务层
//自动补全
/*
* @Description: 自动补全 根据用户的输入联想到可能的词或者短语
* @Method: suggester
* @Param: [commonEntity]
* @Update:
* @since: 1.0.0
* @Return: org.elasticsearch.action.search.SearchResponse
*
*/
public List cSuggest(CommonEntity commonEntity) throws Exception {
//定义返回
List suggestList = new ArrayList<>();
//构建查询请求
SearchRequest searchRequest = new
SearchRequest(commonEntity.getIndexName());
//通过查询构建器定义评分排序
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.sort(new ScoreSortBuilder().order(SortOrder.DESC));
//构造搜索建议语句,搜索条件字段
CompletionSuggestionBuilder completionSuggestionBuilder =new
CompletionSuggestionBuilder(commonEntity.getSuggestFileld());
//搜索关键字
completionSuggestionBuilder.prefix(commonEntity.getSuggestValue());
//去除重复
completionSuggestionBuilder.skipDuplicates(true);
//匹配数量
completionSuggestionBuilder.size(commonEntity.getSuggestCount());
searchSourceBuilder.suggest(new SuggestBuilder().addSuggestion("czbk-suggest", completionSuggestionBuilder));
//czbk-suggest为返回的字段,所有返回将在czbk-suggest里面,可写死,sort按照评分排序
searchRequest.source(searchSourceBuilder);
//定义查找响应
SearchResponse suggestResponse = restHighLevelClient.search(searchRequest,
RequestOptions.DEFAULT);
//定义完成建议对象
CompletionSuggestion completionSuggestion =
suggestResponse.getSuggest().getSuggestion("czbk-suggest");
List optionsList =
completionSuggestion.getEntries().get(0).getOptions();
//从optionsList取出结果
if (!CollectionUtils.isEmpty(optionsList)) {
optionsList.forEach(item ->
suggestList.add(item.getText().toString()));
}
return suggestList;
}
测试结果
http://localhost:9090/csuggest
例如:错误输入"【adidaas官方旗舰店】 ”能够纠错为【adidas官方旗舰店】
#自动纠错
GET product_completion_index/_search
{
"suggest": {
"czbk-suggestion": {
"text": "adidaas官方旗舰店",
"phrase": {
"field": "name",
"size": 13
}
}
}
}
控制层
/*
* @Description: 拼写纠错
* @Method: suggester2
* @Param: [commonEntity]
* @Update:
* @since: 1.0.0
* @Return: com.oldlu.commons.result.ResponseData
*
*/
@GetMapping(value = "/psuggest")
public ResponseData pSuggest(@RequestBody CommonEntity commonEntity) {
// 构造返回数据
ResponseData rData = new ResponseData();
if (StringUtils.isEmpty(commonEntity.getIndexName()) ||
StringUtils.isEmpty(commonEntity.getSuggestFileld()) ||
StringUtils.isEmpty(commonEntity.getSuggestValue())) {
rData.setResultEnum(ResultEnum.param_isnull);
return rData;
}
//批量查询返回结果
String result = null;
try {
//通过高阶API调用批量新增操作方法
result = contentService.pSuggest(commonEntity);
//通过类型推断自动装箱(多个参数取交集)
rData.setResultEnum(result, ResultEnum.success, null);
//日志记录
logger.info(TipsEnum.get_doc_success.getMessage());
} catch (Exception e) {
//日志记录
logger.error(TipsEnum.batch_get_doc_fail.getMessage(), e);
//构建错误返回信息
rData.setResultEnum(ResultEnum.error);
}
return rData;
}
服务层
/*
* @Description: 拼写纠错
* @Method: psuggest
* @Param: [commonEntity]
* @Update:
* @since: 1.0.0
* @Return: java.util.List
*
*/
public String pSuggest(CommonEntity commonEntity) throws Exception {
//定义返回
String pSuggestString = new String();
//定义查询请求
SearchRequest searchRequest = new
SearchRequest(commonEntity.getIndexName());
//定义查询条件构建器
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//定义排序器
searchSourceBuilder.sort(new ScoreSortBuilder().order(SortOrder.DESC));
//构造短语建议器对象(参数为匹配列)
PhraseSuggestionBuilder pSuggestionBuilder = new
PhraseSuggestionBuilder(commonEntity.getSuggestFileld());
//搜索关键字(被纠错的值)
pSuggestionBuilder.text(commonEntity.getSuggestValue());
//匹配数量
pSuggestionBuilder.size(1);
searchSourceBuilder.suggest(new SuggestBuilder().addSuggestion("czbk-suggest", pSuggestionBuilder));
searchRequest.source(searchSourceBuilder);
//定义查找响应
SearchResponse suggestResponse = restHighLevelClient.search(searchRequest,
RequestOptions.DEFAULT);
//定义短语建议对象
PhraseSuggestion phraseSuggestion =
suggestResponse.getSuggest().getSuggestion("czbk-suggest");
//获取返回数据
List optionsList =
phraseSuggestion.getEntries().get(0).getOptions();
//从optionsList取出结果
if (!CollectionUtils.isEmpty(optionsList)
&&optionsList.get(0).getText()!=null) {
pSuggestString = optionsList.get(0).getText().string().replaceAll(" ","");
}
return pSuggestString;
}
测试
http://localhost:9090/psuggest
仿京东实战完整代码详见我的gitee,代码地址如下
https://gitee.com/EkkoBoy/elasticsearch.git