安装IK中文分词和拼音插件
./bin/elasticsearch-plugin install https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v6.3.2/elasticsearch-analysis-ik-6.3.2.zip
./bin/elasticsearch-plugin install https://github.com/medcl/elasticsearch-analysis-pinyin/releases/download/v6.3.2/elasticsearch-analysis-pinyin-6.3.2.zip
我的ES版本是6.3.2,具体安装插件版本看自己的ES版本。
可以验证插件是否安装成功:
POST /_analyze
{
"analyzer":"pinyin",
"text":"北京东"
}
POST /_analyze
{
"analyzer":"ik_max_word",
"text":"北京东"
}
结果如下
拼音的分析结果
{
"tokens": [
{
"token": "bei",
"start_offset": 0,
"end_offset": 0,
"type": "word",
"position": 0
},
{
"token": "jing",
"start_offset": 0,
"end_offset": 0,
"type": "word",
"position": 1
},
{
"token": "dong",
"start_offset": 0,
"end_offset": 0,
"type": "word",
"position": 2
},
{
"token": "bjd",
"start_offset": 0,
"end_offset": 0,
"type": "word",
"position": 2
}
]
}
--------
IK分词分析结果
{
"tokens": [
{
"token": "北京",
"start_offset": 0,
"end_offset": 2,
"type": "CN_WORD",
"position": 0
},
{
"token": "京东",
"start_offset": 1,
"end_offset": 3,
"type": "CN_WORD",
"position": 1
}
]
}
新建index的表结构
PUT /station_test/
{
"settings": {
"index": {
"analysis": {
"analyzer": {
"pinyin_analyzer": {
"tokenizer": "my_pinyin"
}
},
"tokenizer": {
"my_pinyin": {
"type": "pinyin",
"keep_first_letter":true,
"keep_separate_first_letter": true,
"keep_full_pinyin": true,
"keep_original": true,
"limit_first_letter_length": 16,
"lowercase": true
}
}
}
}
},
"mappings": {
"route": {
"properties": {
"station_name": {
"type": "text",
"analyzer": "ik_max_word",
"fields": {
"s-pinyin": {
"type": "completion",
"analyzer": "pinyin_analyzer"
}
}
},
"station_code": {
"type": "completion"
}
}
}
}
}
这里面我定义了自己的analyzer的pinyin_analyzer。
有两个字段station_name,station_code.
插入数据
PUT /station_test/route/1
{
"station_code": "VAP",
"station_name": "北京北"
}
PUT /station_test/route/2
{
"station_code": "BOP",
"station_name": "北京东"
}
PUT /station_test/route/3
{
"station_code": "GGQ",
"station_name": "广州南"
}
PUT /station_test/route/4
{
"station_code": "SHH",
"station_name": "上海"
}
SUGGEST查询
拼音提示
POST /station_test/_search
{
"suggest":{
"text":"bj",
"code-suggest" : {
"completion" : {
"field" : "station_code"
}
},
"pinyin-suggest" : {
"completion" : {
"field" : "station_name.s-pinyin"
}
}
}
}
结果如下:
{
"took": 8,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 0,
"max_score": 0,
"hits": []
},
"suggest": {
"code-suggest": [
{
"text": "bj",
"offset": 0,
"length": 2,
"options": []
}
],
"pinyin-suggest": [
{
"text": "bj",
"offset": 0,
"length": 2,
"options": [
{
"text": "北京东",
"_index": "station_test",
"_type": "route",
"_id": "2",
"_score": 1,
"_source": {
"station_code": "BOP",
"station_name": "北京东"
}
},
{
"text": "北京北",
"_index": "station_test",
"_type": "route",
"_id": "1",
"_score": 1,
"_source": {
"station_code": "VAP",
"station_name": "北京北"
}
}
]
}
]
}
}
中文提示:
POST /station_test/_search
{
"suggest":{
"text":"广",
"code-suggest" : {
"completion" : {
"field" : "station_code"
}
},
"pinyin-suggest" : {
"completion" : {
"field" : "station_name.s-pinyin"
}
}
}
}
结果如下:
{
"took": 5,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 0,
"max_score": 0,
"hits": []
},
"suggest": {
"code-suggest": [
{
"text": "广",
"offset": 0,
"length": 1,
"options": []
}
],
"pinyin-suggest": [
{
"text": "广",
"offset": 0,
"length": 1,
"options": [
{
"text": "广州南",
"_index": "station_test",
"_type": "route",
"_id": "3",
"_score": 1,
"_source": {
"station_code": "GGQ",
"station_name": "广州南"
}
}
]
}
]
}
}
Java代码API调用
package com.es;
import org.apache.http.HttpHost;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.suggest.Suggest;
import org.elasticsearch.search.suggest.SuggestBuilder;
import org.elasticsearch.search.suggest.SuggestBuilders;
import org.elasticsearch.search.suggest.completion.CompletionSuggestionBuilder;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* @author allen
* @date 2019-03-19.
*/
public class EsSuggestDemo {
public static RestHighLevelClient getRestHighLevelClient() {
String[] ips = "localhost".split(",");
Integer port = 9200;
List list = new ArrayList();
for (String ip : ips) {
list.add(new HttpHost(ip, port, "http"));
}
RestHighLevelClient client = new RestHighLevelClient(RestClient.builder(list.toArray(new HttpHost[list.size()])));
return client;
}
public static void main(String[] args) throws IOException {
RestHighLevelClient client = getRestHighLevelClient();
String indexName = "station_test";
String keyword = "bj";
CompletionSuggestionBuilder stationName = SuggestBuilders.completionSuggestion("station_name.s-pinyin").prefix(keyword);
CompletionSuggestionBuilder stationCode = SuggestBuilders.completionSuggestion("station_code").prefix(keyword);
SearchRequest searchRequest = new SearchRequest().indices(indexName).types("route").source(new SearchSourceBuilder().suggest(
new SuggestBuilder().addSuggestion("pinyin-suggest", stationName)
.addSuggestion("code-suggest", stationCode)
));
SearchResponse searchResponse = client.search(searchRequest);
System.out.println(searchResponse);
Suggest suggestions = searchResponse.getSuggest();
List extends Suggest.Suggestion.Entry extends Suggest.Suggestion.Entry.Option>> results = suggestions.getSuggestion("pinyin-suggest").getEntries();
for (Suggest.Suggestion.Entry extends Suggest.Suggestion.Entry.Option> op : results) {
List extends Suggest.Suggestion.Entry.Option> options = op.getOptions();
for (Suggest.Suggestion.Entry.Option pp : options) {
System.out.println( pp.getText());
}
}
client.close();
}
}
结果如下:
{"took":2,"timed_out":false,"_shards":{"total":5,"successful":5,"skipped":0,"failed":0},"hits":{"total":0,"max_score":0.0,"hits":[]},"suggest":{"code-suggest":[{"text":"bj","offset":0,"length":2,"options":[]}],"pinyin-suggest":[{"text":"bj","offset":0,"length":2,"options":[{"text":"北京东","_index":"station_test","_type":"route","_id":"2","_score":1.0,"_source":{"station_code":"BOP","station_name":"北京东"}},{"text":"北京北","_index":"station_test","_type":"route","_id":"1","_score":1.0,"_source":{"station_code":"VAP","station_name":"北京北"}}]}]}}
北京东
北京北
参考文档:
https://github.com/medcl/elasticsearch-analysis-ik
https://github.com/medcl/elasticsearch-analysis-pinyin
https://blog.csdn.net/qq_30581017/article/details/79985483
https://discuss.elastic.co/t/completion-suggester-using-java-api-6-1-1/116463/6
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-suggesters-completion.html