ES拼音中文智能提示suggest

安装IK中文分词和拼音插件

./bin/elasticsearch-plugin install https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v6.3.2/elasticsearch-analysis-ik-6.3.2.zip

./bin/elasticsearch-plugin install https://github.com/medcl/elasticsearch-analysis-pinyin/releases/download/v6.3.2/elasticsearch-analysis-pinyin-6.3.2.zip

我的ES版本是6.3.2,具体安装插件版本看自己的ES版本。


可以验证插件是否安装成功:

POST /_analyze
{
		"analyzer":"pinyin",
		"text":"北京东"
}

POST /_analyze
{
		"analyzer":"ik_max_word",
		"text":"北京东"
}

结果如下

拼音的分析结果
{
  "tokens": [
    {
      "token": "bei",
      "start_offset": 0,
      "end_offset": 0,
      "type": "word",
      "position": 0
    },
    {
      "token": "jing",
      "start_offset": 0,
      "end_offset": 0,
      "type": "word",
      "position": 1
    },
    {
      "token": "dong",
      "start_offset": 0,
      "end_offset": 0,
      "type": "word",
      "position": 2
    },
    {
      "token": "bjd",
      "start_offset": 0,
      "end_offset": 0,
      "type": "word",
      "position": 2
    }
  ]
}
--------
IK分词分析结果
{
  "tokens": [
    {
      "token": "北京",
      "start_offset": 0,
      "end_offset": 2,
      "type": "CN_WORD",
      "position": 0
    },
    {
      "token": "京东",
      "start_offset": 1,
      "end_offset": 3,
      "type": "CN_WORD",
      "position": 1
    }
  ]
}

新建index的表结构

PUT /station_test/
{
  "settings": {
    "index": {
      "analysis": {
        "analyzer": {
          "pinyin_analyzer": {
            "tokenizer": "my_pinyin"
          }
        },
        "tokenizer": {
          "my_pinyin": {
            "type": "pinyin",
            "keep_first_letter":true,
            "keep_separate_first_letter": true,
            "keep_full_pinyin": true,
            "keep_original": true,
            "limit_first_letter_length": 16,
            "lowercase": true
          }
        }
      }
    }
  },
  "mappings": {
    "route": {
      "properties": {
        "station_name": {
          "type": "text",
          "analyzer": "ik_max_word",
          "fields": {
            "s-pinyin": {
              "type": "completion",
              "analyzer": "pinyin_analyzer"
            }
          }
        },
        "station_code": {
          "type": "completion"
        }
      }
    }
  }
}

这里面我定义了自己的analyzer的pinyin_analyzer。
有两个字段station_name,station_code.
插入数据

PUT /station_test/route/1
{
  "station_code": "VAP",
  "station_name": "北京北"
}

PUT /station_test/route/2
{
  "station_code": "BOP",
  "station_name": "北京东"
}

PUT /station_test/route/3
{
  "station_code": "GGQ",
  "station_name": "广州南"
}

PUT /station_test/route/4
{
  "station_code": "SHH",
  "station_name": "上海"
}


SUGGEST查询
拼音提示

POST /station_test/_search
{
  "suggest":{
    "text":"bj",
    "code-suggest" : {
      "completion" : {
        "field" : "station_code"
      }
    },
    "pinyin-suggest" : {
      "completion" : {
        "field" : "station_name.s-pinyin"
      }
    }
  }
}

结果如下:

{
  "took": 8,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 0,
    "max_score": 0,
    "hits": []
  },
  "suggest": {
    "code-suggest": [
      {
        "text": "bj",
        "offset": 0,
        "length": 2,
        "options": []
      }
    ],
    "pinyin-suggest": [
      {
        "text": "bj",
        "offset": 0,
        "length": 2,
        "options": [
          {
            "text": "北京东",
            "_index": "station_test",
            "_type": "route",
            "_id": "2",
            "_score": 1,
            "_source": {
              "station_code": "BOP",
              "station_name": "北京东"
            }
          },
          {
            "text": "北京北",
            "_index": "station_test",
            "_type": "route",
            "_id": "1",
            "_score": 1,
            "_source": {
              "station_code": "VAP",
              "station_name": "北京北"
            }
          }
        ]
      }
    ]
  }
}

中文提示:

POST /station_test/_search
{
  "suggest":{
    "text":"广",
    "code-suggest" : {
      "completion" : {
        "field" : "station_code"
      }
    },
    "pinyin-suggest" : {
      "completion" : {
        "field" : "station_name.s-pinyin"
      }
    }
  }
}

结果如下:

{
  "took": 5,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 0,
    "max_score": 0,
    "hits": []
  },
  "suggest": {
    "code-suggest": [
      {
        "text": "广",
        "offset": 0,
        "length": 1,
        "options": []
      }
    ],
    "pinyin-suggest": [
      {
        "text": "广",
        "offset": 0,
        "length": 1,
        "options": [
          {
            "text": "广州南",
            "_index": "station_test",
            "_type": "route",
            "_id": "3",
            "_score": 1,
            "_source": {
              "station_code": "GGQ",
              "station_name": "广州南"
            }
          }
        ]
      }
    ]
  }
}

Java代码API调用

package com.es;

import org.apache.http.HttpHost;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.suggest.Suggest;
import org.elasticsearch.search.suggest.SuggestBuilder;
import org.elasticsearch.search.suggest.SuggestBuilders;
import org.elasticsearch.search.suggest.completion.CompletionSuggestionBuilder;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;


/**
 * @author allen
 * @date 2019-03-19.
 */
public class EsSuggestDemo {


	public static RestHighLevelClient getRestHighLevelClient() {
		String[] ips = "localhost".split(",");
		Integer port = 9200;
		List list = new ArrayList();
		for (String ip : ips) {
			list.add(new HttpHost(ip, port, "http"));
		}
		RestHighLevelClient client = new RestHighLevelClient(RestClient.builder(list.toArray(new HttpHost[list.size()])));
		return client;
	}

	public static void main(String[] args) throws IOException {

		RestHighLevelClient client = getRestHighLevelClient();
		String indexName = "station_test";
		String keyword = "bj";

		CompletionSuggestionBuilder stationName = SuggestBuilders.completionSuggestion("station_name.s-pinyin").prefix(keyword);
		CompletionSuggestionBuilder stationCode = SuggestBuilders.completionSuggestion("station_code").prefix(keyword);
		SearchRequest searchRequest = new SearchRequest().indices(indexName).types("route").source(new SearchSourceBuilder().suggest(
				new SuggestBuilder().addSuggestion("pinyin-suggest", stationName)
						.addSuggestion("code-suggest", stationCode)
		));
		SearchResponse searchResponse = client.search(searchRequest);
		System.out.println(searchResponse);
		Suggest suggestions = searchResponse.getSuggest();
		List> results = suggestions.getSuggestion("pinyin-suggest").getEntries();
		for (Suggest.Suggestion.Entry op : results) {
			List options = op.getOptions();
			for (Suggest.Suggestion.Entry.Option pp : options) {
				System.out.println( pp.getText());
			}
		}

		client.close();
	}
}

结果如下:

{"took":2,"timed_out":false,"_shards":{"total":5,"successful":5,"skipped":0,"failed":0},"hits":{"total":0,"max_score":0.0,"hits":[]},"suggest":{"code-suggest":[{"text":"bj","offset":0,"length":2,"options":[]}],"pinyin-suggest":[{"text":"bj","offset":0,"length":2,"options":[{"text":"北京东","_index":"station_test","_type":"route","_id":"2","_score":1.0,"_source":{"station_code":"BOP","station_name":"北京东"}},{"text":"北京北","_index":"station_test","_type":"route","_id":"1","_score":1.0,"_source":{"station_code":"VAP","station_name":"北京北"}}]}]}}
北京东
北京北

参考文档:
https://github.com/medcl/elasticsearch-analysis-ik
https://github.com/medcl/elasticsearch-analysis-pinyin
https://blog.csdn.net/qq_30581017/article/details/79985483
https://discuss.elastic.co/t/completion-suggester-using-java-api-6-1-1/116463/6
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-suggesters-completion.html

你可能感兴趣的:(ElasticSearch,Java)