es拼音插件https://github.com/medcl/elasticsearch-analysis-pinyin/tree/v1.6.0,具体用法此处不介绍,看readme
现在要实现一个搜场馆的功能,需要按场馆名称首字母缩写也能搜索出场馆,还必须要高亮显示首拼对应的汉字部分。
analysis可定义如下:
"analysis": {
"tokenizer": {
"my_pinyin" : {
"type" : "pinyin",
"first_letter" : "only"
}
},
"filter": {
"pinyin_filter" : {
"type" : "pinyin",
"first_letter" : "only",
"padding_char" : ""
}
},
"analyzer": {
"pinyin_analyzer" : {
"tokenizer" : "ansj_query_token",
"filter" : "pinyin_filter"
},
"pinyin_all_analyzer" : {
"tokenizer" : "my_pinyin",
"filter" : "word_delimiter"
},
"index_ansj": {
"type": "custom",
"tokenizer": "ansj_index_token"
},
"query_ansj": {
"type": "custom",
"tokenizer": "ansj_query_token"
}
}
}
分词部分使用的是ansj分词。
场馆mapping如下:
{
"properties": {
"name": {
"type": "string",
"term_vector" : "with_positions_offsets",
"index_analyzer": "index_ansj",
"search_analyzer": "query_ansj",
"fields": {
"pinyin": {
"type": "string",
"index_analyzer": "pinyin_analyzer",
"search_analyzer": "query_ansj"
}
}
},
"address": {
"type": "string",
"term_vector" : "with_positions_offsets",
"index_analyzer": "index_ansj",
"search_analyzer": "query_ansj"
},
"createTime" : {
"type": "date",
"format": "yyy-MM-dd HH:mm:ss"
}
}
}
name字段采用的multi_fields字段,创建name索引的同时也会创建name.pinyin索引。
搜索query可以这么写:
{
"_source": ["createTime"],
"query": {
"multi_match": {
"type": "most_fields",
"query": "cs",
"fields": ["name", "name.pinyin","address"],
"minimum_should_match": "-20%"
}
},
"sort": [
{"createTime":{"order":"desc"}}
],
"highlight": {
"boundary_chars":".,!? \t\n,。!?",
"pre_tags" : [""],
"post_tags" : [""],
"fields": {
"name" : {
"number_of_fragments" : 0
},
"name.pinyin" : {
"number_of_fragments" : 0
},
"address" : {
"number_of_fragments" : 0
}
}
}
}
{
"took": 5,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 3,
"max_score": null,
"hits": [
{
"_index": "huiti_app_v1",
"_type": "stadium",
"_id": "id1",
"_score": null,
"_source": {
"createTime": "2015-01-01 00:00:00"
},
"highlight": {
"name.pinyin": [
"测试场馆1"
]
},
"sort": [
1420070400000
]
},
{
"_index": "huiti_app_v1",
"_type": "stadium",
"_id": "id2",
"_score": null,
"_source": {
"createTime": "2015-01-01 00:00:00"
},
"highlight": {
"name.pinyin": [
"测试场馆2"
]
},
"sort": [
1420070400000
]
},
{
"_index": "huiti_app_v1",
"_type": "stadium",
"_id": "id3",
"_score": null,
"_source": {
"createTime": "2015-01-01 00:00:00"
},
"highlight": {
"name.pinyin": [
"测试场馆3"
]
},
"sort": [
1420070400000
]
}
]
}
}