在这里如果你想通过elasticsearch使用中文+拼音的分词,但是对于大家来说中文分词,拼音分词单独使用的时候会能合理运用,但是如果混合搜索就会出现一些问题,如果你有图中的这个样子的需求,那本文就是能帮助到你的了
我在这里是通过修改拼音插件分词的源码之后进行打包上传,然后安装之后就可以进行图中的搜索了.
es的版本:
elasticsearch 6.8.4
如果你正好是6.8.4,你就可以通过百度云盘下载下来直接使用了.
链接: https://pan.baidu.com/s/1GVtK6LeFuYWfbL5B6BotVQ 提取码: k4i8 复制这段内容后打开百度网盘手机App,操作更方便哦
如果不是这个版本之后,那么就是需要自己修改源码并编译了,我是通过这位大佬的博客中,你们也可以查看这个大佬的代码提示修改,我觉得已经很详细了:
[es 修改拼音分词器源码实现汉字/拼音/简拼混合搜索时同音字不匹配]
接下来就是在安装好了之后就是可以了,接下来就是贴代码了
@Test
public void setPyService() {
String content = "武houq";
HighlightBuilder highlightBuilder = new HighlightBuilder();
/** 拼音字段 */
SearchRequestBuilder searchRequestBuilderPy =
client.prepareSearch("address").setTypes("address");
BoolQueryBuilder boolQueryBuilderPy = QueryBuilders.boolQuery();
highlightBuilder
.field("region")
.field("town")
.field("villag")
.field("road")
.field("plotName");
MatchPhraseQueryBuilder matchQueryBuilderRegionPy = QueryBuilders.matchPhraseQuery("region", content);
MatchPhraseQueryBuilder matchQueryBuilderTownPy = QueryBuilders.matchPhraseQuery("town", content);
MatchPhraseQueryBuilder matchQueryBuilderVillagPy = QueryBuilders.matchPhraseQuery("villag", content);
MatchPhraseQueryBuilder termQueryBuilderRoad = QueryBuilders.matchPhraseQuery("road", content);
MatchPhraseQueryBuilder matchQueryBuilderPnPy = QueryBuilders.matchPhraseQuery("plotName", content);
boolQueryBuilderPy
.should(matchQueryBuilderRegionPy)
.should(matchQueryBuilderTownPy)
.should(matchQueryBuilderVillagPy)
.should( termQueryBuilderRoad)
.should(matchQueryBuilderPnPy)
;
searchRequestBuilderPy.setQuery(boolQueryBuilderPy).highlighter(highlightBuilder);
SearchResponse searchResponse = searchRequestBuilderPy.execute().actionGet();
for (SearchHit hit : searchResponse.getHits().getHits()) {
System.out.println(hit.getSourceAsString());
}
}
执行的结果:
{"id":"d06456165718469fbab524d513b1358c","region":"武侯区","town":"玉林街道办事处","villag":"黉门街社区","road":"黉门街29号","plotName":"武侯区教育局","dn":""}
{"id":"0c040fb2b5c94f89a5bda30995b26874","region":"武侯区","town":"浆洗街街道办事处","villag":"蜀汉社区","road":"蜀汉东街8号","plotName":"武侯区政府宿舍","dn":""}
{"id":"ed63768257234cca8c7fc5b1d35dd3ea","region":"武侯区","town":"双楠街道办事处","villag":"双楠街社区","road":"双楠街165号","plotName":"武侯区教师公寓","dn":"1幢"}
{"id":"f281711eaff140178167f810bc0de143","region":"武侯区","town":"双楠街道办事处","villag":"双楠街社区","road":"双楠街165号","plotName":"武侯区教师公寓","dn":"3幢"}
{"id":"ad18f51264fe4ef0ac01b5e918021834","region":"武侯区","town":"浆洗街街道办事处","villag":"蜀汉社区","road":"蜀汉东街8号","plotName":"武侯区政府宿舍","dn":"门卫室"}
{"id":"99b022cb06684dd9a247f8333fc189c1","region":"武侯区","town":"簇桥街道办事处","villag":"凉水井村","road":"","plotName":"武侯区鸿鑫皮鞋","dn":""}
{"id":"f05c75c071444e2b97476ef8e5564f57","region":"武侯区","town":"金花街道办事处","villag":"双凤村","road":"凤翔路","plotName":"武侯区兴宏医疗","dn":""}
{"id":"eb279f23eb63454e8a73691366f85570","region":"武侯区","town":"金花街道办事处","villag":"双凤村","road":"金兴中路197号","plotName":"武侯区伊人门诊","dn":""}
{"id":"8ea46ba4a5a940dfb7da6986f96a6a84","region":"武侯区","town":"浆洗街街道办事处","villag":"蜀汉社区","road":"蜀汉东街8号","plotName":"武侯区政府宿舍","dn":"2幢"}
{"id":"c6f3fa089e5c4c0e984dd07f4add77fc","region":"武侯区","town":"双楠街道办事处","villag":"双楠街社区","road":"双楠街165号","plotName":"武侯区教师公寓","dn":"2幢"}
这里可能有人配置了还是没有成功,我贴一下我的es的mapping和settiong设置吧
这个setting可以直接使用,前提是你得有以下的插件:
elasticsearch-analysis-hanlp-6.8.4 中文分词
elasticsearch-analysis-ik-6.8.4 ik分词
elasticsearch-analysis-stconvert-6.8.4 繁体字转换
elasticsearch-analysis-pinyin-6.8.4 拼音分词
如果有以上几个就是可以直接使用
{
"index": {
"analysis": {
"filter": {
"edge_ngram_filter": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 50
},
"pinyin_simple_filter": {
"type": "pinyin",
"keep_separate_first_letter": false,
"keep_full_pinyin": false,
"keep_original": true,
"keep_joined_full_pinyin": true,
"limit_first_letter_length": 16,
"lowercase": true,
"remove_duplicated_term": true
}
},
"char_filter": {
"tsconvert": {
"type": "stconvert",
"convert_type": "t2s"
}
},
"analyzer": {
"pinyin_chinese_analyzer": {
"tokenizer": "pinyin_tokenizer"
},
"pinyin_analyzer": {
"tokenizer": "pinyin_chinese_tokenizer"
},
"ikSearchAnalyzer": {
"type": "custom",
"tokenizer": "ik_max_word",
"char_filter": [
"tsconvert"
]
},
"my_analyzer": {
"type": "hanlp",
"algorithm": "viterbi",
"enableIndexMode": "true",
"enableCustomDictionary": "true",
"customDictionaryPath": "",
"enableCustomDictionaryForcing": "true",
"enableStopWord": "true",
"stopWordDictionaryPath": "",
"enableNumberQuantifierRecognize": "true",
"enableNameRecognize": "true",
"enableTranslatedNameRecognize": "true",
"enableJapaneseNameRecognize": "true",
"enableOrganizationRecognize": "true",
"enablePlaceRecognize": "true",
"enableTraditionalChineseMode": "false"
},
"pinyinSimpleIndexAnalyzer": {
"tokenizer": "keyword",
"filter": [
"pinyin_simple_filter",
"edge_ngram_filter",
"lowercase",
"edgeNGram",
"standard"
]
}
},
//这里按照我这样设置的话,最后返回的就是上面的键结果,当然如果你想自己定义,可以去拼音插件的github上自行添加
"tokenizer": {
"pinyin_chinese_tokenizer": {
"type": "pinyin",
"keep_first_letter": true,
"keep_separate_first_letter": true,
"keep_full_pinyin": true,
"keep_original": true,
"limit_first_letter_length": 50,
"keep_separate_chinese": true,
"lowercase": true,
"keep_none_chinese_together": true
},
"pinyin_tokenizer": {
"type": "pinyin",
"keep_first_letter": true,
"keep_separate_first_letter": true,
"keep_full_pinyin": true,
"keep_original": true,
"limit_first_letter_length": 50,
"keep_separate_chinese": true,
"lowercase": true,
"keep_none_chinese_together": true
}
}
}
}
}
{
"address": {
"_all": {
"enabled": false
},
"properties": {
"id": {
"type": "keyword"
},
"region": {
"type": "text",
//按照这个方式修改即可,可以直接复制改名字就行了
"search_analyzer": "pinyin_chinese_analyzer",
"analyzer": "pinyin_analyzer"
},
"town": {
"type": "text",
"search_analyzer": "pinyin_chinese_analyzer",
"analyzer": "pinyin_analyzer"
},
"villag": {
"type": "text",
"search_analyzer": "pinyin_chinese_analyzer",
"analyzer": "pinyin_analyzer"
},
"road": {
"type": "text",
"search_analyzer": "pinyin_chinese_analyzer",
"analyzer": "pinyin_analyzer"
},
"plotName": {
"type": "text",
"search_analyzer": "pinyin_chinese_analyzer",
"analyzer": "pinyin_analyzer"
},
"dn": {
"type": "text",
"search_analyzer": "pinyin_chinese_analyzer",
"analyzer": "pinyin_analyzer"
}
}
}
}
完毕