本文基于elasticsearch7.3.0版本
edge_ngram和ngram是elasticsearch内置的两个tokenizer和filter
步骤
创建测试索引
PUT analyzer_test
{
"settings": {
"refresh_interval": "1s",
"index": {
"max_ngram_diff": 10
},
"analysis": {
"analyzer": {
"edge_ngram_analyzer": {
"type": "custom",
"char_filter": [],
"tokenizer": "keyword",
"filter": [
"edge_ngram_filter"
]
},
"ngram_analyzer": {
"type": "custom",
"char_filter": [],
"tokenizer": "keyword",
"filter": [
"ngram_filter"
]
}
},
"filter": {
"edge_ngram_filter": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 11
},
"ngram_filter": {
"type": "ngram",
"min_gram": 2,
"max_gram": 5
}
}
}
}
}
测试edge_ngram_analyzer分析器
POST /analyzer_test/_analyze
{
"text": "虹桥机场",
"analyzer": "edge_ngram_analyzer"
}
{
"tokens" : [
{
"token" : "虹",
"start_offset" : 0,
"end_offset" : 4,
"type" : "word",
"position" : 0
},
{
"token" : "虹桥",
"start_offset" : 0,
"end_offset" : 4,
"type" : "word",
"position" : 0
},
{
"token" : "虹桥机",
"start_offset" : 0,
"end_offset" : 4,
"type" : "word",
"position" : 0
},
{
"token" : "虹桥机场",
"start_offset" : 0,
"end_offset" : 4,
"type" : "word",
"position" : 0
}
]
}
测试ngram_analyzer分析器
POST /analyzer_test/_analyze
{
"text": "虹桥机场",
"analyzer": "ngram_analyzer"
}
{
"tokens" : [
{
"token" : "虹桥",
"start_offset" : 0,
"end_offset" : 4,
"type" : "word",
"position" : 0
},
{
"token" : "虹桥机",
"start_offset" : 0,
"end_offset" : 4,
"type" : "word",
"position" : 0
},
{
"token" : "虹桥机场",
"start_offset" : 0,
"end_offset" : 4,
"type" : "word",
"position" : 0
},
{
"token" : "桥机",
"start_offset" : 0,
"end_offset" : 4,
"type" : "word",
"position" : 0
},
{
"token" : "桥机场",
"start_offset" : 0,
"end_offset" : 4,
"type" : "word",
"position" : 0
},
{
"token" : "机场",
"start_offset" : 0,
"end_offset" : 4,
"type" : "word",
"position" : 0
}
]
}