实际开发中,使用elasticsearch做搜索时,难免会遇到以下需求:
(假设,搜索"吴京",同时去搜索contentName、actor、director三个字段)
(1)场景1:三个字段中包含"吴京"的文档的排序:contentName>actor>director(即contenName包含吴京的文档在前,actor次之,director最后)
(2)场景2:包含“吴京”的字段多的文档排序靠前,少的靠后
POST /_bulk
{"index":{"_index":"my-test2","_id":"1"}}
{"contentName":"战啥狼","actor":["吴啥京","张啥翰"],"director":["吴啥京"]}
{"index":{"_index":"my-test2","_id":"2"}}
{"contentName":"战神啥传奇","actor":["周杰啥伦","吴啥非"],"director":["吴啥京"]}
{"index":{"_index":"my-test2","_id":"3"}}
{"contentName":"狼图啥腾","actor":["吴啥京"],"director":[]}
{"index":{"_index":"my-test2","_id":"4"}}
{"contentName":"烽火啥狼烟","actor":["小啥明","王啥华"],"director":["这是战啥狼"]}
{"index":{"_index":"my-test2","_id":"5"}}
{"contentName":"吴啥京做客什么什么","actor":["吴啥京"],"director":["吴啥京"]}
{"index":{"_index":"my-test2","_id":"6"}}
{"contentName":"吴啥京","actor":["吴啥京"],"director":["吴啥京"]}
{"index":{"_index":"my-test2","_id":"7"}}
{"contentName":"又是一部啥电影","actor":["吴啥京"],"director":["吴啥京"]}
{"index":{"_index":"my-test2","_id":"8"}}
{"contentName":"又是一部啥电影2","actor":["哈哈喽"],"director":["吴啥京"]}
{"index":{"_index":"my-test2","_id":"9"}}
{"contentName":"战神杀杀杀狼无敌啊","actor":["吴啥京"],"director":["这是战啥狼"]}
{"index":{"_index":"my-test2","_id":"10"}}
{"contentName":"不知道起什么名字","actor":["这是战啥狼"],"director":["这也是战啥狼"]}
GET /_search
{
"query": {
"function_score": {
"query": { "match_all": {} },
"boost": "5",
"functions": [
{
"filter": { "match": { "test": "bar" } },
"random_score": {},
"weight": 23
},
{
"filter": { "match": { "test": "cat" } },
"weight": 42
}
],
"max_boost": 42,
"score_mode": "max",
"boost_mode": "multiply",
"min_score": 42
}
}
}
(1) multiply:默认。相乘
(2) sum:相加
(3) avg:平均值
(4) first:应用第一个function
(5) max: 应用最大的
(6) min:应用最小的
(1) multiply:默认。query score和function score相乘
(2) replace:仅使用function score,忽略query score
(3) sum:query score和function score相加
(3) avg:平均值
(4) max: query score和function score中最大的
(5) min: query score和function score中最小的
使用multi_match最初始的查询评分排序
GET /my-test2/_search
{
"size": 3,
"query": {
"multi_match": {
"query": "吴啥京",
"fields": [
"actor",
"contentName",
"director"
],
"type": "phrase"
}
}
}
{
...
"hits" : {
"total" : {
"value" : 8,
"relation" : "eq"
},
"max_score" : 4.0664473,
"hits" : [
{
"_index" : "my-test2",
"_type" : "_doc",
"_id" : "6",
"_score" : 4.0664473,
"_source" : {
"contentName" : "吴啥京",
"actor" : [
"吴啥京"
],
"director" : [
"吴啥京"
]
}
},
{
"_index" : "my-test2",
"_type" : "_doc",
"_id" : "5",
"_score" : 2.6965895,
"_source" : {
"contentName" : "吴啥京做客什么什么",
"actor" : [
"吴啥京"
],
"director" : [
"吴啥京"
]
}
},
{
"_index" : "my-test2",
"_type" : "_doc",
"_id" : "3",
"_score" : 1.1954124,
"_source" : {
"contentName" : "狼图啥腾",
"actor" : [
"吴啥京"
],
"director" : [ ]
}
}
]
}
}
使用function_score(score_mode=sum boost_mode=multiply)
GET /my-test2/_search
{
"size": 3,
"query": {
"function_score": {
"query": {
"multi_match": {
"query": "吴啥京",
"fields": [
"actor",
"contentName",
"director"
],
"type": "phrase"
}
},
"functions": [
{
"filter": {
"match_phrase": {
"contentName": "吴啥京"
}
},
"weight": 2
},
{
"filter": {
"match_phrase": {
"actor": "吴啥京"
}
},
"weight": 3
},
{
"filter": {
"match_phrase": {
"director": "吴啥京"
}
},
"weight": 4
}
],
"score_mode": "sum",
"boost_mode": "multiply"
}
}
}
{
...
"hits" : {
"total" : {
"value" : 8,
"relation" : "eq"
},
"max_score" : 36.598026,
"hits" : [
{
"_index" : "my-test2",
"_type" : "_doc",
"_id" : "6",
"_score" : 36.598026,
"_source" : {
"contentName" : "吴啥京",
"actor" : [
"吴啥京"
],
"director" : [
"吴啥京"
]
}
},
{
"_index" : "my-test2",
"_type" : "_doc",
"_id" : "5",
"_score" : 24.269306,
"_source" : {
"contentName" : "吴啥京做客什么什么",
"actor" : [
"吴啥京"
],
"director" : [
"吴啥京"
]
}
},
{
"_index" : "my-test2",
"_type" : "_doc",
"_id" : "7",
"_score" : 8.367887,
"_source" : {
"contentName" : "又是一部啥电影",
"actor" : [
"吴啥京"
],
"director" : [
"吴啥京"
]
}
}
]
}
}
从返回的_score字段可以看出,最新的评分 = 将计算评分相加后,又乘以原始的搜索评分(eg: 36.598026 ≈ (2+3+4)*4.0664473 )
使用function_score(score_mode=multiply boost_mode=multiply)
{
...
"hits" : {
"total" : {
"value" : 8,
"relation" : "eq"
},
"max_score" : 97.594734,
"hits" : [
{
"_index" : "my-test2",
"_type" : "_doc",
"_id" : "6",
"_score" : 97.594734,
"_source" : {
"contentName" : "吴啥京",
"actor" : [
"吴啥京"
],
"director" : [
"吴啥京"
]
}
},
{
"_index" : "my-test2",
"_type" : "_doc",
"_id" : "5",
"_score" : 64.71815,
"_source" : {
"contentName" : "吴啥京做客什么什么",
"actor" : [
"吴啥京"
],
"director" : [
"吴啥京"
]
}
},
{
"_index" : "my-test2",
"_type" : "_doc",
"_id" : "7",
"_score" : 14.344949,
"_source" : {
"contentName" : "又是一部啥电影",
"actor" : [
"吴啥京"
],
"director" : [
"吴啥京"
]
}
}
]
}
}
从返回的_score字段可以看出,最新的评分 = 计算评分相乘,又乘以原始的搜索评分(eg: 97.594734 ≈ (234)*4.0664473 )
经测试,score_mode是用来控制functions中的评分关系,是”相乘“还是”相加“等;boost_mode是用来控制functions中的评分关系与原始搜索评分的关系