本文基于Elasticsearch7.x
Elasticsearch可以控制相关度分数计算的, 让满足一些条件的分数更高.
在学习本篇博客前先了解下Elasticsearch全文搜索之基础语法API
Elasticsearch有三种控制相关度分数的方法:
添加搜索实例数据
POST /blogs/_bulk
{"index": {}}
{"post_date": "2020-01-01", "title": "Quick brown rabbits", "content": "Brown rabbits are commonly seen.", "author_id": 11401}
{"index": {}}
{"post_date": "2020-01-02", "title": "Keeping pets healthy", "content": "My quick brown fox eats rabbits on a regular basis.", "author_id": 11402}
{"index": {}}
{"post_date": "2020-01-03", "title": "My dog barks", "content": "I see a lot of barking dogs on the road.", "author_id": 11403}
boost
(1) 语法
(2) 实例
GET /blogs/_search
{
"query": {
"bool": {
"should": [
{
"match": {
"content": {
"query": "rabbits",
"boost": 2
}
}
},
{
"match": {
"title": {
"query": "brown",
"boost": 0.1
}
}
}
]
}
}
}
boosting
(1) 语法
(2) 实例
GET /blogs/_search
{
"query": {
"boosting": {
"positive": {
"match": {
"title": "rabbits"
}
},
"negative": {
"term": {
"post_date": {
"value": "2020-01-02"
}
}
},
"negative_boost": 0.2
}
}
}
function_score
function_score可以在搜索结束后, 对每一个匹配的文档进行一系列的重新算分, 根据新生成的分数进行排序.
(1) 语法
GET /blogs/_search
{
"query": {
"function_score": {
"query": {
全局匹配条件
},
"functions": [
{
"filter": {
函数生效的匹配条件
},
function
},
{
"filter": {
函数生效的匹配条件
},
function
}
],
"boost": ,
"score_mode": "",
"boost_mode": "",
"min_score": ,
"max_boost":
}
}
}
a. functions
b. score_mode
存在多个函数时, 计算最终函数分数的方式.
c. boost_mode
计算函数分数与查询分数的综合得分的方式.
d. max_boost
最大的函数分数, 函数分数不能超过max_boost.
e. min_score
最小综合分数, 最终综合分数小于min_score的文档将会被过滤掉.
f. boost
设置一个基础分数.
(2) 实例
a. Weight和Random Score
GET /blogs/_search
{
"query": {
"function_score": {
"query": {
"match": {
"content": "rabbits"
}
},
"functions": [
{
"filter": {
"term": {
"post_date": "2020-01-01"
}
},
"random_score": {
"seed": 10,
"field": "_seq_no"
},
"weight": 23
},
{
"filter": {
"term": {
"author_id": "11402"
}
},
"weight": 42
}
],
"boost": 5,
"score_mode": "multiply",
"boost_mode": "multiply",
"min_score": 42,
"max_boost": 42
}
}
}
b. Field Value Factor
函数得分 = modifier(factor * field.value).
GET /blogs/_search
{
"query": {
"function_score": {
"query": {
"match": {
"content": "rabbits"
}
},
"functions": [
{
"field_value_factor": {
"field": "author_id",
"factor": 1.2,
"modifier": "sqrt",
"missing": 1
}
}
]
}
}
}
c. Decay Functions
衰减函数有linear, exp和gauss三种(线性、指数和高斯函数), 我们以gauss为例.
具体计算公式看官网: https://www.elastic.co/guide/en/elasticsearch/reference/7.5/query-dsl-function-score-query.html#function-weight
GET /blogs/_search
{
"query": {
"function_score": {
"query": {
"match": {
"content": "rabbits"
}
},
"functions": [
{
"gauss": {
"post_date": {
"origin": "2020-01-03",
"scale": "10d",
"offset": "5d",
"decay": 0.5
}
}
}
]
}
}
}
下面介绍Elasticsearch Java Client 的使用, 我们来将上文的实例转化为 Java Client.
(1) main方法
public static void main(String[] args) throws IOException {
RestHighLevelClient client = new RestHighLevelClient(
RestClient.builder(
new HttpHost("localhost", 9200, "http")));
bulkIndex(client);
boost(client);
boosting(client);
functionScore(client);
fieldValueFactor(client);
decayFunction(client);
client.close();
}
新增文档和查询文档请求不要一起执行, 这样会查不到文档, 因为新增文档后需要1s时间进行倒排索引创建, 这也是ES被称为近实时的原因.
(2) 添加搜索数据
private static void bulkIndex(RestHighLevelClient client) throws IOException {
BulkRequest bulkRequest = new BulkRequest();
bulkRequest.add(new IndexRequest("blogs").id("1")
.source(XContentType.JSON, "post_date", "2020-01-01", "title", "Quick brown rabbits", "content", "Brown rabbits are commonly seen.", "author_id", 11401));
bulkRequest.add(new IndexRequest("blogs").id("2")
.source(XContentType.JSON, "post_date", "2020-01-02", "title", "Keeping pets healthy", "content", "My quick brown fox eats rabbits on a regular basis.", "author_id", 11402));
bulkRequest.add(new IndexRequest("blogs").id("3")
.source(XContentType.JSON, "post_date", "2020-01-03", "title", "My dog barks", "content", "I see a lot of barking dogs on the road.", "author_id", 11403));
client.bulk(bulkRequest, RequestOptions.DEFAULT);
}
(3) boost
private static void boost(RestHighLevelClient client) throws IOException {
SearchRequest searchRequest = new SearchRequest("blogs");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();
boolQueryBuilder.should(new MatchQueryBuilder("content", "rabbits").boost(2f));
boolQueryBuilder.should(new MatchQueryBuilder("title", "brown").boost(0.1f));
searchSourceBuilder.query(boolQueryBuilder);
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
SearchHit[] hits = searchResponse.getHits().getHits();
for (SearchHit hit : hits) {
System.out.println(hit.getSourceAsString());
}
}
(4) boosting
private static void boosting(RestHighLevelClient client) throws IOException {
SearchRequest searchRequest = new SearchRequest("blogs");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
MatchQueryBuilder matchQueryBuilder = new MatchQueryBuilder("title", "rabbits");
TermQueryBuilder termQueryBuilder = new TermQueryBuilder("post_date", "2020-01-02");
BoostingQueryBuilder boostingQueryBuilder = new BoostingQueryBuilder(matchQueryBuilder, termQueryBuilder);
boostingQueryBuilder.negativeBoost(0.2f);
searchSourceBuilder.query(boostingQueryBuilder);
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
SearchHit[] hits = searchResponse.getHits().getHits();
for (SearchHit hit : hits) {
System.out.println(hit.getSourceAsString());
}
}
(5) Weight和Random Score
private static void functionScore(RestHighLevelClient client) throws IOException {
SearchRequest searchRequest = new SearchRequest("blogs");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//function1
RandomScoreFunctionBuilder randomScoreFunctionBuilder = ScoreFunctionBuilders.randomFunction().seed(10).setField("_seq_no").setWeight(23);
TermQueryBuilder termQueryBuilder1 = new TermQueryBuilder("post_date", "2020-01-01");
FunctionScoreQueryBuilder.FilterFunctionBuilder filterFunctionBuilder = new FunctionScoreQueryBuilder.FilterFunctionBuilder(termQueryBuilder1, randomScoreFunctionBuilder);
//function2
TermQueryBuilder termQueryBuilder2 = new TermQueryBuilder("author_id", 11402);
WeightBuilder weightBuilder = ScoreFunctionBuilders.weightFactorFunction(42);
FunctionScoreQueryBuilder.FilterFunctionBuilder filterFunctionBuilder2 = new FunctionScoreQueryBuilder.FilterFunctionBuilder(termQueryBuilder2, weightBuilder);
//query and functions
MatchQueryBuilder matchQueryBuilder = new MatchQueryBuilder("content", "rabbits");
FunctionScoreQueryBuilder.FilterFunctionBuilder[] filterFunctionBuilders = new FunctionScoreQueryBuilder.FilterFunctionBuilder[]{filterFunctionBuilder, filterFunctionBuilder2};
FunctionScoreQueryBuilder functionScoreQueryBuilder = new FunctionScoreQueryBuilder(matchQueryBuilder, filterFunctionBuilders);
functionScoreQueryBuilder.boost(5);
functionScoreQueryBuilder.scoreMode(org.elasticsearch.common.lucene.search.function.FunctionScoreQuery.ScoreMode.MULTIPLY);
functionScoreQueryBuilder.boostMode(CombineFunction.MULTIPLY);
functionScoreQueryBuilder.setMinScore(42);
functionScoreQueryBuilder.maxBoost(42);
searchSourceBuilder.query(functionScoreQueryBuilder);
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
SearchHit[] hits = searchResponse.getHits().getHits();
for (SearchHit hit : hits) {
System.out.println(hit.getSourceAsString());
}
}
(6) Field Value Factor
private static void fieldValueFactor(RestHighLevelClient client) throws IOException {
SearchRequest searchRequest = new SearchRequest("blogs");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
MatchQueryBuilder matchQueryBuilder = new MatchQueryBuilder("content", "rabbits");
FieldValueFactorFunctionBuilder fieldValueFactorFunctionBuilder = ScoreFunctionBuilders.fieldValueFactorFunction("author_id").factor(1.2f).modifier(FieldValueFactorFunction.Modifier.SQRT).missing(1);
FunctionScoreQueryBuilder functionScoreQueryBuilder = new FunctionScoreQueryBuilder(matchQueryBuilder, fieldValueFactorFunctionBuilder);
searchSourceBuilder.query(functionScoreQueryBuilder);
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
SearchHit[] hits = searchResponse.getHits().getHits();
for (SearchHit hit : hits) {
System.out.println(hit.getSourceAsString());
}
}
(7) Decay Functions
private static void decayFunction(RestHighLevelClient client) throws IOException {
SearchRequest searchRequest = new SearchRequest("blogs");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
MatchQueryBuilder matchQueryBuilder = new MatchQueryBuilder("content", "rabbits");
GaussDecayFunctionBuilder gaussDecayFunctionBuilder = ScoreFunctionBuilders.gaussDecayFunction("post_date", "2020-01-03", "10d", "5d", 0.5);
FunctionScoreQueryBuilder functionScoreQueryBuilder = new FunctionScoreQueryBuilder(matchQueryBuilder, gaussDecayFunctionBuilder);
searchSourceBuilder.query(functionScoreQueryBuilder);
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
SearchHit[] hits = searchResponse.getHits().getHits();
for (SearchHit hit : hits) {
System.out.println(hit.getSourceAsString());
}
}