ElasticSearch 包含nested字段类型,该类型的出现主要是由于对象数组类型的操作往往不能如我们预期,这主要是因为在Lucene内部没有对象的概念,所以ES将层级的JSON数据转化成扁平的键值对列表形式。
例如文档:
PUT my_index/my_type/5
{
"owner" : "小李",
"family" : [
{
"call" : "dad",
"name" : "李俊杰"
},
{
"call" : "mom",
"name" : "李翠莲"
}
]
}
PUT my_index/my_type/5
{
"owner": "小李",
"family.call": [
"dad",
"mom"
],
"family.name": [
"李俊杰",
"李翠莲"
]
}
然而ES中nested类型字段,允许对象数组中的每一个对象被独立的索引和查询。
我们先做一个通常的处理方式,看看能够得到什么结果:
使用动态mapping,直接插入数据,对象数组mapping的数据类型是对象类型
PUT my_index/my_type/1
{
"owner": "张三",
"family": [
{
"call": "dad",
"name": "张三爸"
},
{
"call": "mom",
"name": "张三妈"
}
]
}
PUT my_index/my_type/2
{
"owner" : "李四",
"family" : [
{
"call" : "dad",
"name" : "李四爸"
},
{
"call" : "mom",
"name" : "李四妈"
}
]
}
PUT my_index/my_type/3
{
"owner" : "王五",
"family" : [
{
"call" : "dad",
"name" : "王五爸"
},
{
"call" : "mom",
"name" : "王五妈"
}
]
}
PUT my_index/my_type/4
{
"owner" : "赵六",
"family" : [
{
"call" : "dad",
"name" : "赵六爸"
},
{
"call" : "mom",
"name" : "赵六妈"
}
]
}
PUT my_index/my_type/5
{
"owner" : "我",
"family" : [
{
"call" : "dad",
"name" : "我老爸"
},
{
"call" : "mom",
"name" : "我老妈"
}
]
}
执行上面,对索引库my_index的表my_type添加五条数据
{
"my_index": {
"mappings": {
"my_type": {
"properties": {
"family": {
"properties": {
"call": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"owner": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
}
}
可以看到family是个对象类型包含两个属性。
must+term查询
GET my_index/my_type/_search
{
"query": {
"bool": {
"must": [
{
"term": {
"family.name.keyword": "王五妈"
}
}, {
"term": {
"family.call.keyword": "dad"
}
}
]
}
}
}
预期没有返回结果 因为没有一个爸爸的名字叫做“王五妈”
实际返回的结果如下:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.5753642,
"hits": [
{
"_index": "my_index",
"_type": "my_type",
"_id": "3",
"_score": 0.5753642,
"_source": {
"owner": "王五",
"family": [
{
"call": "dad",
"name": "王五爸"
},
{
"call": "mom",
"name": "王五妈"
}
]
}
}
]
}
GET my_index/my_type/_search?size=0
{
"aggs": {
"call": {
"filter": {
"term": {
"family.call.keyword": "dad"
}
},
"aggs": {
"name": {
"terms": {
"field": "family.name.keyword",
"size": 10
}
}
}
}
}
}
期望得到5个爸爸的统计结果,实际却把妈妈的名称也返回了
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 5,
"max_score": 0,
"hits": []
},
"aggregations": {
"call": {
"doc_count": 5,
"name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "张三妈",
"doc_count": 1
},
{
"key": "张三爸",
"doc_count": 1
},
{
"key": "我老妈",
"doc_count": 1
},
{
"key": "我老爸",
"doc_count": 1
},
{
"key": "李四妈",
"doc_count": 1
},
{
"key": "李四爸",
"doc_count": 1
},
{
"key": "王五妈",
"doc_count": 1
},
{
"key": "王五爸",
"doc_count": 1
},
{
"key": "赵六妈",
"doc_count": 1
},
{
"key": "赵六爸",
"doc_count": 1
}
]
}
}
}
}
可见对于对象数组如果不做特殊处理的话,其结果是不符合预期的。
可以建一个动态映射的模板:
在建模版之前,为了测试方便,把原来的上例子中的测试数据删除,重新开始:
删除执行操作:DELETE my_index
PUT my_index
{
"mappings": {
"my_type": {
"dynamic_templates": [
{
"object_as_nest": {
"match_mapping_type": "object",
"mapping": {
"type": "nested"
}
}
}
]
}
}
}
这样的话,对象类型就映射为nested类型,其他的字段依旧按照默认的动态映射。
这里就省略
使用上面例子中的第一步的插入操作将数据添加进来
得到五条数据。
{
"my_index": {
"mappings": {
"my_type": {
"dynamic_templates": [
{
"object_as_nest": {
"match_mapping_type": "object",
"mapping": {
"type": "nested"
}
}
}
],
"properties": {
"family": {
"type": "nested",
"properties": {
"call": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"owner": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
}
}
nested+must+term
GET my_index/my_type/_search
{
"query": {
"nested": {
"path": "family",
"score_mode": "sum",
"query": {
"bool": {
"must": [
{
"term": {
"family.call.keyword": "dad"
}
},
{
"term": {
"family.name.keyword": "王五妈"
}
}
]
}
}
}
}
}
返回结果:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 0,
"max_score": null,
"hits": []
}
}
GET my_index/my_type/_search
{
"query": {
"nested": {
"path": "family",
"score_mode": "sum",
"query": {
"bool": {
"must": [
{
"term": {
"family.call.keyword": "dad"
}
},
{
"term": {
"family.name.keyword": "张三爸"
}
}
]
}
}
}
}
}
查询结果为:一条结果 和预期结果一致
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1.3862944,
"hits": [
{
"_index": "my_index",
"_type": "my_type",
"_id": "1",
"_score": 1.3862944,
"_source": {
"owner": "张三",
"family": [
{
"call": "dad",
"name": "张三爸"
},
{
"call": "mom",
"name": "张三妈"
}
]
}
}
]
}
}
nested+filter+term+terms 聚合方式
GET my_index/my_type/_search?size=0
{
"aggs": {
"家庭": {
"nested": {
"path": "family"
},
"aggs": {
"爸爸": {
"filter": {
"term": {
"family.call.keyword": "dad"
}
},
"aggs": {
"爸爸集合": {
"terms": {
"field": "family.name.keyword",
"size": 10
}
}
}
}
}
}
}
}
聚合结果如下:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 5,
"max_score": 0,
"hits": []
},
"aggregations": {
"家庭": {
"doc_count": 10,
"爸爸": {
"doc_count": 5,
"爸爸集合": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "张三爸",
"doc_count": 1
},
{
"key": "我老爸",
"doc_count": 1
},
{
"key": "李四爸",
"doc_count": 1
},
{
"key": "王五爸",
"doc_count": 1
},
{
"key": "赵六爸",
"doc_count": 1
}
]
}
}
}
}
}
GET my_index/my_type/_search?size=0
{
"aggs": {
"家庭": {
"nested": {
"path": "family"
},
"aggs": {
"妈妈": {
"filter": {
"term": {
"family.call.keyword": "mom"
}
},
"aggs": {
"妈妈集合": {
"terms": {
"field": "family.name.keyword",
"size": 10
}
}
}
}
}
}
}
}
{
"took": 6,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 5,
"max_score": 0,
"hits": []
},
"aggregations": {
"家庭": {
"doc_count": 10,
"妈妈": {
"doc_count": 5,
"妈妈集合": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "张三妈",
"doc_count": 1
},
{
"key": "我老妈",
"doc_count": 1
},
{
"key": "李四妈",
"doc_count": 1
},
{
"key": "王五妈",
"doc_count": 1
},
{
"key": "赵六妈",
"doc_count": 1
}
]
}
}
}
}
}
而且也相对简单,尤其是动态mapping模板的建立。