本篇文章主要讲解elasticsearch在业务中经常用到的字段类型,通过大量的范例来学习和理解不同字段类型的应用场景。范例elasticsearch使用的版本为7.17.5。
在Elasticsearch的映射关系中,每个字段都对应一个数据类型或者字段类型,这些类型规范了字段存储的值和用途。
#创建名为userinfo的索引库并为其创建映射关系 PUT userinfo { "mappings": { "properties": { "age": { "type": "long" }, "aliasage": { "type": "alias", "path": "age" }, "transit_mode": { "type": "keyword" } } } }
#以上语句创建了userinfo索引库,而且为age字段创建了名为aliasage的别名。 #在索引库userinfo中插入一条文档数据 PUT userinfo/_doc/1 { "age": 39, "transit_mode": "transit_mode" } #通过年龄查询大于30的用户信息 GET userinfo/_doc/_search { "query": { "range": { "age": { "gte": 30 } } } } #通过别名查询年龄大于30的用户信息 GET userinfo/_doc/_search { "query": { "range": { "aliasage": { "gte": 30 } } } } #通过年龄、别名查询返回的结果信息如下 { "took" : 0, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 1, "relation" : "eq" }, "max_score" : 1.0, "hits" : [ { "_index" : "userinfo", "_type" : "_doc", "_id" : "1", "_score" : 1.0, "_source" : { "age" : 39, "transit_mode" : "transit_mode" } } ] } }
#创建索引映射并指定blob字段的类型为二进制类型 PUT myindex-2_02 { "mappings": { "properties": { "name": { "type": "text" }, "blob": { "type": "binary" } } } } #在索引库中插入数据,blob的内容是Base64编码的字符串 PUT myindex-2_02/_doc/1 { "name":"Some binary blob", "blob":"c2FkZw==" }
#创建索引映射并指定user字段为一个嵌套类型 PUT myindex-2_07 { "mappings": { "properties": { "user":{ "type": "nested" } } } }
#在索引库中插入文档数据,user字段中嵌套了键值对 PUT myindex-2_07/_doc/1 { "group": "fans", "user": [ { "first": "John", "last": "Smith" }, { "first": "Alice", "last": "White" } ] }
#查询user索引库字段中user.first的值是Alice以及User.last的值是Smith的结果 GET myindex-2_07/_search { "query": { "nested": { "path": "user", "query": { "bool": { "must": [ { "match": { "user.first": "Alice" } }, { "match": { "user.last": "Smith" } } ] } } } } }
范围类型 |
说明 |
integer_range |
表示由符号的32位整数 |
float_range |
表示单精度浮点数 |
long_range |
表示有符号的64位整数 |
double_range |
表示双精度浮点数 |
date_range |
表示日期范围,可以通过format映射参数支持各种日期格式。无论使用哪种格式,日期值都会被解析为一个无符号的64位整数,该整数为纪元以来的毫秒数。 |
ip_range |
表示IPv4或IPv6地址的一系列IP值 |
#创建索引映射,并指定expected_attendees字段类型为整数范围类型,time_frame字段类型为日期范围类型 PUT myindex-2_08 { "mappings": { "properties": { "expected_attendees": { "type": "integer_range" }, "time_frame":{ "type": "date_range", "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis" } } } } #插入文档数据 PUT myindex-2_08/_doc/1?refresh { "expected_attendees":{ "gte":10, "lt":20 }, "time_frame":{ "gte":"2021-10-31 12:00:00", "lte":"2021-11-01" } }
GET myindex-2_08/_search { "query": { "term": { "expected_attendees": { "value": "12" } } } }
#根据日期范围查询符合条件的文档数据 GET myindex-2_08/_search { "query": { "range": { "time_frame": { "gte": "2021-10-31", "lte": "2021-11-01", "relation": "within" } } } }
#创建索引映射并将"pagerank"和"topics"这两个字段类型分别指定为"rank_feature"类型和"rank_features"类型 PUT myindex-2_10 { "mappings": { "properties": { "pagerank":{ "type": "rank_feature" }, "url_length":{ "type": "rank_feature", "positive_score_impact":false }, "topics":{ "type": "rank_features" } } } } #在索引库中插入数据 PUT myindex-2_10/_doc/1 { "url": "http://en.wikipedia.org/wiki/2016_Summer_Olympics", "content": "Rio 2016", "pagerank": 50.3, "url_length": 42, "topics": { "sports": 50, "brazil": 30 } } #在索引库中插入数据 PUT myindex-2_10/_doc/2 { "url": "http://en.wikipedia.org/wiki/2016_Brazilian_Grand_Prix", "content": "Formula One motor race held on 13 November 2016 at the Autodromo Jose Carlos Pace in Sao Paulo,Brazil", "pagerank": 50.3, "url_length": 47, "topics": { "sports": 50, "brazil": 20, "formula one":65 } } #在索引库中插入数据 PUT myindex-2_10/_doc/3 { "url": "http://en.wikipedia.org/wiki/Deadpool_(film)", "content": "Deadpool is a 2016 American superhero film", "pagerank": 50.3, "url_length": 37, "topics": { "movies":60, "super hero":65 } } #查询索引库的content字段值中包含"2016"的文档,并根据评分(score字段的值)排序输出 GET myindex-2_10/_search { "query": { "bool": { "must": [ { "match": { "content": "2016" } } ], "should": [ { "rank_feature": { "field": "pagerank" } }, { "rank_feature": { "field": "url_length", "boost": 0.1 } }, { "rank_feature": { "field": "topics.sports", "boost": 0.4 } } ] } } }
{ "mappings": { "properties": { "ip_field": { "type": "ip" } } } }
PUT my-index/_doc/1 { "ip_field": "192.168.1.1" } PUT my-index/_doc/2 { "ip_field": "2001:0db8:85a3:0000:0000:8a2e:0370:7334" }
GET my-index/_search { "query": { "range": { "ip_field": { "gte": "192.168.1.1", "lte": "192.168.1.10" } } } }
GET my-index/_search { "query": { "term": { "ip_field": "192.168.1.0/24" } } }
GET my-index/_search { "sort": [ { "ip_field": { "order": "asc" } } ] }
{ "mappings": { "properties": { "title": { "type": "search_as_you_type" } } } }
创建的字段 |
说明 |
my_field |
按照映射中的配置进行分析,如果未配置分词器,则使用索引的默认分词器 |
my_field._2gram |
用大小为2的shingle token filter 分词器对 ny_field进行分词 |
my_field._3gram |
用大小为3的shingle token filter 分词器对 ny_field进行分词 |
my_field._index_prefix |
用edge ngram token filter 打包 my_field._3gram的分词器 |
{ "query": { "multi_match": { "query": "search text", "type": "bool_prefix", "fields": [ "title", "title._2gram", "title._3gram" ] } } }
#创建索引映射,给name字段添加子对象,其名称是length,类型是token_count,使用standard分词器进行分词 PUT myindex-tokencount { "mappings": { "properties": { "name":{ "type": "text", "fields": { "length":{ "type":"token_count", "analyzer":"standard" } } } } } } #添加文档数据 PUT myindex-tokencount/_doc/1 { "name":"John Smith" } #添加文档数据 PUT myindex-tokencount/_doc/2?pretty { "name":"Rachel Alice Williams" } PUT myindex-tokencount/_doc/3 { "name":"长大" } #查询索引库中name字段被分词后,分词的数量等于2的文档 GET myindex-tokencount/_search { "query": { "term": { "name.length": { "value": "2" } } } }
#使用standard分词器对内容"John Smith"进行分词处理,返回分词后的结果 GET myindex-tokencount/_analyze { "analyzer": "standard", "text":["John Smith"] }