Es基础语法整理

Es基础语法整理

java api 文档
使用kibana工具整理

索引相关

创建索引

# 创建索引
PUT testindex

# 响应结果  注意:创建索引库的分片数默认 1 片,在 7.0.0 之前的 Elasticsearch 版本中,默认 5 片
{
 # 响应结果true 操作成功
 "acknowledged": true, 
 # 分片结果 分片操作成功
 "shards_acknowledged": true, 
 # 索引名称
 "index": "testindex"
}

查看所有索引

# 查看所有索引 `_cat` 表示查看的意思
GET /_cat/indices?v

# 结果
health status index                           uuid                   pri rep docs.count docs.deleted store.size pri.store.size
green  open   .kibana_task_manager            pr5CLJH9T02vHvKRmui1CQ   1   0          2            0     53.8kb         53.8kb
yellow open   book                            IZPhfTm_TxOUhseXinYRmQ   1   1          4            2     26.3kb         26.3kb
green  open   .monitoring-kibana-7-2021.12.08 ty6fkh_4QE-qC3RCAaJ1zw   1   0       2118            0    686.9kb        686.9kb
green  open   .monitoring-es-7-2021.12.21     f9qSRCdrSImyrqGJ355t1Q   1   0       1334         1135      1.6mb          1.6mb
green  open   .monitoring-es-7-2021.12.15     EwTAxZXcSEyd9xHjtVVtzQ   1   0       7266            0        3mb            3mb
green  open   .monitoring-es-7-2021.12.05     EOV6x_W7Rt-eMmeMkNepzw   1   0       2822            0      1.2mb          1.2mb
green  open   .kibana_1                       seOZsItZTbWSyiJM13x2wQ   1   0          7            2     61.3kb         61.3kb
yellow open   test_sd_data                    e-B5PLZ2Rja70xmQVXFG3A   1   1         11            0    101.2kb        101.2kb
green  open   .monitoring-es-7-2021.12.08     J8S_3dY0QS2ZSCiYOciBOA   1   0      27711            0      8.9mb          8.9mb
yellow open   testindex                       qNOOFYeOSGWHyQlxWAnpiA   1   1          0            0       283b           283b
green  open   .monitoring-es-7-2021.12.06     EpGROCORRJWu6wQaN8Kdwg   1   0      38197            0     10.6mb         10.6mb
green  open   .monitoring-kibana-7-2021.12.15 Za04EFHGQT-bwX8aIPBjmg   1   0        456            0    163.9kb        163.9kb
green  open   .monitoring-kibana-7-2021.12.05 PNh-6JVeQFepnFK-eBEzdg   1   0        288            0     95.2kb         95.2kb
yellow open   shopping                        zrshQEXaTd2maNh88J0WGA   1   1          0            0       283b           283b
green  open   .monitoring-kibana-7-2021.12.21 8eL43D9MREumvuOh8vJJWw   1   0         66            0      129kb          129kb

表头 含义
health 当前服务器健康状态:green(集群完整) yellow(单点正常、集群不完整) red(单点不正常)
status 索引打开、关闭状态
index 索引名
uuid 索引统一编号
pri 主分片数量
rep 副本数量
docs.count 可用文档数量
docs.deleted 文档删除状态(逻辑删除)
store.size 主分片和副分片整体占空间大小
pri.store.size 主分片占空间大小

查看端个索引

# 查询当个索引
GET testindex

# 结果
{
  "testindex" : {
    "aliases" : { },
    "mappings" : { },
    "settings" : {
      "index" : {
        "creation_date" : "1640052649065",
        "number_of_shards" : "1",
        "number_of_replicas" : "1",
        "uuid" : "qNOOFYeOSGWHyQlxWAnpiA",
        "version" : {
          "created" : "7030099"
        },
        "provided_name" : "testindex"
      }
    }
  }
}

{
 "testindex"【索引名】: { 
 "aliases"【别名】: {},
 "mappings"【映射】: {},
 "settings"【设置】: {
 "index"【设置 - 索引】: {
 "creation_date"【设置 - 索引 - 创建时间】: "1614265373911",
 "number_of_shards"【设置 - 索引 - 主分片数量】: "1",
 "number_of_replicas"【设置 - 索引 - 副分片数量】: "1",
 "uuid"【设置 - 索引 - 唯一标识】: "eI5wemRERTumxGCc1bAk2A",
 "version"【设置 - 索引 - 版本】: {
 "created": "7080099"
 },
 "provided_name"【设置 - 索引 - 名称】: "shopping"
 }
 }
 } }

删除索引

# 删除索引
DELETE testindex

{
  "acknowledged" : true
}

文档操作

创建文档( 自动生成_id)

# 创建文档
POST testindex/_doc
{
 "title":"手机hhhh",
 "content":"啦啦啦啦啦啦啦啦啦啦啦啦啦啦",
 "name":"地界"
}

# 结果
{
  "_index" : "testindex",
  "_type" : "_doc",
  "_id" : "glbS2n0BgOyOsl0n-wMM",
  "_version" : 1,
  "result" : "created",
  "_shards" : {
    "total" : 2,
    "successful" : 1,
    "failed" : 0
  },
  "_seq_no" : 0,
  "_primary_term" : 1
}

# 结果解释
{
 "_index"【索引】: "testindex",
 "_type"【类型-文档】: "_doc",
 "_id"【唯一标识】: "Xhsa2ncBlvF_7lxyCE9G", #可以类比为 MySQL 中的主键,随机生成
 "_version"【版本】: 1,
 "result"【结果】: "created", #这里的 create 表示创建成功
 "_shards"【分片】: {
 "total"【分片 - 总数】: 2,
 "successful"【分片 - 成功】: 1,
 "failed"【分片 - 失败】: 0
 },
 "_seq_no": 0,
 "_primary_term": 1
}

创建文档( 指定_id)

# 使用自定义的ID
PUT testindex/_doc/1
{
 "title":"手机hhhh",
 "content":"啦啦啦啦啦啦啦啦啦啦啦啦啦啦",
 "name":"地界"
}

# 结果 
{
  "_index" : "testindex",
  "_type" : "_doc",
  "_id" : "2",
  "_version" : 1,
  "result" : "created",
  "_shards" : {
    "total" : 2,
    "successful" : 1,
    "failed" : 0
  },
  "_seq_no" : 2,
  "_primary_term" : 2
}
# 结果 解释同上

查看文档(_id查询)

GET testindex/_doc/1

# 结果
{
  "_index" : "testindex",
  "_type" : "_doc",
  "_id" : "1",
  "_version" : 1,
  "_seq_no" : 1,
  "_primary_term" : 2,
  "found" : true,
  "_source" : {
    "title" : "手机hhhh",
    "content" : "啦啦啦啦啦啦啦啦啦啦啦啦啦啦",
    "name" : "地界"
  }
}

# 结果解释 
{
 "_index"【索引】: "testindex",
 "_type"【文档类型】: "_doc",
 "_id": "1",
 "_version": 2,
 "_seq_no": 2,
 "_primary_term": 2,
 "found"【查询结果】: true, # true 表示查找到,false 表示未查找到
 "_source"【文档源信息】: {
 "title": "华为手机",
 "category": "华为",
 "images": "http://www.gulixueyuan.com/hw.jpg",
 "price": 4999.00
 } }

查看文档(全部文档)

GET  testindex/_search

{
  "took" : 0,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 3,
      "relation" : "eq"
    },
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "testindex",
        "_type" : "_doc",
        "_id" : "glbS2n0BgOyOsl0n-wMM",
        "_score" : 1.0,
        "_source" : {
          "title" : "手机hhhh",
          "content" : "啦啦啦啦啦啦啦啦啦啦啦啦啦啦",
          "name" : "地界"
        }
      }
      ...
    ]
  }
}

修改文档(全量字段修改)

# 全量修改
POST testindex/_doc/1
{
 "title":"手机hhhh",
 "content":"1111111",
 "name":"地界"
}


{
  "_index" : "testindex",
  "_type" : "_doc",
  "_id" : "1",
  "_version" : 2,
  "result" : "updated",
  "_shards" : {
    "total" : 2,
    "successful" : 1,
    "failed" : 0
  },
  "_seq_no" : 3,
  "_primary_term" : 2
}
# 结果解释
{
 "_index": "testindex",
 "_type": "_doc",
 "_id": "1",
 "_version"【版本】: 2,
 "result"【结果】: "updated", # updated 表示数据被更新
 "_shards": {
 "total": 2,
 "successful": 1,
 "failed": 0
 },
 "_seq_no": 2,
 "_primary_term": 2
}

修改文档(部分字段修改)

# 修改部分字段
POST testindex/_update/1
{
"doc":{
   "content":"22222"
	}
}

# 结果
{
  "_index" : "testindex",
  "_type" : "_doc",
  "_id" : "1",
  "_version" : 5,
  "result" : "updated",
  "_shards" : {
    "total" : 2,
    "successful" : 1,
    "failed" : 0
  },
  "_seq_no" : 6,
  "_primary_term" : 2
}

更新时 retry_on_conflict 参数重试次数

retry_on_conflict 指定重试次数

POST /test_index/_doc/5/_update?retry_on_conflict=3
{
  "doc": {
    "test_field": "itcast1"
  }
}

与 _version结合使用

POST /test_index/_doc/5/_update?retry_on_conflict=3&version=22&version_type=external
{
  "doc": {
    "test_field": "itcast1"
  }
}

删除文档(根据_id删除)

DELETE testindex/_doc/1

# 结果
{
  "_index" : "testindex",
  "_type" : "_doc",
  "_id" : "1",
  "_version" : 6,
  "result" : "deleted",
  "_shards" : {
    "total" : 2,
    "successful" : 1,
    "failed" : 0
  },
  "_seq_no" : 7,
  "_primary_term" : 2
}



# 结果结束
{
 "_index": "testindex",
 "_type": "_doc",
 "_id": "1",
 "_version"【版本】: 4, #对数据的操作,都会更新版本
 "result"【结果】: "deleted", # deleted 表示数据被标记为删除
 "_shards": {
 "total": 2,
 "successful": 1,
 "failed": 0
 },
 "_seq_no": 4,
 "_primary_term": 2
}

条件更新

POST student/_update_by_query
{
  "query": {
    "bool": {
      "must": [
        {
          "term": {
            "name.keyword": "zhangsan"
          }
        },
        {
          "term": {
            "age": "30"
          }
        }
      ]
    }
  },
  "script": {
    "source": "ctx._source.age=\"18\";ctx._source.nickname=\"张三丰\""
  }
}

脚本更新

更新单个文档 多个语句之间用";"隔开就可以

POST index/doc/1/_update
{

"script":"ctx._source.status = 10;ctx._source.open_time = '2020-02-24 15:48:20'"

}

根据条件更新文档


POST 12818_person/_update_by_query
{
  "query": {
    "bool": {
      "filter": [
        {
          "term": {
            "person_id": {
              "value": "2496800"
            }
          }
        }
      ]
    }
  },
  "script": {
    "inline": """
    def appId = ctx._source.wechat_appid;
    def nId = params.appid;
    if(appId == null) {
      ctx._source.wechat_appid = nId;
    } else if (!(appId instanceof List) && appId != nId) {
      List arr = new ArrayList();
      arr.add(appId);
      arr.add(nId);
      ctx._source.wechat_appid = arr;
    }
    """
    ,
    "params":{
      "appid":"12311"
    }
  }
}

删除文档(条件删除)

POST testindex/_delete_by_query
{
 "query":{
  "match":{
    "name.keyword":"地界1"
    }
 } 
}

映射操作

查询映射

GET testindex/_mapping

# 结果
{
  "testindex" : {
    "mappings" : {
      "properties" : {
        "content" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        },
        "name" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        },
        "title" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        }
      }
    }
  }
}

创建映射

PUT testindex/_mapping
{
  "properties": {
    "title": {
      "type": "text",
      "index": true
    },
    "content": {
      "type": "text",
      "index": true
    },
    "name": {
      "type": "text",
      "index": true
    }
  }
}


# 映射数据说明:
#字段名:任意填写,下面指定许多属性,例如:title、subtitle、images、price
#type:类型,Elasticsearch 中支持的数据类型非常丰富,说几个关键的:
# String 类型,又分两种:
# text:可分词
# keyword:不可分词,数据会作为完整字段进行匹配
# Numerical:数值类型,分两类
# 基本数据类型:long、integer、short、byte、double、float、half_float
# 浮点数的高精度类型:scaled_float
# Date:日期类型
# Array:数组类型
# Object:对象
# index:是否索引,默认为 true,也就是说你不进行任何配置,所有字段都会被索引。
#    true:字段会被索引,则可以用来进行搜索
#    false:字段不会被索引,不能用来搜索
# store:是否将数据进行独立存储,默认为 false
# 原始的文本会存储在_source 里面,默认情况下其他提取出来的字段都不是独立存储
# 的,是从_source 里面提取出来的。当然你也可以独立的存储某个字段,只要设置
# "store": true 即可,获取独立存储的字段要比从_source 中解析快得多,但是也会占用
# 更多的空间,所以要根据实际业务需求来设置。
# analyzer:分词器,这里的 ik_max_word 即使用 ik 分词器

高级查询

基础数据

PUT student



POST /student/_doc/1001
{
"name":"zhangsan",
"nickname":"zhangsan",
 "sex":"男",
 "age":30
}
POST /student/_doc/1002
{
"name":"lisi",
"nickname":"lisi",
 "sex":"男",
 "age":20 }
 POST /student/_doc/1003
{
"name":"wangwu",
 "nickname":"wangwu",
 "sex":"女",
 "age":40 }
 POST /student/_doc/1004
{
"name":"zhangsan1",
"nickname":"zhangsan1",
 "sex":"女",
 "age":50 }
 POST /student/_doc/1005
{
"name":"zhangsan2",
"nickname":"zhangsan2",
 "sex":"女",
 "age":30 }

查询所有文档

GET  student/_search
{
 "query": {
 "match_all": {}
 	} 
 }

# "query":这里的 query 代表一个查询对象,里面可以有不同的查询属性
# "match_all":查询类型,例如:match_all(代表查询所有), match,term , range 等等
# {查询条件}:查询条件会根据类型的不同,写法也有差异

# 结果
{
  "took" : 0,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 1,
      "relation" : "eq"
    },
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "testindex",
        "_type" : "_doc",
        "_id" : "2",
        "_score" : 1.0,
        "_source" : {
          "title" : "手机hhhh",
          "content" : "啦啦啦啦啦啦啦啦啦啦啦啦啦啦",
          "name" : "地界"
        }
      }
    ]
  }
}


# 结果解释: 
{
 "took【查询花费时间,单位毫秒】" : 1116,
 "timed_out【是否超时】" : false,
 "_shards【分片信息】" : {
 "total【总数】" : 1,
 "successful【成功】" : 1,
 "skipped【忽略】" : 0,
 "failed【失败】" : 0
 },
 "hits【搜索命中结果】" : {
 "total"【搜索条件匹配的文档总数】: {
 "value"【总命中计数的值】: 3,
 "relation"【计数规则】: "eq" # eq 表示计数准确, gte 表示计数不准确
 },
 "max_score【匹配度分值】" : 1.0,
 "hits【命中结果集合】" : [
 。。。
 }
 ]
 } 
 }

简单条件查询

GET /blog_new/_search?q=name:libai,age=10&from=0&size=1&sort=age:desc,name.keyword:asc
q: 查询条件 字段名:字段值
from: 分页查询起始 (page - 1) * pageName
size: 展示条数
sort: 排序

匹配查询

GET student/_search
{
  "query": {
    "match": {
      "name": "zhangsan"
    }
  }
}
# match 匹配类型查询,会把查询条件进行分词,然后进行查询,多个词条之间是 or 的关系

# 结果
{
  "took" : 0,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 1,
      "relation" : "eq"
    },
    "max_score" : 1.3862944,
    "hits" : [
      {
        "_index" : "student",
        "_type" : "_doc",
        "_id" : "1001",
        "_score" : 1.3862944,
        "_source" : {
          "name" : "zhangsan",
          "nickname" : "zhangsan",
          "sex" : "男",
          "age" : 30
        }
      }
    ]
  }
}

字段匹配查询

GET student/_search
{
 "query": {
 "multi_match": {
 "query": "zhangsan",
 "fields": ["name","nickname"]
 		}
 	} 
 }
 # multi_match 与 match 类似,不同的是它可以在多个字段中查询
 
 {
  "took" : 0,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 1,
      "relation" : "eq"
    },
    "max_score" : 1.3862944,
    "hits" : [
      {
        "_index" : "student",
        "_type" : "_doc",
        "_id" : "1001",
        "_score" : 1.3862944,
        "_source" : {
          "name" : "zhangsan",
          "nickname" : "zhangsan",
          "sex" : "男",
          "age" : 30
        }
      }
    ]
  }
} 

关键字精确查询

 GET student/_search
 {
   "query": {
     "term": {
       "name": {
         "value": "zhangsan"
       }
     }
   }
 }
 # term 查询,精确的关键词匹配查询,不对查询条件进行分词
 
 # 结果
{
  "took" : 0,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 1,
      "relation" : "eq"
    },
    "max_score" : 1.3862944,
    "hits" : [
      {
        "_index" : "student",
        "_type" : "_doc",
        "_id" : "1001",
        "_score" : 1.3862944,
        "_source" : {
          "name" : "zhangsan",
          "nickname" : "zhangsan",
          "sex" : "男",
          "age" : 30
        }
      }
    ]
  }
}

多关键字精确查询

 GET student/_search
 {
   "query": {
     "terms": {
       "name": ["zhangsan","lisi"]
     }
   }
 }
 
# terms 查询和 term 查询一样,但它允许你指定多值进行匹配。
# 如果这个字段包含了指定值中的任何一个值,那么这个文档满足条件,类似于 mysql 的 in
 
 
# 查询结果
 {
  "took" : 0,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 2,
      "relation" : "eq"
    },
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "student",
        "_type" : "_doc",
        "_id" : "1001",
        "_score" : 1.0,
        "_source" : {
          "name" : "zhangsan",
          "nickname" : "zhangsan",
          "sex" : "男",
          "age" : 30
        }
      },
      {
        "_index" : "student",
        "_type" : "_doc",
        "_id" : "1002",
        "_score" : 1.0,
        "_source" : {
          "name" : "lisi",
          "nickname" : "lisi",
          "sex" : "男",
          "age" : 20
        }
      }
    ]
  }
}

指定查询字段

 GET student/_search
{
 "_source": ["name","nickname"], 
 "query": {
 "terms": {
 "nickname": ["zhangsan"]
 }
 } }
 
 
 
# 默认情况下,Elasticsearch 在搜索的结果中,会把文档中保存在_source 的所有字段都返回。
# 如果我们只想获取其中的部分字段,我们可以添加_source 的过滤

# 查询结果
{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 1,
      "relation" : "eq"
    },
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "student",
        "_type" : "_doc",
        "_id" : "1001",
        "_score" : 1.0,
        "_source" : {
          "name" : "zhangsan",
          "nickname" : "zhangsan"
        }
      }
    ]
  }
}

过滤字段

# includes:来指定想要显示的字段
# excludes:来指定不想要显示的字段

 GET student/_search
{
 "_source": {
 "includes": ["name","nickname"]
 }, 
 "query": {
 "terms": {
 "nickname": ["zhangsan"]
 }
 } }
 
 # 结果
 {
  "took" : 0,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 1,
      "relation" : "eq"
    },
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "student",
        "_type" : "_doc",
        "_id" : "1001",
        "_score" : 1.0,
        "_source" : {
          "name" : "zhangsan",
          "nickname" : "zhangsan"
        }
      }
    ]
  }
}

查询存在字段

# 查询包含 name 字段的文档
GET /boot/_search
{
"query": {
  "exists": {
    "field": "name"
  }
}
}

IDS ( sql in 查询)

GET /book/_search
{
    "query": {
        "ids" : {
            "values" : ["1", "4", "100"]
        }
    }
}

prefix 前缀查询

GET /book/_search
{
    "query": {
        "prefix": {
            "description": {
                "value": "spring"
            }
        }
    }
}

regexp query 正则查询

GET /book/_search
{
    "query": {
        "regexp": {
            "description": {
                "value": "j.*a",
                "flags" : "ALL",
                "max_determinized_states": 10000,
                "rewrite": "constant_score"
            }
        }
    }
}

filter条件查询

filter,仅仅只是按照搜索条件过滤出需要的数据而已,不计算任何相关度分数,对相关度没有任何影响。
query,会去计算每个document相对于搜索条件的相关度,并按照相关度进行排序。

应用场景:
一般来说,如果你是在进行搜索,需要将最匹配搜索条件的数据先返回,那么用query		如果你只是要根据一些条件筛选出一部分数据,不关注其排序,那么用filter

filter与query性能
filter,不需要计算相关度分数,不需要按照相关度分数进行排序,同时还有内置的自动cache最常使用filter的数据
query,相反,要计算相关度分数,按照分数进行排序,而且无法cache结果

GET /book/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "match": {
            "description": "java程序员"
          }
        }
      ],
      "filter": {
        "range": {
          "price": {
            "gte": 80,
		     "lte": 90
          }
        }
      }
    }
  }
}

单独使用 filter

GET /book/_search 
{
  "query": {
    "constant_score": {
      "filter" : {
            "term" : {
                "studymodel" : "201001"
            }
        }
    }
  },
  "sort": [
    {
      "price": {
        "order": "asc"
      }
    }
  ]
}

组合查询

 GET student/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "match": {
            "name": "zhangsan"
          }
        }
      ],
      "must_not": [
        {
          "match": {
            "age": "40"
          }
        }
      ],
      "should": [
        {
          "match": {
            "sex": "男"
          },        {
          "match": {
            "sex": "女"
          }
        }
      ]
    }
  }
}

# `bool`把各种其它查询通过
# `must`(必须 )
# `must_not`(必须不)
# `should`(应该)(多个满足一个即可)的方式进行组合

# 结果
{
  "took" : 2,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 1,
      "relation" : "eq"
    },
    "max_score" : 2.261763,
    "hits" : [
      {
        "_index" : "student",
        "_type" : "_doc",
        "_id" : "1001",
        "_score" : 2.261763,
        "_source" : {
          "name" : "zhangsan",
          "nickname" : "zhangsan",
          "sex" : "男",
          "age" : 30
        }
      }
    ]
  }
}

范围查询

操作符 说明
gt 大于 >
gte 大于等于 >=
lt 小于 <
lte 小于等于 <=
range 查询找出那些落在指定区间内的数字或者时间。range 查询允许以上字符
 GET student/_search
{
 "query": {
 "range": {
 "age": {
 "gte": 30,
 "lte": 35
 }
 }
 } }
 
 
 # 结果
 {
  "took" : 0,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 2,
      "relation" : "eq"
    },
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "student",
        "_type" : "_doc",
        "_id" : "1001",
        "_score" : 1.0,
        "_source" : {
          "name" : "zhangsan",
          "nickname" : "zhangsan",
          "sex" : "男",
          "age" : 30
        }
      },
      {
        "_index" : "student",
        "_type" : "_doc",
        "_id" : "1005",
        "_score" : 1.0,
        "_source" : {
          "name" : "zhangsan2",
          "nickname" : "zhangsan2",
          "sex" : "女",
          "age" : 30
        }
      }
    ]
  }
}

模糊查询

# 返回包含与搜索字词相似的字词的文档。
# 编辑距离是将一个术语转换为另一个术语所需的一个字符更改的次数。这些更改可以包括:
# 更改字符(box → fox)
# 删除字符(black → lack)
# 插入字符(sic → sick)
# 转置两个相邻字符(act → cat)
# 为了找到相似的术语,fuzzy 查询会在指定的编辑距离内创建一组搜索词的所有可能的变体或扩展。然后查询返回每个扩展的完全匹配。
# 通过 fuzziness 修改编辑距离。一般使用默认值 AUTO,根据术语的长度生成编辑距离。

  GET student/_search
 {
 "query": {
 "fuzzy": {
 "name": {
 "value": "zhangsa1n",
"fuzziness": 2
 }
 }
 } }
 
 # 结果
{
  "took" : 3,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 3,
      "relation" : "eq"
    },
    "max_score" : 1.2322617,
    "hits" : [
      {
        "_index" : "student",
        "_type" : "_doc",
        "_id" : "1004",
        "_score" : 1.2322617,
        "_source" : {
          "name" : "zhangsan1",
          "nickname" : "zhangsan1",
          "sex" : "女",
          "age" : 50
        }
      },
      {
        "_index" : "student",
        "_type" : "_doc",
        "_id" : "1001",
        "_score" : 1.2130076,
        "_source" : {
          "name" : "zhangsan",
          "nickname" : "zhangsan",
          "sex" : "男",
          "age" : 30
        }
      },
      {
        "_index" : "student",
        "_type" : "_doc",
        "_id" : "1005",
        "_score" : 1.078229,
        "_source" : {
          "name" : "zhangsan2",
          "nickname" : "zhangsan2",
          "sex" : "女",
          "age" : 30
        }
      }
    ]
  }
}

排序

# sort 可以让我们按照不同的字段进行排序,并且通过 order 指定排序的方式。desc 降序,asc升序。
# 单字段
GET student/_search
{
 "query": {
 "match": {
 "name":"zhangsan"
 }
 },
 "sort": [{
 "age": {
 "order":"desc"
 }
 }]
}

# 多字段
GET student/_search
{
 "query": {
 "match_all": {}
 },
 "sort": [
 {
 "age": {
 "order": "desc"
 }
 },
 {
 "_id":{
 "order": "desc"
 }
 }
 ] }

# 结果
{
  "took" : 0,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 1,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [
      {
        "_index" : "student",
        "_type" : "_doc",
        "_id" : "1001",
        "_score" : null,
        "_source" : {
          "name" : "zhangsan",
          "nickname" : "zhangsan",
          "sex" : "男",
          "age" : 30
        },
        "sort" : [
          30
        ]
      }
    ]
  }
}

高亮查询

# 在进行关键字搜索时,搜索出的内容中的关键字会显示不同的颜色,称之为高亮
# Elasticsearch 可以对查询内容中的关键字部分,进行标签和样式(高亮)的设置。
# 在使用 match 查询的同时,加上一个 highlight 属性:
#   pre_tags:前置标签
#   post_tags:后置标签
#   fields:需要高亮的字段
#   title:这里声明 title 字段需要高亮,后面可以为这个字段设置特有配置,也可以空

GET student/_search
{
 "query": {
 "match": {
 "name": "zhangsan"
 }
 },
 "highlight": {
 "pre_tags": "",
 "post_tags": "",
 "fields": {
 "name": {}
 }
 } }
 
 # 结果
 {
  "took" : 62,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 1,
      "relation" : "eq"
    },
    "max_score" : 1.3862944,
    "hits" : [
      {
        "_index" : "student",
        "_type" : "_doc",
        "_id" : "1001",
        "_score" : 1.3862944,
        "_source" : {
          "name" : "zhangsan",
          "nickname" : "zhangsan",
          "sex" : "男",
          "age" : 30
        },
        "highlight" : {
          "name" : [
            "zhangsan"
          ]
        }
      }
    ]
  }
}

分页查询

# from:当前页的起始索引,默认从 0 开始。 from = (pageNum - 1) * size
# size:每页显示多少条



GET student/_search
{
 "query": {
 "match_all": {}
 },
 "sort": [
 {
 "age": {
 "order": "desc"
 }
 }
 ],
 "from": 0,
 "size": 2
}

# 结果
{
  "took" : 0,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 5,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [
      {
        "_index" : "student",
        "_type" : "_doc",
        "_id" : "1004",
        "_score" : null,
        "_source" : {
          "name" : "zhangsan1",
          "nickname" : "zhangsan1",
          "sex" : "女",
          "age" : 50
        },
        "sort" : [
          50
        ]
      },
      {
        "_index" : "student",
        "_type" : "_doc",
        "_id" : "1003",
        "_score" : null,
        "_source" : {
          "name" : "wangwu",
          "nickname" : "wangwu",
          "sex" : "女",
          "age" : 40
        },
        "sort" : [
          40
        ]
      }
    ]
  }
}

聚合查询

# 聚合允许使用者对 es 文档进行统计分析,类似与关系型数据库中的 group by,当然还有很多其他的聚合,例如取最大值、平均值等等。
# 最大值
GET student/_search
{
 "aggs":{
 "max_age":{
 "max":{"field":"age"}
 }
 },
 "size":0
}

# 最小值
GET student/_search
{
 "aggs":{
 "min_age":{
 "min":{"field":"age"}
 }
 },
 "size":0
}

# 求和
GET student/_search
{
 "aggs":{
 "sum_age":{
 "sum":{"field":"age"}
 }
 },
 "size":0
}

# 平均值
GET student/_search
{
 "aggs":{
 "avg_age":{
 "avg":{"field":"age"}
 }
 },
 "size":0
}

# 对某个字段的值进行去重之后再取总数
GET student/_search
{
 "aggs":{
 "distinct_age":{
 "cardinality":{"field":"age"}
 }
 },
 "size":0
}

# State 聚合 stats 聚合,对某个字段一次性返回 count,max,min,avg 和 sum 五个指标
GET student/_search
{
 "aggs":{
 "stats_age":{
 "stats":{"field":"age"}
 }
 },
 "size":0
}

桶聚合查询

# 桶聚和相当于 sql 中的 group by 语句
# terms 聚合,分组统计
GET student/_search
{
 "aggs":{
 "age_groupby":{
 "terms":{"field":"age"}
 }
 },
 "size":0
}	

# 在 terms 分组下再进行聚合
GET student/_search
{
    "aggs":{
        "age_groupby":{
            "terms":{
                "field":"age"
            },
            "aggs":{
                "sum_age":{
                    "sum":{
                        "field":"age"
                    }
                }
            }
        }
    },
    "size":0
}}

查询计划

一般用在那种特别复杂庞大的搜索下,比如你一下子写了上百行的搜索,这个时候可以先用validate api去验证一下,搜索是否合法。

合法以后,explain就像mysql的执行计划,可以看到搜索的目标等信息。

GET /book/_validate/query?explain
{
  "query": {
    "match": {
      "description": "java程序员"
    }
  }
}
# 结果
{
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "failed" : 0
  },
  "valid" : true,
  "explanations" : [
    {
      "index" : "book",
      "valid" : true,
      "explanation" : "description:java description:程序员"
    }
  ]
}

Scroll分批查询

场景:下载某一个索引中1亿条数据,到文件或是数据库。

不能一下全查出来,系统内存溢出。所以使用scoll滚动搜索技术,一批一批查询。

scoll搜索会在第一次搜索的时候,保存一个当时的视图快照,之后只会基于该旧的视图快照提供数据搜索,如果这个期间数据变更,是不会让用户看到的

每次发送scroll请求,我们还需要指定一个scoll参数,指定一个时间窗口,每次搜索请求只要在这个时间窗口内能完成就可以了。

搜索

GET /book/_search?scroll=1m
{
  "query": {
    "match_all": {}
  },
  "size": 3
}

返回

{
  "_scroll_id" : "DXF1ZXJ5QW5kRmV0Y2gBAAAAAAAAMOkWTURBNDUtcjZTVUdKMFp5cXloVElOQQ==",
  "took" : 3,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 3,
      "relation" : "eq"
    },
    "max_score" : 1.0,
    "hits" : [
     
    ]
  }
}

获得的结果会有一个scoll_id,下一次再发送scoll请求的时候,必须带上这个scoll_id

GET /_search/scroll
{
    "scroll": "1m", 
    "scroll_id" : "DXF1ZXJ5QW5kRmV0Y2gBAAAAAAAAMOkWTURBNDUtcjZTVUdKMFp5cXloVElOQQ=="
}

与分页区别:

分页给用户看的 deep paging

scroll是用户系统内部操作,如下载批量数据,数据转移。零停机改变索引映射。

批量查询 mget

单条查询 GET /test_index/_doc/1,如果查询多个id的文档一条一条查询,网络开销太大。

mget 批量查询:

GET /_mget
{
   "docs" : [
      {
         "_index" : "test_index",
         "_type" :  "_doc",
         "_id" :    1
      },
      {
         "_index" : "test_index",
         "_type" :  "_doc",
         "_id" :    7
      }
   ]
}

返回:

{
  "docs" : [
    {
      "_index" : "test_index",
      "_type" : "_doc",
      "_id" : "2",
      "_version" : 6,
      "_seq_no" : 12,
      "_primary_term" : 1,
      "found" : true,
      "_source" : {
        "test_field" : "test12333123321321"
      }
    },
    {
      "_index" : "test_index",
      "_type" : "_doc",
      "_id" : "3",
      "_version" : 6,
      "_seq_no" : 18,
      "_primary_term" : 1,
      "found" : true,
      "_source" : {
        "test_field" : "test3213"
      }
    }
  ]
}

提示去掉type

GET /_mget
{
   "docs" : [
      {
         "_index" : "test_index",
         "_id" :    2
      },
      {
         "_index" : "test_index",
         "_id" :    3
      }
   ]
}

同一索引下批量查询:

GET /test_index/_mget
{
   "docs" : [
      {
         "_id" :    2
      },
      {
         "_id" :    3
      }
   ]
}

第三种写法:搜索写法

post /test_index/_doc/_search
{
    "query": {
        "ids" : {
            "values" : ["1", "7"]
        }
    }
}

批量增删改 bulk

Bulk 操作解释将文档的增删改查一些列操作,通过一次请求全都做完。减少网络传输次数。

语法:

POST /_bulk
{"action": {"metadata"}}
{"data"}

如下操作,删除5,新增14,修改2。

POST /_bulk
{ "delete": { "_index": "test_index",  "_id": "5" }} 
{ "create": { "_index": "test_index",  "_id": "14" }}
{ "test_field": "test14" }
{ "update": { "_index": "test_index",  "_id": "2"} }
{ "doc" : {"test_field" : "bulk test"} }

总结:

1功能:

  • delete:删除一个文档,只要1个json串就可以了
  • create:相当于强制创建 PUT /index/type/id/_create
  • index:普通的put操作,可以是创建文档,也可以是全量替换文档
  • update:执行的是局部更新partial update操作

2格式:每个json不能换行。相邻json必须换行。

3隔离:每个操作互不影响。操作失败的行会返回其失败信息。

4实际用法:bulk请求一次不要太大,否则一下积压到内存中,性能会下降。所以,一次请求几千个操作、大小在几M正好。

脚本

Painless是内置支持的。脚本内容可以通过多种途径传给 es,包括 rest 接口,或者放到 config/scripts目录等,默认开启。

官方文档

你可能感兴趣的:(ELK,elasticsearch,搜索引擎,big,data)