ElasticSearch8.x操作记录

ElasticSearch8.x操作记录

文档内容来自于尚硅谷海波老师的ElasticSearch教程课,在Kibana中的一些操作演示

以下为在文档中的相关操作记录

1.索引操作

#创建索引
#PUT 索引名称
PUT test_index

#PUT 索引
#增加配置:JSON格式的主题内容
PUT test_index_1
{
  "aliases": {
    "test1": {}
  }
}

#删除索引
#delete 索引名称
DELETE test_index_1

#修改索引配置
#ES不允许修改索引信息
POST test_index_1
{
  "aliases": {
    "test1": {}
  }
}

#HEAD索引 (判读索引是否存在)HTTP状态码 200404
HEAD test_index

#查询索引
GET test_index
GET test_index_1
GET test1

#查询所有索引
GET _cat/indices

#创建文档(索引数据)--增加唯一性标识(手动:PUT,后面需要自己添加/自动;POST自动生成,不需要再后面添加)
#首先需要先创建索引
PUT index_doc

PUT index_doc/_doc/1001
{
  "id": 1001,
  "name": "zhangsan",
  "age": 30
}

POST index_doc/_doc
{
  "id": 1002,
  "name": "lisi",
  "age": 14
}

2.文档操作

#查询文档
GET index_doc/_doc/1001

#查询当前索引中所有的文档数据
GET index_doc/_search

#修改文档数据
PUT index_doc/_doc/1001
{
  "id": 100111,
  "name": "zhangsan",
  "age": 30,
  "tel": "15123392594"
}
#POST修改数据
POST index_doc/_doc/okBdhIQB7PHEeADHmDqa
{
  "id": 1003,
  "name": "wangwu",
  "age": 22
}

#删除数据
DELETE index_doc/_doc/okBdhIQB7PHEeADHmDqa

#以下操作是不被允许的
DELETE index_doc/_doc

3.文档搜索

#增加索引
PUT test_query

DELETE test_query
#添加数据
PUT test_query/_bulk
{"index":{"_index": "test_query", "_id":"1001"}}
{"id":"1001", "name": "zhang san", "age": 30}
{"index":{"_index": "test_query", "_id":"1002"}}
{"id":"1002", "name": "li si", "age": 40}
{"index":{"_index": "test_query", "_id":"1003"}}
{"id":"1003", "name": "wang wu", "age": 50}
{"index":{"_index": "test_query", "_id":"1004"}}
{"id":"1004", "name": "zhangsan", "age": 30}
{"index":{"_index": "test_query", "_id":"1005"}}
{"id":"1005", "name": "lisi", "age": 40}
{"index":{"_index": "test_query", "_id":"1006"}}
{"id":"1006", "name": "wangwu", "age": 50}

#Match是分词查询,ES会将数据分词(关键词)保存
#zhang san
GET test_query/_search
{
  "query": {
    "match": {
      "name": "zhang san"
    }
  }
}

GET test_query/_search
{
  "query": {
    "term": {
      "name": {
        "value": "zhang san"
      }
    }
  }
}

#对查询结果字段进行限制
GET test_query/_search
{
  "_source": ["name", "age"], 
  "query": {
    "match": {
      "name": "zhang san"
    }
  }
}

#组合多个条件 or
GET test_query/_search
{
  "query": {
    "bool": {
      "should": [
        {
          "match": {
            "name": "zhang"
          }
        },
        {
          "match": {
            "age": "40"
          }
        }
      ]
    }
  }
}

# 排序后查询
GET test_query/_search
{
  "query": {
    "match": {
      "name": "zhang li"
    }
  },
  "sort": [
    {
      "age": {
        "order": "desc"
      }
    }
  ]
}

#分页查询
GET test_query/_search
{
  "query": {
    "match_all": {}
  },
  "from": 4,
  "size": 2
}

4.聚合搜索

# 分组查询
GET test_query/_search
{
  "aggs": {
    "ageGroup": {
      "terms": {
        "field": "age"
      }
    }
  },
  "size": 0
}

# 分组后聚合(求和)
GET test_query/_search
{
  "aggs": {
    "ageGroup": {
      "terms": {
        "field": "age"
      },
      "aggs": {
        "ageSum": {
          "sum": {
            "field": "age"
          }
        }
      }
    }
  },
  "size": 0
}

# 求年龄平均值
GET test_query/_search
{
  "aggs": {
    "avgAge": {
      "avg": {
        "field": "age"
      }
    }
  },
  "size": 0
}

# 获取前几名操作
GET test_query/_search
{
  "aggs": {
    "top3": {
      "top_hits": {
        "sort": [
          {
            "age": {
            "order": "desc"
          }
          }
        ], 
        "size": 3
      }
    }
  },
  "size": 0
}

5.索引模板

PUT test_temp

GET test_temp

PUT test_temp_1
{
  "settings": {
    "number_of_shards": 2
  }
}

GET test_temp_1

#创建模板
PUT _template/mytemplate
{
  "index_patterns": [
    "my*"  
  ],
  "settings": {
    "index": {
      "number_of_shards" : "2"
    }
  },
  "mappings": {
    "properties": {
      "now": {
        "type": "date",
        "format": "yyyy/MM/dd"
      }
    }
  }
}

#查看模板
GET _template/mytemplate

PUT test_temp_2
GET test_temp_2

# 匹配模板规则,以my开头
PUT my_test_temp
GET my_test_temp

#删除模板
DELETE _template/mytemplate

6.中文分词

#分词操作
GET _analyze
{
  "analyzer": "standard", 
  "text": ["zhang san"]
}

# 分词操作(不带插件情况下,中文拆分逻辑太适合)
GET _analyze
{
  "analyzer": "chinese", 
  "text": ["我是一个三好学生"]
}

# 集成了IK插件后提供的分词
GET _analyze
{
  "analyzer": "ik_smart", 
  "text": ["我是一个三好学生"]
}

# 集成了IK插件后提供的分词,相较于上者,分得更加精细
GET _analyze
{
  "analyzer": "ik_max_word", 
  "text": ["我是一个三好学生"]
}

7.文档评分机制

PUT test_score

PUT test_score/_doc/1001
{
  "text": "zhang kai shou bi, yin jie tai yang"
}

PUT test_score/_doc/1002
{
  "text": "zhang san"
}

GET test_score/_search?explain=true
{
  "query": {
    "match": {
      "text": "zhang"
    }
  }
}
# 公式如下
boost * idf * tf = 2.2 * 0.18232156 * 0.6024096

PUT itwluo

PUT itwluo/_doc/1001
{
  "text": "java"
}

GET itwluo/_search
{
  "query": {
    "match": {
      "text": "java"
    }
  }
}

#result
{
  "took": 992,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 1,
      "relation": "eq"
    },
    "max_score": 0.2876821,
    "hits": [
      {
        "_index": "itwluo",
        "_id": "1001",
        "_score": 0.2876821,
        "_source": {
          "text": "java"
        }
      }
    ]
  }
}

#详细结果
{
  "took": 3,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 1,
      "relation": "eq"
    },
    "max_score": 0.2876821,
    "hits": [
      {
        "_shard": "[itwluo][0]",
        "_node": "EX7ZCQpSRLu-OWEZjQazog",
        "_index": "itwluo",
        "_id": "1001",
        "_score": 0.2876821,
        "_source": {
          "text": "java"
        },
        "_explanation": {
          "value": 0.2876821,
          "description": "weight(text:java in 0) [PerFieldSimilarity], result of:",
          "details": [
            {
              "value": 0.2876821,
              "description": "score(freq=1.0), computed as boost * idf * tf from:",
              "details": [
                {
                  "value": 2.2,
                  "description": "boost",
                  "details": []
                },
                {
                  "value": 0.2876821,
                  "description": "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",
                  "details": [
                    {
                      "value": 1,
                      "description": "n, number of documents containing term",
                      "details": []
                    },
                    {
                      "value": 1,
                      "description": "N, total number of documents with field",
                      "details": []
                    }
                  ]
                },
                {
                  "value": 0.45454544,
                  "description": "tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from:",
                  "details": [
                    {
                      "value": 1,
                      "description": "freq, occurrences of term within document",
                      "details": []
                    },
                    {
                      "value": 1.2,
                      "description": "k1, term saturation parameter",
                      "details": []
                    },
                    {
                      "value": 0.75,
                      "description": "b, length normalization parameter",
                      "details": []
                    },
                    {
                      "value": 1,
                      "description": "dl, length of field",
                      "details": []
                    },
                    {
                      "value": 1,
                      "description": "avgdl, average length of field",
                      "details": []
                    }
                  ]
                }
              ]
            }
          ]
        }
      }
    ]
  }
}

#新增数据后,观察分值变化
PUT itwluo/_doc/1002
{
  "text": "java bigdata"
}

#查询文档数据
GET itwluo/_search?explain=true
{
  "query": {
    "match": {
      "text": "java"
    }
  }
}

#详细结果
{
  "took": 609,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 2,
      "relation": "eq"
    },
    "max_score": 0.21110919,
    "hits": [
      {
        "_shard": "[itwluo][0]",
        "_node": "EX7ZCQpSRLu-OWEZjQazog",
        "_index": "itwluo",
        "_id": "1001",
        "_score": 0.21110919,
        "_source": {
          "text": "java"
        },
        "_explanation": {
          "value": 0.21110919,
          "description": "weight(text:java in 0) [PerFieldSimilarity], result of:",
          "details": [
            {
              "value": 0.21110919,
              "description": "score(freq=1.0), computed as boost * idf * tf from:",
              "details": [
                {
                  "value": 2.2,
                  "description": "boost",
                  "details": []
                },
                {
                  "value": 0.18232156,
                  "description": "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",
                  "details": [
                    {
                      "value": 2,
                      "description": "n, number of documents containing term",
                      "details": []
                    },
                    {
                      "value": 2,
                      "description": "N, total number of documents with field",
                      "details": []
                    }
                  ]
                },
                {
                  "value": 0.5263158,
                  "description": "tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from:",
                  "details": [
                    {
                      "value": 1,
                      "description": "freq, occurrences of term within document",
                      "details": []
                    },
                    {
                      "value": 1.2,
                      "description": "k1, term saturation parameter",
                      "details": []
                    },
                    {
                      "value": 0.75,
                      "description": "b, length normalization parameter",
                      "details": []
                    },
                    {
                      "value": 1,
                      "description": "dl, length of field",
                      "details": []
                    },
                    {
                      "value": 1.5,
                      "description": "avgdl, average length of field",
                      "details": []
                    }
                  ]
                }
              ]
            }
          ]
        }
      },
      {
        "_shard": "[itwluo][0]",
        "_node": "EX7ZCQpSRLu-OWEZjQazog",
        "_index": "itwluo",
        "_id": "1002",
        "_score": 0.160443,
        "_source": {
          "text": "java bigdata"
        },
        "_explanation": {
          "value": 0.160443,
          "description": "weight(text:java in 0) [PerFieldSimilarity], result of:",
          "details": [
            {
              "value": 0.160443,
              "description": "score(freq=1.0), computed as boost * idf * tf from:",
              "details": [
                {
                  "value": 2.2,
                  "description": "boost",
                  "details": []
                },
                {
                  "value": 0.18232156,
                  "description": "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",
                  "details": [
                    {
                      "value": 2,
                      "description": "n, number of documents containing term",
                      "details": []
                    },
                    {
                      "value": 2,
                      "description": "N, total number of documents with field",
                      "details": []
                    }
                  ]
                },
                {
                  "value": 0.40000004,
                  "description": "tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from:",
                  "details": [
                    {
                      "value": 1,
                      "description": "freq, occurrences of term within document",
                      "details": []
                    },
                    {
                      "value": 1.2,
                      "description": "k1, term saturation parameter",
                      "details": []
                    },
                    {
                      "value": 0.75,
                      "description": "b, length normalization parameter",
                      "details": []
                    },
                    {
                      "value": 2,
                      "description": "dl, length of field",
                      "details": []
                    },
                    {
                      "value": 1.5,
                      "description": "avgdl, average length of field",
                      "details": []
                    }
                  ]
                }
              ]
            }
          ]
        }
      }
    ]
  }
}

# 在上述数据基础上继续添加数据,分析结果
PUT itwluo/_doc/1003
{
  "text": "bigdata",
  "content": "java bigdata"
}

# 详细计算结果
{
  "took": 599,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 2,
      "relation": "eq"
    },
    "max_score": 0.52354836,
    "hits": [
      {
        "_shard": "[itwluo][0]",
        "_node": "EX7ZCQpSRLu-OWEZjQazog",
        "_index": "itwluo",
        "_id": "1001",
        "_score": 0.52354836,
        "_source": {
          "text": "java"
        },
        "_explanation": {
          "value": 0.52354836,
          "description": "weight(text:java in 0) [PerFieldSimilarity], result of:",
          "details": [
            {
              "value": 0.52354836,
              "description": "score(freq=1.0), computed as boost * idf * tf from:",
              "details": [
                {
                  "value": 2.2,
                  "description": "boost",
                  "details": []
                },
                {
                  "value": 0.47000363,
                  "description": "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",
                  "details": [
                    {
                      "value": 2,
                      "description": "n, number of documents containing term",
                      "details": []
                    },
                    {
                      "value": 3,
                      "description": "N, total number of documents with field",
                      "details": []
                    }
                  ]
                },
                {
                  "value": 0.50632906,
                  "description": "tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from:",
                  "details": [
                    {
                      "value": 1,
                      "description": "freq, occurrences of term within document",
                      "details": []
                    },
                    {
                      "value": 1.2,
                      "description": "k1, term saturation parameter",
                      "details": []
                    },
                    {
                      "value": 0.75,
                      "description": "b, length normalization parameter",
                      "details": []
                    },
                    {
                      "value": 1,
                      "description": "dl, length of field",
                      "details": []
                    },
                    {
                      "value": 1.3333334,
                      "description": "avgdl, average length of field",
                      "details": []
                    }
                  ]
                }
              ]
            }
          ]
        }
      },
      {
        "_shard": "[itwluo][0]",
        "_node": "EX7ZCQpSRLu-OWEZjQazog",
        "_index": "itwluo",
        "_id": "1002",
        "_score": 0.39019167,
        "_source": {
          "text": "java bigdata"
        },
        "_explanation": {
          "value": 0.39019167,
          "description": "weight(text:java in 0) [PerFieldSimilarity], result of:",
          "details": [
            {
              "value": 0.39019167,
              "description": "score(freq=1.0), computed as boost * idf * tf from:",
              "details": [
                {
                  "value": 2.2,
                  "description": "boost",
                  "details": []
                },
                {
                  "value": 0.47000363,
                  "description": "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",
                  "details": [
                    {
                      "value": 2,
                      "description": "n, number of documents containing term",
                      "details": []
                    },
                    {
                      "value": 3,
                      "description": "N, total number of documents with field",
                      "details": []
                    }
                  ]
                },
                {
                  "value": 0.37735844,
                  "description": "tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from:",
                  "details": [
                    {
                      "value": 1,
                      "description": "freq, occurrences of term within document",
                      "details": []
                    },
                    {
                      "value": 1.2,
                      "description": "k1, term saturation parameter",
                      "details": []
                    },
                    {
                      "value": 0.75,
                      "description": "b, length normalization parameter",
                      "details": []
                    },
                    {
                      "value": 2,
                      "description": "dl, length of field",
                      "details": []
                    },
                    {
                      "value": 1.3333334,
                      "description": "avgdl, average length of field",
                      "details": []
                    }
                  ]
                }
              ]
            }
          ]
        }
      }
    ]
  }
    
# 通过提高权重,从而提高分数,使排名靠前
DELETE test_score

PUT test_score

PUT /test_score/_doc/1001
{
  "title": "Hadoop is a FrameWork",
  "content": "Hadoop 是一个大数据基础框架"
}

PUT /test_score/_doc/1002
{
  "title": "Hive is a SQL Tools",
  "content": "Hive是一个SQL工具"
}

PUT /test_score/_doc/1003
{
  "title": "Spark is a FrameWork",
  "content": "Spark 是一个分布式计算引擎"
}

GET test_score/_search?explain=true
{
  "query": {
    "bool": {
      "should": [
        {
          "match": {
            "title": {
              "query": "Hadoop", "boost": 1
            }
          }
        },
        {
          "match": {
            "title": {
              "query": "Hive", "boost": 2
            }
          }
        },
        {
          "match": {
            "title": {
              "query": "Spark", "boost": 1
            }
          }
        }
      ]
    }
  }
}

# 详细结果分析
{
  "took": 4,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 3,
      "relation": "eq"
    },
    "max_score": 2.2458146,
    "hits": [
      {
        "_shard": "[test_score][0]",
        "_node": "EX7ZCQpSRLu-OWEZjQazog",
        "_index": "test_score",
        "_id": "1002",
        "_score": 2.2458146,
        "_source": {
          "title": "Hive is a SQL Tools",
          "content": "Hive是一个SQL工具"
        },
        "_explanation": {
          "value": 2.2458146,
          "description": "sum of:",
          "details": [
            {
              "value": 2.2458146,
              "description": "weight(title:hive in 0) [PerFieldSimilarity], result of:",
              "details": [
                {
                  "value": 2.2458146,
                  "description": "score(freq=1.0), computed as boost * idf * tf from:",
                  "details": [
                    {
                      "value": 4.4,
                      "description": "boost",
                      "details": []
                    },
                    {
                      "value": 1.2039728,
                      "description": "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",
                      "details": [
                        {
                          "value": 1,
                          "description": "n, number of documents containing term",
                          "details": []
                        },
                        {
                          "value": 4,
                          "description": "N, total number of documents with field",
                          "details": []
                        }
                      ]
                    },
                    {
                      "value": 0.42394012,
                      "description": "tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from:",
                      "details": [
                        {
                          "value": 1,
                          "description": "freq, occurrences of term within document",
                          "details": []
                        },
                        {
                          "value": 1.2,
                          "description": "k1, term saturation parameter",
                          "details": []
                        },
                        {
                          "value": 0.75,
                          "description": "b, length normalization parameter",
                          "details": []
                        },
                        {
                          "value": 5,
                          "description": "dl, length of field",
                          "details": []
                        },
                        {
                          "value": 4.25,
                          "description": "avgdl, average length of field",
                          "details": []
                        }
                      ]
                    }
                  ]
                }
              ]
            }
          ]
        }
      },
      {
        "_shard": "[test_score][0]",
        "_node": "EX7ZCQpSRLu-OWEZjQazog",
        "_index": "test_score",
        "_id": "1003",
        "_score": 1.2336599,
        "_source": {
          "title": "Spark is a FrameWork",
          "content": "Spark 是一个分布式计算引擎"
        },
        "_explanation": {
          "value": 1.2336599,
          "description": "sum of:",
          "details": [
            {
              "value": 1.2336599,
              "description": "weight(title:spark in 2) [PerFieldSimilarity], result of:",
              "details": [
                {
                  "value": 1.2336599,
                  "description": "score(freq=1.0), computed as boost * idf * tf from:",
                  "details": [
                    {
                      "value": 2.2,
                      "description": "boost",
                      "details": []
                    },
                    {
                      "value": 1.2039728,
                      "description": "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",
                      "details": [
                        {
                          "value": 1,
                          "description": "n, number of documents containing term",
                          "details": []
                        },
                        {
                          "value": 4,
                          "description": "N, total number of documents with field",
                          "details": []
                        }
                      ]
                    },
                    {
                      "value": 0.46575344,
                      "description": "tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from:",
                      "details": [
                        {
                          "value": 1,
                          "description": "freq, occurrences of term within document",
                          "details": []
                        },
                        {
                          "value": 1.2,
                          "description": "k1, term saturation parameter",
                          "details": []
                        },
                        {
                          "value": 0.75,
                          "description": "b, length normalization parameter",
                          "details": []
                        },
                        {
                          "value": 4,
                          "description": "dl, length of field",
                          "details": []
                        },
                        {
                          "value": 4.25,
                          "description": "avgdl, average length of field",
                          "details": []
                        }
                      ]
                    }
                  ]
                }
              ]
            }
          ]
        }
      },
      {
        "_shard": "[test_score][0]",
        "_node": "EX7ZCQpSRLu-OWEZjQazog",
        "_index": "test_score",
        "_id": "1001",
        "_score": 0.7102385,
        "_source": {
          "title": "Hadoop is a FrameWork",
          "content": "Hadoop 是一个大数据基础框架"
        },
        "_explanation": {
          "value": 0.7102385,
          "description": "sum of:",
          "details": [
            {
              "value": 0.7102385,
              "description": "weight(title:hadoop in 1) [PerFieldSimilarity], result of:",
              "details": [
                {
                  "value": 0.7102385,
                  "description": "score(freq=1.0), computed as boost * idf * tf from:",
                  "details": [
                    {
                      "value": 2.2,
                      "description": "boost",
                      "details": []
                    },
                    {
                      "value": 0.6931472,
                      "description": "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",
                      "details": [
                        {
                          "value": 2,
                          "description": "n, number of documents containing term",
                          "details": []
                        },
                        {
                          "value": 4,
                          "description": "N, total number of documents with field",
                          "details": []
                        }
                      ]
                    },
                    {
                      "value": 0.46575344,
                      "description": "tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from:",
                      "details": [
                        {
                          "value": 1,
                          "description": "freq, occurrences of term within document",
                          "details": []
                        },
                        {
                          "value": 1.2,
                          "description": "k1, term saturation parameter",
                          "details": []
                        },
                        {
                          "value": 0.75,
                          "description": "b, length normalization parameter",
                          "details": []
                        },
                        {
                          "value": 4,
                          "description": "dl, length of field",
                          "details": []
                        },
                        {
                          "value": 4.25,
                          "description": "avgdl, average length of field",
                          "details": []
                        }
                      ]
                    }
                  ]
                }
              ]
            }
          ]
        }
      }
    ]
  }
}
# 当boost指定为2时, 权重值翻倍

: 1,
“description”: “freq, occurrences of term within document”,
“details”: []
},
{
“value”: 1.2,
“description”: “k1, term saturation parameter”,
“details”: []
},
{
“value”: 0.75,
“description”: “b, length normalization parameter”,
“details”: []
},
{
“value”: 4,
“description”: “dl, length of field”,
“details”: []
},
{
“value”: 4.25,
“description”: “avgdl, average length of field”,
“details”: []
}
]
}
]
}
]
}
]
}
}
]
}
}

当boost指定为2时, 权重值翻倍

公式

你可能感兴趣的:(ElasticSearch,elasticsearch,java,搜索引擎)