4.2-结构化搜索

结构化数据

  • 结构化搜索(Structured search) 是指对结构化数据的搜索

    • ⽇期,布尔类型和数字都是结构化的
  • ⽂本也可以是结构化的。

    • 如彩⾊笔可以有离散的颜⾊集合:红(red)、绿(green)、蓝(blue)

    • ⼀个博客可能被标记了标签,例如,分布式(distributed) 和 搜索(search)

    • 电商⽹站上的商品都有 UPCs(通⽤产品码 Universal Product Codes)或其他的唯⼀标识,它们都需要遵从严格规定的、结构化的格式。

ES 中的结构化搜索

  • 布尔,时间,⽇期和数字这类结构化数据:有精确的格式,我们可以对这些格式进⾏逻辑操作。包括⽐较数字或时间的范围,或判定两个值的⼤⼩。

  • 结构化的⽂本可以做精确匹配或者部分匹配

    • Term 查询 / Prefix 前缀查询
  • 结构化结果只有"是"或"否"两个值

    • 根据场景需要,可以决定结构化搜索是否需要打分

布尔值

#对布尔值 match 查询,有算分
POST products/_search
{
  "profile": "true",
  "explain": true,
  "query": {
    "term": {
      "avaliable": true
    }
  }
}



#对布尔值,通过constant score 转成 filtering,没有算分
POST products/_search
{
  "profile": "true",
  "explain": true,
  "query": {
    "constant_score": {
      "filter": {
        "term": {
          "avaliable": true
        }
      }
    }
  }
}

数字 Range

  • gt ⼤于

  • lt ⼩于

  • gte ⼤于等于

  • lte ⼩于等于

#数字类型 Term
POST products/_search
{
  "profile": "true",
  "explain": true,
  "query": {
    "term": {
      "price": 30
    }
  }
}

#数字类型 terms
POST products/_search
{
  "query": {
    "constant_score": {
      "filter": {
        "terms": {
          "price": [
            "20",
            "30"
          ]
        }
      }
    }
  }
}

#数字 Range 查询
GET products/_search
{
    "query" : {
        "constant_score" : {
            "filter" : {
                "range" : {
                    "price" : {
                        "gte" : 20,
                        "lte"  : 30
                    }
                }
            }
        }
    }
}

日期 range

  • Date Math Expressions

    • 2014-01-01 00:00:00||+1M
字段 含义
y
M
w
d
H/h 小时
m 分钟
s
# 日期 range
POST products/_search
{
    "query" : {
        "constant_score" : {
            "filter" : {
                "range" : {
                    "date" : {
                      "gte" : "now-1y"
                    }
                }
            }
        }
    }
}

处理空值

#exists查询
POST products/_search
{
  "query": {
    "constant_score": {
      "filter": {
        "exists": {
          "field": "date"
        }
      }
    }
  }
}


POST products/_search
{
  "query": {
    "constant_score": {
      "filter": {
        "bool": {
          "must_not" : {
            "exists" : {
              "field" : "date"
            }
          }
        }
      }
    }
  }
}

查找多个精确值

POST products/_search
{
  "query": {
    "constant_score": {
      "filter": {
        "terms": {
          "price": [
            "20",
            "30"
          ]
        }
      }
    }
  }
}


POST products/_search
{
  "query": {
    "constant_score": {
      "filter": {
        "terms": {
          "productID.keyword": [
            "JODL-X-1937-#pV7",
            "QQPX-R-3956-#aD8"
          ]
        }
      }
    }
  }
}

包含⽽不是相等

#处理多值字段,term 查询是包含,而不是等于
POST movies/_search
{
  "query": {
    "constant_score": {
      "filter": {
        "term": {
          "genre.keyword": "Comedy"
        }
      }
    }
  }
}

//res
{
    "_index" : "movies",
    "_type" : "_doc",
    "_id" : "4399",
    "_score" : 1.0,
    "_source" : {
      "genre" : [
        "Comedy",
        "Drama"
      ],
      "title" : "Diary of a Chambermaid",
      "year" : 0,
      "@version" : "1",
      "id" : "4399"
    }
},
{
    "_index" : "movies",
    "_type" : "_doc",
    "_id" : "4402",
    "_score" : 1.0,
    "_source" : {
      "genre" : [
        "Comedy"
      ],
      "title" : "Dr. Goldfoot and the Bikini Machine",
      "year" : 1965,
      "@version" : "1",
      "id" : "4402"
    }
},
  • 解决⽅案:增加个 genrecont 字段进⾏计数;

  • 会在组合 Query & Filtering 给出解决方法;

  • 会在组合 bool query 给出解决方法

本节知识点回顾

  • 结构化数据 & 结构化搜索

    • 如果不需要算分,可以通过 Constant Score,将查询转为 Filtering
  • 范围查询和 Date Math

  • 使⽤ Exist 查询处理⾮空 Null 值

  • 精确值 & 多值字段的精确值查找

    • Term 查询是包含,不是完全相等。针对多值字段查询要尤其注意

课程demo

#结构化搜索,精确匹配
DELETE products
POST /products/_bulk
{ "index": { "_id": 1 }}
{ "price" : 10,"avaliable":true,"date":"2018-01-01", "productID" : "XHDK-A-1293-#fJ3" }
{ "index": { "_id": 2 }}
{ "price" : 20,"avaliable":true,"date":"2019-01-01", "productID" : "KDKE-B-9947-#kL5" }
{ "index": { "_id": 3 }}
{ "price" : 30,"avaliable":true, "productID" : "JODL-X-1937-#pV7" }
{ "index": { "_id": 4 }}
{ "price" : 30,"avaliable":false, "productID" : "QQPX-R-3956-#aD8" }

GET products/_mapping



#对布尔值 match 查询,有算分
POST products/_search
{
  "profile": "true",
  "explain": true,
  "query": {
    "term": {
      "avaliable": true
    }
  }
}



#对布尔值,通过constant score 转成 filtering,没有算分
POST products/_search
{
  "profile": "true",
  "explain": true,
  "query": {
    "constant_score": {
      "filter": {
        "term": {
          "avaliable": true
        }
      }
    }
  }
}


#数字类型 Term
POST products/_search
{
  "profile": "true",
  "explain": true,
  "query": {
    "term": {
      "price": 30
    }
  }
}

#数字类型 terms
POST products/_search
{
  "query": {
    "constant_score": {
      "filter": {
        "terms": {
          "price": [
            "20",
            "30"
          ]
        }
      }
    }
  }
}

#数字 Range 查询
GET products/_search
{
    "query" : {
        "constant_score" : {
            "filter" : {
                "range" : {
                    "price" : {
                        "gte" : 20,
                        "lte"  : 30
                    }
                }
            }
        }
    }
}


# 日期 range
POST products/_search
{
    "query" : {
        "constant_score" : {
            "filter" : {
                "range" : {
                    "date" : {
                      "gte" : "now-1y"
                    }
                }
            }
        }
    }
}



#exists查询
POST products/_search
{
  "query": {
    "constant_score": {
      "filter": {
        "exists": {
          "field": "date"
        }
      }
    }
  }
}

#处理多值字段
POST /movies/_bulk
{ "index": { "_id": 1 }}
{ "title" : "Father of the Bridge Part II","year":1995, "genre":"Comedy"}
{ "index": { "_id": 2 }}
{ "title" : "Dave","year":1993,"genre":["Comedy","Romance"] }


#处理多值字段,term 查询是包含,而不是等于
POST movies/_search
{
  "query": {
    "constant_score": {
      "filter": {
        "term": {
          "genre.keyword": "Comedy"
        }
      }
    }
  }
}


#字符类型 terms
POST products/_search
{
  "query": {
    "constant_score": {
      "filter": {
        "terms": {
          "productID.keyword": [
            "QQPX-R-3956-#aD8",
            "JODL-X-1937-#pV7"
          ]
        }
      }
    }
  }
}



POST products/_search
{
  "profile": "true",
  "explain": true,
  "query": {
    "match": {
      "price": 30
    }
  }
}


POST products/_search
{
  "profile": "true",
  "explain": true,
  "query": {
    "term": {
      "date": "2019-01-01"
    }
  }
}

POST products/_search
{
  "profile": "true",
  "explain": true,
  "query": {
    "match": {
      "date": "2019-01-01"
    }
  }
}




POST products/_search
{
  "profile": "true",
  "explain": true,
  "query": {
    "constant_score": {
      "filter": {
        "term": {
          "productID.keyword": "XHDK-A-1293-#fJ3"
        }
      }
    }
  }
}

POST products/_search
{
  "profile": "true",
  "explain": true,
  "query": {
    "term": {
      "productID.keyword": "XHDK-A-1293-#fJ3"
    }
  }
}

#对布尔数值
POST products/_search
{
  "query": {
    "constant_score": {
      "filter": {
        "term": {
          "avaliable": "false"
        }
      }
    }
  }
}

POST products/_search
{
  "query": {
    "term": {
      "avaliable": {
        "value": "false"
      }
    }
  }
}

POST products/_search
{
  "profile": "true",
  "explain": true,
  "query": {
    "term": {
      "price": {
        "value": "20"
      }
    }
  }
}

POST products/_search
{
  "profile": "true",
  "explain": true,
  "query": {
    "match": {
      "price": "20"
    }
    }
  }
}


POST products/_search
{
  "query": {
    "constant_score": {
      "filter": {
        "bool": {
          "must_not": {
            "exists": {
              "field": "date"
            }
          }
        }
      }
    }
  }
}

相关阅读

https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-exists-query.html
https://www.elastic.co/guide/en/elasticsearch/reference/7.1/term-level-queries.html

你可能感兴趣的:(4.2-结构化搜索)