ES实现三表关联查询+条件过滤

需求背景

        很多时候mysql的表之间是一对多的关系,比如库信息表(元数据信息),表信息表(元数据信息),字段信息表(元数据信息)。一个库可以包含多个表,一个表可以包含多个字段。他们的关系:库—(1:n)->表—(1:n)->字段。

        ElasticsSearch(以下简称ES)处理这种关系虽然不是特别擅长(相对于关系型数据库),因为ES和大多数 NoSQL 数据库类似,是扁平化的存储结构。索引是独立文档的集合体。不同的索引之间一般是没有关系的。

不过ES目前毕竟发展到8.x版本了, 已经有几种可选的方式能够高效的支持这种一对多关系的映射。

        比较常用的方案是嵌套对象,嵌套文档和父子文档。后两种是我们本文要讲的内容。

表结构

        为了便于描述下面的demo内容,现在先介绍一下表结构demo内容(表名称:字段1,字段2,字段3......)

database: database_id, name, desc

table:table_id,name,desc,address

column:column_id,name,desc,address

嵌套文档查询实例

#建立索引元数据:两层嵌套 database->table->column
put http://localhost:9200/test_nested
{
	"mappings": {
		"properties": {
			"table": {
				"type": "nested",
				"properties": {
					"column": {
						"type": "nested" 
					}
				}
		 
			}
		}
	}
}
#创建1个库数据database1
PUT http://localhost:9200/test_nested/_doc/database1
{
	"database_id": 1,
	"name" : "database1",
	"des" : "This is a database!",
	"table" : [
		{
		  "table_id":1,
		  "name" : "John",
		  "des" :  "This is a table!",
		  "address":"hangzhou",
		  "column":[
				{
					"column_id":1,
					"name" :"zhangsan",
					"des" :  "This is a column!",
					"address":"wuhan"
				},
				{
					"column_id":2,
					"name" :"Alice",
					"des" :  "This is a column!",	
					"address":"changchun"
				}
			]
		},
		{
		  "table_id":2,
		  "name" : "Alice",
		  "des" :  "This is a table!",
		  "address":"changchun",
		  "column":[
				{
					"column_id":3,
					"name" :"zhangsan",
					"des" :  "This is a column!",	
					"address":"hangzhou"
				},
				{
					"column_id":4,
					"name" :"John",
					"des" :  "This is a column!",	
					"address":"zhengzhou"
				}
			]
		}
	]
}

#创建1个库数据database2
PUT http://localhost:9200/test_nested/_doc/database2
{
	"database_id": 2,
	"name" : "database2",
	"des" : "This is a database!",
	"table" : [
		{
		  "table_id":3,
		  "name" : "zhangsan",
		  "des" :  "This is a table!",
		  "address":"wuhan",
		  "column":[
				{
					"column_id":5,
					"name" :"John",
					"des" :  "This is a column!",
					"address":"hangzhou"
				},
				{
					"column_id":6,
					"name" :"Alice",
					"des" :  "This is a column!",	
					"address":"changchun"
				}
			]
		},
		{
		  "table_id":4,
		  "name" : "Alice",
		  "des" :  "This is a table!",
		  "address":"changchun",
		  "column":[
				{
					"column_id":7,
					"name" :"zhangsan",
					"des" :  "This is a column!",	
					"address":"hangzhou"
				},
				{
					"column_id":8,
					"name" :"John",
					"des" :  "This is a column!",	
					"address":"zhengzhou"
				}
			]
		}
	]
}

#嵌套查询例子,查询column匹配指定内容,且table匹配指定内容的文档
POST http://localhost:9200/test_nested/_search
{
	"query" : {
		"bool": {
			"must": [ 
				{
					"nested": {
						"path": "table",
						"query": {
							"bool": {
								"must": [
									{ 
										"match": { 
											"table.address": "hangzhou" 
										}
									},
									{ 
										"match": { 
											"table.name": "John" 
										}
									}
								]
							}
						}
					}
				},
				{
					"nested": {
						"path": "table.column",
						"query" : {
							"bool": {
								"must": [
									{ 
										"match": { 
											"table.column.address": "wuhan" 
										}
									},
									{ 
										"match": { 
											"table.column.name": "zhangsan" 
										}
									}
								 ]
							}

						}
					}
				}
			]
		}
	}
}

#实现类似"三表关联查询+条件过滤",查询cloumn匹配指定内容,或table匹配指定内容,或database匹配指定内容的文档
POST http://localhost:9200/test_nested/_search
{
	"query" : {
		"bool": {
			"should": [ 
				{
					"nested": {
						"path": "table",
						"query": {
							"bool": {
								"must": [
									{ 
										"match": { 
											"table.address": "hangzhou" 
										}
									},
									{ 
										"match": { 
											"table.name": "John" 
										}
									}
								]
							}
						}
					}
				},
				{
					"nested": {
						"path": "table.column",
						"query" : {
							"bool": {
								"must": [
									{ 
										"match": { 
											"table.column.address": "hangzhou" 
										}
									},
									{ 
										"match": { 
											"table.column.name": "John" 
										}
									}
								 ]
							}

						}
					}
				},
				{
					"match" :{
						"name":"hangzhou"
					}
				}
			]
		}
	}
}

父子文档查询实例

#创建索引元数据
put http://localhost:9200/metadata1
{
  "mappings": {
    "properties": {
      "my_join_field": {
        "type": "join",
        "relations": {
          "database": ["table"],
          "table": ["column"]
        }
      }
    }
  }
}

#创建1个父文档
put http://localhost:9200/metadata1/_doc/1
{
  "database_id": "1",
  "des": "This is a database!",
  "name":"zhangsan",
  "address":"hangzhou",
  "my_join_field": {
    "name": "database" 
  }
}

#创建1个子文档
put http://localhost:9200/metadata1/_doc/2?routing=1
{
  "table_id": "1",
  "des": "This is a table!",
  "name":"lisi",
  "address":"hangzhou",
  "my_join_field": {
    "name": "table",
	"parent":1
  }
}

#创建1个孙子文档
put http://localhost:9200/metadata1/_doc/3?routing=2
{
  "column_id": "1",
  "des": "This is a column!",
  "name":"wangwu",
  "address":"hangzhou",
  "my_join_field": {
    "name": "column",
	"parent":2
  }
}

#创建1个孙子文档
put http://localhost:9200/metadata1/_doc/4?routing=2
{
  "column_id": "2",
  "des": "This is a column!",
  "name":"hangzhou",
  "address":"zhengzhou",
  "my_join_field": {
    "name": "column",
	"parent":2
  }
}

#创建1个孙子文档,用于验证查询内容默认分词了
put http://localhost:9200/metadata1/_doc/5?routing=2
{
  "column_id": "3",
  "des": "This is a column!",
  "name":"hangzhouren",
  "address":"hangzhou city",
  "my_join_field": {
    "name": "column",
	"parent":2
  }
}

#分页查询某个字段(查询范围包括父,子,孙子文档)
post http://localhost:9200/metadata1/_search
{
  "query" : {
		"match": {
			"address" : "hangzhou"
		}
	},
	"from" : 1,
	"size" : 1
}

#term 批量查询
post http://localhost:9200/metadata1/_search
{
	"query": {
		"terms" : {
			"address":["hangzhou pro","zhengzhou"]
		}
	}
}

#查询具备满足匹配内容的孙子文档的子文档
post http://localhost:9200/metadata1/_search
{
  "query": {
    "has_child": {
      "type": "column",
      "query" : {
			"match": {
				"address" : "hangzhou"
			}
		}
    }
  }
}

#查询具备满足匹配内容的子文档的父文档
post http://localhost:9200/metadata1/_search
{
  "query": {
    "has_child": {
      "type": "table",
      "query" : {
			"match": {
				"address" : "hangzhou"
			}
		}
    }
  }
}

#查询具备满足匹配内容的孙子文档的父文档
post http://localhost:9200/metadata1/_search
{
  "query": {
    "has_child": {
      "type": "table",
      "query" : {
			"has_child": {
				"type": "column",
				"query" : {
					"multi_match": {
						"query" : "hangzhou",
						"fields":["address","name"]
					}
				}
			}
		}
    }
  }
}

#bool查询满足条件孙子文档的父文档,和满足条件子文档的父文档
post http://localhost:9200/metadata1/_search
{
	"query": {
		"bool": {
			"should": [
				{
					"has_child": {
					  "type": "table",
					  "query" : {
							"has_child": {
								"type": "column",
								"query" : {
									"multi_match": {
										"query" : "hangzhou",
										"fields":["address","name"]
									}
								}
							}
						}
					}
				},
				{
					"has_child": {
						"type": "table",
						"query" : {
							"multi_match": {
								"query" : "hangzhou",
								"fields":["address","name"]
							}
						}
					}
				}
			]
		}
	}	
	 
}

#查询满足条件子文档的父文档的子文档,即子文档本身;如果父,子,孙文档的文档字段名称不同,就不用这么麻烦的查询
post http://localhost:9200/metadata1/_search
{
	"query": {
		"has_parent": {
			"parent_type": "database",
			"query" : {
				"has_child": {
					"type": "table",
					"query" : {
						"multi_match": {
							"query" : "hangzhou",
							"fields":["address","name"]
						}
					}
				}
			}

		}
	}	
	 
}


#以下两条查询可以类似实现"三表关联查询+条件过滤"的功能
#先查询满足条件匹配的父文档的子文档,满足条件匹配孙子文档的子文档和满足条件匹配的子文档
post http://localhost:9200/metadata1/_search
{
	"query": {
		"bool": {
			"should": [
				{
					"has_parent": {
					  "parent_type": "database",
							"query" : {
								"multi_match": {
									"query" : "hangzhou",
									"fields":["address","name"]
								}
							}
					}
				},
				{
					"has_child": {
						"type": "column",
						"query" : {
							"multi_match": {
								"query" : "hangzhou",
								"fields":["address","name"]
							}
						}
					}
				},
				{
					"has_parent": {
						"parent_type": "database",
						"query" : {
							"has_child": {
								"type": "table",
								"query" : {
									"multi_match": {
										"query" : "hangzhou",
										"fields":["address","name"]
									}
								}
							}
						}

					}
				}
			]
		}
	}	
	 
}

#根据上面的子文档查询关联的父文档和孙子文档,然后再在程序里进行数据关联组装
post http://localhost:9200/metadata1/_search
{
	"query": {
		"bool": {
			"should": [
				{
					"has_parent": {
					  "parent_type": "table",
							"query" : {
								"ids": {
									"values" : [2]
								}
							}
					}
				},
				{
					"has_child": {
						"type": "table",
						"query" : {
							"ids": {
									"values" : [2]
								}
						}
					}
				}
			]
		}
	}	
	 
}


你可能感兴趣的:(elasticsearch,大数据,搜索引擎)