数据血缘可视化这块目前比较优秀的就是SQLFlow(马哈鱼数据血缘分析器)了。我们可以从SQLFlow的json格式中学习数据血缘json格式设计。
其他设计参考:
如何获取SQLFlow的json?在可视化画布上右键–》Download as json
以SQLFlow带的的 Hive sample sql 为例。
SELECT pageid, adid
FROM pageAds LATERAL VIEW explode(adid_list) adTable AS adid;
为了观感我们一段一段的讲,文章末尾有完整json。
json中血缘关系定义在sqlflow这个对象中,有一个字段两个数组
"sqlflow": {
"dbvendor": "dbvhive",
"dbobjs": [],
"relations": []
}
dbobjs数组存着多个节点对象,节点对象结果如下。每个节点对象内有这个表的所有列和坐标。
{
"id": "2",
"name": "pageAds",
"type": "table",
"columns": [
{
"id": "9",
"name": "adid_list",
"coordinates": [
{
"x": 3,
"y": 35,
"hashCode": "0"
},
{
"x": 3,
"y": 44,
"hashCode": "0"
}
]
},
{
"id": "13",
"name": "pageid",
"coordinates": [
{
"x": 2,
"y": 8,
"hashCode": "0"
},
{
"x": 2,
"y": 14,
"hashCode": "0"
}
]
}
],
"coordinates": [
{
"x": 3,
"y": 6,
"hashCode": "0"
},
{
"x": 3,
"y": 61,
"hashCode": "0"
}
]
}
json中表和列的二元坐标组都有这样方便前端渲染关系图。表和字段有各自编号方便后面关系定位自己的父节点
relations里的每个关系对象有最重要的 target和sources字段。从relations里我们也可以发现sqlflow的json格式和hive利用hook被动采集的字段级血缘关系的日志格式基本相似。
{
"id": "101",
//sqlflow自己定义的几种关系
"type": "fdd",
"effectType": "select",
"target": {
"id": "14",
"column": "adid",
"parentId": "11",
"parentName": "RS-1",
"coordinates": [
{
"x": 2,
"y": 16,
"hashCode": "0"
},
{
"x": 2,
"y": 20,
"hashCode": "0"
}
]
},
"sources": [
{
"id": "8",
"column": "adid",
"parentId": "7",
"parentName": "adTable",
"coordinates": [
{
"x": 3,
"y": 57,
"hashCode": "0"
},
{
"x": 3,
"y": 61,
"hashCode": "0"
}
]
}
]
}
我们可以依据上文sqlflow的json格式设计我们自己的血缘json格式,
sqlflow的完整json
{
"code": 200,
"data": {
"mode": "global",
"summary": {
"schema": 0,
"process": 0,
"database": 0,
"view": 0,
"mostRelationTables": [
{
"table": "PAGEADS"
},
{
"table": "ADTABLE"
}
],
"column": 3,
"table": 2,
"relation": 3
},
"sqlflow": {
"dbvendor": "dbvhive",
"dbobjs": [
{
"id": "2",
"name": "pageAds",
"type": "table",
"columns": [
{
"id": "9",
"name": "adid_list",
"coordinates": [
{
"x": 3,
"y": 35,
"hashCode": "0"
},
{
"x": 3,
"y": 44,
"hashCode": "0"
}
]
},
{
"id": "13",
"name": "pageid",
"coordinates": [
{
"x": 2,
"y": 8,
"hashCode": "0"
},
{
"x": 2,
"y": 14,
"hashCode": "0"
}
]
}
],
"coordinates": [
{
"x": 3,
"y": 6,
"hashCode": "0"
},
{
"x": 3,
"y": 61,
"hashCode": "0"
}
]
},
{
"id": "7",
"name": "adTable",
"type": "table",
"columns": [
{
"id": "8",
"name": "adid",
"coordinates": [
{
"x": 3,
"y": 57,
"hashCode": "0"
},
{
"x": 3,
"y": 61,
"hashCode": "0"
}
]
}
],
"coordinates": [
{
"x": 3,
"y": 46,
"hashCode": "0"
},
{
"x": 3,
"y": 53,
"hashCode": "0"
}
]
},
{
"id": "11",
"name": "RS-1",
"type": "select_list",
"columns": [
{
"id": "12",
"name": "pageid",
"coordinates": [
{
"x": 2,
"y": 8,
"hashCode": "0"
},
{
"x": 2,
"y": 14,
"hashCode": "0"
}
]
},
{
"id": "14",
"name": "adid",
"coordinates": [
{
"x": 2,
"y": 16,
"hashCode": "0"
},
{
"x": 2,
"y": 20,
"hashCode": "0"
}
]
}
],
"coordinates": [
{
"x": 2,
"y": 8,
"hashCode": "0"
},
{
"x": 2,
"y": 20,
"hashCode": "0"
}
]
}
],
"relations": [
{
"id": "101",
"type": "fdd",
"effectType": "select",
"target": {
"id": "14",
"column": "adid",
"parentId": "11",
"parentName": "RS-1",
"coordinates": [
{
"x": 2,
"y": 16,
"hashCode": "0"
},
{
"x": 2,
"y": 20,
"hashCode": "0"
}
]
},
"sources": [
{
"id": "8",
"column": "adid",
"parentId": "7",
"parentName": "adTable",
"coordinates": [
{
"x": 3,
"y": 57,
"hashCode": "0"
},
{
"x": 3,
"y": 61,
"hashCode": "0"
}
]
}
]
},
{
"id": "102",
"type": "fdd",
"effectType": "select",
"target": {
"id": "12",
"column": "pageid",
"parentId": "11",
"parentName": "RS-1",
"coordinates": [
{
"x": 2,
"y": 8,
"hashCode": "0"
},
{
"x": 2,
"y": 14,
"hashCode": "0"
}
]
},
"sources": [
{
"id": "13",
"column": "pageid",
"parentId": "2",
"parentName": "pageAds",
"coordinates": [
{
"x": 2,
"y": 8,
"hashCode": "0"
},
{
"x": 2,
"y": 14,
"hashCode": "0"
}
]
}
]
},
{
"id": "103",
"type": "fdd",
"effectType": "select",
"target": {
"id": "8",
"column": "adid",
"parentId": "7",
"parentName": "adTable",
"coordinates": [
{
"x": 3,
"y": 57,
"hashCode": "0"
},
{
"x": 3,
"y": 61,
"hashCode": "0"
}
]
},
"sources": [
{
"id": "9",
"column": "adid_list",
"parentId": "2",
"parentName": "pageAds",
"coordinates": [
{
"x": 3,
"y": 35,
"hashCode": "0"
},
{
"x": 3,
"y": 44,
"hashCode": "0"
}
]
}
]
}
]
},
"graph": {
"elements": {
"tables": [
{
"columns": [
{
"height": 16,
"id": "n0::n0",
"label": {
"content": "pageid",
"fontFamily": "Segoe UI Symbol",
"fontSize": "12",
"height": 13.96875,
"width": 59,
"x": 0,
"y": 0
},
"width": 160,
"x": 445,
"y": -59.49261
},
{
"height": 16,
"id": "n0::n1",
"label": {
"content": "adid",
"fontFamily": "Segoe UI Symbol",
"fontSize": "12",
"height": 13.96875,
"width": 43,
"x": 0,
"y": 0
},
"width": 160,
"x": 445,
"y": -43.49261
}
],
"height": 57.96875,
"id": "n0",
"label": {
"content": "RS-1",
"fontFamily": "Segoe UI Symbol",
"fontSize": "12",
"height": 17.96875,
"width": 162,
"x": 0,
"y": 0
},
"width": 162,
"x": 444,
"y": -81.46136
},
{
"columns": [
{
"height": 16,
"id": "n1::n0",
"label": {
"content": "adid_list",
"fontFamily": "Segoe UI Symbol",
"fontSize": "12",
"height": 13.96875,
"width": 67,
"x": 0,
"y": 0
},
"width": 160,
"x": 1,
"y": -59.49261
},
{
"height": 16,
"id": "n1::n1",
"label": {
"content": "pageid",
"fontFamily": "Segoe UI Symbol",
"fontSize": "12",
"height": 13.96875,
"width": 59,
"x": 0,
"y": 0
},
"width": 160,
"x": 1,
"y": -43.49261
}
],
"height": 57.96875,
"id": "n1",
"label": {
"content": "pageAds",
"fontFamily": "Segoe UI Symbol",
"fontSize": "12",
"height": 17.96875,
"width": 162,
"x": 0,
"y": 0
},
"width": 162,
"x": 0,
"y": -81.46136
},
{
"columns": [
{
"height": 16,
"id": "n2::n0",
"label": {
"content": "adid",
"fontFamily": "Segoe UI Symbol",
"fontSize": "12",
"height": 13.96875,
"width": 43,
"x": 0,
"y": 0
},
"width": 160,
"x": 223,
"y": -30.000149
}
],
"height": 41.96875,
"id": "n2",
"label": {
"content": "adTable",
"fontFamily": "Segoe UI Symbol",
"fontSize": "12",
"height": 17.96875,
"width": 162,
"x": 0,
"y": 0
},
"width": 162,
"x": 222,
"y": -51.9689
}
],
"edges": [
{
"id": "e0",
"sourceId": "n1::n0",
"targetId": "n2::n0"
},
{
"id": "e1",
"sourceId": "n2::n0",
"targetId": "n0::n1"
},
{
"id": "e2",
"sourceId": "n1::n1",
"targetId": "n0::n0"
}
]
},
"tooltip": {
},
"relationIdMap": {
"e0": "fdd",
"e1": "fdd",
"e2": "fdd"
},
"listIdMap": {
"n0": [
"11"
],
"n0::n0": [
"12"
],
"n0::n1": [
"14"
],
"n1": [
"2"
],
"n1::n0": [
"9"
],
"n1::n1": [
"13"
],
"n2": [
"7"
],
"n2::n0": [
"8"
]
}
}
},
"sessionId": "a91895aadcb9d674e7d3edd5ddd12d4fefcda4ff797881d3b4823fe1ec9ff367_1633745113743"
}