python解析sql字段血缘_SQL血缘关系分析

大数据场景下,每天可能都要在离线集群,运行大量的任务来支持产品、运营的分析查询。任务越来越多的时候,就会有越来越多的依赖关系,每一个任务都需要等需要的input表生产出来后,再去生产自己的output表。最开始的时候,依赖关系自然是可以通过管理员来管理,随着任务量的加大,就需要一个分析工具来解析SQL的血缘关系,并且自行依赖上血缘表。

本文就介绍一个使用druid parser,来解析SQL的血缘关系。

一、SQL血缘关系含义

SQL血缘关系的含义是将sql中包含的表全部输出。

例:如下sql,需要解析出该sql包含supindb.student、supindb.college两张表。

String sql = "select * from " +

"(select * from supindb.student d where dt='20190202')a " +

"left join " +

"(select * from supindb.college c where dt='20190202')b " +

" on a.uid=b.uid " +

"where a.uid > 0";

二、SQL血缘关系解析实现

SQL血缘关系解析pom依赖

com.alibaba

druid

1.0.14

SQL血缘关系解析代码

SQL血缘关系解析代码仅解析select、update、insert、delete四种sql,其他种类,可依据需求自行添加相应逻辑。

public class Parent {

public static Map> getFromTo (String sql) throws ParserException {

Map> result = new HashMap>();

List stmts = SQLUtils.parseStatements(sql, JdbcConstants.HIVE);

TreeSet selectSet = new TreeSet();

TreeSet updateSet = new TreeSet();

TreeSet insertSet = new TreeSet();

TreeSet deleteSet = new TreeSet();

if (stmts == null) {

return null;

}

String database = "DEFAULT";

for (SQLStatement stmt : stmts) {

SchemaStatVisitor statVisitor = SQLUtils.createSchemaStatVisitor(stmts,JdbcConstants.HIVE);

if (stmt instanceof SQLUseStatement) {

database = ((SQLUseStatement) stmt).getDatabase().getSimpleName();

}

stmt.accept(statVisitor);

Map tables = statVisitor.getTables();

if (tables != null) {

final String db = database;

for (Map.Entry table : tables.entrySet()) {

TableStat.Name tableName = table.getKey();

TableStat stat = table.getValue();

if (stat.getCreateCount() > 0 || stat.getInsertCount() > 0) { //create

String insert = tableName.getName();

if (!insert.contains("."))

insert = db + "." + insert;

insertSet.add(insert);

} else if (stat.getSelectCount() > 0) { //select

String select = tableName.getName();

if (!select.contains("."))

select = db + "." + select;

selectSet.add(select);

}else if (stat.getUpdateCount() > 0 ) { //update

String update = tableName.getName();

if (!update.contains("."))

update = db + "." + update;

updateSet.add(update);

}else if (stat.getDeleteCount() > 0) { //delete

String delete = tableName.getName();

if (!delete.contains("."))

delete = db + "." + delete;

deleteSet.add(delete);

}

}

}

}

result.put("select",selectSet);

result.put("insert",insertSet);

result.put("update",updateSet);

result.put("delete",deleteSet);

return result;

}

public static void main(String[] args) {

String sql = "select * from " +

"(select * from supindb.student d where dt='20190202')a " +

"left join " +

"(select * from supindb.college c where dt='20190202')b " +

" on a.uid=b.uid " +

"where a.uid > 0";

//sql = "update supindb.college set uid='22333' where name='小明'";

//sql = "delete from supindb.college where uid= '22223333'";

Map> getfrom = getFromTo(sql);

for (Map.Entry> entry : getfrom.entrySet()){

System.out.println("================");

System.out.println("key=" + entry.getKey());

for (String table : entry.getValue()){

System.out.println(table);

}

}

}

}

运行结果

image.png

你可能感兴趣的:(python解析sql字段血缘)