大数据场景下,每天可能都要在离线集群,运行大量的任务来支持产品、运营的分析查询。任务越来越多的时候,就会有越来越多的依赖关系,每一个任务都需要等需要的input表生产出来后,再去生产自己的output表。最开始的时候,依赖关系自然是可以通过管理员来管理,随着任务量的加大,就需要一个分析工具来解析SQL的血缘关系,并且自行依赖上血缘表。
本文就介绍一个使用druid parser,来解析SQL的血缘关系。
一、SQL血缘关系含义
SQL血缘关系的含义是将sql中包含的表全部输出。
例:如下sql,需要解析出该sql包含supindb.student、supindb.college两张表。
String sql = "select * from " +
"(select * from supindb.student d where dt='20190202')a " +
"left join " +
"(select * from supindb.college c where dt='20190202')b " +
" on a.uid=b.uid " +
"where a.uid > 0";
二、SQL血缘关系解析实现
SQL血缘关系解析pom依赖
com.alibaba
druid
1.0.14
SQL血缘关系解析代码
SQL血缘关系解析代码仅解析select、update、insert、delete四种sql,其他种类,可依据需求自行添加相应逻辑。
public class Parent {
public static Map> getFromTo (String sql) throws ParserException {
Map> result = new HashMap>();
List stmts = SQLUtils.parseStatements(sql, JdbcConstants.HIVE);
TreeSet selectSet = new TreeSet();
TreeSet updateSet = new TreeSet();
TreeSet insertSet = new TreeSet();
TreeSet deleteSet = new TreeSet();
if (stmts == null) {
return null;
}
String database = "DEFAULT";
for (SQLStatement stmt : stmts) {
SchemaStatVisitor statVisitor = SQLUtils.createSchemaStatVisitor(stmts,JdbcConstants.HIVE);
if (stmt instanceof SQLUseStatement) {
database = ((SQLUseStatement) stmt).getDatabase().getSimpleName();
}
stmt.accept(statVisitor);
Map tables = statVisitor.getTables();
if (tables != null) {
final String db = database;
for (Map.Entry table : tables.entrySet()) {
TableStat.Name tableName = table.getKey();
TableStat stat = table.getValue();
if (stat.getCreateCount() > 0 || stat.getInsertCount() > 0) { //create
String insert = tableName.getName();
if (!insert.contains("."))
insert = db + "." + insert;
insertSet.add(insert);
} else if (stat.getSelectCount() > 0) { //select
String select = tableName.getName();
if (!select.contains("."))
select = db + "." + select;
selectSet.add(select);
}else if (stat.getUpdateCount() > 0 ) { //update
String update = tableName.getName();
if (!update.contains("."))
update = db + "." + update;
updateSet.add(update);
}else if (stat.getDeleteCount() > 0) { //delete
String delete = tableName.getName();
if (!delete.contains("."))
delete = db + "." + delete;
deleteSet.add(delete);
}
}
}
}
result.put("select",selectSet);
result.put("insert",insertSet);
result.put("update",updateSet);
result.put("delete",deleteSet);
return result;
}
public static void main(String[] args) {
String sql = "select * from " +
"(select * from supindb.student d where dt='20190202')a " +
"left join " +
"(select * from supindb.college c where dt='20190202')b " +
" on a.uid=b.uid " +
"where a.uid > 0";
//sql = "update supindb.college set uid='22333' where name='小明'";
//sql = "delete from supindb.college where uid= '22223333'";
Map> getfrom = getFromTo(sql);
for (Map.Entry> entry : getfrom.entrySet()){
System.out.println("================");
System.out.println("key=" + entry.getKey());
for (String table : entry.getValue()){
System.out.println(table);
}
}
}
}
运行结果
image.png