说明:解解析sql语句工具析工具可以解析出sql语句 语法、库表、条件 下面示例只是解析 库表的示例,其他解析需要再分析解析规范
使用:
一、mvaen引用
org.apache.hive
hive-exec
2.0.0-SNAPSHOT
org.apache.hive
hive-common
org.antlr
antlr-runtime
3.4
org.apache.hadoop
hadoop-common
2.2.0
org.apache.hadoop
hadoop-mapreduce-client-core
2.2.0
二、引入解析类
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import java.util.Stack;
import com.alibaba.fastjson.JSONObject;
import org.antlr.runtime.ANTLRStringStream;
import org.antlr.runtime.CharStream;
import org.antlr.runtime.RecognitionException;
import org.antlr.runtime.TokenRewriteStream;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.parse.*;
/**
* 目的:获取AST中的表,列,以及对其所做的操作,如SELECT,INSERT
* 重点:获取SELECT操作中的表和列的相关操作。其他操作这判断到表级别。
* 实现思路:对AST深度优先遍历,遇到操作的token则判断当前的操作,
* 遇到TOK_TAB或TOK_TABREF或TOK_ALTERTABLE或者则判断出当前操作的表,遇到子句则压栈当前处理,处理子句。
* 子句处理完,栈弹出。
*/
public class HiveParseUtil {
private final Log log = LogFactory.getLog(HiveParseUtil.class);
private final String UNKNOWN = "UNKNOWN";
private String defaultDbName;
private Set tables = new HashSet();
private Stack tableNameStack = new Stack();
private Stack operStack = new Stack();
private String nowQueryTable = "";//定义及处理不清晰,修改为query或from节点对应的table集合或许好点。目前正在查询处理的表可能不止一个。
private Oper oper;
private boolean joinClause = false;
private enum Oper {
SELECT, INSERT, DROP, TRUNCATE, LOAD, CREATE, ALTER, USE
}
public Set parseIteral(ASTNode ast) {
Set set = new HashSet();//当前查询所对应到的表集合
prepareToParseCurrentNodeAndChilds(ast);
set.addAll(parseChildNodes(ast));
set.addAll(parseCurrentNode(ast, set));
endParseCurrentNode(ast);
return set;
}
private void endParseCurrentNode(ASTNode ast) {
if (ast.getToken() != null) {
switch (ast.getToken().getType()) {//join 从句结束,跳出join
case HiveParser.TOK_RIGHTOUTERJOIN:
case HiveParser.TOK_LEFTOUTERJOIN:
case HiveParser.TOK_JOIN:
joinClause = false;
break;
case HiveParser.TOK_QUERY:
case HiveParser.TOK_INSERT:
case HiveParser.TOK_SELECT:
nowQueryTable = tableNameStack.pop();
oper = operStack.pop();
break;
}
}
}
private Set parseCurrentNode(ASTNode ast, Set set) {
if (ast.getToken() != null) {
switch (ast.getToken().getType()) {
case HiveParser.TOK_TABLE_PARTITION:
if (ast.getChildCount() != 2) {
String table = BaseSemanticAnalyzer
.getUnescapedName((ASTNode) ast.getChild(0));
if (oper == Oper.SELECT) {
nowQueryTable = table;
}
tables.add(table + "\t" + oper);
}
break;
case HiveParser.TOK_TAB:// outputTable
String tableTab = BaseSemanticAnalyzer
.getUnescapedName((ASTNode) ast.getChild(0));
if (oper == Oper.SELECT) {
nowQueryTable = tableTab;
}
tables.add(tableTab + "\t" + oper);
break;
case HiveParser.TOK_TABREF:// inputTable
ASTNode tabTree = (ASTNode) ast.getChild(0);
String tableName = (tabTree.getChildCount() == 1) ? BaseSemanticAnalyzer
.getUnescapedName((ASTNode) tabTree.getChild(0))
: BaseSemanticAnalyzer
.getUnescapedName((ASTNode) tabTree.getChild(0))
+ "." + tabTree.getChild(1);
if (oper == Oper.SELECT) {
if (joinClause && !"".equals(nowQueryTable)) {
nowQueryTable += "&" + tableName;//
} else {
nowQueryTable = tableName;
}
set.add(tableName);
}
tables.add(tableName + "\t" + oper);
break;
// case HiveParser.TOK_ALTERTABLE_ADDCOLS:
// ASTNode alterTableName = (ASTNode) ast.getChild(0);
// tables.add(alterTableName.getText() + "\t" + oper);
// break;
case HiveParser.TOK_SWITCHDATABASE:
ASTNode dbName = (ASTNode) ast.getChild(0);
defaultDbName = dbName.getText();
break;
case HiveParser.TOK_ALTERTABLE:
String alterTableTab = BaseSemanticAnalyzer
.getUnescapedName((ASTNode) ast.getChild(0));
if (oper == Oper.SELECT) {
nowQueryTable = alterTableTab;
}
tables.add(alterTableTab + "\t" + oper);
break;
case HiveParser.TOK_DROPTABLE:
String dropTableTab = BaseSemanticAnalyzer
.getUnescapedName((ASTNode) ast.getChild(0));
if (oper == Oper.SELECT) {
nowQueryTable = dropTableTab;
}
tables.add(dropTableTab + "\t" + oper);
break;
case HiveParser.TOK_CREATETABLE:
String createTableTab = BaseSemanticAnalyzer
.getUnescapedName((ASTNode) ast.getChild(0));
if (oper == Oper.SELECT) {
nowQueryTable = createTableTab;
}
tables.add(createTableTab + "\t" + oper);
}
}
return set;
}
private Set parseChildNodes(ASTNode ast) {
Set set = new HashSet();
int numCh = ast.getChildCount();
if (numCh > 0) {
for (int num = 0; num < numCh; num++) {
ASTNode child = (ASTNode) ast.getChild(num);
set.addAll(parseIteral(child));
}
}
return set;
}
private void prepareToParseCurrentNodeAndChilds(ASTNode ast) {
if (ast.getToken() != null) {
switch (ast.getToken().getType()) {//join 从句开始
case HiveParser.TOK_RIGHTOUTERJOIN:
case HiveParser.TOK_LEFTOUTERJOIN:
case HiveParser.TOK_JOIN:
joinClause = true;
break;
case HiveParser.TOK_QUERY:
tableNameStack.push(nowQueryTable);
operStack.push(oper);
nowQueryTable = "";//sql22
oper = Oper.SELECT;
break;
case HiveParser.TOK_INSERT:
tableNameStack.push(nowQueryTable);
operStack.push(oper);
oper = Oper.INSERT;
break;
case HiveParser.TOK_SELECT:
tableNameStack.push(nowQueryTable);
operStack.push(oper);
oper = Oper.SELECT;
break;
case HiveParser.TOK_DROPTABLE:
oper = Oper.DROP;
break;
case HiveParser.TOK_TRUNCATETABLE:
oper = Oper.TRUNCATE;
break;
case HiveParser.TOK_LOAD:
oper = Oper.LOAD;
break;
case HiveParser.TOK_CREATETABLE:
oper = Oper.CREATE;
break;
case HiveParser.TOK_SWITCHDATABASE:
oper = Oper.USE;
break;
case HiveParser.TOK_SET_AUTOCOMMIT:
}
if (ast.getToken() != null
&& ast.getToken().getType() >= HiveParser.TOK_ALTERDATABASE_PROPERTIES
&& ast.getToken().getType() <= HiveParser.TOK_ALTERVIEW_RENAME) {
oper = Oper.ALTER;
}
}
}
public String unescapeIdentifier(String val) {
if (val == null) {
return null;
}
if (val.charAt(0) == '`' && val.charAt(val.length() - 1) == '`') {
val = val.substring(1, val.length() - 1);
}
return val;
}
public JSONObject parse(String parseSql) throws ParseException, RecognitionException {
log.warn("parsesql=====================" + parseSql);
tables = new HashSet();
// 定义返回结果
JSONObject jsonObject = new JSONObject();
// 得到主操作
String[] mainOperateArray = parseSql.split("\\s+");
String mainOperate = "";
if (mainOperateArray != null && mainOperateArray.length > 0) {
mainOperate = mainOperateArray[0].toUpperCase();
}
HiveLexer lexer = new HiveLexer(new ANTLRNoCaseStringStream(parseSql));
//HiveConf 配置忽略关键字
HiveConf conf = new HiveConf();
conf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_SQL11_RESERVED_KEYWORDS, false);
conf.setVar(HiveConf.ConfVars.HIVE_QUOTEDID_SUPPORT, "none");
lexer.setHiveConf(conf);
TokenRewriteStream tokens = new TokenRewriteStream(lexer);
tokens.getNumberOfOnChannelTokens();
HiveParser parser = new HiveParser(tokens);
parser.setTreeAdaptor(ParseDriver.adaptor);
// 解析sql
// ParseDriver pd = new ParseDriver();
ASTNode ast = (ASTNode) parser.statement().getTree();
parseIteral(ast);
// 得到表信息
if (tables != null && tables.size() > 0) {
for (String table : tables) {
String[] tableArray = table.split("\t");
if (tableArray.length == 2) {
String rTable = tableArray[0];
String ope = tableArray[1];
if (ope.equals(mainOperate)) {
if (rTable.contains(".")) {
String[] rTables = rTable.split("\\.");
if (rTables.length == 2) {
jsonObject.put("dbName", rTables[0]);
jsonObject.put("tableName", rTables[1]);
}
} else {
jsonObject.put("tableName", rTable);
}
}
}
}
}
return jsonObject;
}
public String getDefaultDbName(String parseSql) throws ParseException {
ParseDriver pd = new ParseDriver();
ASTNode ast = pd.parse(parseSql);
parseIteral(ast);
return defaultDbName;
}
public static void main(String[] args) throws IOException, ParseException,
SemanticException, RecognitionException {
// HiveConf conf = new HiveConf();
String sql1 = "Select * from zpc1";
String sql2 = "Select name,ip from zpc2 bieming where age > 10 and area in (select area from city)";
String sql3 = "Select d.name,d.ip from (select * from zpc3 where age > 10 and area in (select area from city)) d";
String sql4 = "create table zpc(id string, name string)";
String sql5 = "insert overwrite table tmp.tmp1 PARTITION (partitionkey='2008-08-15') select * from tmp";
String sql6 = "FROM ( SELECT p.datekey datekey, p.userid userid, c.clienttype FROM detail.usersequence_client c JOIN fact.orderpayment p ON p.orderid = c.orderid "
+ " JOIN default.user du ON du.userid = p.userid WHERE p.datekey = 20131118 ) base INSERT OVERWRITE TABLE `test`.`customer_kpi` SELECT base.datekey, "
+ " base.clienttype, count(distinct base.userid) buyer_count GROUP BY base.datekey, base.clienttype";
String sql7 = "SELECT id, value FROM (SELECT id, value FROM p1 UNION ALL SELECT 4 AS id, 5 AS value FROM p1 limit 1) u";
String sql8 = "select dd from(select id+1 dd from zpc) d";
String sql9 = "select dd+1 from(select id+1 dd from zpc) d";
String sql10 = "truncate table zpc";
String sql11 = "drop table zpc";
String sql12 = "select * from tablename where unix_timestamp(cz_time) > unix_timestamp('2050-12-31 15:32:28')";
String sql15 = "alter table old_table_name RENAME TO new_table_name";
String sql16 = "select statis_date,time_interval,gds_cd,gds_nm,sale_cnt,discount_amt,discount_rate,price,etl_time,pay_amt from o2ostore.tdm_gds_monitor_rt where time_interval = from_unixtime(unix_timestamp(concat(regexp_replace(from_unixtime(unix_timestamp('201506181700', 'yyyyMMddHHmm')+ 84600 , 'yyyy-MM-dd HH:mm'),'-| |:',''),'00'),'yyyyMMddHHmmss'),'yyyy-MM-dd HH:mm:ss')";
String sql13 = "INSERT OVERWRITE TABLE u_data_new SELECT TRANSFORM (userid, movieid, rating, unixtime) USING 'python weekday_mapper.py' AS (userid, movieid, rating, weekday) FROM u_data";
String sql14 = "SELECT a.* FROM a JOIN b ON (a.id = b.id AND a.department = b.department)";
String sql17 = "LOAD DATA LOCAL INPATH \"/opt/data/1.txt\" OVERWRITE INTO TABLE table1";
String sql18 = "CREATE TABLE table1 ( column1 STRING COMMENT 'comment1', column2 INT COMMENT 'comment2' )";
String sql19 = "ALTER TABLE events RENAME TO 3koobecaf";
String sql20 = "ALTER TABLE invites ADD COLUMNS (new_col2 INT COMMENT 'a comment')";
String sql21 = "alter table mp add partition (b='1', c='1')";
String sql22 = "select login.uid from login day_login left outer join (select uid from regusers where dt='20130101') day_regusers on day_login.uid=day_regusers.uid where day_login.dt='20130101' and day_regusers.uid is null";
String sql23 = "select name from (select * from zpc left outer join def) d";
String sql24 = "set mapred.reduce.tasks = 15";
String sql25 = "CREATE EXTERNAL TABLE app_bp.app_xtl_sku_profile_sale_base1 ( " +
" item_sku_id bigint COMMENT '商品id', " +
" sale_num bigint COMMENT '销售数量', " +
" sale_amount double COMMENT '销售金额', " +
" order_num bigint COMMENT '订单量', " +
" promotion_sale_num bigint COMMENT '促销销售数量', " +
" promotion_sale_amount double COMMENT '促销销售金额', " +
" promotion_order_num bigint COMMENT '促销订单量') " +
"COMMENT '新通路_商品画像_销售基础数据表' " +
"PARTITIONED BY ( " +
" dt string) " +
"ROW FORMAT DELIMITED " +
" FIELDS TERMINATED BY '\t' " +
"STORED AS ORC " +
"TBLPROPERTIES ( " +
" 'orc.compress'='SNAPPY') ";
String sql26 = "insert into app.lhhtest1 select run_times,fail_times from app.lhy_oozie_partitions_test a left join bdm.table1 c on a.id=c.id";
String sql27 = "use app";
String sql28 = "alter table dev.cqq_employee_ext_par drop partition(month='201805')";
String sql29 = "alter table dev.cqq_employee_ext_par RENAME TO 3koobecaf";
String sql30 = "drop table zpc.test";
String sql31 = "-- d;\n" +
"-- d;\n" +
" drop table dev.cqq_employee_ext_par";
sql31 = sql31.replaceAll("--.*", "");
sql31 = sql31.trim().replaceAll("\n|\t|\r", "");
String sql32 = "insert into dev.cqq_employee_ext_par values(${pa1},'345',${pa1},${pa1})";
sql32 = sql32.replaceAll("\\$\\{.*?\\}", "123456");
String sql33 = "INSERT overwrite table adm_test.adm_w08_log_bdpsta_offline_userinfo " +
"select user_log_acct, count(1) as PV from adm.adm_w08_log_bdp_log";
String sql34 = "INSERT overwrite table\n" +
" adm_test.adm_w08_log_bdpsta_offline_userinfo \n" +
"select\n" +
"\tuser_log_acct,\n" +
"\tcount(1) as PV\n, \"asdasd\" " +
"from\n" +
"\tadm.adm_w08_log_bdp_log";
String sql51 = "INSERT overwrite\n" +
" table adm_test.adm_w08_log_bdpsta_all_userinfo partition\n" +
" (\n" +
" dt = '${dt}'\n" +
" )\n" +
" select\n" +
" user_log_acct,\n" +
" count(1) as PV\n" +
" from\n" +
" adm.adm_w08_log_bdp_log\n" +
" where\n" +
" dt = '${dt}'\n" +
" group by\n" +
" user_log_acct";
sql51 = sql51.replaceAll("\\$\\{.*?\\}", "123456");
String sql35 = "CREATE TABLE `app.test_data_cal_sql_create_table_partition`" +
"(`shop_id` int," +
"`shopname` string)" +
"PARTITIONED BY" +
"(`dt` string)" +
"ROW FORMAT DELIMITED" +
" FIELDS TERMINATED BY '\t' " +
"STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'" +
" OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'";
String sql36 = "CREATE TABLE `app.test_data_cal_sql_create_table_partition11`" +
"(`shop_id` int," +
"`shopname` string)" +
"PARTITIONED BY" +
"(`dt` string)" +
"ROW FORMAT DELIMITED" +
" FIELDS TERMINATED BY '\t' " +
"STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'" +
" OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'";
String sql37 = "insert overwrite" +
" table adm_test.adm_w08_log_bdpsta_all_userinfo partition" +
"(" +
"dt = test" +
")" +
"select" +
"user_log_acct," +
"count(1) as PV" +
"from" +
"adm.adm_w08_log_bdp_log" +
"where" +
"dt = 'test'" +
"group by" +
"user_log_acct";
String sql43 = "CREATE TABLE IF NOT EXISTS dev.spark_team_wuguoxiao_spark_sql_success_rate \n" +
"STORED AS ORC\n" +
"AS\n" +
"select b.dt, abs(b.fail_logs / a.sum_logid - 1) as success_rate from (\n" +
" select a.dt,count(distinct a.logid) as sum_logid from fdm.fdm_spark_appinfo_di as a\n" +
" group by a.dt\n" +
") as a\n" +
"inner join (\n" +
" select a.dt,spark_sql_source,count(distinct a.logid) as fail_logs from (\n" +
" select a.dt,spark_sql_source,b.logid from fdm.fdm_spark_environmentinfo_di as a\n" +
" inner join fdm.fdm_spark_appinfo_di as b on a.appid = b.appid and a.appattemptid = b.appattemptid and a.dt = b.dt\n" +
" group by a.dt,spark_sql_source,b.logid\n" +
" ) as a\n" +
" inner join (\n" +
" select a.id from fdm.fdm_dispatch_1_d_task_run_log_new_chain as a\n" +
" inner join fdm.fdm_dispatch_1_d_task_chain as b\n" +
" on a.task_id = b.id\n" +
" where a.dp = 'ACTIVE' and b.dp = 'ACTIVE' and b.type = 'pyscript' and a.status = 'fail'\n" +
" union all\n" +
" select a.id from fdm.fdm_dispatch_1_b_run_log_chain as a\n" +
" where a.dp = 'ACTIVE' and run_status = 'fail' and a.instance_task_type='normal'\n" +
" ) as b\n" +
" on a.logid = b.id\n" +
" group by a.dt,spark_sql_source order by a.dt asc\n" +
") as b\n" +
"on a.dt = b.dt\n" +
"where b.dt >= '2020-01-01' and spark_sql_source = 'HiveTask'\n" +
"order by a.dt asc\n";
String sql44 = "drop table dev.spark_team_temp_sql_cluster_running_report;\n" +
"create table dev.spark_team_temp_sql_cluster_running_report\n" +
"select b.*, a.error_type_001\n" +
"from fdm.fdm_spark_appinfo_di as b\n" +
"left join (select log_id, error_type_001 from wangriyu_test.buffalo_log_analysis where error_type_001=2 group by log_id,error_type_001) as a on a.log_id = b.logId\n" +
"inner join fdm.fdm_spark_environmentinfo_di as c on b.appId = c.appId and b.appattemptId = c.appattemptId\n" +
"where b.dt >= '2019-11-01' and c.dt >= '2019-11-01' and c.spark_sql_source='HiveTask' and c.spark_submit_deployMode='cluster'\n";
String parsesql = "drop table dev.spark_team_temp_sql_cluster_running_report;\n" +
"create table dev.spark_team_temp_sql_cluster_running_report\n" +
"select b.*, a.error_type_001\n" +
"from fdm.fdm_spark_appinfo_di as b\n" +
"left join (select log_id, error_type_001 from wangriyu_test.buffalo_log_analysis where error_type_001=2 group by log_id,error_type_001) as a on a.log_id = b.logId\n" +
"inner join fdm.fdm_spark_environmentinfo_di as c on b.appId = c.appId and b.appattemptId = c.appattemptId\n" +
"where b.dt >= '2019-11-01' and c.dt >= '2019-11-01' and c.spark_sql_source='HiveTask' and c.spark_submit_deployMode='cluster'";
System.out.println(parsesql);
HiveParseUtil hiveParseUtil = new HiveParseUtil();
JSONObject jsonObject = hiveParseUtil.parse(parsesql);
System.out.println(jsonObject.toString());
}
/**
* 分词
*/
public class ANTLRNoCaseStringStream extends ANTLRStringStream {
public ANTLRNoCaseStringStream(String input) {
super(input);
}
@Override
public int LA(int i) {
int returnChar = super.LA(i);
if (returnChar == CharStream.EOF) {
return returnChar;
} else if (returnChar == 0) {
return returnChar;
}
return Character.toUpperCase((char) returnChar);
}
}
}