解析sql语句工具

说明:解解析sql语句工具析工具可以解析出sql语句 语法、库表、条件 下面示例只是解析 库表的示例,其他解析需要再分析解析规范

使用:

一、mvaen引用



 org.apache.hive
 hive-exec
 2.0.0-SNAPSHOT
 
 
 org.apache.hive
 hive-common
 
 


 org.antlr
 antlr-runtime
 3.4


 org.apache.hadoop
 hadoop-common
 2.2.0


 org.apache.hadoop
 hadoop-mapreduce-client-core
 2.2.0

 

二、引入解析类

import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import java.util.Stack;

import com.alibaba.fastjson.JSONObject;
import org.antlr.runtime.ANTLRStringStream;
import org.antlr.runtime.CharStream;
import org.antlr.runtime.RecognitionException;
import org.antlr.runtime.TokenRewriteStream;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.parse.*;

/**
 * 目的:获取AST中的表,列,以及对其所做的操作,如SELECT,INSERT
 * 重点:获取SELECT操作中的表和列的相关操作。其他操作这判断到表级别。
 * 实现思路:对AST深度优先遍历,遇到操作的token则判断当前的操作,
 * 遇到TOK_TAB或TOK_TABREF或TOK_ALTERTABLE或者则判断出当前操作的表,遇到子句则压栈当前处理,处理子句。
 * 子句处理完,栈弹出。
 */
public class HiveParseUtil {
    private  final Log log = LogFactory.getLog(HiveParseUtil.class);

    private  final String UNKNOWN = "UNKNOWN";
    private  String defaultDbName;
    private  Set tables = new HashSet();
    private  Stack tableNameStack = new Stack();
    private  Stack operStack = new Stack();
    private  String nowQueryTable = "";//定义及处理不清晰,修改为query或from节点对应的table集合或许好点。目前正在查询处理的表可能不止一个。
    private  Oper oper;
    private  boolean joinClause = false;

    private enum Oper {
        SELECT, INSERT, DROP, TRUNCATE, LOAD, CREATE, ALTER, USE
    }

    public  Set parseIteral(ASTNode ast) {
        Set set = new HashSet();//当前查询所对应到的表集合
        prepareToParseCurrentNodeAndChilds(ast);
        set.addAll(parseChildNodes(ast));
        set.addAll(parseCurrentNode(ast, set));
        endParseCurrentNode(ast);
        return set;
    }

    private  void endParseCurrentNode(ASTNode ast) {
        if (ast.getToken() != null) {
            switch (ast.getToken().getType()) {//join 从句结束,跳出join
                case HiveParser.TOK_RIGHTOUTERJOIN:
                case HiveParser.TOK_LEFTOUTERJOIN:
                case HiveParser.TOK_JOIN:
                    joinClause = false;
                    break;
                case HiveParser.TOK_QUERY:
                case HiveParser.TOK_INSERT:
                case HiveParser.TOK_SELECT:
                    nowQueryTable = tableNameStack.pop();
                    oper = operStack.pop();
                    break;
            }
        }
    }

    private  Set parseCurrentNode(ASTNode ast, Set set) {
        if (ast.getToken() != null) {
            switch (ast.getToken().getType()) {
                case HiveParser.TOK_TABLE_PARTITION:
                    if (ast.getChildCount() != 2) {
                        String table = BaseSemanticAnalyzer
                                .getUnescapedName((ASTNode) ast.getChild(0));
                        if (oper == Oper.SELECT) {
                            nowQueryTable = table;
                        }
                        tables.add(table + "\t" + oper);
                    }
                    break;

                case HiveParser.TOK_TAB:// outputTable
                    String tableTab = BaseSemanticAnalyzer
                            .getUnescapedName((ASTNode) ast.getChild(0));
                    if (oper == Oper.SELECT) {
                        nowQueryTable = tableTab;
                    }
                    tables.add(tableTab + "\t" + oper);
                    break;
                case HiveParser.TOK_TABREF:// inputTable
                    ASTNode tabTree = (ASTNode) ast.getChild(0);
                    String tableName = (tabTree.getChildCount() == 1) ? BaseSemanticAnalyzer
                            .getUnescapedName((ASTNode) tabTree.getChild(0))
                            : BaseSemanticAnalyzer
                            .getUnescapedName((ASTNode) tabTree.getChild(0))
                            + "." + tabTree.getChild(1);
                    if (oper == Oper.SELECT) {
                        if (joinClause && !"".equals(nowQueryTable)) {
                            nowQueryTable += "&" + tableName;//
                        } else {
                            nowQueryTable = tableName;
                        }
                        set.add(tableName);
                    }
                    tables.add(tableName + "\t" + oper);
                    break;
//                case HiveParser.TOK_ALTERTABLE_ADDCOLS:
//                    ASTNode alterTableName = (ASTNode) ast.getChild(0);
//                    tables.add(alterTableName.getText() + "\t" + oper);
//                    break;
                case HiveParser.TOK_SWITCHDATABASE:
                    ASTNode dbName = (ASTNode) ast.getChild(0);
                    defaultDbName = dbName.getText();
                    break;
                case HiveParser.TOK_ALTERTABLE:
                    String alterTableTab = BaseSemanticAnalyzer
                            .getUnescapedName((ASTNode) ast.getChild(0));
                    if (oper == Oper.SELECT) {
                        nowQueryTable = alterTableTab;
                    }
                    tables.add(alterTableTab + "\t" + oper);
                    break;
                case HiveParser.TOK_DROPTABLE:
                    String dropTableTab = BaseSemanticAnalyzer
                            .getUnescapedName((ASTNode) ast.getChild(0));
                    if (oper == Oper.SELECT) {
                        nowQueryTable = dropTableTab;
                    }
                    tables.add(dropTableTab + "\t" + oper);
                    break;
                case HiveParser.TOK_CREATETABLE:
                    String createTableTab = BaseSemanticAnalyzer
                            .getUnescapedName((ASTNode) ast.getChild(0));
                    if (oper == Oper.SELECT) {
                        nowQueryTable = createTableTab;
                    }
                    tables.add(createTableTab + "\t" + oper);
            }
        }
        return set;
    }

    private  Set parseChildNodes(ASTNode ast) {
        Set set = new HashSet();
        int numCh = ast.getChildCount();
        if (numCh > 0) {
            for (int num = 0; num < numCh; num++) {
                ASTNode child = (ASTNode) ast.getChild(num);
                set.addAll(parseIteral(child));
            }
        }
        return set;
    }

    private  void prepareToParseCurrentNodeAndChilds(ASTNode ast) {
        if (ast.getToken() != null) {
            switch (ast.getToken().getType()) {//join 从句开始
                case HiveParser.TOK_RIGHTOUTERJOIN:
                case HiveParser.TOK_LEFTOUTERJOIN:
                case HiveParser.TOK_JOIN:
                    joinClause = true;
                    break;
                case HiveParser.TOK_QUERY:
                    tableNameStack.push(nowQueryTable);
                    operStack.push(oper);
                    nowQueryTable = "";//sql22
                    oper = Oper.SELECT;
                    break;
                case HiveParser.TOK_INSERT:
                    tableNameStack.push(nowQueryTable);
                    operStack.push(oper);
                    oper = Oper.INSERT;
                    break;
                case HiveParser.TOK_SELECT:
                    tableNameStack.push(nowQueryTable);
                    operStack.push(oper);
                    oper = Oper.SELECT;
                    break;
                case HiveParser.TOK_DROPTABLE:
                    oper = Oper.DROP;
                    break;
                case HiveParser.TOK_TRUNCATETABLE:
                    oper = Oper.TRUNCATE;
                    break;
                case HiveParser.TOK_LOAD:
                    oper = Oper.LOAD;
                    break;
                case HiveParser.TOK_CREATETABLE:
                    oper = Oper.CREATE;
                    break;
                case HiveParser.TOK_SWITCHDATABASE:
                    oper = Oper.USE;
                    break;
                case HiveParser.TOK_SET_AUTOCOMMIT:
            }
            if (ast.getToken() != null
                    && ast.getToken().getType() >= HiveParser.TOK_ALTERDATABASE_PROPERTIES
                    && ast.getToken().getType() <= HiveParser.TOK_ALTERVIEW_RENAME) {
                oper = Oper.ALTER;
            }
        }
    }

    public  String unescapeIdentifier(String val) {
        if (val == null) {
            return null;
        }
        if (val.charAt(0) == '`' && val.charAt(val.length() - 1) == '`') {
            val = val.substring(1, val.length() - 1);
        }
        return val;
    }

    public  JSONObject parse(String parseSql) throws ParseException, RecognitionException {
        log.warn("parsesql=====================" + parseSql);
        tables = new HashSet();
        //  定义返回结果
        JSONObject jsonObject = new JSONObject();
        // 得到主操作
        String[] mainOperateArray = parseSql.split("\\s+");
        String mainOperate = "";
        if (mainOperateArray != null && mainOperateArray.length > 0) {
            mainOperate = mainOperateArray[0].toUpperCase();
        }

        HiveLexer lexer = new HiveLexer(new ANTLRNoCaseStringStream(parseSql));
        //HiveConf 配置忽略关键字
        HiveConf conf = new HiveConf();
        conf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_SQL11_RESERVED_KEYWORDS, false);
        conf.setVar(HiveConf.ConfVars.HIVE_QUOTEDID_SUPPORT, "none");
        lexer.setHiveConf(conf);
        TokenRewriteStream tokens = new TokenRewriteStream(lexer);
        tokens.getNumberOfOnChannelTokens();
        HiveParser parser = new HiveParser(tokens);
        parser.setTreeAdaptor(ParseDriver.adaptor);
        // 解析sql
//        ParseDriver pd = new ParseDriver();
        ASTNode ast = (ASTNode) parser.statement().getTree();
        parseIteral(ast);
        // 得到表信息

        if (tables != null && tables.size() > 0) {
            for (String table : tables) {
                String[] tableArray = table.split("\t");
                if (tableArray.length == 2) {
                    String rTable = tableArray[0];
                    String ope = tableArray[1];
                    if (ope.equals(mainOperate)) {
                        if (rTable.contains(".")) {
                            String[] rTables = rTable.split("\\.");
                            if (rTables.length == 2) {
                                jsonObject.put("dbName", rTables[0]);
                                jsonObject.put("tableName", rTables[1]);
                            }
                        } else {
                            jsonObject.put("tableName", rTable);
                        }
                    }
                }
            }
        }
        return jsonObject;
    }

    public  String getDefaultDbName(String parseSql) throws ParseException {
        ParseDriver pd = new ParseDriver();
        ASTNode ast = pd.parse(parseSql);
        parseIteral(ast);
        return defaultDbName;
    }

    public static void main(String[] args) throws IOException, ParseException,
            SemanticException, RecognitionException {
        // HiveConf conf = new HiveConf();
        String sql1 = "Select * from zpc1";
        String sql2 = "Select name,ip from zpc2 bieming where age > 10 and area in (select area from city)";
        String sql3 = "Select d.name,d.ip from (select * from zpc3 where age > 10 and area in (select area from city)) d";
        String sql4 = "create table zpc(id string, name string)";
        String sql5 = "insert overwrite table tmp.tmp1 PARTITION (partitionkey='2008-08-15') select * from tmp";


        String sql6 = "FROM (  SELECT p.datekey datekey, p.userid userid, c.clienttype  FROM detail.usersequence_client c JOIN fact.orderpayment p ON p.orderid = c.orderid "
                + " JOIN default.user du ON du.userid = p.userid WHERE p.datekey = 20131118 ) base  INSERT OVERWRITE TABLE `test`.`customer_kpi` SELECT base.datekey, "
                + "  base.clienttype, count(distinct base.userid) buyer_count GROUP BY base.datekey, base.clienttype";
        String sql7 = "SELECT id, value FROM (SELECT id, value FROM p1 UNION ALL  SELECT 4 AS id, 5 AS value FROM p1 limit 1) u";
        String sql8 = "select dd from(select id+1 dd from zpc) d";
        String sql9 = "select dd+1 from(select id+1 dd from zpc) d";
        String sql10 = "truncate table zpc";
        String sql11 = "drop table zpc";
        String sql12 = "select * from tablename where unix_timestamp(cz_time) > unix_timestamp('2050-12-31 15:32:28')";
        String sql15 = "alter table old_table_name RENAME TO new_table_name";
        String sql16 = "select statis_date,time_interval,gds_cd,gds_nm,sale_cnt,discount_amt,discount_rate,price,etl_time,pay_amt from o2ostore.tdm_gds_monitor_rt where time_interval = from_unixtime(unix_timestamp(concat(regexp_replace(from_unixtime(unix_timestamp('201506181700', 'yyyyMMddHHmm')+ 84600 ,  'yyyy-MM-dd HH:mm'),'-| |:',''),'00'),'yyyyMMddHHmmss'),'yyyy-MM-dd HH:mm:ss')";
        String sql13 = "INSERT OVERWRITE TABLE u_data_new SELECT TRANSFORM (userid, movieid, rating, unixtime) USING 'python weekday_mapper.py' AS (userid, movieid, rating, weekday) FROM u_data";
        String sql14 = "SELECT a.* FROM a JOIN b ON (a.id = b.id AND a.department = b.department)";
        String sql17 = "LOAD DATA LOCAL INPATH \"/opt/data/1.txt\" OVERWRITE INTO TABLE table1";
        String sql18 = "CREATE TABLE  table1     (    column1 STRING COMMENT 'comment1',    column2 INT COMMENT 'comment2'        )";
        String sql19 = "ALTER TABLE events RENAME TO 3koobecaf";
        String sql20 = "ALTER TABLE invites ADD COLUMNS (new_col2 INT COMMENT 'a comment')";
        String sql21 = "alter table mp add partition (b='1', c='1')";
        String sql22 = "select login.uid from login day_login left outer join (select uid from regusers where dt='20130101') day_regusers on day_login.uid=day_regusers.uid where day_login.dt='20130101' and day_regusers.uid is null";
        String sql23 = "select name from (select * from zpc left outer join def) d";
        String sql24 = "set mapred.reduce.tasks = 15";
        String sql25 = "CREATE EXTERNAL TABLE  app_bp.app_xtl_sku_profile_sale_base1 ( " +
                "   item_sku_id  bigint COMMENT '商品id',  " +
                "   sale_num  bigint COMMENT '销售数量',  " +
                "   sale_amount  double COMMENT '销售金额',  " +
                "   order_num  bigint COMMENT '订单量',  " +
                "   promotion_sale_num  bigint COMMENT '促销销售数量',  " +
                "   promotion_sale_amount  double COMMENT '促销销售金额', " +
                "   promotion_order_num  bigint COMMENT '促销订单量') " +
                "COMMENT '新通路_商品画像_销售基础数据表' " +
                "PARTITIONED BY (  " +
                "   dt  string) " +
                "ROW FORMAT DELIMITED  " +
                "  FIELDS TERMINATED BY '\t'   " +
                "STORED AS ORC " +
                "TBLPROPERTIES ( " +
                "  'orc.compress'='SNAPPY') ";

        String sql26 = "insert into app.lhhtest1 select run_times,fail_times from app.lhy_oozie_partitions_test a left join bdm.table1 c on a.id=c.id";


        String sql27 = "use app";

        String sql28 = "alter table dev.cqq_employee_ext_par drop partition(month='201805')";
        String sql29 = "alter table dev.cqq_employee_ext_par RENAME TO 3koobecaf";
        String sql30 = "drop table zpc.test";
        String sql31 = "-- d;\n" +
                "-- d;\n" +
                " drop table dev.cqq_employee_ext_par";

        sql31 = sql31.replaceAll("--.*", "");
        sql31 = sql31.trim().replaceAll("\n|\t|\r", "");
        String sql32 = "insert into dev.cqq_employee_ext_par values(${pa1},'345',${pa1},${pa1})";
        sql32 = sql32.replaceAll("\\$\\{.*?\\}", "123456");
        String sql33 = "INSERT overwrite table adm_test.adm_w08_log_bdpsta_offline_userinfo " +
                "select user_log_acct, count(1) as PV from adm.adm_w08_log_bdp_log";
        String sql34 = "INSERT overwrite table\n" +
                "    adm_test.adm_w08_log_bdpsta_offline_userinfo \n" +
                "select\n" +
                "\tuser_log_acct,\n" +
                "\tcount(1) as PV\n, \"asdasd\" " +
                "from\n" +
                "\tadm.adm_w08_log_bdp_log";
        String sql51 = "INSERT overwrite\n" +
                "        table adm_test.adm_w08_log_bdpsta_all_userinfo partition\n" +
                "        (\n" +
                "                dt = '${dt}'\n" +
                "        )\n" +
                "        select\n" +
                "                user_log_acct,\n" +
                "        count(1) as PV\n" +
                "        from\n" +
                "        adm.adm_w08_log_bdp_log\n" +
                "                where\n" +
                "        dt = '${dt}'\n" +
                "        group by\n" +
                "        user_log_acct";
        sql51 = sql51.replaceAll("\\$\\{.*?\\}", "123456");


        String sql35 = "CREATE TABLE `app.test_data_cal_sql_create_table_partition`" +
                "(`shop_id` int," +
                "`shopname` string)" +
                "PARTITIONED BY" +
                "(`dt` string)" +
                "ROW FORMAT DELIMITED" +
                " FIELDS TERMINATED BY '\t' " +
                "STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'" +
                " OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'";
        String sql36 = "CREATE TABLE `app.test_data_cal_sql_create_table_partition11`" +
                "(`shop_id` int," +
                "`shopname` string)" +
                "PARTITIONED BY" +
                "(`dt` string)" +
                "ROW FORMAT DELIMITED" +
                " FIELDS TERMINATED BY '\t' " +
                "STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'" +
                " OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'";

        String sql37 = "insert    overwrite" +
                " table adm_test.adm_w08_log_bdpsta_all_userinfo partition" +
                "(" +
                "dt = test" +
                ")" +
                "select" +
                "user_log_acct," +
                "count(1) as PV" +
                "from" +
                "adm.adm_w08_log_bdp_log" +
                "where" +
                "dt = 'test'" +
                "group by" +
                "user_log_acct";
        String sql43 = "CREATE TABLE IF NOT EXISTS dev.spark_team_wuguoxiao_spark_sql_success_rate \n" +
                "STORED AS ORC\n" +
                "AS\n" +
                "select b.dt, abs(b.fail_logs / a.sum_logid - 1) as success_rate from (\n" +
                "    select a.dt,count(distinct a.logid) as sum_logid from fdm.fdm_spark_appinfo_di as a\n" +
                "    group by a.dt\n" +
                ") as a\n" +
                "inner join (\n" +
                "    select a.dt,spark_sql_source,count(distinct a.logid) as fail_logs from (\n" +
                "        select a.dt,spark_sql_source,b.logid from fdm.fdm_spark_environmentinfo_di as a\n" +
                "        inner join fdm.fdm_spark_appinfo_di as b on a.appid = b.appid and a.appattemptid = b.appattemptid and a.dt = b.dt\n" +
                "        group by a.dt,spark_sql_source,b.logid\n" +
                "    ) as a\n" +
                "    inner join (\n" +
                "        select a.id from fdm.fdm_dispatch_1_d_task_run_log_new_chain as a\n" +
                "        inner join fdm.fdm_dispatch_1_d_task_chain as b\n" +
                "        on a.task_id = b.id\n" +
                "        where a.dp = 'ACTIVE' and b.dp = 'ACTIVE' and b.type = 'pyscript' and a.status = 'fail'\n" +
                "        union all\n" +
                "        select a.id from fdm.fdm_dispatch_1_b_run_log_chain as a\n" +
                "        where a.dp = 'ACTIVE' and run_status = 'fail' and a.instance_task_type='normal'\n" +
                "    ) as b\n" +
                "    on a.logid  = b.id\n" +
                "    group by a.dt,spark_sql_source order by a.dt asc\n" +
                ") as b\n" +
                "on a.dt = b.dt\n" +
                "where b.dt >= '2020-01-01' and spark_sql_source = 'HiveTask'\n" +
                "order by a.dt asc\n";


        String sql44 = "drop table dev.spark_team_temp_sql_cluster_running_report;\n" +
                "create table dev.spark_team_temp_sql_cluster_running_report\n" +
                "select b.*, a.error_type_001\n" +
                "from fdm.fdm_spark_appinfo_di as b\n" +
                "left join (select log_id, error_type_001 from wangriyu_test.buffalo_log_analysis where error_type_001=2 group by log_id,error_type_001) as a on a.log_id = b.logId\n" +
                "inner join fdm.fdm_spark_environmentinfo_di as c on b.appId = c.appId and b.appattemptId = c.appattemptId\n" +
                "where b.dt >= '2019-11-01' and c.dt >= '2019-11-01' and c.spark_sql_source='HiveTask' and c.spark_submit_deployMode='cluster'\n";


        String parsesql = "drop table dev.spark_team_temp_sql_cluster_running_report;\n" +
                "create table dev.spark_team_temp_sql_cluster_running_report\n" +
                "select b.*, a.error_type_001\n" +
                "from fdm.fdm_spark_appinfo_di as b\n" +
                "left join (select log_id, error_type_001 from wangriyu_test.buffalo_log_analysis where error_type_001=2 group by log_id,error_type_001) as a on a.log_id = b.logId\n" +
                "inner join fdm.fdm_spark_environmentinfo_di as c on b.appId = c.appId and b.appattemptId = c.appattemptId\n" +
                "where b.dt >= '2019-11-01' and c.dt >= '2019-11-01' and c.spark_sql_source='HiveTask' and c.spark_submit_deployMode='cluster'";
        System.out.println(parsesql);

        HiveParseUtil hiveParseUtil = new HiveParseUtil();
        JSONObject jsonObject = hiveParseUtil.parse(parsesql);

        System.out.println(jsonObject.toString());

    }


    /**
     * 分词
     */
    public  class ANTLRNoCaseStringStream extends ANTLRStringStream {

        public ANTLRNoCaseStringStream(String input) {
            super(input);
        }

        @Override
        public int LA(int i) {

            int returnChar = super.LA(i);
            if (returnChar == CharStream.EOF) {
                return returnChar;
            } else if (returnChar == 0) {
                return returnChar;
            }

            return Character.toUpperCase((char) returnChar);
        }
    }

}

你可能感兴趣的:(大数据)