HIVE BUG: NoViableAltException

  • 运行代码
#!/bin/bash             shell文件中的头
#target_table:tag_model.mid_ope_indicators_per_day_xxt
#source_table: chiq.standard_actions_ch_app;
format_date()
{
        Y=`expr substr $1 1 4`
        M=`expr substr $1 6 2`
        D=`expr substr $1 9 2`
        echo $Y"-"$M"-"$D
}
if [ $# -eq 1 ]
then
    n_date=$1
else
    n_date=`date -d yesterday +"%Y-%m-%d"`
fi
v_date=$(format_date $n_date) #昨天
p_date=`date -d "$v_date -161 day " +%Y-%m-%d` #161天前

for i in $(seq 1 10)
do

hive<<start
use tag_model;
DROP TABLE IF EXISTS tag_model.dim_ope_details_qjtj_single_user_popup;
CREATE TABLE IF NOT EXISTS tag_model.dim_ope_details_qjtj_single_user_popup(ip string, mac string, p_log_date string, rid string, tv_time_popup string);

INSERT OVERWRITE TABLE tag_model.dim_ope_details_qjtj_single_user_popup
SELECT DISTINCT ip, mac, p_log_date, reportinfo['rid'] AS rid, time AS tv_time_popup 
FROM chiq.standard_actions_ch_app
WHERE reporttype = 'action' AND sort = 'CH_APP' AND subclass = 'ChGlobalSearch' AND reportinfo['action'] = 'EnterApp'
AND p_log_date = date_add('$p_date', $i)
AND mac REGEXP '([0-9a-fA-F]{2}:){5}[0-9a-fA-F]{2}' AND mac <> '00:00:00:00:00:00';

CREATE  TABLE IF NOT EXISTS tag_model.mid_ope_indicators_per_day_test(scene string, T int, indicators map)
COMMENT 'stardard indicators for measuring performance of recommend system' PARTITIONED BY (p_log_date string) ROW FORMAT DELIMITED  FIELDS TERMINATED BY'\t' STORED AS rcfile;


INSERT INTO TABLE tag_model.mid_ope_indicators_per_day_test PARTITION( p_log_date=date_add('$p_date', $i+1) )
SELECT 'qjtj', 1,
MAP('tj', nvl(CAST(Tj_num.N AS string), '0'), 'sy', NVL(Sy_num.N, '0'), 'dj', '0', 'djzh', '0', 'rjcs', NVL((Tj_num.N/Sy_num.N), '0'))
FROM
(SELECT COUNT(*) AS N FROM tag_model.dim_ope_details_qjtj_single_user_popup) Tj_num
JOIN
(SELECT COUNT(DISTINCT mac) AS N FROM tag_model.dim_ope_details_qjtj_single_user_popup) Sy_num;

start

done
  • 得到错误信息
NoViableAltException(26@[221:1: constant : ( Number | dateLiteral | StringLiteral | stringLiteralSequence | BigintLiteral | SmallintLiteral | TinyintLiteral | DecimalLiteral | charSetStringLiteral | booleanValue );])
at org.antlr.runtime.DFA.noViableAlt(DFA.java:158)
at org.antlr.runtime.DFA.predict(DFA.java:116)
at org.apache.hadoop.hive.ql.parse.HiveParser_IdentifiersParser.constant(HiveParser_IdentifiersParser.java:6128)
at org.apache.hadoop.hive.ql.parse.HiveParser_IdentifiersParser.partitionVal(HiveParser_IdentifiersParser.java:10542)
at org.apache.hadoop.hive.ql.parse.HiveParser_IdentifiersParser.partitionSpec(HiveParser_IdentifiersParser.java:10376)
at org.apache.hadoop.hive.ql.parse.HiveParser_IdentifiersParser.tableOrPartition(HiveParser_IdentifiersParser.java:10254)
at org.apache.hadoop.hive.ql.parse.HiveParser.tableOrPartition(HiveParser.java:40210)
at org.apache.hadoop.hive.ql.parse.HiveParser.insertClause(HiveParser.java:39685)
at org.apache.hadoop.hive.ql.parse.HiveParser.regularBody(HiveParser.java:37647)
at org.apache.hadoop.hive.ql.parse.HiveParser.queryStatementExpressionBody(HiveParser.java:36898)
at org.apache.hadoop.hive.ql.parse.HiveParser.queryStatementExpression(HiveParser.java:36774)
at org.apache.hadoop.hive.ql.parse.HiveParser.execStatement(HiveParser.java:1338)
at org.apache.hadoop.hive.ql.parse.HiveParser.statement(HiveParser.java:1036)
at org.apache.hadoop.hive.ql.parse.ParseDriver.parse(ParseDriver.java:199)
at org.apache.hadoop.hive.ql.parse.ParseDriver.parse(ParseDriver.java:166)
at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:404)
at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:322)
at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:975)
at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1040)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:911)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:901)
at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:268)
at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:220)
at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:423)
at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:792)
at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:686)
at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:625)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
at java.lang.reflect.Method.invoke(Method.java:597)
at org.apache.hadoop.util.RunJar.main(RunJar.java:156)
FAILED: ParseException line 1:82 cannot recognize input near 'date_add' '(' ''2016-06-21'' in constant
  • 原因分析
    由于partition部分采用了built-in function: date_add(start_time, num_days),而在hive-0.13.0不支持此操作,这是HIVE-0.13.0存在的一个BUG。

  • 解决办法
    采用其他策略替换掉分区处的built-in function结果, 例如提前计算好结果。
    例如可以采用以下代码:

#!/bin/bash             shell文件中的头
#target_table:tag_model.mid_ope_indicators_per_day_xxt
#source_table: chiq.standard_actions_ch_app;
format_date()
{
        Y=`expr substr $1 1 4`
        M=`expr substr $1 6 2`
        D=`expr substr $1 9 2`
        echo $Y"-"$M"-"$D
}

if [ $# -eq 1 ]
then
    n_date=$1
else
    n_date=`date -d yesterday +"%Y-%m-%d"`
fi
v_date=$(format_date $n_date) #昨天
p_date=`date -d "$v_date -161 day " +%Y-%m-%d` #161天前
p_date1=`date -d "$v_date -160 day " +%Y-%m-%d` #160天前

for i in $(seq 1 10)
do

c_date=`date -d "$p_date +$i day " +%Y-%m-%d` #(161-i)天前
c_date1=`date -d "$p_date1 +$i day " +%Y-%m-%d` #(160-i)天前

hive<<start
use tag_model;
DROP TABLE IF EXISTS tag_model.dim_ope_details_qjtj_single_user_popup;
CREATE TABLE IF NOT EXISTS tag_model.dim_ope_details_qjtj_single_user_popup(ip string, mac string, p_log_date string, rid string, tv_time_popup string);

INSERT OVERWRITE TABLE tag_model.dim_ope_details_qjtj_single_user_popup
SELECT DISTINCT ip, mac, p_log_date, reportinfo['rid'] AS rid, time AS tv_time_popup 
FROM chiq.standard_actions_ch_app
WHERE reporttype = 'action' AND sort = 'CH_APP' AND subclass = 'ChGlobalSearch' AND reportinfo['action'] = 'EnterApp'
AND p_log_date = '$c_date'
AND mac REGEXP '([0-9a-fA-F]{2}:){5}[0-9a-fA-F]{2}' AND mac <> '00:00:00:00:00:00';



CREATE  TABLE IF NOT EXISTS tag_model.mid_ope_indicators_per_day_test(scene string, T int, indicators map)
COMMENT 'stardard indicators for measuring performance of recommend system' PARTITIONED BY (p_log_date string) ROW FORMAT DELIMITED  FIELDS TERMINATED BY'\t' STORED AS rcfile;


INSERT INTO TABLE tag_model.mid_ope_indicators_per_day_test PARTITION(p_log_date ='$c_date1')
SELECT 'qjtj', 1,
MAP('tj', NVL(CAST(Tj_num.N AS string), '0'), 'sy', NVL(Sy_num.N, '0'), 'dj', '0', 'djzh', '0', 'rjcs', NVL((Tj_num.N/Sy_num.N), '0'))
FROM 
(SELECT COUNT(*) AS N FROM tag_model.dim_ope_details_qjtj_single_user_popup) Tj_num
JOIN
(SELECT COUNT(DISTINCT mac) AS N FROM tag_model.dim_ope_details_qjtj_single_user_popup) Sy_num;

start

done
  • 类似的BUG
    https://issues.apache.org/jira/browse/HIVE-7630?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel

你可能感兴趣的:(HIVE)