建存储日志文件的临时表
#!/bin/bash
dt=`date -d'-1 day' +'%Y%m%d'`
tableName="tmp_ods_shmm_app_action_di_${dt}"
sql3="
create external table if not exists ${tableName}(
content string
)
comment 'action_raw_log'
partitioned by (dt string, ht string)
stored as textfile;"
beeline -u 'jdbc:hive2://dsrv1.heracles.sohuno.com:10000/mediaai;principal=hive/[email protected];' --hiveconf mapreduce.job.queuename=media --delimiterForDSV=DELIMITER --outputformat=tsv2 --showHeader=false -e "${sql3}"
往临时表中插入对应分区的数据
for ht in {0..23}
do
if [ ${ht} -lt 10 ]
then ht="0${ht}"
fi
sql="alter table ${tableName} add partition (dt=${dt},ht=${ht}) location '/user/mediaai/rawlog/logCollector/asa/${dt}/${ht}'"
#sql="alter table ${tableName} DROP IF EXISTS PARTITION (dt=${dt},ht=${ht}) location '/user/mediaai/rawlog/logCollector/asv/${dt}/${ht}';"
echo ${sql}
beeline -u 'jdbc:hive2://dsrv1.heracles.sohuno.com:10000/mediaai;principal=hive/[email protected];' --hiveconf mapreduce.job.queuename=media --delimiterForDSV=DELIMITER --outputformat=tsv2 --showHeader=false -e "${sql}"
done
将查询结果插入目标表
tableName2="ods_shmm_app_action_di"
sql2="
set hive.exec.dynamic.partition = true; --开启动态分区功能
set hive.exec.dynamic.partition.mode = nonstric; --允许所有分区都是动态的
from
(
select
get_json_object(content, '$.ser_log_time') as ser_log_time
,get_json_object(content, '$.ser_host_ip') as ser_host_ip
,get_json_object(content, '$.log_version') as log_version
,get_json_object(content, '$.report_time') as report_time
,get_json_object(content, '$.sdk_version') as sdk_version
,get_json_object(content, '$.vst_user_id') as vst_user_id
,get_json_object(content, '$.app_name') as app_name
,get_json_object(content, '$.app_version') as app_version
,get_json_object(content, '$.app_distri_id') as app_distri_id
,get_json_object(content, '$.os_type') as os_type
,get_json_object(content, '$.os_version') as os_version
,get_json_object(content, '$.device_type') as device_type
,get_json_object(content, '$.device_brand') as device_brand
,get_json_object(content, '$.device_model') as device_model
,get_json_object(content, '$.device_res') as device_res
,get_json_object(content, '$.mac') as mac
,get_json_object(content, '$.imei') as imei
,get_json_object(content, '$.imsi') as imsi
,get_json_object(content, '$.idfa') as idfa
,get_json_object(content, '$.uUID') as uuid --全大写的字段需要首字母小写
,get_json_object(content, '$.sUV') as suv --全大写的字段需要首字母小写
,get_json_object(content, '$.vst_ip') as vst_ip
,get_json_object(content, '$.net') as net
,get_json_object(content, '$.carrier') as carrier
,get_json_object(content, '$.timestamp') as timestamp
,get_json_object(content, '$.log_time') as log_time
,get_json_object(content, '$.page_info') as page_info
,get_json_object(content, '$.refer_page_info') as refer_page_info
,get_json_object(content, '$.spm_cnt') as spm_cnt
,get_json_object(content, '$.spm_pre') as spm_pre
,get_json_object(content, '$.lng') as lng
,get_json_object(content, '$.lat') as lat
,get_json_object(content, '$.acode') as acode
,get_json_object(content, '$.aext') as aext
,get_json_object(content, '$.ser_host_rmtip') as ser_host_rmtip
,get_json_object(content, '$.session_id') as session_id
,'' as other_1
,'' as other_2
,get_json_object(content, '$.app_id') as app_id --分区字段需放在最后一列
from
(
select
explode(Parse2OdsAction(content)) as content
from ${tableName} where dt='${dt}'
)t
where get_json_object(content,'$.ext')!='error'
)t1
insert overwrite table ${tableName2}
partition (dt='${dt}',app_id)
select * where app_id='com.sohu.mobile';
--插入错误日志信息到错误日志表
insert overwrite table ods_shmm_app_error_log_di
partition (type='action',dt='${dt}')
select
get_json_object(content, '$.msg')
from
(
select
explode(Parse2OdsAction(content)) as content
from ${tableName} where dt='${dt}'
)t
where get_json_object(content,'$.ext')='error';
--删除临时表
drop table ${tableName};
"
beeline -u 'jdbc:hive2://dsrv1.heracles.sohuno.com:10000/mediaai;principal=hive/[email protected];' --hiveconf mapreduce.job.queuename=media --delimiterForDSV=DELIMITER --outputformat=tsv2 --showHeader=false -e "${sql2}"
if [ $? -ne 0 ]
then
echo 'failed'
exit 1
fi
impala-shell -i dmeta2.heracles.sohuno.com:25003 -k -d mediaai --query="invalidate metadata mediaai.${tableName2};"