HDFS日志抽取转化并加载到目标表的shell脚本

建存储日志文件的临时表

#!/bin/bash
dt=`date -d'-1 day' +'%Y%m%d'`
tableName="tmp_ods_shmm_app_action_di_${dt}"
sql3="
create external table if not exists ${tableName}(
content string
)
comment 'action_raw_log'
partitioned by (dt string, ht string)
stored as textfile;"
beeline -u 'jdbc:hive2://dsrv1.heracles.sohuno.com:10000/mediaai;principal=hive/[email protected];' --hiveconf mapreduce.job.queuename=media --delimiterForDSV=DELIMITER --outputformat=tsv2 --showHeader=false -e "${sql3}"

往临时表中插入对应分区的数据

for ht in {0..23}
do
    if [ ${ht} -lt 10 ] 
        then ht="0${ht}"
    fi
    sql="alter table ${tableName} add partition (dt=${dt},ht=${ht}) location '/user/mediaai/rawlog/logCollector/asa/${dt}/${ht}'"
    #sql="alter table ${tableName} DROP IF EXISTS PARTITION (dt=${dt},ht=${ht}) location '/user/mediaai/rawlog/logCollector/asv/${dt}/${ht}';" 
    echo ${sql}
beeline -u 'jdbc:hive2://dsrv1.heracles.sohuno.com:10000/mediaai;principal=hive/[email protected];' --hiveconf mapreduce.job.queuename=media --delimiterForDSV=DELIMITER --outputformat=tsv2 --showHeader=false -e "${sql}"
done

将查询结果插入目标表

tableName2="ods_shmm_app_action_di"
sql2="
set hive.exec.dynamic.partition = true;                   --开启动态分区功能
set hive.exec.dynamic.partition.mode = nonstric;          --允许所有分区都是动态的
from
    (
        select
            get_json_object(content, '$.ser_log_time') as ser_log_time
            ,get_json_object(content, '$.ser_host_ip') as ser_host_ip
            ,get_json_object(content, '$.log_version') as log_version
            ,get_json_object(content, '$.report_time') as report_time
            ,get_json_object(content, '$.sdk_version') as sdk_version
            ,get_json_object(content, '$.vst_user_id') as vst_user_id
            ,get_json_object(content, '$.app_name') as app_name
            ,get_json_object(content, '$.app_version') as app_version
            ,get_json_object(content, '$.app_distri_id') as app_distri_id
            ,get_json_object(content, '$.os_type') as os_type
            ,get_json_object(content, '$.os_version') as os_version
            ,get_json_object(content, '$.device_type') as device_type
            ,get_json_object(content, '$.device_brand') as device_brand
            ,get_json_object(content, '$.device_model') as device_model
            ,get_json_object(content, '$.device_res') as device_res
            ,get_json_object(content, '$.mac') as mac
            ,get_json_object(content, '$.imei') as imei
            ,get_json_object(content, '$.imsi') as imsi
            ,get_json_object(content, '$.idfa') as idfa
            ,get_json_object(content, '$.uUID') as uuid --全大写的字段需要首字母小写
            ,get_json_object(content, '$.sUV') as suv --全大写的字段需要首字母小写
            ,get_json_object(content, '$.vst_ip') as vst_ip
            ,get_json_object(content, '$.net') as net
            ,get_json_object(content, '$.carrier') as carrier
            ,get_json_object(content, '$.timestamp') as timestamp
            ,get_json_object(content, '$.log_time') as log_time
            ,get_json_object(content, '$.page_info') as page_info
            ,get_json_object(content, '$.refer_page_info') as refer_page_info
            ,get_json_object(content, '$.spm_cnt') as spm_cnt
            ,get_json_object(content, '$.spm_pre') as spm_pre
            ,get_json_object(content, '$.lng') as lng
            ,get_json_object(content, '$.lat') as lat
            ,get_json_object(content, '$.acode') as acode
            ,get_json_object(content, '$.aext') as aext
            ,get_json_object(content, '$.ser_host_rmtip') as ser_host_rmtip
            ,get_json_object(content, '$.session_id') as session_id
            ,'' as other_1
            ,'' as other_2
            ,get_json_object(content, '$.app_id') as app_id --分区字段需放在最后一列
        from 
            (
            select
                explode(Parse2OdsAction(content)) as content
            from ${tableName} where dt='${dt}'
            )t
        where get_json_object(content,'$.ext')!='error'
    )t1
insert overwrite table ${tableName2}
    partition (dt='${dt}',app_id)
    select * where app_id='com.sohu.mobile';
--插入错误日志信息到错误日志表
insert overwrite table ods_shmm_app_error_log_di
partition (type='action',dt='${dt}')
select 
    get_json_object(content, '$.msg') 
from
    (
    select
        explode(Parse2OdsAction(content)) as content
    from ${tableName} where dt='${dt}'
    )t
where get_json_object(content,'$.ext')='error';
--删除临时表
drop table ${tableName}; 
"
beeline -u 'jdbc:hive2://dsrv1.heracles.sohuno.com:10000/mediaai;principal=hive/[email protected];' --hiveconf mapreduce.job.queuename=media --delimiterForDSV=DELIMITER --outputformat=tsv2 --showHeader=false -e "${sql2}"
if [ $? -ne 0 ]
then
echo 'failed'
exit 1
fi

impala-shell -i dmeta2.heracles.sohuno.com:25003  -k -d mediaai --query="invalidate metadata mediaai.${tableName2};"

你可能感兴趣的:(HDFS日志抽取转化并加载到目标表的shell脚本)