【1】先根据表中数据计算出需要的值,存储在临时表里面,比如计算日活跃度的语句:
with tbl_dau as ( select 'dau' as name, count(distinct deviceid) as cnt from ( select deviceid from src_huidu_zl.event3 where hdfs_par = '20170108' union all select deviceid from src_huidu_zl.page3 where hdfs_par = '20170108' ) as tmp )
另外,计算历史峰值的逻辑比较复杂,例子如下:
historymaxnewdev as( select if(regexp_extract(json_value, '\"maxnewdev\"\:(\\d+(\\.\\d+)*)',1)='','1',regexp_extract(json_value, '\"maxnewdev\"\:(\\d+(\\.\\d+)*)',1)) as historymaxvalue,if(regexp_extract(json_value, '\"maxnewdev\":(\"\\d+(\\.\\d+)*)\\|\\|(\\d{8})\"',3)='','19710101',regexp_extract(json_value, '\"maxnewdev\":(\"\\d+(\\.\\d+)*)\\|\\|(\\d{8})\"',3)) as historydate from ana_fx_goal.zl_daily_kv where json_key = 'daily_static' and hdfs_par='20170108' )
该行语句先从zl_daily_kv 表里面读取出历史峰值,通过正则表达式提取出新设备的历史峰值(提取前的格式为:677777||20161026,前面的数值可能带小数点),然后将历史峰值与今天的新数据对比,找出新的最大值,并更新相应的峰值。
【2】拼接JSON字符串
【3】以‘daily_static’为键值存储第二步的结果
下面是完整的程序:
insert overwrite ana_fx_goal.zl_daily_kv partition(hdfs_par='20170108') select * from ( select cast( concat(substr('20170108', 1, 4), '-', substr('20170108', 5, 2), '-', substr('20170108', 7, 2)) as timestamp) as day_begin, NOW() as generate_time, 'daily_static' as json_key, json_output as json_value from ( select concat( '{', group_concat( concat(wrapped_column_key, ':', wrapped_column_value), ','), '}' ) as json_output from ( select concat('"', name, '"') as wrapped_column_key, case when cnt is null then 'null' when 'double' = 'string' then concat('"', cast(cnt as string), '"') else cast(cnt as string) end as wrapped_column_value from ( with tbl_dau as ( select 'dau' as name, count(distinct deviceid) as cnt from ( select deviceid from src_huidu_zl.event3 where hdfs_par = '20170108' union all select deviceid from src_huidu_zl.page3 where hdfs_par = '20170108' ) as tmp ), tbl_launchrecords as ( select 'launchrecords' as name, count(*) as cnt from src_huidu_zl.launch2 where hdfs_par = '20170108' ), tbl_newdev as ( select 'newdev' as name, count(distinct deviceid) as cnt from ana_fx_middle.devappear2 where first_appear_hdfspar = '20170108' and hdfs_par = '20170108' ), tbl_avgusetime as ( select 'avgusetime' as name, avg(use_time) as cnt from src_huidu_zl.launch2 where hdfs_par = '20170108' ), tbl_avgdevtime as ( select 'avgdevtime' as name, sum(use_time)/count(distinct deviceid) as cnt from src_huidu_zl.launch2 where hdfs_par = '20170108' ), tbl_alldev as ( select 'alldev' as name, count(distinct deviceid) as cnt from ana_fx_middle.devappear2 where hdfs_par = '20170108' ), tbl_mau as ( select 'mau' as name, count(distinct deviceid) as cnt from ( select deviceid from src_huidu_zl.event3 where hdfs_par between '20161211' and '20170108' union all select deviceid from src_huidu_zl.page3 where hdfs_par between '20161211' and '20170108' ) as tmp ), tbl_avglaunchrecords as ( select 'avglaunchrecords' as name, count(*)/count(distinct deviceid) as cnt from src_huidu_zl.launch2 where hdfs_par = '20170108' ), tbl_launchdays as ( select sum(cnt) as cnt from ( select hdfs_par, count(distinct deviceid) as cnt from src_huidu_zl.launch2 where hdfs_par between '20161211' and '20170108' group by hdfs_par ) as tmp ), tbl_launchdevs as ( select count(distinct deviceid) as cnt from src_huidu_zl.launch2 where hdfs_par between '20161211' and '20170108' ), tbl_28new7retain as ( select '28new7retain' as name, count(distinct tbl_7day.deviceid) as cnt from ( select deviceid from src_huidu_zl.event3 where hdfs_par between '20170101' and '20170108' union all select deviceid from src_huidu_zl.page3 where hdfs_par between '20170101' and '20170108' ) as tbl_7day left join ( select distinct deviceid as deviceid from ana_fx_middle.devappear2 where first_appear_hdfspar = '20161211' ) as tbl_28day on tbl_7day.deviceid = tbl_28day.deviceid where tbl_28day.deviceid != 'NULL' ), tbl_28new as ( select '28new' as name, count(distinct deviceid) as cnt from ana_fx_middle.devappear2 where first_appear_hdfspar = '20161211' ), historymaxdauvalue as( select if(regexp_extract(json_value, '\"maxdau\"\:(\\d+(\\.\\d+)*)',1)='','1',regexp_extract(json_value, '\"maxdau\"\:(\\d+(\\.\\d+)*)',1)) as historymaxvalue,if(regexp_extract(json_value, '\"maxdau\":(\"\\d+(\\.\\d+)*)\\|\\|(\\d{8})\"',3)='','19710101',regexp_extract(json_value, '\"maxdau\":(\"\\d+(\\.\\d+)*)\\|\\|(\\d{8})\"',3)) as historydate from ana_fx_goal.zl_daily_kv where json_key = 'daily_static' and hdfs_par='20170108' ), historymaxlaunchrecords as( select if(regexp_extract(json_value, '\"maxlaunchrecords\"\:(\\d+(\\.\\d+)*)',1)='','1',regexp_extract(json_value, '\"maxlaunchrecords\"\:(\\d+(\\.\\d+)*)',1)) as historymaxvalue,if(regexp_extract(json_value, '\"maxlaunchrecords\":(\"\\d+(\\.\\d+)*)\\|\\|(\\d{8})\"',3)='','19710101',regexp_extract(json_value, '\"maxlaunchrecords\":(\"\\d+(\\.\\d+)*)\\|\\|(\\d{8})\"',3)) as historydate from ana_fx_goal.zl_daily_kv where json_key = 'daily_static' and hdfs_par='20170108' ), historymaxnewdev as( select if(regexp_extract(json_value, '\"maxnewdev\"\:(\\d+(\\.\\d+)*)',1)='','1',regexp_extract(json_value, '\"maxnewdev\"\:(\\d+(\\.\\d+)*)',1)) as historymaxvalue,if(regexp_extract(json_value, '\"maxnewdev\":(\"\\d+(\\.\\d+)*)\\|\\|(\\d{8})\"',3)='','19710101',regexp_extract(json_value, '\"maxnewdev\":(\"\\d+(\\.\\d+)*)\\|\\|(\\d{8})\"',3)) as historydate from ana_fx_goal.zl_daily_kv where json_key = 'daily_static' and hdfs_par='20170108' ), historymaxavgusetime as ( select if(regexp_extract(json_value, '\"maxavgusetime\"\:(\\d+(\\.\\d+)*)',1)='','1',regexp_extract(json_value, '\"maxavgusetime\"\:(\\d+(\\.\\d+)*)',1)) as historymaxvalue,if(regexp_extract(json_value, '\"maxavgusetime\":(\"\\d+(\\.\\d+)*)\\|\\|(\\d{8})\"',3)='','19710101',regexp_extract(json_value, '\"maxavgusetime\":(\"\\d+(\\.\\d+)*)\\|\\|(\\d{8})\"',3)) as historydate from ana_fx_goal.zl_daily_kv where json_key = 'daily_static' and hdfs_par='20170108' ), historymaxavgdevtime as ( select if(regexp_extract(json_value, '\"maxavgdevtime\"\:(\\d+(\\.\\d+)*)',1)='','1',regexp_extract(json_value, '\"maxavgdevtime\"\:(\\d+(\\.\\d+)*)',1)) as historymaxvalue,if(regexp_extract(json_value, '\"maxavgdevtime\":(\"\\d+(\\.\\d+)*)\\|\\|(\\d{8})\"',3)='','19710101',regexp_extract(json_value, '\"maxavgdevtime\":(\"\\d+(\\.\\d+)*)\\|\\|(\\d{8})\"',3)) as historydate from ana_fx_goal.zl_daily_kv where json_key = 'daily_static' and hdfs_par='20170108' ) select name,cast(cnt as string) as cnt from tbl_dau union all select name,cast(cnt as string) as cnt from tbl_launchrecords union all select name,cast(cnt as string) as cnt from tbl_newdev union all select name,cast(cnt as string) as cnt from tbl_avgusetime union all select name,cast(cnt as string) as cnt from tbl_avgdevtime union all select name,cast(cnt as string) as cnt from tbl_alldev union all select name,cast(cnt as string) as cnt from tbl_mau union all select name,cast(cnt as string) as cnt from tbl_avglaunchrecords union all select 'avglaunchdays' as name, cast((tbl_launchdays.cnt/tbl_launchdevs.cnt) as string) as cnt from tbl_launchdays, tbl_launchdevs union all select 'retainratio' as name, cast((tbl_28new7retain.cnt/tbl_28new.cnt) as string) as cnt from tbl_28new, tbl_28new7retain union all select 'maxdau' as name, if(tbl_dau.cnt>cast(historymaxdauvalue.historymaxvalue as bigint),concat(cast(tbl_dau.cnt as string),'||','20170108'),concat(historymaxdauvalue.historymaxvalue,'||',historydate)) as cnt from tbl_dau,historymaxdauvalue union all select 'maxlaunchrecords' as name, if(tbl_launchrecords.cnt>cast(historymaxlaunchrecords.historymaxvalue as bigint),concat(cast(tbl_launchrecords.cnt as string),'||','20170108'),concat(historymaxlaunchrecords.historymaxvalue,'||',historydate)) as cnt from tbl_launchrecords,historymaxlaunchrecords union all select 'maxnewdev' as name, if(tbl_newdev.cnt>cast(historymaxnewdev.historymaxvalue as bigint),concat(cast(tbl_newdev.cnt as string),'||','20170108'),concat(historymaxnewdev.historymaxvalue,'||',historydate)) as cnt from tbl_newdev,historymaxnewdev union all select 'maxavgusetime' as name, if(tbl_avgusetime.cnt>cast(historymaxavgusetime.historymaxvalue as bigint),concat(cast(tbl_avgusetime.cnt as string),'||','20170108'),concat(historymaxavgusetime.historymaxvalue,'||',historydate)) as cnt from tbl_avgusetime,historymaxavgusetime union all select 'maxavgusetime' as name, if(tbl_avgdevtime.cnt>cast(historymaxavgdevtime.historymaxvalue as bigint),concat(cast(tbl_avgdevtime.cnt as string),'||','20170108'),concat(historymaxavgdevtime.historymaxvalue,'||',historydate)) as cnt from tbl_avgdevtime,historymaxavgdevtime ) as tmp1_B ) as tmp2_B ) as tmp limit 1 ) as t1 union all select * from ( select day_begin, generate_time, json_key, json_value from ana_fx_goal.zl_daily_kv where hdfs_par = '20170108' and json_key != 'daily_static' ) as t2 ;