http://dp.pt.xiaomi.com/job/job_details/339836
with intern_t as (-- 每月月初调度
select id,brand,modelname,
from_unixtime(cast(lastactivetime/1000 as int),'yyyy-MM-dd') as lastactivetime,
-- 重点!!! 必须是dense rank,因为device表是每日全量表,如果某日不活跃,则会按照上次活跃日期重复上报
dense_rank() over (partition by id order by lastactivetime desc) as rank
from profile.device_state_accumulator_all
where date <= ${date-1} and date >= ${date-30} -- 共30天
and from_unixtime(cast(lastactivetime/1000 as int),'yyyyMM') = substr(${date-1},1,6)
and finalCountry="中国"
),
not_active_continuous_table as (
select brand,modelname,
(datediff(t1.lastactivetime,t2.lastactivetime)-1) as notactive_continuous_days,
count(distinct t1.id) as not_active_continuous_days_counts
from (select id,brand,modelname,lastactivetime from intern_t where rank=1) t1
inner join (select id,lastactivetime from intern_t where rank=2)t2
on t1.id = t2.id
group by brand,modelname,(datediff(t1.lastactivetime,t2.lastactivetime)-1)
having notactive_continuous_days>=15),
active_days_permonth_table as ( -- 本月共有x日不活跃(只要小于该月天数,说明该月活跃过)
select brand,modelname,
(from_unixtime(unix_timestamp(cast(${date-1} as string), 'yyyyMMdd'),'dd')-active_days_permonth)as not_active_days_permonth,
count(distinct id) as not_active_counts
from
(select id,brand,modelname,-- 该月总天数-该月活跃天数 = 该月不活跃的天数
count(distinct lastactivetime) as active_days_permonth
from intern_t
group by id,brand,modelname) ttt
group by brand,modelname,(from_unixtime(unix_timestamp(cast(${date-1} as string), 'yyyyMMdd'),'dd')-active_days_permonth)
)
insert overwrite table miui_data.sales_newinfo_table partition(date=${date-1})
select not_active_continuous_table.brand as brand,not_active_continuous_table.modelname as modelname,
notactive_continuous_days,not_active_days_permonth,
not_active_continuous_days_counts,not_active_counts
from not_active_continuous_table
full outer join active_days_permonth_table
on not_active_continuous_table.brand = active_days_permonth_table.brand
and not_active_continuous_table.modelname = active_days_permonth_table.modelname
with t as (-- 每月月初调度
select id,
from_unixtime(cast(lastactivetime/1000 as int),'yyyy-MM-dd') as lastactivetime,
-- 重点!!! 必须是dense rank,因为device表是每日全量表,如果某日不活跃,则会按照上次活跃日期重复上报
dense_rank() over (partition by id order by lastactivetime desc) as rank
from profile.device_state_accumulator_all
where date <= ${date-1} and date >= ${date-30} -- 共30天
and from_unixtime(cast(lastactivetime/1000 as int),'yyyyMM') = substr(${date-1},1,6)
and finalCountry="中国"
)
select t1.id as id
from
(select * from t) as t1
inner join
(select * from t) as t2
on t1.id = t2.id
where t2.rank - t1.rank = 2
and datediff(t2.date,t1.date) = 2
现在有两列数据,一列是userid,一列是text,text数据中有各种球类运动,比如篮球,足球等。这个数据表的意思是每个用户所喜欢的球类运动,且不知道共有多少球类运动,求每个球类运动中,喜欢各个其他球类运动的用户占比。
比如喜欢篮球的,其中又有多少喜欢足球,多少喜欢羽毛球,这种交叉比例
select text1,text2,count(distinct id) as counts
from
(select t1.id as id,t1.text as text1,t2.text as text2
from
(select id,text from table) t1
inner join
(select id,text from table)t2
on t1.id = t2.id) table
group by text1,text2