一次hive笔试题

一次hive笔试题_第1张图片题目一:
select
t1.uid,
t1.发过多少日记,
t2.获得多少点赞
from
(
select
uid,
count(*) as 发过多少日记
from T1
group by uid
)t1
left join
(
select
uid,
sum(like_uid) as 获得多少点赞
from T1 join T2
on T1.log_id =T2.log_id
group_by uid
)t2
on tt1.uid=tt2.uid
一次hive笔试题_第2张图片题目二:
1.
select
id
from
(
select
v_id,
dense_rank() over(order by first desc ,second desc,third desc) as seq
from
(select
id,
cast(split(v_id,"\.")[0] as int) as first,
cast(split(v_id,"\.")[1] as int) as second,
cast(split(v_id,"\.")[2] as int) as third
from
T1
)t1
)t2
where seq = 1
2.
select
V_id,
rank() over(order by first desc ,second desc,third desc) as seq
from
(select
id,
cast(split(V_id,"\.")[0] as int) as first,
cast(split(V_id,"\.")[1] as int) as second,
cast(split(V_id,"\.")[2] as int) as third
from
T1
)t1
一次hive笔试题_第3张图片一次hive笔试题_第4张图片一次hive笔试题_第5张图片题目三:
1.
select
count(t1.user_id) 新用户数,
count(case when datediff(t1.create_time, t2.create_time) = 1 then 0 end) 次日留存用户数,
count(case when datediff(t1.create_time, t2.create_time) = 2 then 0 end) 2日留存用户数,
count(case when datediff(t1.create_time, t2.create_time) = 3 then 0 end) 3日留存用户数,
count(case when datediff(t1.create_time, t2.create_time) = 4 then 0 end) 4日留存用户数,

count(case when t2.create_time= dt then 0 end) dt
from dim_tb_user t1
left join
(
select
user_id,
create_time
from fact_access_log
group by user_id,create_time
) t2 ON t1.user_id = t2.user_id
group by t1.create_time
2.
inset into reporting_new_user_retain_day
select *
from
(
select
count(t1.user_id) 新用户数,
count(case when datediff(t1.create_time, t2.create_time) = 1 then 1 end) 次日留存用户数,
count(case when datediff(t1.create_time, t2.create_time) = 2 then 2 end) 2日留存用户数,
count(case when datediff(t1.create_time, t2.create_time) = 3 then 3 end) 3日留存用户数,
count(case when datediff(t1.create_time, t2.create_time) = 4 then 4 end) 4日留存用户数,
count(case when datediff(t1.create_time, t2.create_time) = 4 then 5 end) 5日留存用户数,
count(case when datediff(t1.create_time, t2.create_time) = 4 then 6 end) 5日留存用户数,
count(case when datediff(t1.create_time, t2.create_time) = 4 then 7 end) 7日留存用户数,
from dim_tb_user t1
left join
(
select
user_id,
create_time
from fact_access_log
group by user_id,create_time
)t2
ON t1.user_id = t2.user_id
group by t1.create_time
)where dt = 20190109

未合理使用分区;大数据领域做全量计算是不可取的
select
count(t1.user_id) 新增用户数,
create_time 用户新增日期
from dim_tb_user
select
count(distinct dt) as day_count
from
(
SELECT
user_id,
(DATE_SUB(create_time,rn) dt
from
(
select
user_id
rank() over(partition by user_id order by create_time) rn
from fact_access_log
) t1
)t2
group by user_id,day_count

第三题思路有问题 大家可以指出来,交流一下 ,一起进步

你可能感兴趣的:(数仓)