================================新的实例 更好理解
数据:
四列的表,第一列id,第二列渠道,第三列系统,第四类访问日期
0: jdbc:hive2://hadoop009.dx.momo.com:2181,ha> select * from dc_dev.tmp_row_num;
tmp_row_num.muid tmp_row_num.channeltmp_row_num.os tmp_row_num.time
242443391 Y1 android 20150319
242443391 Y2 ios 20150320
2443391 Y1 android 20150319
2443391 Y3 ios 20150321
2423391 Y1 android 20150319
2423391 Y4 ios 20150311
24244391 Y1 android 20150319
24244391 Y2 ios 20150324
2424430391 Y4 android 20150323
2424430391 Y2 ios 20150322
2424430391 Y1 ios 20150312
330091 Y5 android 20150321
330091 Y1 android 20150311
339001 Y2 ios 20150320
14 rows selected (0.313 seconds)
实例一:
相当于按muid分组,然后同muid组内按muid,time排序
select channel,muid,time, row_number() over (partition by muid order by muid,time desc ) rank
from dc_dev.tmp_row_num
group by channel,muid,time
结果:
Y1 2423391 20150319 1
Y4 2423391 20150311 2
Y4 2424430391 20150323 1
Y2 2424430391 20150322 2
Y1 2424430391 20150312 3
Y2 242443391 20150320 1
Y1 242443391 20150319 2
Y2 24244391 20150324 1
Y1 24244391 20150319 2
Y3 2443391 20150321 1
Y1 2443391 20150319 2
Y5 330091 20150321 1
Y1 330091 20150311 2
Y2 339001 20150320 1
实例二:
相当于按channel分组,然后同channel组内按time降序排序
select channel,muid,time, row_number() over (partition by channel order by time desc ) rank
from dc_dev.tmp_row_num
group by channel,muid,time
结果:
channel muid time rank
Y1 2423391 20150319 1
Y1 242443391 20150319 2
Y1 24244391 20150319 3
Y1 2443391 20150319 4
Y1 2424430391 20150312 5
Y1 330091 20150311 6
Y2 24244391 20150324 1
Y2 2424430391 20150322 2
Y2 242443391 20150320 3
Y2 339001 20150320 4
Y3 2443391 20150321 1
Y4 2424430391 20150323 1
Y4 2423391 20150311 2
Y5 330091 20150321 1
实例三:
相当于按channel,muid分组,然后同channel,muid组内按time排序
select channel,muid,time, row_number() over (partition by channel,muid order by time desc ) rank
from dc_dev.tmp_row_num
group by channel,muid,time
结果:
channel muid time rank
Y1 2423391 20150319 1
Y1 2424430391 20150312 1
Y1 242443391 20150319 1
Y1 24244391 20150319 1
Y1 2443391 20150319 1
Y1 330091 20150311 1
Y2 2424430391 20150322 1
Y2 242443391 20150320 1
Y2 24244391 20150324 1
Y2 339001 20150320 1
Y3 2443391 20150321 1
Y4 2423391 20150311 1
Y4 2424430391 20150323 1
Y5 330091 20150321 1
14 rows selected (106.867 seconds)
================================往下旧的
分组排序求某用户日期最大的3天,row_number() over (partition by 字段a order by 计算项b desc ) rank rank是行数
select
a.*
from
(
select p_day,muid, row_number() over (partition by muid order by p_day desc ) rank
from test_row_num
group by p_day,muid
) a
where
a.rank<=3;
select muid,count(1) from pay_log_days where p_day=20151021 group by muid;
获取每天充值次数的前3名
select *
from
(
select p_day,muid,c,row_number() over (partition by p_day order by p_day,c desc) ord
from
(
select p_day,muid,count(1) c from pay_log_days where p_day>=20151015 group by p_day,muid
) t1
) t2
where ord<=3;