窗口函数练习

练习1

1.数据准备

name cadate   money
jack,2017-01-01,10
tony,2017-01-02,15
jack,2017-02-03,23
tony,2017-01-04,29
jack,2017-01-05,46
jack,2017-04-06,46
tony,2017-01-07,50
jack,2017-01-08,55
mart,2017-04-08,62
mart,2017-04-09,68
neil,2017-05-10,12
mart,2017-04-11,75
neil,2017-06-12,80
mart,2017-04-13,94

2.建表,上传数据


create table t_orders
(
    name  string,
    cdate string,
    money double
) row format delimited fields terminated by ',';

load data local inpath '/root/orders.txt' into table t_orders;

select * from t_orders;

3.

(1)查询每个用户总订单金额
select *,sum(money)  over(partition by name ) from t_orders ;
(2)查询每个月的订单总数  
select *,substr(cdate,1,7) month,count(1) over(partition by substr(cdate,1,7)) from t_orders   ;
(3)查询所有所有用户的总订单金额
select  *,sum(money) over() from t_orders;
(4)查询每个用户的订单总金额
select  *,sum(money) over(partition by name)from t_orders;
(5)查询每个用户的订单总金额 按天数排序 累加
select *,sum(money) over(partition by name order by cdate rows between unbounded preceding and current row )from t_orders;
(6)查询每个月的订单总金额 按照天数累加
select *,sum(money) over(partition by substr(cdate,1,7) order by cdate )from t_orders;

练习2

1.数据准备

uid login_date
001,2017-02-05 12:00:00
001,2017-02-05 14:00:00
001,2017-02-06 13:00:00
001,2017-02-07 12:00:00
001,2017-02-08 12:00:00
001,2017-02-10 14:00:00
002,2017-02-05 13:00:00
002,2017-02-06 12:00:00
002,2017-02-06 14:00:00
002,2017-02-08 12:00:00
002,2017-02-09 16:00:00
002,2017-02-10 12:00:00
003,2017-01-31 13:00:00
003,2017-01-31 12:00:00
003,2017-02-01 12:00:00
004,2017-02-02 12:00:00
004,2017-02-03 12:00:00
004,2017-02-10 12:00:00
004,2017-03-01 12:00:00

2.建表,数据导入

create table  t_login_user(
                              uid string,
                              login_date string
)row format delimited fields terminated by ",";

load data local inpath "/hive/login_user.txt" overwrite into table t_login_user;

select * from t_login_user;

3.

计算连续登陆3天的用户
1.去重
select uid,login_date from t_login_user group by uid,login_date;
2.排号
select uid,login_date,
       row_number() over (partition by uid order by login_date) as n
from (select uid,login_date from t_login_user group by uid,login_date) t;
3.获得新的日期  旧日期-行号 相等的说明是连续登录
select uid,login_date,n,
       date_sub(login_date,n) new_date
from (select uid,login_date,
             row_number() over (partition by uid order by login_date) as n
      from (select uid,login_date from t_login_user group by uid,login_date) t) t2;
3.count+过滤
select uid,count(1)
from(select uid,login_date,n,
            date_sub(login_date,n) new_date
     from (select uid,login_date,
                  row_number() over (partition by uid order by login_date) as n
           from (select uid,login_date from t_login_user group by uid,login_date) t) t2) t3 group by uid,new_date  having count(1)>=3;

练习3

1.数据准备

uid,hit,m
1,1,0
1,2,1
1,3,1
1,4,1
1,5,0
1,6,0
1,7,1
2,1,1
2,2,1
2,3,1
2,4,1
2,5,1
3,1,1
3,2,1
3,3,1
3,4,0
3,5,0
3,6,1
3,7,0
3,8,1

2.建表,数据导入

create table tb_ds(
                      uid int ,  -- 用户名
                      hit int ,  -- 第几次打地鼠
                      m int      -- 是否命中 1命中 0 未命中
)
    row format delimited fields terminated by ','  ;
load data local inpath '/hive/ds.txt' into table tb_ds ;

select  * from tb_ds;

3.

-- 查询用户最大连续命中次数
-- 过滤只剩m=1
select uid, hit, m
from tb_ds where m=1;
--打行号
select uid, hit, m,
       row_number() over (partition by uid order by hit) flag
from (select uid, hit, m
      from tb_ds where m=1) t;
-- 同一id   hit-flag结果相同说明是连续的
select uid, hit, m, flag,(hit-flag) n
from (select uid, hit, m,
             row_number() over (partition by uid order by hit) flag
      from (select uid, hit, m
            from tb_ds where m=1) t) t1;
-- count
select uid,count(1) count
from (select uid, hit, m, flag,(hit-flag) n
      from (select uid, hit, m,
                   row_number() over (partition by uid order by hit) flag
            from (select uid, hit, m
                  from tb_ds where m=1) t) t1) t2 group by uid,n ;
-- uid 进行分组, 获得最大的值
select uid,max(count)
from (select uid,count(1) count
      from (select uid, hit, m, flag,(hit-flag) n
            from (select uid, hit, m,
                         row_number() over (partition by uid order by hit) flag
                  from (select uid, hit, m
                        from tb_ds where m=1) t) t1) t2 group by uid,n) t3 group by uid;

你可能感兴趣的:(database,hive,数据仓库,大数据,mysql,sql)