hive-sql练习题

数据准备:name,orderdate,cost

jack,2017-01-01,10
tony,2017-01-02,15
jack,2017-02-03,23
tony,2017-01-04,29
jack,2017-01-05,46
jack,2017-04-06,42
tony,2017-01-07,50
jack,2017-01-08,55
mart,2017-04-08,62
mart,2017-04-09,68
neil,2017-05-10,12
mart,2017-04-11,75
neil,2017-06-12,80
mart,2017-04-13,94

创建本地business.txt,导入数据
创建hive表并导入数据

create table business(
name string, 
orderdate string,
cost int
) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';

load data local inpath "/export/data/business.txt" into table business;

按需求查询数据
(1)查询在2017年4月份购买过的顾客及总人数

select 
   name,
   count(*)
from
   business
where substring(orderdate,1,7) = '2017-04'
group by
   name;

(2)查询顾客的购买明细及月购买总额

select
    name,
    orderdate,
    cost,
    sum(cost) over(partition by name order by orderdate)
from
   business;

(3)上述的场景,要将cost按照日期进行累加

select
     name,
     orderdate,
     cost,
     sum(cost) over(order by orderdate)
 from
    business;

(4)查看顾客上次的购买时间

select
   name,
   orderdate,
   cost,
   lag(orderdate,1,'9999-99-99') over(partition by name order by orderdate)
from
   business;

(5)查询前20%时间的订单信息

select 
   name,
   orderdate,
   cost,
   ntile(5) over(order by orderdate) ntile_5
from
  business;

select
     name,
     orderdate,
     cost
from
    (select 
   name,
   orderdate,
   cost,
   ntile(5) over(order by orderdate) ntile_5
from
  business)t1
 where ntile_5 = 1;

你可能感兴趣的:(Hive)