测试数据
-- 数据准备:
姓名,购买日期,购买数量
saml 2018-01-01 10
saml 2018-01-08 55
tony 2018-01-07 50
saml 2018-01-05 46
tony 2018-01-04 29
tony 2018-01-02 15
saml 2018-02-03 23
mart 2018-04-13 94
saml 2018-04-06 42
mart 2018-04-11 75
mart 2018-04-09 68
mart 2018-04-08 62
neil 2018-05-10 12
neil 2018-06-12 80
-- 创建order表:
create table if not exists t_order
(
name string,
orderdate string,
cost int
) row format delimited fields terminated by '\t';
--加载数据:
load data local inpath "./data/order.txt" into table t_order;
lag函数.三个参数,第一个列名,第二个往前多少行,第三个默认值
– 列出每个人详细信息和上一次购买的数量
select *,lag(cost,1,0) over (distribute by name sort by orderdate) `pre cost` from t_order;
+----+----------+----+--------+
|name|orderdate |cost|pre cost|
+----+----------+----+--------+
| |NULL |NULL|0 |
|mart|2018-04-08|62 |0 |
|mart|2018-04-09|68 |62 |
|mart|2018-04-11|75 |68 |
|mart|2018-04-13|94 |75 |
|neil|2018-05-10|12 |0 |
|neil|2018-06-12|80 |12 |
|saml|2018-01-01|10 |0 |
|saml|2018-01-05|46 |10 |
|saml|2018-01-08|55 |46 |
|saml|2018-02-03|23 |55 |
|saml|2018-04-06|42 |23 |
|tony|2018-01-02|15 |0 |
|tony|2018-01-04|29 |15 |
|tony|2018-01-07|50 |29 |
+----+----------+----+--------+
– 列出每个人详细信息和上上次购买的数量
select *,lag(cost,2,0) over (distribute by name sort by orderdate) `pre cost` from t_order;
列出每个人详细信息和大上次购买的数量,以及当前日期的下一次购买的数量
select *,lag(cost,2,0) over (distribute by name sort by orderdate) `pre cost`,
lead(cost,1,0) over (distribute by name sort by orderdate) `next cost`
from t_order;
+----+----------+----+--------+---------+
|name|orderdate |cost|pre cost|next cost|
+----+----------+----+--------+---------+
| |NULL |NULL|0 |0 |
|mart|2018-04-08|62 |0 |68 |
|mart|2018-04-09|68 |0 |75 |
|mart|2018-04-11|75 |62 |94 |
|mart|2018-04-13|94 |68 |0 |
|neil|2018-05-10|12 |0 |80 |
|neil|2018-06-12|80 |0 |0 |
|saml|2018-01-01|10 |0 |46 |
|saml|2018-01-05|46 |0 |55 |
|saml|2018-01-08|55 |10 |23 |
|saml|2018-02-03|23 |46 |42 |
|saml|2018-04-06|42 |55 |0 |
|tony|2018-01-02|15 |0 |29 |
|tony|2018-01-04|29 |0 |50 |
|tony|2018-01-07|50 |15 |0 |
+----+----------+----+--------+---------+
–列出每个人的详细信息和上一次购买数据,以及与上一次购买数据的差值
select * ,lag(cost,1,0) over(distribute by name sort by orderdate) `pre_cost`,
cost-lag(cost,1,0) over(distribute by name sort by orderdate) `diff`
from t_order;
+----+----------+----+--------+----+
|name|orderdate |cost|pre_cost|diff|
+----+----------+----+--------+----+
| |NULL |NULL|0 |NULL|
|mart|2018-04-08|62 |0 |62 |
|mart|2018-04-09|68 |62 |6 |
|mart|2018-04-11|75 |68 |7 |
|mart|2018-04-13|94 |75 |19 |
|neil|2018-05-10|12 |0 |12 |
|neil|2018-06-12|80 |12 |68 |
|saml|2018-01-01|10 |0 |10 |
|saml|2018-01-05|46 |10 |36 |
|saml|2018-01-08|55 |46 |9 |
|saml|2018-02-03|23 |55 |-32 |
|saml|2018-04-06|42 |23 |19 |
|tony|2018-01-02|15 |0 |15 |
|tony|2018-01-04|29 |15 |14 |
|tony|2018-01-07|50 |29 |21 |
+----+----------+----+--------+----+
求分组排序后的第一个值
select *,first_value(cost) over(distribute by name sort by orderdate)
from t_order;
+----+----------+----+----+
|name|orderdate |cost|c1 |
+----+----------+----+----+
| |NULL |NULL|NULL|
|mart|2018-04-08|62 |62 |
|mart|2018-04-09|68 |62 |
|mart|2018-04-11|75 |62 |
|mart|2018-04-13|94 |62 |
|neil|2018-05-10|12 |12 |
|neil|2018-06-12|80 |12 |
|saml|2018-01-01|10 |10 |
|saml|2018-01-05|46 |10 |
|saml|2018-01-08|55 |10 |
|saml|2018-02-03|23 |10 |
|saml|2018-04-06|42 |10 |
|tony|2018-01-02|15 |15 |
|tony|2018-01-04|29 |15 |
|tony|2018-01-07|50 |15 |
+----+----------+----+----+
求分组排序后的最后一个值,但是相当当前行来说,当前行就是最后一行
如下结果并不是我们想要的,换种思路
select *,last_value(cost) over(distribute by name sort by orderdate)
from t_order;
+----+----------+----+----+
|name|orderdate |cost|c1 |
+----+----------+----+----+
| |NULL |NULL|NULL|
|mart|2018-04-08|62 |62 |
|mart|2018-04-09|68 |68 |
|mart|2018-04-11|75 |75 |
|mart|2018-04-13|94 |94 |
|neil|2018-05-10|12 |12 |
|neil|2018-06-12|80 |80 |
|saml|2018-01-01|10 |10 |
|saml|2018-01-05|46 |46 |
|saml|2018-01-08|55 |55 |
|saml|2018-02-03|23 |23 |
|saml|2018-04-06|42 |42 |
|tony|2018-01-02|15 |15 |
|tony|2018-01-04|29 |29 |
|tony|2018-01-07|50 |50 |
+----+----------+----+----+
换种思路求每组的最后一个值
select *,first_value(cost) over(distribute by name sort by orderdate desc)
from t_order;
+----+----------+----+----+
|name|orderdate |cost|c1 |
+----+----------+----+----+
| |NULL |NULL|NULL|
|mart|2018-04-13|94 |94 |
|mart|2018-04-11|75 |94 |
|mart|2018-04-09|68 |94 |
|mart|2018-04-08|62 |94 |
|neil|2018-06-12|80 |80 |
|neil|2018-05-10|12 |80 |
|saml|2018-04-06|42 |42 |
|saml|2018-02-03|23 |42 |
|saml|2018-01-08|55 |42 |
|saml|2018-01-05|46 |42 |
|saml|2018-01-01|10 |42 |
|tony|2018-01-07|50 |50 |
|tony|2018-01-04|29 |50 |
|tony|2018-01-02|15 |50 |
+----+----------+----+----+