Hive|级联之自join累加求和问题、sum() over累加求和问题

–建表
create table t_salary_detail(username string,month string,salary int)
row format delimited fields terminated by ‘,’;

–从linux服务器加载数据
load data local inpath ‘/root/hivedata/t_salary_detail.dat’ into table t_salary_detail;

–表数据
A,2015-01,5
A,2015-01,15
B,2015-01,5
A,2015-01,8
B,2015-01,25
A,2015-01,5
A,2015-02,4
A,2015-02,6
B,2015-02,10
B,2015-02,5
A,2015-03,7
A,2015-03,9
B,2015-03,11
B,2015-03,6

–需求:统计每个用户每个月获得多少小费?

select username,month,sum(salary) as month_salary
from t_salary_detail group by username,month
order by username,month;

±----------±---------±--------------±-+
| username | month | month_salary |
±----------±---------±--------------±-+
| A | 2015-01 | 33 |
| A | 2015-02 | 10 |
| A | 2015-03 | 16 |
| B | 2015-01 | 30 |
| B | 2015-02 | 15 |
| B | 2015-03 | 17 |
±----------±---------±--------------±-+

–需求:求逐月累加金额
–way1

select tmp.username,tmp.month,month_salary,
sum(month_salary) over(partition by tmp.username order by tmp.month) as total_salary
from (select username,month,sum(salary) as month_salary
from t_salary_detail group by username,month
order by username,month) as tmp;

–way2

1、第一步,先求个用户的月总金额
select username,month,sum(salary) as salary from t_salary_detail group by username,month;

±----------±---------±--------±
| username | month | salary | 累计金额
±----------±---------±--------±
| A | 2015-01 | 33 | 33
| A | 2015-02 | 10 | 43
| A | 2015-03 | 16 | 59
| B | 2015-01 | 33 | 33
| B | 2015-02 | 15 | 48
| B | 2015-03 | 17 | 65
±----------±---------±--------±-+

2、第二步,将月总金额表 自己连接 自己连接

select A.*,B.* FROM
(select username,month,sum(salary) as salary from t_salary_detail group by username,month) A 
inner join 
(select username,month,sum(salary) as salary from t_salary_detail group by username,month) B
on
A.username=B.username
where B.month <= A.month;

±------------±---------±----------±------------±---------±----------±-+
| a.username | a.month | a.salary | b.username | b.month | b.salary |
±------------±---------±----------±------------±---------±----------±-+
| A | 2015-01 | 33 | A | 2015-01 | 33 |
| A | 2015-02 | 10 | A | 2015-01 | 33 |
| A | 2015-02 | 10 | A | 2015-02 | 10 |
| A | 2015-03 | 16 | A | 2015-01 | 33 |
| A | 2015-03 | 16 | A | 2015-02 | 10 |
| A | 2015-03 | 16 | A | 2015-03 | 16 |
| B | 2015-01 | 30 | B | 2015-01 | 30 |
| B | 2015-02 | 15 | B | 2015-01 | 30 |
| B | 2015-02 | 15 | B | 2015-02 | 15 |
| B | 2015-03 | 17 | B | 2015-01 | 30 |
| B | 2015-03 | 17 | B | 2015-02 | 15 |
| B | 2015-03 | 17 | B | 2015-03 | 17 |
±------------±---------±----------±------------±---------±----------±-+

3、第三步,从上一步的结果中
进行分组查询,分组的字段是a.username a.month
求月累计值: 将b.month <= a.month的所有b.salary求和即可

select A.username,A.month,max(A.salary) as salary,sum(B.salary) as accumulate
from 
(select username,month,sum(salary) as salary from t_salary_detail group by username,month) A 
inner join 
(select username,month,sum(salary) as salary from t_salary_detail group by username,month) B
on
A.username=B.username
where B.month <= A.month
group by A.username,A.month
order by A.username,A.month;

±------------±---------±--------±------------±-+
| a.username | a.month | salary | accumulate |
±------------±---------±--------±------------±-+
| A | 2015-01 | 33 | 33 |
| A | 2015-02 | 10 | 43 |
| A | 2015-03 | 16 | 59 |
| B | 2015-01 | 30 | 30 |
| B | 2015-02 | 15 | 45 |
| B | 2015-03 | 17 | 62 |
±------------±---------±--------±------------±-+

你可能感兴趣的:(Hive)