假如,有如下数据(文件名:accumulate.dat):
A,2015-01,5
A,2015-01,15
B,2015-01,5
A,2015-01,8
B,2015-01,25
A,2015-01,5
C,2015-01,10
C,2015-01,20
A,2015-02,4
A,2015-02,6
C,2015-02,30
C,2015-02,10
B,2015-02,10
B,2015-02,5
A,2015-03,14
A,2015-03,6
B,2015-03,20
B,2015-03,25
C,2015-03,10
C,2015-03,20
-----统计结果如下累计报表
用户 |
月份 |
月总额 |
累计到当月的总额 |
A |
2015-01 |
33 |
33 |
A |
2015-02 |
10 |
43 |
A |
2015-03 |
30 |
73 |
B |
2015-01 |
30 |
30 |
B |
2015-02 |
15 |
45 |
... |
... |
... |
... |
建表映射:
create table t_access_times(username string,month string,counts int)
row format delimited fields terminated by ',';
导入数据(accumulate.dat):
load data local inpath '/root/accumulate.dat' into table t_access_times;
select username,month,sum(salary) as salary from t_access_times group by username,month
输出结果
+-----------+----------+---------+--+
| username | month | salary |
+-----------+----------+---------+--+
| A | 2015-01 | 33 |
| A | 2015-02 | 10 |
| B | 2015-01 | 30 |
| B | 2015-02 | 15 |
+-----------+----------+---------+--+
select A.*,B.* FROM
(select username,month,sum(salary) as salary from t_access_times group by username,month) A
inner join
(select username,month,sum(salary) as salary from t_access_times group by username,month) B
on
A.username=B.username
where B.month <= A.month
输出结果
+-------------+----------+-----------+-------------+----------+-----------+--+
| a.username | a.month | a.salary | b.username | b.month | b.salary |
+-------------+----------+-----------+-------------+----------+-----------+--+
| A | 2015-01 | 33 | A | 2015-01 | 33 |
| A | 2015-01 | 33 | A | 2015-02 | 10 |
| A | 2015-02 | 10 | A | 2015-01 | 33 |
| A | 2015-02 | 10 | A | 2015-02 | 10 |
| B | 2015-01 | 30 | B | 2015-01 | 30 |
| B | 2015-01 | 30 | B | 2015-02 | 15 |
| B | 2015-02 | 15 | B | 2015-01 | 30 |
| B | 2015-02 | 15 | B | 2015-02 | 15 |
+-------------+----------+-----------+-------------+----------+-----------+--+
select auname,amonth,acnts,sum(bcnts)
from t_tmp2
group by auname,amonth,acnts;
当然,也可以把整个逻辑过程写成一个SQL语句:
select A.username,A.month,max(A.salary) as salary,sum(B.salary) as accumulate
from
(select username,month,sum(salary) as salary from t_access_times group by username,month) A
inner join
(select username,month,sum(salary) as salary from t_access_times group by username,month) B
on
A.username=B.username
where B.month <= A.month
group by A.username,A.month
order by A.username,A.month;
-- 窗口分析函数 sum() over() :可以实现在窗口中进行逐行累加
0: jdbc:hive2://localhost:10000> select * from t_access_amount;
+----------------------+------------------------+-------------------------+--+
| t_access_amount.uid | t_access_amount.month | t_access_amount.amount |
+----------------------+------------------------+-------------------------+--+
| A | 2015-01 | 33 |
| A | 2015-02 | 10 |
| A | 2015-03 | 20 |
| B | 2015-01 | 30 |
| B | 2015-02 | 15 |
| B | 2015-03 | 45 |
| C | 2015-01 | 30 |
| C | 2015-02 | 40 |
| C | 2015-03 | 30 |
+----------------------+------------------------+-------------------------+--+
求出每个人截止到每个月的总额
select uid,month,amount,
sum(amount) over(partition by uid order by month rows between unbounded preceding and current row) as accumulate
from t_access_amount;