hive over函数应用

over:窗口函数


建表及插入数据:

create table temp.over_test (
name           string,
part           string,
salary         decimal(30,8),
age            string
)              
row format delimited fields terminated by '\036'
stored as rcfile;

insert into table temp.over_test  select '11','a','100','12'              ;
insert into table temp.over_test  select '21','a','211','12'              ;
insert into table temp.over_test  select '11','a','323','34'              ;
insert into table temp.over_test  select '241','b','432','12'             ;
insert into table temp.over_test  select '231','b','123','12'             ;
insert into table temp.over_test  select '2121','a','422','12'            ;
insert into table temp.over_test  select '2012','a','422','12'            ;
insert into table temp.over_test  select '13','c','131','12'              ;
insert into table temp.over_test  select '123','b','34','12'              ;
insert into table temp.over_test  select '342','e','789','12'             ;
insert into table temp.over_test  select '1222231','s','555','12'         ;
insert into table temp.over_test  select '675','e','666','12'             ;
insert into table temp.over_test  select '51','b','33','12'               ;
insert into table temp.over_test  select '41','b','222','12'              ;
insert into table temp.over_test  select '11','a','36','12'               ;

应用:

select * from temp.over_test
select name,salary,max(salary)over(partition by part) from temp.over_test

select name,part,salary, sum(salary)over(order by salary range between 20 preceding and 20 following) mm
 from temp.over_test--salary +-20范围内salary相加

select name,part,salary, sum(salary)over(order by salary rows between 1 preceding and 2 following) mm 
from temp.over_test--前1后2行一起相加

select name,part,salary, rank()over(partition by part order by salary desc)  from temp.over_test--跳号排序1、1、3

select name,part,salary, row_number()over(partition by part order by salary desc)  from temp.over_test--排序加唯一行号

select name,part,salary, dense_rank()over(partition by part order by salary desc)  from temp.over_test--不跳号排序1、1、2

select name,part,salary, sum(salary)over(partition by part order by salary) from temp.over_test--本次之前salary相加

select name,part,salary, lag(cast(salary as string),1,'')over(order by salary) as qian from temp.over_test--排序后前一行值,无用''代替(string 类型下有效)

select name,part,salary, lead(cast(salary as string),1,'')over(partition by part order by salary desc) as qian--后一行数据
 from temp.over_test

select name,part,salary, ntile(4)over(order by salary desc) as qian from temp.over_test--数据四分片返回所在分偏值

select salary,ratio_to_report(salary)over() from temp.over_test --didn't support 求salary的百分比

select name,part,salary,percent_rank()over(partition by part order by name desc) from temp.over_test--(所在行-1)/(分类后总行-1)

select name,part,salary,cume_dist()over(partition by part order by salary desc) from temp.over_test--[所在行-1(并列则+并列数-1)]/总行数
select salary,
percentile_cont(0.7) within GROUP(ORDER BY salary) over(PARTITION BY part) "Percentile_Cont",
percent_rank()over(partition by part order by name desc) from temp.over_test

以上内容参考自:

http://blog.csdn.net/sherri_du/article/details/53312085



你可能感兴趣的:(hive)