window funtion

first_value() & last_value()

取当前分区的第一/最后一个值

SELECT * ,first_value(salary) over  
( 
PARTITION BY id ORDER BY salary  
)  
FROM salary
--取分组内排序后,截止到当前行,第一个值
SELECT * ,first_value(salary) over  
(
PARTITION BY id ORDER BY salary  
ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING 
)  FROM salary 
--先以id分区,以工资正序排序,之后按照行号对比,在该行号前一、本身、后一个元素中取第一个值返回

lead(expr [, offset] [, default]) OVER([partition_by_clause] order_by_clause)

此函数是分区排序后,返回一个当前行之后n行的元素(可设置默认值,为null即用默认值)

lag()是与lead()相反的函数,返回一个当前行之前n行的元素

SELECT id,salary ,lead(salary,1,salary) over 
(PARTITION BY id ORDER BY salary ) - salary  gongzicha
FROM salary
--返回当前行与下行工资的差值,默认值为当前行工资本身
     id salary  gongzicha
1   1001    100 50
2   1001    150 0
3   1001    150 50
4   1001    200 0
5   1002    50  50
6   1002    100 100
7   1002    200 0
8   1002    200 100
9   1002    300 100
10  1002    400 0
11  1003    50  50
12  1003    100 0
13  1004    60  0

rank()

分区排序出名次,算并列

select id,salary
,rank() over(partition by id order by salary desc) paixu
,row_number() over(partition by id order by salary desc) hanghao
from salary
-----------------------------
    id  salary  paixu   hanghao
1   1001    200 1   1
2   1001    150 2   2
3   1001    150 2   3
4   1001    100 4   4
5   1002    400 1   1
6   1002    300 2   2
7   1002    200 3   3
8   1002    200 3   4
9   1002    100 5   5
10  1002    50  6   6
11  1003    100 1   1
12  1003    50  2   2
13  1004    60  1   1

dense_rank()

分组排序,并列不算在内

utill()

分片函数

--取每个员工工资最高的三分之一的分片
SELECT id,salary,ntile(3) over (PARTITION BY id ORDER BY salary desc) as rm FROM salary ;
--rm=1即为切片

java_mathod()

java_method(class, method[, arg1[, arg2..]])
Calls a Java method by matching the argument signature, using reflection. (As of Hive 0.9.0.)
可使用java中的方法对数据进行处理,可以大大拓展hive的方法

--对salary开平方
--此为举例,其实hive中有sqrt方法
select java_method("java.lang.Math","sqrt",cast(salary as double)) from salary

你可能感兴趣的:(window funtion)