hive函数实例

1.and和or的优先级
and优先级大于or

root@ubuntu:~# cat windfunc 
1001    100.0   ABC
1001    150.0   BCD
1001    200.0   CDE
1001    150.0   DEF
1002    200.0   ABC
1002    200.0   ABC
1002    100.0   BCD
1002    300.0   CDE
1002    50.0    DEF
1002    400.0   EFG
1003    100.0   ABC
1003    50.0    BCD
1004    60.0    ABC
hive> create  table windfunc(id string,money float,type string) row format delimited fields terimnated by '\t' lines terminated by '\n' stored as textfile;
hive> load data local inpath '/root/windfunc' into table windfunc;
Copying data from file:/root/windfunc
Copying file: file:/root/windfunc
Loading data to table default.windfunc
[Warning] could not update stats.
OK
Time taken: 2.7 seconds
hive> select * from windfunc;
OK
1001    100.0   ABC
1001    150.0   BCD
1001    200.0   CDE
1001    150.0   DEF
1002    200.0   ABC
1002    200.0   ABC
1002    100.0   BCD
1002    300.0   CDE
1002    50.0    DEF
1002    400.0   EFG
1003    100.0   ABC
1003    50.0    BCD
1004    60.0    ABC
Time taken: 0.272 seconds, Fetched: 13 row(s)
hive> select * from windfunc where id = '1001' or id = '1002' and money = '100.0';

OK
1001    100.0   ABC
1001    150.0   BCD
1001    200.0   CDE
1001    150.0   DEF
1002    100.0   BCD
Time taken: 26.489 seconds, Fetched: 5 row(s)

2.cast类型转换

hive> select cast(1.324 as int) from windfunc;

OK
1
1
1
1
1
1
1
1
1
1
1
1
1
Time taken: 23.841 seconds, Fetched: 13 row(s)

3.if(expression,true as value1,false as value2)
4.case when ….then…. when….then ……else….
5.get_json_object

hive> select get_json_object('{"name":"jack","age":"20"}','$.name') from windfunc limit 1;

OK
jack
Time taken: 23.766 seconds, Fetched: 1 row(s)

6.parse_url
HOST,PATH,
具体参考文档

hive> select parse_url('http://www.baidu.com','HOST') FROM WINDFUNC limit 1;
OK
www.baidu.com

7.collect_set(),collect_list()

hive> select collect_set(id) from windfunc;
OK
["1003","1002","1001","1004"]
Time taken: 34.749 seconds, Fetched: 1 row(s)
hive> select collect_list(id) from windfunc;
OK
["1001","1001","1001","1001","1002","1002","1002","1002","1002","1002","1003","1003","1004"]
Time taken: 33.504 seconds, Fetched: 1 row(s)

8.partition

**hive> select id,money,type,first_value(money) over (partition by id order by money) from windfunc;

1001    100.0   ABC 100.0
1001    150.0   BCD 100.0
1001    150.0   DEF 100.0
1001    200.0   CDE 100.0
1002    50.0    DEF 50.0
1002    100.0   BCD 50.0
1002    200.0   ABC 50.0
1002    200.0   ABC 50.0
1002    300.0   CDE 50.0
1002    400.0   EFG 50.0
1003    50.0    BCD 50.0
1003    100.0   ABC 50.0
1004    60.0    ABC 60.0**
hive> select id,money,type,first_value(money) over (partition by id order by money rows between 1 preceding and 1 following) from windfunc;

在三行中选择最小的(不会跨分区)
1 preceding
当前行
1 following

OK
1001    100.0   ABC 100.0
1001    150.0   BCD 100.0
1001    150.0   DEF 150.0
1001    200.0   CDE 150.0
1002    50.0    DEF 50.0
1002    100.0   BCD 50.0
1002    200.0   ABC 100.0
1002    200.0   ABC 200.0
1002    300.0   CDE 200.0
1002    400.0   EFG 300.0
1003    50.0    BCD 50.0
1003    100.0   ABC 50.0
1004    60.0    ABC 60.0
Time taken: 33.191 seconds, Fetched: 13 row(s)

9.rank(),dense_rank()

hive> select id,money,rank() over (partition by id order by money) from windfunc;
OK
1001    100.0   1
1001    150.0   2
1001    150.0   2
1001    200.0   4
1002    50.0    1
1002    100.0   2
1002    200.0   3
1002    200.0   3
1002    300.0   5
1002    400.0   6
1003    50.0    1
1003    100.0   2
1004    60.0    1
hive> select id,money,dense_rank() over (partition by id order by money) from windfunc;

OK
1001    100.0   1
1001    150.0   2
1001    150.0   2
1001    200.0   3
1002    50.0    1
1002    100.0   2
1002    200.0   3
1002    200.0   3
1002    300.0   4
1002    400.0   5
1003    50.0    1
1003    100.0   2
1004    60.0    1

10.cume_dist(),percent_rank()
cume_dist()
(相同值最大行号/行数)*每个值的个数,与前面额累加

hive> select id,money,cume_dist() over (partition by id order by money) from windfunc; 

OK
1001    100.0   0.25
1001    150.0   0.75
1001    150.0   0.75
1001    200.0   1.0
1002    50.0    0.16666666666666666
1002    100.0   0.3333333333333333
1002    200.0   0.6666666666666666
1002    200.0   0.6666666666666666
1002    300.0   0.8333333333333334
1002    400.0   1.0
1003    50.0    0.5
1003    100.0   1.0
1004    60.0    1.0

percent_rank()
(相同值最小行号-1)/(行数-1)

hive> select id,money,percent_rank() over (partition by id order by money) from windfunc;
OK
1001    100.0   0.0
1001    150.0   0.3333333333333333
1001    150.0   0.3333333333333333
1001    200.0   1.0
1002    50.0    0.0
1002    100.0   0.2
1002    200.0   0.4
1002    200.0   0.4
1002    300.0   0.8
1002    400.0   1.0
1003    50.0    0.0
1003    100.0   1.0
1004    60.0    0.0

11.java_method和reflect

hive> select java_method('java.lang.Math','sqrt',cast(id as double)) from windfunc;
OK
31.63858403911275
31.63858403911275
31.63858403911275
31.63858403911275
31.654383582688826
31.654383582688826
31.654383582688826
31.654383582688826
31.654383582688826
31.654383582688826
31.670175244226233
31.670175244226233
31.68595903550972

12.explode
13.rlike
14.regexp_replace

你可能感兴趣的:(#,hive)