1.and和or的优先级
and优先级大于or
root@ubuntu:~# cat windfunc
1001 100.0 ABC
1001 150.0 BCD
1001 200.0 CDE
1001 150.0 DEF
1002 200.0 ABC
1002 200.0 ABC
1002 100.0 BCD
1002 300.0 CDE
1002 50.0 DEF
1002 400.0 EFG
1003 100.0 ABC
1003 50.0 BCD
1004 60.0 ABC
hive> create table windfunc(id string,money float,type string) row format delimited fields terimnated by '\t' lines terminated by '\n' stored as textfile;
hive> load data local inpath '/root/windfunc' into table windfunc;
Copying data from file:/root/windfunc
Copying file: file:/root/windfunc
Loading data to table default.windfunc
[Warning] could not update stats.
OK
Time taken: 2.7 seconds
hive> select * from windfunc;
OK
1001 100.0 ABC
1001 150.0 BCD
1001 200.0 CDE
1001 150.0 DEF
1002 200.0 ABC
1002 200.0 ABC
1002 100.0 BCD
1002 300.0 CDE
1002 50.0 DEF
1002 400.0 EFG
1003 100.0 ABC
1003 50.0 BCD
1004 60.0 ABC
Time taken: 0.272 seconds, Fetched: 13 row(s)
hive> select * from windfunc where id = '1001' or id = '1002' and money = '100.0';
OK
1001 100.0 ABC
1001 150.0 BCD
1001 200.0 CDE
1001 150.0 DEF
1002 100.0 BCD
Time taken: 26.489 seconds, Fetched: 5 row(s)
2.cast类型转换
hive> select cast(1.324 as int) from windfunc;
OK
1
1
1
1
1
1
1
1
1
1
1
1
1
Time taken: 23.841 seconds, Fetched: 13 row(s)
3.if(expression,true as value1,false as value2)
4.case when ….then…. when….then ……else….
5.get_json_object
hive> select get_json_object('{"name":"jack","age":"20"}','$.name') from windfunc limit 1;
OK
jack
Time taken: 23.766 seconds, Fetched: 1 row(s)
6.parse_url
HOST,PATH,
具体参考文档
hive> select parse_url('http://www.baidu.com','HOST') FROM WINDFUNC limit 1;
OK
www.baidu.com
7.collect_set(),collect_list()
hive> select collect_set(id) from windfunc;
OK
["1003","1002","1001","1004"]
Time taken: 34.749 seconds, Fetched: 1 row(s)
hive> select collect_list(id) from windfunc;
OK
["1001","1001","1001","1001","1002","1002","1002","1002","1002","1002","1003","1003","1004"]
Time taken: 33.504 seconds, Fetched: 1 row(s)
8.partition
**hive> select id,money,type,first_value(money) over (partition by id order by money) from windfunc;
1001 100.0 ABC 100.0
1001 150.0 BCD 100.0
1001 150.0 DEF 100.0
1001 200.0 CDE 100.0
1002 50.0 DEF 50.0
1002 100.0 BCD 50.0
1002 200.0 ABC 50.0
1002 200.0 ABC 50.0
1002 300.0 CDE 50.0
1002 400.0 EFG 50.0
1003 50.0 BCD 50.0
1003 100.0 ABC 50.0
1004 60.0 ABC 60.0**
hive> select id,money,type,first_value(money) over (partition by id order by money rows between 1 preceding and 1 following) from windfunc;
在三行中选择最小的(不会跨分区)
1 preceding
当前行
1 following
OK
1001 100.0 ABC 100.0
1001 150.0 BCD 100.0
1001 150.0 DEF 150.0
1001 200.0 CDE 150.0
1002 50.0 DEF 50.0
1002 100.0 BCD 50.0
1002 200.0 ABC 100.0
1002 200.0 ABC 200.0
1002 300.0 CDE 200.0
1002 400.0 EFG 300.0
1003 50.0 BCD 50.0
1003 100.0 ABC 50.0
1004 60.0 ABC 60.0
Time taken: 33.191 seconds, Fetched: 13 row(s)
9.rank(),dense_rank()
hive> select id,money,rank() over (partition by id order by money) from windfunc;
OK
1001 100.0 1
1001 150.0 2
1001 150.0 2
1001 200.0 4
1002 50.0 1
1002 100.0 2
1002 200.0 3
1002 200.0 3
1002 300.0 5
1002 400.0 6
1003 50.0 1
1003 100.0 2
1004 60.0 1
hive> select id,money,dense_rank() over (partition by id order by money) from windfunc;
OK
1001 100.0 1
1001 150.0 2
1001 150.0 2
1001 200.0 3
1002 50.0 1
1002 100.0 2
1002 200.0 3
1002 200.0 3
1002 300.0 4
1002 400.0 5
1003 50.0 1
1003 100.0 2
1004 60.0 1
10.cume_dist(),percent_rank()
cume_dist()
(相同值最大行号/行数)*每个值的个数,与前面额累加
hive> select id,money,cume_dist() over (partition by id order by money) from windfunc;
OK
1001 100.0 0.25
1001 150.0 0.75
1001 150.0 0.75
1001 200.0 1.0
1002 50.0 0.16666666666666666
1002 100.0 0.3333333333333333
1002 200.0 0.6666666666666666
1002 200.0 0.6666666666666666
1002 300.0 0.8333333333333334
1002 400.0 1.0
1003 50.0 0.5
1003 100.0 1.0
1004 60.0 1.0
percent_rank()
(相同值最小行号-1)/(行数-1)
hive> select id,money,percent_rank() over (partition by id order by money) from windfunc;
OK
1001 100.0 0.0
1001 150.0 0.3333333333333333
1001 150.0 0.3333333333333333
1001 200.0 1.0
1002 50.0 0.0
1002 100.0 0.2
1002 200.0 0.4
1002 200.0 0.4
1002 300.0 0.8
1002 400.0 1.0
1003 50.0 0.0
1003 100.0 1.0
1004 60.0 0.0
11.java_method和reflect
hive> select java_method('java.lang.Math','sqrt',cast(id as double)) from windfunc;
OK
31.63858403911275
31.63858403911275
31.63858403911275
31.63858403911275
31.654383582688826
31.654383582688826
31.654383582688826
31.654383582688826
31.654383582688826
31.654383582688826
31.670175244226233
31.670175244226233
31.68595903550972
12.explode
13.rlike
14.regexp_replace