Hive常用函数及自定义函数

前言:
    HQL常用函数语法和RDBMS语法相似
    官网参考:
    https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Select
    https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF

一.Hive自带函数
  1. -- 查看系统自带函数
    hive> show functions;
  2. least
  3. length
  4. like
  5. max
  6. min
  7. -- 查看函数使用帮助
 
hive
> desc function extended upper;
 Example:
  > SELECT upper('Facebook') FROM src LIMIT 1;
  'FACEBOOK'

二.常用的函数语法
 举例:
  1. hive> select * from emp;
  2. OK
  3. 7369 SMITH CLERK 7902 1980-12-17 800.0 NULL 20
  4. 7499 ALLEN SALESMAN 7698 1981-2-20 1600.0 300.0 30
  5. 7521 WARD SALESMAN 7698 1981-2-22 1250.0 500.0 30
  6. 7566 JONES MANAGER 7839 1981-4-2 2975.0 NULL 20
  7. 7654 MARTIN SALESMAN 7698 1981-9-28 1250.0 1400.0 30
  8. 7698 BLAKE MANAGER 7839 1981-5-1 2850.0 NULL 30
  9. 7782 CLARK MANAGER 7839 1981-6-9 2450.0 NULL 10
  10. 7788 SCOTT ANALYST 7566 1987-4-19 3000.0 NULL 20
  11. 7839 KING PRESIDENT NULL 1981-11-17 5000.0 NULL 10
  12. 7844 TURNER SALESMAN 7698 1981-9-8 1500.0 0.0 30
  13. 7876 ADAMS CLERK 7788 1987-5-23 1100.0 NULL 20
  14. 7900 JAMES CLERK 7698 1981-12-3 950.0 NULL 30
  15. 7902 FORD ANALYST 7566 1981-12-3 3000.0 NULL 20
  16. 7934 MILLER CLERK 7782 1982-1-23 1300.0 NULL 10
  17. 8888 HIVE PROGRAM 7839 1988-1-23 10300.0 NULL NULL

  1. -- 根据工资降序排序 取前三
  2. hive> select * from emp order by salary desc limit 3;
  3. OK
  4. 7369 SMITH CLERK 7902 1980-12-17 800.0 NULL 20
  5. 7900 JAMES CLERK 7698 1981-12-3 950.0 NULL 30
  6. 7876 ADAMS CLERK 7788 1987-5-23 1100.0 NULL 20

  1. -- 求每个部门的最大工资,最小工资,平均工资,所有工资
  2. select deptno, max(salary),min(salary),avg(salary),sum(salary) from ruozedata_emp group by deptno
  3. OK
    NULL    10300.0 10300.0 10300.0 10300.0
    10      5000.0  1300.0  2916.6666666666665      8750.0
    20      3000.0  800.0   2175.0  10875.0
    30      2850.0  950.0   1566.6666666666667      9400.0


  1. -- 求每个部门的最大工资,最小工资,平均工资,所有工资中平均工资大于2000
  2. select deptno, max(salary),min(salary),avg(salary),sum(salary) from ruozedata_emp group by deptno
  3. where avg(salary)>3000
  4. OK
  5. NULL 10300.0 10300.0 10300.0 10300.0
  6. 20 3000.0 800.0 2175.0 10875.0

  1. -- 求每个部门工资前2的姓名和工资
  2. select t.*
  3. from
  4. (select
  5. deptno,
  6. ename,
  7. salary,
  8. row_number() over(partition by deptno order by salary desc ) as number
  9. from emp)t
  10. where t.number<=2;
  11. OK
  12. NULL HIVE 10300.0 1
  13. 10 KING 5000.0 1
  14. 10 CLARK 2450.0 2
  15. 20 SCOTT 3000.0 1
  16. 20 FORD 3000.0 2
  17. 30 BLAKE 2850.0 1
  18. 30 ALLEN 1600.0 2

  1. -- 统计每个部门的人数
  2. select deptno,count(1) from emp group by deptno;
  3. OK
  4. NULL 1
  5. 10 3
  6. 20 5
  7. 30 6

  1. -- CASE WHEN统计工资情况
  2. select ename,salary,
  3. case
  4. when salary>1 and salary<=1000 then 'lower'
  5. when salary>1000 and salary<=2000 then 'middle'
  6. when salary>2000 and salary<=4000 then 'high'
  7. else 'highest'
  8. end
  9. from emp
OK
SMITH   800.0   lower
ALLEN   1600.0  middle
WARD    1250.0  middle
JONES   2975.0  high
MARTIN  1250.0  middle
BLAKE   2850.0  high


  1. -- 类型转换
  2. hive> select cast('2018-06-17' as date) from dual;
    OK
    2018-06-17

  3. -- 截取字符串
  4. hive> select substr('abcdef',1,5) from dual;
    OK
    abcde

  5. -- 拼接
  6. hive> select concat('123','456') from dual;
    OK
    123456
  7. -- 制定分割拼接
  8. hive> select concat_ws('.','www','baidu','com') from dual;
    OK
    www.baidu.com
  1. -- 获取长度
  2. hive> select length(concat_ws('.','www','baidu','com')) from dual;
    OK
  3. -- split拆分
  4. hive> select split(concat_ws('.','www','baidu','com'),'\\.') from dual;
    OK
    ["www","baidu","com"]
    Time taken: 0.084 seconds, Fetched: 1 row(s)
  1. -- 拆分换行
 hive> select explode(split(concat_ws('.','www','baidu','com'),'\\.') ) from dual;
 OK
 www
 baidu
 com

  1. -- 大写转换小写
    hive> select lower('ABC') from dual;
    OK
    abc

  2. -- 小写转换大写
    hive> select upper('abc') from dual;
    OK
    ABC

  1. -- json导入查询
  2. hive> create table json(json string);
  3. OK
  4. Time taken: 0.059 seconds
  5. -- 加载json文件
  6. hive> load data local inpath'/home/hadoop/data/test.json' into table json;
  7. hive> select * from json;
  8. OK
  9. {"name":"zhangsan","age":28,"addr":"hz"}
  10. {"name":"lisi","age":18,"addr":"bj"}
  11. {"name":"wangwu","age":10,"addr":"sh"}
  12. Time taken: 0.151 seconds, Fetched: 3 row(s)
  13. -- json_tuple查询
  14. hive> select json_tuple(json,'name','age','addr') as (name,age,addr)  from json;
    OK
    zhangsan        28      hz
    lisi            18      bj
    wangwu          10      sh
    Time taken: 0.077 seconds, Fetched: 3 row(s)


三.常用时间函数
  1. -- current_date返回年 月 日
hive> select current_date  from dual;
OK
2018-06-07


--  返回unix时间搓
hive> select unix_timestamp() from dual;
OK
1528403209

--  current_timestamp返回年月入时分秒
hive> select current_timestamp from dual;
OK
2018-06-07 16:35:12.752
Time taken: 0.217 seconds, Fetched: 1 row(s)


-- 获取年月日
hive> select to_date('2018-06-07 16:44:44') from dual;
OK
2018-06-07

-- 获取年
hive> select year('2018-06-07 16:44:44') from dual;
OK
2018

-- 获取月
hive> select month('2018-06-07 16:44:44') from dual;
OK
6

-- 获取日
hive> select day('2018-06-07 16:44:44') from dual;
OK
7

-- 获取小时
hive> select hour('2018-06-07 16:44:44') from dual;
OK
16

-- 获取分钟
hive> select minute('2018-06-07 16:44:44') from dual;
OK
44


-- 获取秒
hive> select second('2018-06-07 16:44:44') from dual;
OK


-- 时间天相加
hive> select date_add('2018-06-07',10) from dual;
OK
2018-06-17

-- 时间天相减
hive> select date_sub('2018-06-17',10) from dual;
OK
2018-06-07

四.自定义函数

.....

    

来自 “ ITPUB博客 ” ,链接:http://blog.itpub.net/31441024/viewspace-2155864/,如需转载,请注明出处,否则将追究法律责任。

转载于:http://blog.itpub.net/31441024/viewspace-2155864/

你可能感兴趣的:(Hive常用函数及自定义函数)