[hadoop@node03 ~]$ hive -help
语法结构:
hive [-hiveconf x=y]* [<-i filename>]* [<-f filename>|<-e query-string>][-S]
说明:
Hive参数大全:
官网地址
开发Hive应用时,不可避免地需要设定Hive的参数。设定Hive的参数可以调优HQL代码的执行效率,或帮助定位问题。然而实践中经常遇到的一个问题是,为什么设定的参数没有起作用?这通常是错误的设定方式导致的。
对于一般参数,有以下三种设定方式:
配置文件 hive-site.xml
命令行参数 启动hive客户端的时候可以设置参数
参数声明 进入客户端以后设置的一些参数 set
配置文件
命令行参数:启动Hive(客户端或Server方式)时,可以在命令行添加-hiveconf param=value来设定参数,例如:
bin/hive --hiveconf hive.root.logger=INFO,console
这一设定只对本次启动的Session(对于Server方式启动,则是所有请求的Sessions)有效。
-- 设置mr中reduce个数
set mapreduce.job.reduces=100;
这一设定的作用域也是session级的。
参数声明 > 命令行参数 > 配置文件参数(hive)
实际工作当中,我们一般都是将hive的hql语法开发完成之后,就写入到一个脚本里面去,然后定时的通过命令 hive -f 去执行hive的语法即可
然后通过定义变量来传递参数到hive的脚本当中去,那么我们接下来就来看看如何使用hive来传递参数。
hive0.9以及之前的版本是不支持传参
hive1.0版本之后支持 hive -f 传递参数
在hive当中我们一般可以使用hivevar
或者hiveconf
来进行参数的传递
hiveconf用于定义HIVE执行上下文的属性(配置参数),可覆盖覆盖hive-site.xml(hive-default.xml)中的参数值,如用户执行目录、日志打印级别、执行队列等。例如我们可以使用hiveconf来覆盖我们的hive属性配置,
hiveconf变量取值必须要使用hiveconf作为前缀参数,具体格式如下:
${hiveconf:key}
bin/hive --hiveconf "mapred.job.queue.name=root.default"
hivevar用于定义HIVE运行时的变量替换,类似于JAVA中的“PreparedStatement”,与${key}配合使用或者与 ${hivevar:key}
对于hivevar取值可以不使用前缀hivevar,具体格式如下:
-- 使用前缀:
${hivevar:key}
-- 不使用前缀:
${key}
hive --hivevar name=zhangsan
${hivevar:name}
也可以这样取值 ${name}
hive --hiveconf "mapred.job.queue.name=root.default" -d my="201912" --database myhive
-- 执行SQL
hive > select * from myhive.score2 where concat(year, month) = ${my} limit 5;
select * from student left join score on student.s_id = score.s_id where score.month = '201807' and score.s_score > 80 and score.c_id = 03;
hive (myhive)> create external table student
(s_id string, s_name string, s_birth string, s_sex string) row format delimited
fields terminated by '\t';
hive (myhive)> load data local inpath '/opt/install/hivedatas/student.csv' overwrite into table student;
开发hql脚本,并使用hiveconf和hivevar进行参数传入
node03执行以下命令定义hql脚本
cd /opt/instal/hivedatas
vim hivevariable.hql
use myhive;
select * from student left join score on student.s_id = score.s_id where score.month = ${hiveconf:month} and score.s_score > ${hivevar:s_score} and score.c_id = ${c_id};
hive (myhive)> select * from student;
OK
student.s_id student.s_name student.s_birth student.s_sex
01 赵雷 1990-01-01 男
02 钱电 1990-12-21 男
03 孙风 1990-05-20 男
04 李云 1990-08-06 男
05 周梅 1991-12-01 女
06 吴兰 1992-03-01 女
07 郑竹 1989-07-01 女
08 王菊 1990-01-20 女
Time taken: 0.048 seconds, Fetched: 8 row(s)
hive (myhive)> select * from score;
OK
score.s_id score.c_id score.s_score score.month
01 01 80 201912
01 02 90 201912
01 03 99 201912
02 01 70 201912
02 02 60 201912
02 03 80 201912
03 01 80 201912
03 02 80 201912
03 03 80 201912
04 01 50 201912
04 02 30 201912
04 03 20 201912
05 01 76 201912
05 02 87 201912
06 01 31 201912
06 03 34 201912
07 02 89 201912
07 03 98 201912
hive --hiveconf month=201912 --hivevar s_score=80 --hivevar c_id=03 -f /opt/install/hivedatas/hivevariable.hql
1.查看系统自带的函数
hive> show functions;
2.显示自带的函数的用法
hive> desc function upper;
3.详细显示自带的函数的用法
hive> desc function extended upper;
hive> select round(3.1415926) from tableName;
3
hive> select round(3.5) from tableName;
4
hive> create table tableName as select round(9542.158) from tableName;
hive> select round(3.1415926, 4) from tableName;
3.1416
hive> select floor(3.1415926) from tableName;
3
hive> select floor(25) from tableName;
25
hive> select ceil(3.1415926) from tableName;
4
hive> select ceil(46) from tableName;
46
hive> select ceiling(3.1415926) from tableName;
4
hive> select ceiling(46) from tableName;
46
hive> select rand() from tableName;
0.5577432776034763
hive> select rand() from tableName;
0.6638336467363424
hive> select rand(100) from tableName;
0.7220096548596434
hive> select rand(100) from tableName;
0.7220096548596434
hive> select from_unixtime(1323308943, 'yyyyMMdd') from tableName;
20111208
hive> select unix_timestamp() from tableName;
1323309615
hive> select unix_timestamp('2011-12-07 13:01:03') from tableName;
1323234063
hive> select unix_timestamp('20111207 13:01:03','yyyyMMdd HH:mm:ss') from tableName;
1323234063
hive> select to_date('2011-12-08 10:03:01') from tableName;
2011-12-08
hive> select year('2011-12-08 10:03:01') from tableName;
2011
hive> select year('2012-12-08') from tableName;
2012
hive> select month('2011-12-08 10:03:01') from tableName;
12
hive> select month('2011-08-08') from tableName;
8
hive> select day('2011-12-08 10:03:01') from tableName;
8
hive> select day('2011-12-24') from tableName;
24
hive> select hour('2011-12-08 10:03:01') from tableName;
10
hive> select minute('2011-12-08 10:03:01') from tableName;
3
-- second 返回秒
hive> select second('2011-12-08 10:03:01') from tableName;
1
hive> select weekofyear('2011-12-08 10:03:01') from tableName;
49
hive> select datediff('2012-12-08','2012-05-09') from tableName;
213
hive> select date_add('2012-12-08',10) from tableName;
2012-12-18
hive> select date_sub('2012-12-08',10) from tableName;
2012-11-28
hive> select if(1=2,100,200) from tableName;
200
hive> select if(1=1,100,200) from tableName;
100
hive> select COALESCE(null,'100','50') from tableName;
100
hive> select case 100 when 50 then 'tom' when 100 then 'mary' else 'tim' end from tableName;
mary
hive> Select case 200 when 50 then 'tom' when 100 then 'mary' else 'tim' end from tableName;
tim
hive> select case when 1=2 then 'tom' when 2=2 then 'mary' else 'tim' end from tableName;
mary
hive> select case when 1=1 then 'tom' when 2=2 then 'mary' else 'tim' end from tableName;
tom
hive> select length('abcedfg') from tableName;
hive> select reverse('abcedfg') from tableName;
gfdecba
hive> select concat('abc','def','gh') from tableName;
abcdefgh
hive> select concat_ws(',','abc','def','gh') from tableName;
abc,def,gh
hive> select substr('abcde',3) from tableName;
cde
hive> select substring('abcde',3) from tableName;
cde
hive> select substr('abcde',-1) from tableName; (和ORACLE相同)
e
hive> select substr('abcde',3,2) from tableName;
cd
hive> select substring('abcde',3,2) from tableName;
cd
hive>select substring('abcde',-3,2) from tableName;
cd
hive> select upper('abSEd') from tableName;
ABSED
hive> select ucase('abSEd') from tableName;
ABSED
hive> select lower('abSEd') from tableName;
absed
hive> select lcase('abSEd') from tableName;
absed
hive> select trim(' ab c ') from tableName;
ab c
hive> select parse_url
('https://www.tableName.com/path1/p.php?k1=v1&k2=v2#Ref1', 'HOST')
from tableName;
www.tableName.com
hive> select parse_url
('https://www.tableName.com/path1/p.php?k1=v1&k2=v2#Ref1', 'QUERY', 'k1')
from tableName;
v1
hive> select get_json_object('{"store":{"fruit":\[{"weight":8,"type":"apple"},{"weight":9,"type":"pear"}], "bicycle":{"price":19.95,"color":"red"} },"email":"amy@only_for_json_udf_test.net","owner":"amy"}','$.owner') from tableName;
hive> select repeat('abc', 5) from tableName;
abcabcabcabcabc
hive> select split('abtcdtef','t') from tableName;
["ab","cd","ef"]
hive> select count(*) from tableName;
20
hive> select count(distinct t) from tableName;
10
hive> select sum(t) from tableName;
100
hive> select sum(distinct t) from tableName;
70
hive> select avg(t) from tableName;
50
hive> select avg (distinct t) from tableName;
30
hive> select min(t) from tableName;
20
hive> select max(t) from tableName;
120
-- 建表
create table score_map(name string, score map<string, int>)
row format delimited fields terminated by '\t'
collection items terminated by ','
map keys terminated by ':';
-- 创建数据内容如下并加载数据
cd /opt/install/hivedatas/
vim score_map.txt
zhangsan 数学:80,语文:89,英语:95
lisi 语文:60,数学:80,英语:99
-- 加载数据到hive表当中去
load data local inpath '/opt/install/hivedatas/score_map.txt' overwrite into table score_map;
-- map结构数据访问:
-- 获取所有的value:
select name,map_values(score) from score_map;
-- 获取所有的key:
select name,map_keys(score) from score_map;
-- 按照key来进行获取value值
select name,score["数学"] from score_map;
-- 查看map元素个数
select name,size(score) from score_map;
-- 构建一个map
select map(1, 'zs', 2, 'lisi');
-- 创建struct表
hive> create table movie_score(name string, info struct<number:int,score:float>)
row format delimited fields terminated by "\t"
collection items terminated by ":";
-- 加载数据
cd /opt/install/hivedatas/
vim struct.txt
-- 电影ABC,有1254人评价过,打分为7.4分
ABC 1254:7.4
DEF 256:4.9
XYZ 456:5.4
-- 加载数据
load data local inpath '/opt/install/hivedatas/struct.txt' overwrite into table movie_score;
-- hive当中查询数据
hive> select * from movie_score;
hive> select info.number, info.score from movie_score;
OK
1254 7.4
256 4.9
456 5.4
-- 构建一个struct
select struct(1, 'anzhulababy', 'moon', 1.68);
hive> create table person(name string, work_locations array<string>)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY ',';
-- 加载数据到person表当中去
cd /opt/install/hivedatas/
vim person.txt
-- 数据内容格式如下
biansutao beijing,shanghai,tianjin,hangzhou
linan changchun,chengdu,wuhan
-- 加载数据
hive > load data local inpath '/opt/install/hivedatas/person.txt' overwrite into table person;
-- 查询所有数据数据
hive > select * from person;
-- 按照下表索引进行查询
hive > select work_locations[0] from person;
-- 查询所有集合数据
hive > select work_locations from person;
-- 查询元素个数
hive > select size(work_locations) from person;
-- 构建array
select array(1, 2, 1);
select array(1, 'a', 1.0);
select array(1, 2, 1.0);
hive> select size(map(1, 'zs', 2, 'anzhulababy')) from tableName;
2
hive> select size(t) from arr_table2;
4
hive> select cast('1' as bigint) from tableName;
1
CONCAT(string A/col, string B/col…):返回输入字符串连接后的结果,支持任意个输入字符串;
CONCAT_WS(separator, str1, str2,…):它是一个特殊形式的 CONCAT()。
COLLECT_SET(col):函数只接受基本数据类型,它的主要作用是将某字段的值进行去重汇总,产生array类型字段。
name | constellation | blood_type |
---|---|---|
孙悟空 | 白羊座 | A |
老王 | 射手座 | A |
宋宋 | 白羊座 | B |
猪八戒 | 白羊座 | A |
按住啦baby | 射手座 | A |
射手座,A 老王|按住啦baby
白羊座,A 孙悟空|猪八戒
白羊座,B 宋宋
cd /opt/install/hivedatas
vim constellation.txt
孙悟空 白羊座 A
老王 射手座 A
宋宋 白羊座 B
猪八戒 白羊座 A
凤姐 射手座 A
hive (hive_explode)> create table person_info(name string, constellation string, blood_type string) row format delimited fields terminated by "\t";
hive (hive_explode)> load data local inpath '/opt/install/hivedatas/constellation.txt' into table person_info;
hive (hive_explode)> select t1.base, concat_ws('|', collect_set(t1.name)) name
from
(select name, concat(constellation, "," , blood_type) base from person_info) t1
group by t1.base;
EXPLODE(col):将hive一列中复杂的array或者map结构拆分成多行。
LATERAL VIEW
cd /opt/install/hivedatas
vim movie.txt
《疑犯追踪》 悬疑,动作,科幻,剧情
《Lie to me》 悬疑,警匪,动作,心理,剧情
《战狼2》 战争,动作,灾难
《疑犯追踪》 悬疑
《疑犯追踪》 动作
《疑犯追踪》 科幻
《疑犯追踪》 剧情
《Lie to me》 悬疑
《Lie to me》 警匪
《Lie to me》 动作
《Lie to me》 心理
《Lie to me》 剧情
《战狼2》 战争
《战狼2》 动作
《战狼2》 灾难
hive (hive_explode)> create table movie_info(movie string, category array<string>)
row format delimited fields terminated by "\t"
collection items terminated by ",";
load data local inpath "/opt/install/hivedatas/movie.txt" into table movie_info;
hive (hive_explode)> select movie, category_name from movie_info
lateral view explode(category) table_tmp as category_name;
-- 需求:现在有数据格式如下
zhangsan child1,child2,child3,child4 k1:v1,k2:v2
lisi child5,child6,child7,child8 k3:v3,k4:v4
-- 字段之间使用\t分割,需求将所有的child进行拆开成为一列
+----------+--+
| mychild |
+----------+--+
| child1 |
| child2 |
| child3 |
| child4 |
| child5 |
| child6 |
| child7 |
| child8 |
+----------+--+
-- 将map的key和value也进行拆开,成为如下结果
+-----------+-------------+--+
| mymapkey | mymapvalue |
+-----------+-------------+--+
| k1 | v1 |
| k2 | v2 |
| k3 | v3 |
| k4 | v4 |
+-----------+-------------+--+
hive (default)> create database hive_explode;
hive (default)> use hive_explode;
hive (hive_explode)> create table hive_explode.t3(name string, children array<string>, address Map<string, string>) row format delimited fields terminated by '\t' collection items terminated by ',' map keys terminated by ':' stored as textFile;
cd /opt/install/hivedatas/
vim maparray
-- 数据内容格式如下
zhangsan child1,child2,child3,child4 k1:v1,k2:v2
lisi child5,child6,child7,child8 k3:v3,k4:v4
hive (hive_explode)> load data local inpath '/opt/install/hivedatas/maparray' into table hive_explode.t3;
hive (hive_explode)> SELECT explode(children) AS myChild FROM hive_explode.t3;
hive (hive_explode)> SELECT explode(address) AS (myMapKey, myMapValue) FROM hive_explode.t3;
a:shandong,b:beijing,c:hebei|1,2,3,4,5,6,7,8,9|[{"source":"7fresh","monthSales":4900,"userCount":1900,"score":"9.9"},{"source":"jd","monthSales":2090,"userCount":78981,"score":"9.8"},{"source":"jdmart","monthSales":6987,"userCount":1600,"score":"9.0"}]
其中字段与字段之间的分隔符是 |
我们要解析得到所有的monthSales对应的值为以下这一列(行转列)
4900
2090
6987
hive (hive_explode)> create table hive_explode.explode_lateral_view (area string, goods_id string, sale_info string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS textfile;
cd /opt/install/hivedatas
vim explode_json
a:shandong,b:beijing,c:hebei|1,2,3,4,5,6,7,8,9|[{"source":"7fresh","monthSales":4900,"userCount":1900,"score":"9.9"},{"source":"jd","monthSales":2090,"userCount":78981,"score":"9.8"},{"source":"jdmart","monthSales":6987,"userCount":1600,"score":"9.0"}]
hive (hive_explode)> load data local inpath '/opt/install/hivedatas/explode_json' overwrite into table hive_explode.explode_lateral_view;
hive (hive_explode)> select explode(split(goods_id, ',')) as goods_id from hive_explode.explode_lateral_view;
hive (hive_explode)> select explode(split(area, ',')) as area from hive_explode.explode_lateral_view;
hive (hive_explode)> select explode(split(regexp_replace(regexp_replace(sale_info,'\\[\\{',''),'}]',''),'},\\{')) as sale_info from hive_explode.explode_lateral_view;
hive (hive_explode)> select get_json_object(explode(split(regexp_replace(regexp_replace(sale_info,'\\[\\{',''),'}]',''),'},\\{')),'$.monthSales') as sale_info from hive_explode.explode_lateral_view;
-- 然后出现异常FAILED: SemanticException [Error 10081]: UDTF's are not supported outside the SELECT clause, nor nested in expressions
-- UDTF explode不能写在别的函数内
-- 如果你这么写,想查两个字段,select explode(split(area,',')) as area,good_id from explode_lateral_view;
-- 会报错FAILED: SemanticException 1:40 Only a single expression in the SELECT clause is supported with UDTF's. Error encountered near token 'good_id'
-- 使用UDTF的时候,只支持一个字段,这时候就需要LATERAL VIEW出场了
lateral view用于和split、explode等UDTF一起使用的,能将一行数据拆分成多行数据
在此基础上可以对拆分的数据进行聚合
lateral view首先为原始表的每行调用UDTF,UDTF会把一行拆分成一行或者多行,lateral view在把结果组合,产生一个支持别名表的虚拟表。
配合lateral view查询多个字段
hive (hive_explode)> select goods_id2, sale_info from explode_lateral_view
LATERAL VIEW explode(split(goods_id, ','))goods as goods_id2;
其中LATERAL VIEW explode(split(goods_id,‘,’))goods相当于一个虚拟表,与原表explode_lateral_view笛卡尔积关联。
也可以多重使用,如下,也是三个表笛卡尔积的结果
hive (hive_explode)> select goods_id2, sale_info, area2 from explode_lateral_view
LATERAL VIEW explode(split(goods_id, ','))goods as goods_id2
LATERAL VIEW explode(split(area,','))area as area2;
hive (hive_explode)> select
get_json_object(concat('{',sale_info_1,'}'),'$.source') as source, get_json_object(concat('{',sale_info_1,'}'),'$.monthSales') as monthSales, get_json_object(concat('{',sale_info_1,'}'),'$.userCount') as userCount, get_json_object(concat('{',sale_info_1,'}'),'$.score') as score
from explode_lateral_view
LATERAL VIEW explode(split(regexp_replace(regexp_replace(sale_info,'\\[\\{',''),'}]',''),'},\\{'))sale_info as sale_info_1;
hive (hive_explode)> create table test_udf(col1 int,col2 int)
row format delimited fields terminated by ',';
cd /opt/install/hivedatas
vim test_udf
1,2
4,3
6,4
7,5
5,6
hive (hive_explode)> load data local inpath '/opt/install/hivedatas/test_udf' overwrite into table test_udf;
hive (hive_explode)> select reflect("java.lang.Math","max", col1, col2) from test_udf;
hive (hive_explode)> create table test_udf2(class_name string, method_name string, col1 int, col2 int) row format delimited fields terminated by ',';
cd /export/servers/hivedatas
vim test_udf2
java.lang.Math,min,1,2
java.lang.Math,max,2,3
hive (hive_explode)> load data local inpath '/opt/install/hivedatas/test_udf2' overwrite into table test_udf2;
hive (hive_explode)> select reflect(class_name, method_name, col1, col2) from test_udf2;
使用apache commons中的函数,commons下的jar已经包含在hadoop的classpath中,所以可以直接使用。
使用方式如下:
hive (hive_explode)> select reflect("org.apache.commons.lang.math.NumberUtils", "isNumber", "123");
分组求topN或者求取百分比,或者进行数据的切片
等等,我们都可以使用分析函数来解决1、ROW_NUMBER():
2、RANK() :
3、DENSE_RANK() :
4、CUME_DIST :
5、PERCENT_RANK :
6、NTILE(n) :
cookie1,2015-04-10,1
cookie1,2015-04-11,5
cookie1,2015-04-12,7
cookie1,2015-04-13,3
cookie1,2015-04-14,2
cookie1,2015-04-15,4
cookie1,2015-04-16,4
cookie2,2015-04-10,2
cookie2,2015-04-11,3
cookie2,2015-04-12,5
cookie2,2015-04-13,6
cookie2,2015-04-14,3
cookie2,2015-04-15,9
cookie2,2015-04-16,7
CREATE EXTERNAL TABLE cookie_pv (
cookieid string,
createtime string,
pv INT
) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' ;
cd /opt/install/hivedatas
vim cookiepv.txt
cookie1,2015-04-10,1
cookie1,2015-04-11,5
cookie1,2015-04-12,7
cookie1,2015-04-13,3
cookie1,2015-04-14,2
cookie1,2015-04-15,4
cookie1,2015-04-16,4
cookie2,2015-04-10,2
cookie2,2015-04-11,3
cookie2,2015-04-12,5
cookie2,2015-04-13,6
cookie2,2015-04-14,3
cookie2,2015-04-15,9
cookie2,2015-04-16,7
load data local inpath '/opt/install/hivedatas/cookiepv.txt' overwrite into table cookie_pv;
select * from (
SELECT
cookieid,
createtime,
pv,
RANK() OVER(PARTITION BY cookieid ORDER BY pv desc) AS rn1,
DENSE_RANK() OVER(PARTITION BY cookieid ORDER BY pv desc) AS rn2,
ROW_NUMBER() OVER(PARTITION BY cookieid ORDER BY pv DESC) AS rn3
FROM cookie_pv
) temp where temp.rn1 <= 3;
Hive 自带了一些函数,比如:max/min等,但是数量有限,自己可以通过自定义UDF来方便的扩展。
当Hive提供的内置函数无法满足你的业务处理需要时,此时就可以考虑使用用户自定义函数(UDF:user-defined function)
根据用户自定义函数类别分为以下三种:
UDF(User-Defined-Function) 一进一出
UDAF(User-Defined Aggregation Function) 聚集函数,多进一出,类似于:count/max/min
UDTF(User-Defined Table-Generating Functions) 一进多出,如lateral view explode()
如lateral view explode()
官方文档地址
https://cwiki.apache.org/confluence/display/Hive/HivePlugins
编程步骤:
(1)继承org.apache.hadoop.hive.ql.UDF
(2)需要实现evaluate函数;evaluate函数支持重载;
(1)UDF必须要有返回类型,可以返回null,但是返回类型不能为void;
(2)UDF中常用Text/LongWritable等类型,不推荐使用java类型;
<repositories>
<repository>
<id>clouderaid>
<url>https://repository.cloudera.com/artifactory/cloudera-repos/url>
repository>
repositories>
<dependencies>
<dependency>
<groupId>org.apache.hadoopgroupId>
<artifactId>hadoop-clientartifactId>
<version>2.6.0-mr1-cdh5.14.2version>
dependency>
<dependency>
<groupId>org.apache.hadoopgroupId>
<artifactId>hadoop-commonartifactId>
<version>2.6.0-cdh5.14.2version>
dependency>
<dependency>
<groupId>org.apache.hadoopgroupId>
<artifactId>hadoop-hdfsartifactId>
<version>2.6.0-cdh5.14.2version>
dependency>
<dependency>
<groupId>org.apache.hadoopgroupId>
<artifactId>hadoop-mapreduce-client-coreartifactId>
<version>2.6.0-cdh5.14.2version>
dependency>
<dependency>
<groupId>org.apache.hivegroupId>
<artifactId>hive-execartifactId>
<version>1.1.0-cdh5.14.2version>
dependency>
<dependency>
<groupId>org.apache.hivegroupId>
<artifactId>hive-jdbcartifactId>
<version>1.1.0-cdh5.14.2version>
dependency>
<dependency>
<groupId>org.apache.hivegroupId>
<artifactId>hive-cliartifactId>
<version>1.1.0-cdh5.14.2version>
dependency>
dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.pluginsgroupId>
<artifactId>maven-compiler-pluginartifactId>
<version>3.0version>
<configuration>
<source>1.8source>
<target>1.8target>
<encoding>UTF-8encoding>
configuration>
plugin>
plugins>
build>
package com.udf.MyUDF;
public class MyUDF extends UDF {
public Text evaluate(final Text s) {
if (null == s) {
return null;
}
//返回大写字母
return new Text(s.toString().toUpperCase());
}
}
cd /opt/install/hive-1.1.0-cdh5.14.2/lib
mv original-day_hive_udf-1.0-SNAPSHOT.jar udf.jar
0: jdbc:hive2://node03:10000> add jar /opt/install/hive-1.1.0-cdh5.14.2/lib/udf.jar;
0: jdbc:hive2://node03:10000> create temporary function touppercase as 'com.opt.udf.MyUDF';
0: jdbc:hive2://node03:10000>select tolowercase('abc');
hive当中如何创建永久函数
在hive当中添加临时函数,需要我们每次进入hive客户端的时候都需要添加以下,退出hive客户端临时函数就会失效,那么我们也可以创建永久函数来让其不会失效
创建永久函数
-- 1、指定数据库,将我们的函数创建到指定的数据库下面
0: jdbc:hive2://node03:10000>use myhive;
-- 2、使用add jar添加我们的jar包到hive当中来
0: jdbc:hive2://node03:10000>add jar /opt/install/hive-1.1.0-cdh5.14.2/lib/udf.jar;
-- 3、查看我们添加的所有的jar包
0: jdbc:hive2://node03:10000>list jars;
-- 4、创建永久函数,与我们的函数进行关联
0: jdbc:hive2://node03:10000>create function myuppercase as 'com.opt.udf.MyUDF';
-- 5、查看我们的永久函数
0: jdbc:hive2://node03:10000>show functions like 'my*';
-- 6、使用永久函数
0: jdbc:hive2://node03:10000>select myhive.myuppercase('helloworld');
-- 7、删除永久函数
0: jdbc:hive2://node03:10000>drop function myhive.myuppercase;
-- 8、查看函数
show functions like 'my*';