UDTF(user-defined table-generating function,UDTF) 操作作用于单个数据行,并且产生多个数据行-------一个表作为输出。lateral view explore()
编程思路:
例子:
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import java.util.ArrayList;
/**
* Description: xxx
* Copyright(c),2020,zll
* This program is protected by laws
* Date:2020年#05月28日
*
* @author zll
* @version:1.0
*/
//需求:
//切分”key:value;key:value;”这种字符串,返回结果为key, value两个字段
public class ExplodeMap extends GenericUDTF {
@Override
public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
if(argOIs.length != 1){
throw new UDFArgumentLengthException("ExplodeMap takes only one argument");
}
if(argOIs[0].getCategory() != ObjectInspector.Category.PRIMITIVE){
throw new UDFArgumentException("ExplodeMap takes string as a parameter");
}
ArrayList<String> fieldNames = new ArrayList<String>();
ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();
fieldNames.add("col1");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("col2");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames,fieldOIs);
}
@Override
public void process(Object[] objects) throws HiveException {
String input = objects[0].toString();
String[] test = input.split(";");
for(int i=0; i<test.length; i++){
try{
String[] result = test[i].split(":");
forward(result);
}catch (Exception e){
continue;
}
}
}
@Override
public void close() throws HiveException {
}
}
2、打包定义函数
1)数据准备:
1 name:zll;age:18
2 name:mm;sex:男
2)建表语句:
create table user_info(
id int,
user_retail string
)
row format delimited
fields terminated by '\t';
3)加载数据
load data local inpath '/root/data/arithmetic/data4' overwrite into table user_info;
4)检查数据:
hive> select * from user_info;
OK
1 name:zll;age:18
2 name:mm;sex:男
Time taken: 0.046 seconds, Fetched: 2 row(s)
5)创建临时函数
add jar /data/hive/lib/ExplodeMap.jar;
CREATE TEMPORARY FUNCTION explodeMap as 'ExplodeMap';
6)测试自定义的udtf函数
测试1(直接select中使用)
select explodemap(user_retail) as (col1,col2) from user_info;
结果
hive> select explodemap(user_retail) as (col1,col2) from user_info;
OK
name zll
age 18
name mm
sex 男
Time taken: 0.08 seconds, Fetched: 4 row(s)
测试2:(和lateral view一起使用):此方法更为方便日常使用。执行过程相当于单独执行了两次抽取,然后union到一个表里。
select user_info.id,t.col1,t.col2 from user_info lateral view explodemap(user_retail) t as col1,col2;
结果:
hive> select user_info.id,t.col1,t.col2 from user_info lateral view explodemap(user_retail) t as col1,col2;
OK
1 name zll
1 age 18
2 name mm
2 sex 男
Time taken: 0.057 seconds, Fetched: 4 row(s)
创建永久函数可参照上篇:hive的自定义udf函数(临时和永久函数)
创建步骤一致