1.UDF中没有引用其他资源文件
Hive UDF 函数,写完后打成jar包。放到指定目录下
例如:/home/hadoop/udf/
并且在该目录下启动hive cli 执行
add jar /home/hadoop/udf/timeOffset.jar
create temporary function timeOffset(time) as 'cn.gitv.analysis.TimeOffset'
之后就可以用了,没有路径问题
当然你也可以将jar文件,上传到hdfs的某个目录中
add jar hdfs://nameservice1/udf/timeOffset.jar (nameservice1 是dfs集群名称)
create temporary function timeOffset(time) as 'cn.gitv.analysis.TimeOffset'
同样没有路径问题
2.UDF中引用了其他资源文件
package cn.gitv.bi.log.analysis.udf;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Hashtable;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
@Description(
name="loc 2 id",
value="_FUNC_(string arg1, string arg2) - input two string",
extended="return the MAC corresponding to arg1 and arg2 \n" +
"Example:\n" +
"> SELECT _FUNC_(\"82b27fbc27408a13c0ed34466a8a4e57\", \"AH_CMCC\") FROM dual;\n"
)
public class Key2Mac extends GenericUDF{
private transient ObjectInspector[] argumentOIs;
public static String dataFile = "Key2Mac.txt";
public static final String SEP = "\\|";
public static Hashtable key2Mac = new Hashtable();
public void ReadFile(Hashtable tbl, String dataFile) {
FileInputStream fis=null;
InputStreamReader isr=null;
try {
fis = new FileInputStream(dataFile);
isr = new InputStreamReader(fis, "utf-8");
BufferedReader br = new BufferedReader(isr);
String tp = null;
String[] tmp = null;
while((tp = br.readLine()) != null) {
tp = tp.trim();
tmp = tp.split(SEP);
tbl.put(tmp[0], tmp[1]);
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
try {
isr.close();
fis.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
public ObjectInspector initialize(ObjectInspector[] args)
throws UDFArgumentException {
if (args.length > 2) {
throw new UDFArgumentLengthException(
"The operator 'key2Mac' accepts at most 2 arguments.");
}
ReadFile(key2Mac, dataFile);
argumentOIs = args;
return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
}
public Object evaluate(DeferredObject[] args) throws HiveException {
Object base = args[0].get();
Object power = args[1].get();
StringObjectInspector soi0 = (StringObjectInspector)argumentOIs[0];
StringObjectInspector soi1 = (StringObjectInspector)argumentOIs[1];
String str_key = soi0.getPrimitiveJavaObject(base);
String str_val = soi1.getPrimitiveJavaObject(power);
if(str_key.length()==17){
return str_key;
}
StringBuilder sb = new StringBuilder();
String key=sb.append(str_key).append(":").append(str_val).toString();
String ret = key2Mac.get(key);
if(ret == null) {
return str_key;
}
return ret;
}
@SuppressWarnings("resource")
public static void main(String[] args) throws IOException {
StringBuilder sb = new StringBuilder();
String key=sb.append("c5861e2b62c86e142ae856950a785f22").append(":").append("AH_CMCC").toString();
new Key2Mac().ReadFile(key2Mac, "C:/Users/Administrator/Desktop/Key2Mac.txt");
String value=key2Mac.get(key);
System.out.println(value);
}
@Override
public String getDisplayString(String[] args) {
// TODO Auto-generated method stub
StringBuilder sb = new StringBuilder();
sb.append("convert apikey ");
sb.append(args[0]);
sb.append(" to relevant mac, if ");
sb.append(args[0]);
sb.append(" is null ");
sb.append("returns");
sb.append(args[1]);
return sb.toString() ;
}
}
如上述代码中datafile,指定的相对路径这种写法。同样将UDF打成jar。放到指定目录,也将你的资源文件也放到同一目录中(由于资源文件要定时更新,所以不能打到jar里面)
例如:/home/hadoop/udf/
并且在该目录下启动hive cli 执行(请注意,必须在给目录下启动cli ,udf才能找到资源文件,否则是找不到的)
add jar /home/hadoop/udf/key2mac.jar
add file /home/hadoop/udf/Key2Mac.txt
create temporary function key2mac(mac,code) as 'cn.gitv.analysis.Key2Mac'
之后就可以用了。
对于上述代码这种方式的写法,是不可以,将jar和资源文件放到dfs上的,如果放到dfs上的目录中。即使二者放到相同目录也是找不到资源文件的。所有这种jar引用资源文件的上述udf写法。只能在本地文件系统,且在文件所在目录启动cli才能使用。
另一种解决办法,UDF读取HDFS文件资源就可以了,下面代码中dataFile指的是HDFS中的指定的文件,只要把需要的资源文件放到下面代码指定的文件目录就行了,当然文件名也要一样。对于jar文件位置,就可以随意放置了,当然你也可以在UDF中加一个参数,传人你要放置的资源文件路径,这样就可以随心所欲了。但是UDF函数传人参数太多,虽然灵活了,但是操作麻烦了。
package cn.gitv.bi.log.analysis.udf;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Hashtable;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import cn.gitv.bi.log.analysis.utils.HDFSUtils;
@Description(name = "loc 2 id", value = "_FUNC_(string arg1, string arg2) - input two string", extended = "return the MAC corresponding to arg1 and arg2 \n"
+ "Example:\n"
+ "> SELECT _FUNC_(\"82b27fbc27408a13c0ed34466a8a4e57\", \"AH_CMCC\") FROM dual;\n")
public class Key2Mac extends GenericUDF {
private transient ObjectInspector[] argumentOIs;
public static String dataFile = "/udf/Key2Mac.txt";
public static final String SEP = "\\|";
public static Hashtable key2Mac = new Hashtable();
public void ReadFile(Hashtable tbl, String dataFile) {
FileSystem fs = HDFSUtils.getFileSystem();
InputStreamReader in = HDFSUtils.getData(fs, dataFile);
BufferedReader br = new BufferedReader(in);
String line = null;
String[] tmp = null;
try {
while ((line = br.readLine()) != null) {
line = line.trim();
tmp = line.split(SEP);
tbl.put(tmp[0], tmp[1]);
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
try {
br.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
public ObjectInspector initialize(ObjectInspector[] args)
throws UDFArgumentException {
if (args.length > 2) {
throw new UDFArgumentLengthException(
"The operator 'key2Mac' accepts at most 2 arguments.");
}
ReadFile(key2Mac, dataFile);
argumentOIs = args;
return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
}
public Object evaluate(DeferredObject[] args) throws HiveException {
Object base = args[0].get();
Object power = args[1].get();
StringObjectInspector soi0 = (StringObjectInspector) argumentOIs[0];
StringObjectInspector soi1 = (StringObjectInspector) argumentOIs[1];
String str_key = soi0.getPrimitiveJavaObject(base);
String str_val = soi1.getPrimitiveJavaObject(power);
if (str_key.length() == 17) {
return str_key;
}
StringBuilder sb = new StringBuilder();
String key = sb.append(str_key).append(":").append(str_val).toString();
String ret = key2Mac.get(key);
if (ret == null) {
return str_key;
}
return ret;
}
@SuppressWarnings("resource")
public static void main(String[] args) throws IOException {
StringBuilder sb = new StringBuilder();
String key = sb.append("c5861e2b62c86e142ae856950a785f22").append(":")
.append("AH_CMCC").toString();
new Key2Mac().ReadFile(key2Mac,
"C:/Users/Administrator/Desktop/Key2Mac.txt");
String value = key2Mac.get(key);
System.out.println(value);
}
@Override
public String getDisplayString(String[] args) {
// TODO Auto-generated method stub
StringBuilder sb = new StringBuilder();
sb.append("convert apikey ");
sb.append(args[0]);
sb.append(" to relevant mac, if ");
sb.append(args[0]);
sb.append(" is null ");
sb.append("returns");
sb.append(args[1]);
return sb.toString();
}
}