Hive HDF 引用资源文件路径问题

1.UDF中没有引用其他资源文件
Hive UDF 函数,写完后打成jar包。放到指定目录下
例如:/home/hadoop/udf/
并且在该目录下启动hive cli 执行

add jar /home/hadoop/udf/timeOffset.jar
create temporary function timeOffset(time) as 'cn.gitv.analysis.TimeOffset'

之后就可以用了,没有路径问题
当然你也可以将jar文件,上传到hdfs的某个目录中

add jar hdfs://nameservice1/udf/timeOffset.jar (nameservice1 是dfs集群名称)
create temporary function timeOffset(time) as 'cn.gitv.analysis.TimeOffset'

同样没有路径问题

2.UDF中引用了其他资源文件

package cn.gitv.bi.log.analysis.udf;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Hashtable;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;

@Description(
        name="loc 2 id",
        value="_FUNC_(string arg1, string arg2) - input two string",
        extended="return the MAC corresponding to arg1 and arg2 \n" +
        "Example:\n" +
        "> SELECT _FUNC_(\"82b27fbc27408a13c0ed34466a8a4e57\", \"AH_CMCC\") FROM dual;\n"
)
public class Key2Mac extends GenericUDF{

        private transient ObjectInspector[] argumentOIs;
        public static String dataFile = "Key2Mac.txt";
        public static final String SEP = "\\|";
        public static Hashtable  key2Mac = new Hashtable();

        public void ReadFile(Hashtable tbl, String dataFile)  {
            FileInputStream fis=null;
             InputStreamReader isr=null;
            try {
                 fis = new FileInputStream(dataFile);
               isr = new InputStreamReader(fis, "utf-8");
                BufferedReader br = new BufferedReader(isr);
                String tp = null;
                String[] tmp = null;

                while((tp = br.readLine()) != null) {
                    tp = tp.trim();
                    tmp = tp.split(SEP);
                    tbl.put(tmp[0], tmp[1]);
                }
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }finally{
                try {
                    isr.close();
                    fis.close();

                } catch (IOException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }

        }

        public ObjectInspector initialize(ObjectInspector[] args)
                                throws UDFArgumentException {
            if (args.length > 2) {
                throw new UDFArgumentLengthException(
                        "The operator 'key2Mac' accepts at most 2 arguments.");
            }

            ReadFile(key2Mac, dataFile);

            argumentOIs = args;
            return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
        }

        public Object evaluate(DeferredObject[] args) throws HiveException {

            Object base = args[0].get();
            Object power = args[1].get();
            StringObjectInspector soi0 = (StringObjectInspector)argumentOIs[0];
            StringObjectInspector soi1 = (StringObjectInspector)argumentOIs[1];
            String str_key = soi0.getPrimitiveJavaObject(base);
            String str_val = soi1.getPrimitiveJavaObject(power);
            if(str_key.length()==17){
                return str_key;
            }
            StringBuilder sb = new StringBuilder();
            String key=sb.append(str_key).append(":").append(str_val).toString();
            String ret = key2Mac.get(key);
            if(ret == null) {
                return str_key;
            }
            return ret;
        }
        @SuppressWarnings("resource")
        public static void main(String[] args) throws IOException {
             StringBuilder sb = new StringBuilder();
             String key=sb.append("c5861e2b62c86e142ae856950a785f22").append(":").append("AH_CMCC").toString();

            new Key2Mac().ReadFile(key2Mac, "C:/Users/Administrator/Desktop/Key2Mac.txt");
            String value=key2Mac.get(key);
            System.out.println(value);
        }

        @Override
        public String getDisplayString(String[] args) {
            // TODO Auto-generated method stub
            StringBuilder sb = new StringBuilder();
            sb.append("convert apikey ");
            sb.append(args[0]);
            sb.append(" to relevant mac, if ");
            sb.append(args[0]);
            sb.append(" is null ");
            sb.append("returns");
            sb.append(args[1]);
            return sb.toString() ;
        }

    }

如上述代码中datafile,指定的相对路径这种写法。同样将UDF打成jar。放到指定目录,也将你的资源文件也放到同一目录中(由于资源文件要定时更新,所以不能打到jar里面)
例如:/home/hadoop/udf/
并且在该目录下启动hive cli 执行(请注意,必须在给目录下启动cli ,udf才能找到资源文件,否则是找不到的)

add jar /home/hadoop/udf/key2mac.jar
add file /home/hadoop/udf/Key2Mac.txt
create temporary function key2mac(mac,code) as 'cn.gitv.analysis.Key2Mac'

之后就可以用了。
对于上述代码这种方式的写法,是不可以,将jar和资源文件放到dfs上的,如果放到dfs上的目录中。即使二者放到相同目录也是找不到资源文件的。所有这种jar引用资源文件的上述udf写法。只能在本地文件系统,且在文件所在目录启动cli才能使用。

另一种解决办法,UDF读取HDFS文件资源就可以了,下面代码中dataFile指的是HDFS中的指定的文件,只要把需要的资源文件放到下面代码指定的文件目录就行了,当然文件名也要一样。对于jar文件位置,就可以随意放置了,当然你也可以在UDF中加一个参数,传人你要放置的资源文件路径,这样就可以随心所欲了。但是UDF函数传人参数太多,虽然灵活了,但是操作麻烦了。

package cn.gitv.bi.log.analysis.udf;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Hashtable;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;

import cn.gitv.bi.log.analysis.utils.HDFSUtils;

@Description(name = "loc 2 id", value = "_FUNC_(string arg1, string arg2) - input two string", extended = "return the MAC corresponding to arg1 and arg2 \n"
        + "Example:\n"
        + "> SELECT _FUNC_(\"82b27fbc27408a13c0ed34466a8a4e57\", \"AH_CMCC\") FROM dual;\n")
public class Key2Mac extends GenericUDF {

    private transient ObjectInspector[] argumentOIs;
    public static String dataFile = "/udf/Key2Mac.txt";
    public static final String SEP = "\\|";
    public static Hashtable key2Mac = new Hashtable();

    public void ReadFile(Hashtable tbl, String dataFile) {
        FileSystem fs = HDFSUtils.getFileSystem();
        InputStreamReader in = HDFSUtils.getData(fs, dataFile);
        BufferedReader br = new BufferedReader(in);
        String line = null;
        String[] tmp = null;

        try {
            while ((line = br.readLine()) != null) {
                line = line.trim();
                tmp = line.split(SEP);
                tbl.put(tmp[0], tmp[1]);
            }
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } finally {
            try {
                br.close();
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }

    }

    public ObjectInspector initialize(ObjectInspector[] args)
            throws UDFArgumentException {
        if (args.length > 2) {
            throw new UDFArgumentLengthException(
                    "The operator 'key2Mac' accepts at most 2 arguments.");
        }

        ReadFile(key2Mac, dataFile);

        argumentOIs = args;
        return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
    }

    public Object evaluate(DeferredObject[] args) throws HiveException {

        Object base = args[0].get();
        Object power = args[1].get();
        StringObjectInspector soi0 = (StringObjectInspector) argumentOIs[0];
        StringObjectInspector soi1 = (StringObjectInspector) argumentOIs[1];
        String str_key = soi0.getPrimitiveJavaObject(base);
        String str_val = soi1.getPrimitiveJavaObject(power);
        if (str_key.length() == 17) {
            return str_key;
        }
        StringBuilder sb = new StringBuilder();
        String key = sb.append(str_key).append(":").append(str_val).toString();
        String ret = key2Mac.get(key);
        if (ret == null) {
            return str_key;
        }
        return ret;
    }

    @SuppressWarnings("resource")
    public static void main(String[] args) throws IOException {
        StringBuilder sb = new StringBuilder();
        String key = sb.append("c5861e2b62c86e142ae856950a785f22").append(":")
                .append("AH_CMCC").toString();

        new Key2Mac().ReadFile(key2Mac,
                "C:/Users/Administrator/Desktop/Key2Mac.txt");
        String value = key2Mac.get(key);
        System.out.println(value);
    }

    @Override
    public String getDisplayString(String[] args) {
        // TODO Auto-generated method stub
        StringBuilder sb = new StringBuilder();
        sb.append("convert apikey ");
        sb.append(args[0]);
        sb.append(" to relevant mac, if ");
        sb.append(args[0]);
        sb.append(" is null ");
        sb.append("returns");
        sb.append(args[1]);
        return sb.toString();
    }

}

你可能感兴趣的:(Hive)