hive udf开发(已在公司实际开发环境使用)

一:编写udf函数

1:import org.apache.hadoop.hive.ql.exec.UDF;

2:继承UDF类

3:实现evaluate函数

我的代码是取key的md5值,代码如下:

package Udf;

import org.apache.hadoop.hive.ql.exec.UDF;

import Utils.MurmurHash;
import Utils.GetMd5Sign;


public class Getmd5 extends UDF{
    MurmurHash murmurhash = new MurmurHash();
    public static long evaluate(String key){
        String sign = GetMd5Sign.md5Java(key);
        long feed = 0xe17a1465;
        long result = MurmurHash.main(sign, feed);
        long r = result % 999983;
        if (r < 0) {
            r = r + 999983;
        }
        return r;
    }

}
package Utils;

import java.math.BigInteger;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;

public class MurmurHash {
    private static final Charset UTF8 = Charset.forName("UTF-8");

    /**
     * Hashes bytes in an array.
     *
     * @param data The bytes to hash.
     * @param seed The seed for the hash.
     * @return The 32 bit hash of the bytes in question.
     */
    public static int MurmurHash3_x86_32(byte[] data, int seed) {
        final int c1 = 0xcc9e2d51;
        final int c2 = 0x1b873593;
        final int length = data.length;
        final int length4 = length >>> 2;

        int h1 = seed;

        // body
        for (int i = 0; i < length4; ++i) {
            final int i4 = i << 2;
            int k1 = (data[i4] & 0xFF) + ((data[i4 + 1] & 0xFF) << 8)
                    + ((data[i4 + 2] & 0xFF) << 16) + ((data[i4 + 3] & 0xFF) << 24);

            k1 *= c1;
            k1 = ((k1 << 15) | (k1 >>> 17));
            k1 *= c2;

            h1 ^= k1;
            h1 = ((h1 << 13) | (h1 >>> 19));
            h1 = h1 * 5 + 0xe6546b64;
        }

        // tail
        int k1 = 0;

        final int tail = length4 << 2;
        switch (length & 3) {
            case 3:
                k1 ^= data[tail + 2] << 16;
            case 2:
                k1 ^= data[tail + 1] << 8;
            case 1:
                k1 ^= data[tail];
                k1 *= c1;
                k1 = ((k1 << 15) | (k1 >>> 17));
                k1 *= c2;
                h1 ^= k1;
        }

        // finalization
        h1 ^= length;
        // fmix32(h1)
        h1 ^= h1 >>> 16;
        h1 *= 0x85ebca6b;
        h1 ^= h1 >>> 13;
        h1 *= 0xc2b2ae35;
        h1 ^= h1 >>> 16;

        return h1;
    }

    public static long MurmurHash64(final byte[] data, long seed) {
        final long M = 0xc6a4a7935bd1e995L;
        final int R = 47;
        final int length = data.length;
        final int length8 = length >>> 3;
        long h = (seed & 0xffffffffL) ^ (length * M);

        for (int i = 0; i < length8; i++) {
            final int i8 = i << 3;
            long k = (data[i8 + 0] & 0xFFL) + ((data[i8 + 1] & 0xFFL) << 8)
                    + ((data[i8 + 2] & 0xFFL) << 16) + ((data[i8 + 3] & 0xFFL) << 24)
                    + ((data[i8 + 4] & 0xFFL) << 32) + ((data[i8 + 5] & 0xFFL) << 40)
                    + ((data[i8 + 6] & 0xFFL) << 48) + ((data[i8 + 7] & 0xFFL) << 56);

            k *= M;
            k ^= k >>> R;
            k *= M;

            h ^= k;
            h *= M;
        }

        final int tail = length8 << 3;
        switch (length & 7) {
            case 7:
                h ^= (data[tail + 6] & 0xFFL) << 48;
            case 6:
                h ^= (data[tail + 5] & 0xFFL) << 40;
            case 5:
                h ^= (data[tail + 4] & 0xFFL) << 32;
            case 4:
                h ^= (data[tail + 3] & 0xFFL) << 24;
            case 3:
                h ^= (data[tail + 2] & 0xFFL) << 16;
            case 2:
                h ^= (data[tail + 1] & 0xFFL) << 8;
            case 1:
                h ^= (data[tail] & 0xFFL);
                h *= M;
        }

        h ^= h >>> R;
        h *= M;
        h ^= h >>> R;

        return h;
    }

    private static long getblock(byte[] key, int i) {
        return ((key[i + 0] & 0xFFL) << 0) | ((key[i + 1] & 0xFFL) << 8)
                | ((key[i + 2] & 0xFFL) << 16) | ((key[i + 3] & 0xFFL) << 24)
                | ((key[i + 4] & 0xFFL) << 32) | ((key[i + 5] & 0xFFL) << 40)
                | ((key[i + 6] & 0xFFL) << 48) | ((key[i + 7] & 0xFFL) << 56);
    }

    private static long fmix(long k) {
        k ^= k >>> 33;
        k *= 0xff51afd7ed558ccdL;
        k ^= k >>> 33;
        k *= 0xc4ceb9fe1a85ec53L;
        k ^= k >>> 33;

        return k;
    }

    public static long[] MurmurHash3_x64_128(final byte[] key, final int seed) {
        final long c1 = 0x87c37b91114253d5L;
        final long c2 = 0x4cf5ad432745937fL;
        final int length = key.length, length16 = length >>> 4;
        long h1 = seed;
        long h2 = seed;
        long k1, k2;

        for (int i = 0; i < length16; i++) {
            k1 = getblock(key, i * 2 * 8);
            k2 = getblock(key, (i * 2 + 1) * 8);

            k1 *= c1;
            k1 = (k1 << 31) | (k1 >>> (64 - 31));
            k1 *= c2;
            h1 ^= k1;

            h1 = (h1 << 27) | (h1 >>> (64 - 27));
            h1 += h2;
            h1 = h1 * 5 + 0x52dce729;

            k2 *= c2;
            k2 = (k2 << 33) | (k2 >>> (64 - 33));
            k2 *= c1;
            h2 ^= k2;

            h2 = (h2 << 31) | (h2 >>> (64 - 31));
            h2 += h1;
            h2 = h2 * 5 + 0x38495ab5;
        }

        k1 = 0;
        k2 = 0;

        final int tail = length16 << 4;

        switch (length & 15) {
            case 15:
                k2 ^= (long) key[tail + 14] << 48;
            case 14:
                k2 ^= (long) key[tail + 13] << 40;
            case 13:
                k2 ^= (long) key[tail + 12] << 32;
            case 12:
                k2 ^= (long) key[tail + 11] << 24;
            case 11:
                k2 ^= (long) key[tail + 10] << 16;
            case 10:
                k2 ^= (long) key[tail + 9] << 8;
            case 9:
                k2 ^= (long) key[tail + 8] << 0;
                k2 *= c2;
                k2 = (k2 << 33) | (k2 >>> (64 - 33));
                k2 *= c1;
                h2 ^= k2;
            case 8:
                k1 ^= (long) key[tail + 7] << 56;
            case 7:
                k1 ^= (long) key[tail + 6] << 48;
            case 6:
                k1 ^= (long) key[tail + 5] << 40;
            case 5:
                k1 ^= (long) key[tail + 4] << 32;
            case 4:
                k1 ^= (long) key[tail + 3] << 24;
            case 3:
                k1 ^= (long) key[tail + 2] << 16;
            case 2:
                k1 ^= (long) key[tail + 1] << 8;
            case 1:
                k1 ^= (long) key[tail + 0] << 0;
                k1 *= c1;
                k1 = (k1 << 31) | (k1 >>> (64 - 31));
                k1 *= c2;
                h1 ^= k1;
        }

        h1 ^= length;
        h2 ^= length;

        h1 += h2;
        h2 += h1;

        h1 = fmix(h1);
        h2 = fmix(h2);

        h1 += h2;
        h2 += h1;

        return new long[]{h1, h2};
    }

    public static long MurmurHash3_x64_128(String str) {
        return MurmurHash3_x64_128(str.getBytes(UTF8), str.length())[0];
    }

    public static long MurmurHash3_x64_128_unsigned(String str, long index) {
        long hash = MurmurHash3_x64_128(str.getBytes(UTF8), str.length())[0] & 0X0FFFFFFFFFFFFFFFL;
        return hash | (index << 60);
    }

    public static int MurmurHash3_x86_32(String str) {
        return MurmurHash3_x86_32(str.getBytes(UTF8), str.length()) & 0x7FFFFFFF;
    }

    public static String ToUnsignedLongString(long signedLong) {
        return new BigInteger(ByteBuffer.allocate(9).put((byte) 0).putLong(1, signedLong).array()).toString();
    }

    public static long main(String args, long feed) {
        /*for (String str : args) {
            System.out.println(str + "\t" + MurmurHash3_x86_32(str.getBytes(UTF8), 0));
        }*/
        return MurmurHash64(args.getBytes(UTF8), feed);
    }
}
package Utils;

import java.io.UnsupportedEncodingException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;

public class GetMd5Sign {
    public static String md5Java(String message) {
        String digest = null;
        try {
            MessageDigest md = MessageDigest.getInstance("MD5");
            byte[] hash = md.digest(message.getBytes("UTF-8"));

            //converting byte array to Hexadecimal String
            StringBuilder sb = new StringBuilder(2 * hash.length);
            for (byte b : hash) {
                sb.append(String.format("%02x", b & 0xff));
            }

            digest = sb.toString();

        } catch (UnsupportedEncodingException ex) {
            //Logger.getLogger(StringReplace.class.getName()).log(Level.SEVERE, null, ex);
        } catch (NoSuchAlgorithmException ex) {
            //Logger.getLogger(StringReplace.class.getName()).log(Level.SEVERE, null, ex);
        }
        return digest;
    }
}

这是我的pom文件,需要注意的是hadoop和hive的版本问题,cdh商业版本的需要自定义源,查看hadoop版本的命令hadoop version 查看hive版本的命令:hive,然后看输出的提示信息。我的hadoop是2.6.0-cdh5.14.2 hive版本是1.2.1。pom文件如下:



    4.0.0

    Udf
    Getmd5
    1.0-SNAPSHOT

    
        UTF-8
        2.6.0-cdh5.14.2
        1.2.1
    

    
        
            cloudera
            https://repository.cloudera.com/artifactory/cloudera-repos/
        
    

    
        
            org.apache.hive
            hive-exec
            ${hive.version}
        
        
            org.apache.hadoop
            hadoop-common
            ${hadoop.version}
        
    

    
        
            
                org.apache.maven.plugins
                maven-compiler-plugin
                2.3.2
                
                    1.8
                    1.8
                    UTF-8
                
            
        
    


然后使用idea打成jar包,在这里我是出了问题的,我是file->project structure->artifacts反正就这样是不对的,在你hive中创建函数的时候他会提示你找不到类,我们需要这样子打包,view->tool windows->maven->package这个流程打包。

你可能感兴趣的:(hive,shell,编程记录)