hive批量导入到hbase

package com.paic.pad.dp.hbase.udf;

import static org.apache.hadoop.hbase.util.Bytes.toBytes;

import java.util.ArrayList;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.HConnection;
import org.apache.hadoop.hbase.client.HConnectionManager;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.UDFType;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.io.Text;

/**
* import hive data to hbase table create this function: CREATE TEMPORARY
* FUNCTION upa_default_hive2HBase as
* ‘com.unionpay.upa.hive.udf.UDFDefaultHiveOutputToHbase’;
*/
@Description(name = “pad_put_HBase”, value = “FUNC(zookeeperQuorum, hbaseTable, CF, rowKey, ‘name1, name2, name3’, c1, c2, c3, …) - read data from hive and import it to hbase, ”
+ “returns success of the import.”, extended = “The first argument is zookeeperQuorum, ”
+ “the second argument is the hbase table, ”
+ “the Third argument is the CF, ”
+ “the fourth argument is the rowKey, ”
+ “the other args should be a map, seprated by ‘,’ .”
+ “example: select FUNC(‘zookeeperQuorum’, ‘tableName’, ‘columFamily’, key, ‘columnName1,columnName2’, columnName1value,columnName2value) from dual;”)
@UDFType(deterministic = false)
public class UDFHbaseMerge extends GenericUDF {
private static final Log LOG = LogFactory.getLog(UDFHbaseMerge.class
.getName());

protected transient ObjectInspector[] argumentOI;
protected transient String hbaseTable;
protected HTable table;
protected HConnection connection;
protected static String cf = "F";
protected static String[] cols;
protected final static String NULL_FLAG = "";
protected final Text result = new Text();
protected String zookeeperQuorum;
protected String parameters;

protected static final List<Put> putList=new ArrayList<Put>();
protected final static int putListLimitNum=1000;

@Override
public ObjectInspector initialize(ObjectInspector[] arguments)
        throws UDFArgumentTypeException {
    argumentOI = arguments;
    for (int i = 0; i < 3; i++) {
        if (arguments[i].getCategory() == ObjectInspector.Category.PRIMITIVE) {
            PrimitiveObjectInspector poi = ((PrimitiveObjectInspector) arguments[i]);
            if (!(poi.getPrimitiveCategory() == PrimitiveObjectInspector.PrimitiveCategory.STRING)) {
                throw new UDFArgumentTypeException(i,
                        "The argument of function  should be \""
                                + serdeConstants.STRING_TYPE_NAME
                                + "\", but \"" + arguments[i].getTypeName()
                                + "\" is found");
            }
        }
    }
    for (int i = 3; i < arguments.length; i++) {

        if (arguments[i].getCategory() != ObjectInspector.Category.PRIMITIVE) {

            throw new UDFArgumentTypeException(i,
                    "The argument of function should be primative"
                            + ", but \"" + arguments[i].getTypeName()
                            + "\" is found");
        }
    }

    return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
}

@Override
public Object evaluate(DeferredObject[] arguments) {
    try {
        if (table == null) {
                parameters = getDeferredObject(arguments, 0);
            hbaseTable = getDeferredObject(arguments, 1);
            cf = getDeferredObject(arguments, 2);
            cols = getDeferredObject(arguments, 4).split(",");
            Configuration conf = HBaseConfiguration.create();

            for (String para:parameters.split(" , ")) {
                conf.set(para.split("=")[0], para.split("=")[1]);
            }

// conf.set(“hbase.zookeeper.quorum”, zookeeperQuorum);
// conf.set(“hbase.zookeeper.property.clientPort”, “2181”);
// conf.set(“mapred.task.timeout”, “3600000”); // ms
// conf.set(“dfs.socket.timeout”, “3600000”);
// conf.set(“dfs.datanode.socket.write.timeout”, “3600000”);

            connection = HConnectionManager.createConnection(conf);
            table = (HTable) connection.getTable(hbaseTable);
            table.setAutoFlush(false, false);
        }
        Put put = getPut(arguments);
        putList.add(put);

        if (putList.size()>putListLimitNum) {
            table.put(putList); 
            //table.flushCommits();
            putList.clear();
        }

        result.set("success");
    } catch (Exception ex) {
        LOG.error(ex);
        result.set(ex.toString());
        this.close();
    }
    return result;
}

@Override
public String getDisplayString(String[] children) {
    StringBuilder sb = new StringBuilder();
    sb.append("pad_default_hive2HBase(");
    if (children.length > 0) {
        sb.append(children[0]);
        for (int i = 1; i < children.length; i++) {
            sb.append(",");
            sb.append(children[i]);
        }
    }
    sb.append(")");
    return sb.toString();
}

@Override
public void close() {
    try {
        super.close();
        if (table != null) {
                table.put(putList);
            table.flushCommits();
            table.close();
            connection.close();
        }
    } catch (Exception e) {
        LOG.error(Bytes.toString(table.getTableName()) + "  close  error " + e.getMessage());
    }

}

@Override
public String[] getRequiredFiles() {
    return super.getRequiredFiles();
}

protected String getDeferredObject(DeferredObject[] arguments, int index)
        throws HiveException {
    if (arguments[index].get() == null) {
        return NULL_FLAG;
    }
    return ((PrimitiveObjectInspector) argumentOI[index])
            .getPrimitiveJavaObject(arguments[index].get()).toString();
}

protected Put getPut(DeferredObject[] arguments) throws Exception {
    String rowKey = getDeferredObject(arguments, 3);
    Put put = new Put(toBytes(rowKey));
    for (int i = 0; i < cols.length; i++) {
        put.add(toBytes(cf), toBytes(cols[i]), toBytes(getDeferredObject(
                arguments, i + 5)));
    }
    return put;
}

}

你可能感兴趣的:(hive批量导入到hbase)