package com.paic.pad.dp.hbase.udf;
import static org.apache.hadoop.hbase.util.Bytes.toBytes;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.HConnection;
import org.apache.hadoop.hbase.client.HConnectionManager;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.UDFType;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.io.Text;
/**
* import hive data to hbase table create this function: CREATE TEMPORARY
* FUNCTION upa_default_hive2HBase as
* ‘com.unionpay.upa.hive.udf.UDFDefaultHiveOutputToHbase’;
*/
@Description(name = “pad_put_HBase”, value = “FUNC(zookeeperQuorum, hbaseTable, CF, rowKey, ‘name1, name2, name3’, c1, c2, c3, …) - read data from hive and import it to hbase, ”
+ “returns success of the import.”, extended = “The first argument is zookeeperQuorum, ”
+ “the second argument is the hbase table, ”
+ “the Third argument is the CF, ”
+ “the fourth argument is the rowKey, ”
+ “the other args should be a map, seprated by ‘,’ .”
+ “example: select FUNC(‘zookeeperQuorum’, ‘tableName’, ‘columFamily’, key, ‘columnName1,columnName2’, columnName1value,columnName2value) from dual;”)
@UDFType(deterministic = false)
public class UDFHbaseMerge extends GenericUDF {
private static final Log LOG = LogFactory.getLog(UDFHbaseMerge.class
.getName());
protected transient ObjectInspector[] argumentOI;
protected transient String hbaseTable;
protected HTable table;
protected HConnection connection;
protected static String cf = "F";
protected static String[] cols;
protected final static String NULL_FLAG = "";
protected final Text result = new Text();
protected String zookeeperQuorum;
protected String parameters;
protected static final List<Put> putList=new ArrayList<Put>();
protected final static int putListLimitNum=1000;
@Override
public ObjectInspector initialize(ObjectInspector[] arguments)
throws UDFArgumentTypeException {
argumentOI = arguments;
for (int i = 0; i < 3; i++) {
if (arguments[i].getCategory() == ObjectInspector.Category.PRIMITIVE) {
PrimitiveObjectInspector poi = ((PrimitiveObjectInspector) arguments[i]);
if (!(poi.getPrimitiveCategory() == PrimitiveObjectInspector.PrimitiveCategory.STRING)) {
throw new UDFArgumentTypeException(i,
"The argument of function should be \""
+ serdeConstants.STRING_TYPE_NAME
+ "\", but \"" + arguments[i].getTypeName()
+ "\" is found");
}
}
}
for (int i = 3; i < arguments.length; i++) {
if (arguments[i].getCategory() != ObjectInspector.Category.PRIMITIVE) {
throw new UDFArgumentTypeException(i,
"The argument of function should be primative"
+ ", but \"" + arguments[i].getTypeName()
+ "\" is found");
}
}
return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
}
@Override
public Object evaluate(DeferredObject[] arguments) {
try {
if (table == null) {
parameters = getDeferredObject(arguments, 0);
hbaseTable = getDeferredObject(arguments, 1);
cf = getDeferredObject(arguments, 2);
cols = getDeferredObject(arguments, 4).split(",");
Configuration conf = HBaseConfiguration.create();
for (String para:parameters.split(" , ")) {
conf.set(para.split("=")[0], para.split("=")[1]);
}
// conf.set(“hbase.zookeeper.quorum”, zookeeperQuorum);
// conf.set(“hbase.zookeeper.property.clientPort”, “2181”);
// conf.set(“mapred.task.timeout”, “3600000”); // ms
// conf.set(“dfs.socket.timeout”, “3600000”);
// conf.set(“dfs.datanode.socket.write.timeout”, “3600000”);
connection = HConnectionManager.createConnection(conf);
table = (HTable) connection.getTable(hbaseTable);
table.setAutoFlush(false, false);
}
Put put = getPut(arguments);
putList.add(put);
if (putList.size()>putListLimitNum) {
table.put(putList);
//table.flushCommits();
putList.clear();
}
result.set("success");
} catch (Exception ex) {
LOG.error(ex);
result.set(ex.toString());
this.close();
}
return result;
}
@Override
public String getDisplayString(String[] children) {
StringBuilder sb = new StringBuilder();
sb.append("pad_default_hive2HBase(");
if (children.length > 0) {
sb.append(children[0]);
for (int i = 1; i < children.length; i++) {
sb.append(",");
sb.append(children[i]);
}
}
sb.append(")");
return sb.toString();
}
@Override
public void close() {
try {
super.close();
if (table != null) {
table.put(putList);
table.flushCommits();
table.close();
connection.close();
}
} catch (Exception e) {
LOG.error(Bytes.toString(table.getTableName()) + " close error " + e.getMessage());
}
}
@Override
public String[] getRequiredFiles() {
return super.getRequiredFiles();
}
protected String getDeferredObject(DeferredObject[] arguments, int index)
throws HiveException {
if (arguments[index].get() == null) {
return NULL_FLAG;
}
return ((PrimitiveObjectInspector) argumentOI[index])
.getPrimitiveJavaObject(arguments[index].get()).toString();
}
protected Put getPut(DeferredObject[] arguments) throws Exception {
String rowKey = getDeferredObject(arguments, 3);
Put put = new Put(toBytes(rowKey));
for (int i = 0; i < cols.length; i++) {
put.add(toBytes(cf), toBytes(cols[i]), toBytes(getDeferredObject(
arguments, i + 5)));
}
return put;
}
}