package com.csm.data.udf.hive;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.io.Text;
/**
* 根据
* copr_idAcont_idA
* copr_idAcont_idB
* copr_idAcont_idC
*
* 得到
* copr_idAcont_idB
*
* @author hadoop_szty
*
*/
public class UDAFGetCont extends AbstractGenericUDAFResolver {
/**
* 验证参数是否正确
*/
@Override
public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
//只允许一个参数传入
if (parameters.length != 1) {
throw new UDFArgumentTypeException(parameters.length - 1, "Exactly one argument is expected.");
}
//这里只能传入原始的数据类型,其他的列表,数组不能处理
if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted.");
}
return new Evaluator();
}
public static class Evaluator extends GenericUDAFEvaluator {
//最终结果变量
private Text resContId;
private PrimitiveObjectInspector inputOI;
public Evaluator() {
super();
}
@Override
public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
// TODO Auto-generated method stub
super.init(m, parameters);
resContId = new Text();
inputOI = (PrimitiveObjectInspector) parameters[0];
return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
}
static class ContAgg implements AggregationBuffer {
boolean empty;
String resCont;
}
// 返回存储临时聚合结果的AggregationBuffer对象。
@Override
public AggregationBuffer getNewAggregationBuffer() throws HiveException {
ContAgg result = new ContAgg();
reset(result);
return result;
}
// 重置聚合结果对象,以支持mapper和reducer的重用。
@Override
public void reset(AggregationBuffer agg) throws HiveException {
ContAgg myagg = (ContAgg) agg;
myagg.empty = true;
myagg.resCont = "";
}
// 迭代处理原始数据parameters并保存到agg中。
@Override
public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
// TODO Auto-generated method stub
merge(agg, parameters[0]);
}
// 以持久化的方式返回agg表示的部分聚合结果,
// 这里的持久化意味着返回值只能Java基础类型、数组、基础类型包装器、Hadoop的Writables、Lists和Maps。
@Override
public Object terminatePartial(AggregationBuffer agg) throws HiveException {
return terminate(agg);
}
// 合并由partial表示的部分聚合结果到agg中。
@Override
public void merge(AggregationBuffer agg, Object partial) throws HiveException {
if (partial != null) {
ContAgg myagg = (ContAgg) agg;
String contIdStr = PrimitiveObjectInspectorUtils.getString(partial, inputOI);
if (!contIdStr.equals("")) {
if (contIdStr.length() == 18) {
String flag = contIdStr.toString().substring(8, 10);
if (flag.equals("02") || flag.equals("03")) {
myagg.resCont = contIdStr;
myagg.empty = false;
}
} else if (contIdStr.length() == 22 && myagg.resCont.length() != 18) {
myagg.resCont = contIdStr;
myagg.empty = false;
}
}
}
}
// 返回最终结果。
@Override
public Object terminate(AggregationBuffer agg) throws HiveException {
ContAgg myagg = (ContAgg) agg;
if (myagg.empty) {
return null;
}
resContId.set(myagg.resCont);
return resContId;
}
}
}