hive udaf实例

package com.csm.data.udf.hive;

 

import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;

import org.apache.hadoop.hive.ql.metadata.HiveException;

import org.apache.hadoop.hive.ql.parse.SemanticException;

import org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver;

import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;

import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;

import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;

import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;

import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;

import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;

import org.apache.hadoop.io.Text;

 

/**

 * 根据

 * copr_idAcont_idA

 * copr_idAcont_idB

 * copr_idAcont_idC

 * 

 * 得到

 * copr_idAcont_idB

 * 

 * @author hadoop_szty

 *

 */

public class UDAFGetCont extends AbstractGenericUDAFResolver {

 

 

/**

* 验证参数是否正确

*/

@Override

public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {

//只允许一个参数传入

if (parameters.length != 1) {

throw new UDFArgumentTypeException(parameters.length - 1, "Exactly one argument is expected.");

}

//这里只能传入原始的数据类型,其他的列表,数组不能处理

if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {

throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted.");

}

return new Evaluator();

}

 

 

public static class Evaluator extends GenericUDAFEvaluator {

//最终结果变量

private Text resContId;

private PrimitiveObjectInspector inputOI;

 

public Evaluator() {

super();

}

 

@Override

public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {

// TODO Auto-generated method stub

super.init(m, parameters);

resContId = new Text();

inputOI = (PrimitiveObjectInspector) parameters[0];

return PrimitiveObjectInspectorFactory.writableStringObjectInspector;

}

 

static class ContAgg implements AggregationBuffer {

boolean empty;

String resCont;

}

 

// 返回存储临时聚合结果的AggregationBuffer对象。

@Override

public AggregationBuffer getNewAggregationBuffer() throws HiveException {

ContAgg result = new ContAgg();

reset(result);

return result;

}

 

// 重置聚合结果对象,以支持mapper和reducer的重用。

@Override

public void reset(AggregationBuffer agg) throws HiveException {

ContAgg myagg = (ContAgg) agg;

myagg.empty = true;

myagg.resCont = "";

 

}

 

// 迭代处理原始数据parameters并保存到agg中。

@Override

public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {

// TODO Auto-generated method stub

merge(agg, parameters[0]);

}

 

// 以持久化的方式返回agg表示的部分聚合结果,

// 这里的持久化意味着返回值只能Java基础类型、数组、基础类型包装器、Hadoop的Writables、Lists和Maps。

@Override

public Object terminatePartial(AggregationBuffer agg) throws HiveException {

return terminate(agg);

}

 

// 合并由partial表示的部分聚合结果到agg中。

@Override

public void merge(AggregationBuffer agg, Object partial) throws HiveException {

if (partial != null) {

ContAgg myagg = (ContAgg) agg;

String contIdStr = PrimitiveObjectInspectorUtils.getString(partial, inputOI);

if (!contIdStr.equals("")) {

if (contIdStr.length() == 18) {

String flag = contIdStr.toString().substring(8, 10);

if (flag.equals("02") || flag.equals("03")) {

myagg.resCont = contIdStr;

myagg.empty = false;

}

} else if (contIdStr.length() == 22 && myagg.resCont.length() != 18) {

myagg.resCont = contIdStr;

myagg.empty = false;

}

}

}

}

 

// 返回最终结果。

@Override

public Object terminate(AggregationBuffer agg) throws HiveException {

ContAgg myagg = (ContAgg) agg;

if (myagg.empty) {

return null;

}

resContId.set(myagg.resCont);

return resContId;

}

}

}

你可能感兴趣的:(hive,udaf)