Hive自定义函数解析

Hive运行用户自定义函数对数据信息处理,可以试用show functions查看 hive当前支持的函数。

hive支持三种类型的UDF函数:

  • 普通UDF函数
    操作单个数据行,且产生一个数据作为输出。例如(数学函数,字符串函数)
  • 聚合udf (UDAF)
    接受多个数据行,并产生一个数据行作为输出。例如(COUNT,MAX函数等)
  • 表生成UDF(UDTF)
    接受一个数据行,然后返回产生多个数据行(一个表作为输出)

UDF函数的处理单行单列输入的,UDAF是处理多行单列数据,UDTF函数是处理单行多列的。

UDF函数示例

import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hive.ql.exec.UDF;
/**
 * @program: udf-function
 * @description: 返回不小于输入值value的最小整数
 * @author zhangchenguang
 *
 */
public class CeilUDF extends UDF {

	public Long evaluate(String value) {
		try {
			if (StringUtils.isNotBlank(value)) {
				double resultDouble = Double.parseDouble(value);
				Double resuleInt = Math.ceil(resultDouble);
				return resuleInt.longValue();
			} else {
				return null;
			}
		} catch (Exception e) {
			return null;
		}
	}
}

UDAF函数示例:

import org.apache.hadoop.hive.ql.exec.UDAF;
import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
/**
 * UDAF实现加强版 MIN函数
 * @author zhangchenguang
 *
 */
public class MinUDF extends UDAF {

	static public class MinShortEvaluator implements UDAFEvaluator {
		private short mMin;
		private boolean mEmpty;

		public MinShortEvaluator() {
			super();
			init();
		}

		public void init() {
			mMin = 0;
			mEmpty = true;
		}

		public boolean iterate(ShortWritable o) {
			if (o != null) {
				if (mEmpty) {
					mMin = o.get();
					mEmpty = false;
				} else {
					mMin = (short) Math.min(mMin, o.get());
				}
			}
			return true;
		}

		public ShortWritable terminatePartial() {
			return mEmpty ? null : new ShortWritable(mMin);
		}

		public boolean merge(ShortWritable o) {
			return iterate(o);
		}

		public ShortWritable terminate() {
			return mEmpty ? null : new ShortWritable(mMin);
		}
	}

	static public class MinIntEvaluator implements UDAFEvaluator {
		private int mMin;
		private boolean mEmpty;

		public MinIntEvaluator() {
			super();
			init();
		}

		public void init() {
			mMin = 0;
			mEmpty = true;
		}

		public boolean iterate(IntWritable o) {
			if (o != null) {
				if (mEmpty) {
					mMin = o.get();
					mEmpty = false;
				} else {
					mMin = Math.min(mMin, o.get());
				}
			}
			return true;
		}

		public IntWritable terminatePartial() {
			return mEmpty ? null : new IntWritable(mMin);
		}

		public boolean merge(IntWritable o) {
			return iterate(o);
		}

		public IntWritable terminate() {
			return mEmpty ? null : new IntWritable(mMin);
		}
	}

	static public class MinLongEvaluator implements UDAFEvaluator {
		private long mMin;
		private boolean mEmpty;

		public MinLongEvaluator() {
			super();
			init();
		}

		public void init() {
			mMin = 0;
			mEmpty = true;
		}

		public boolean iterate(LongWritable o) {
			if (o != null) {
				if (mEmpty) {
					mMin = o.get();
					mEmpty = false;
				} else {
					mMin = Math.min(mMin, o.get());
				}
			}
			return true;
		}

		public LongWritable terminatePartial() {
			return mEmpty ? null : new LongWritable(mMin);
		}

		public boolean merge(LongWritable o) {
			return iterate(o);
		}

		public LongWritable terminate() {
			return mEmpty ? null : new LongWritable(mMin);
		}
	}

	static public class MinFloatEvaluator implements UDAFEvaluator {
		private float mMin;
		private boolean mEmpty;

		public MinFloatEvaluator() {
			super();
			init();
		}

		public void init() {
			mMin = 0;
			mEmpty = true;
		}

		public boolean iterate(FloatWritable o) {
			if (o != null) {
				if (mEmpty) {
					mMin = o.get();
					mEmpty = false;
				} else {
					mMin = Math.min(mMin, o.get());
				}
			}
			return true;
		}

		public FloatWritable terminatePartial() {
			return mEmpty ? null : new FloatWritable(mMin);
		}

		public boolean merge(FloatWritable o) {
			return iterate(o);
		}

		public FloatWritable terminate() {
			return mEmpty ? null : new FloatWritable(mMin);
		}
	}

	static public class MinDoubleEvaluator implements UDAFEvaluator {
		private double mMin;
		private boolean mEmpty;

		public MinDoubleEvaluator() {
			super();
			init();
		}

		public void init() {
			mMin = 0;
			mEmpty = true;
		}

		public boolean iterate(DoubleWritable o) {
			if (o != null ) {
				if (mEmpty) {
					mMin = o.get();
					mEmpty = false;
				} else {
					mMin = Math.min(mMin, o.get());
				}
			}
			return true;
		}

		public DoubleWritable terminatePartial() {
			return mEmpty ? null : new DoubleWritable(mMin);
		}

		public boolean merge(DoubleWritable o) {
			return iterate(o);
		}

		public DoubleWritable terminate() {
			return mEmpty ? null : new DoubleWritable(mMin);
		}
	}

	static public class MinStringEvaluator implements UDAFEvaluator {
		private Text mMin;
		private boolean mEmpty;

		public MinStringEvaluator() {
			super();
			init();
		}

		public void init() {
			mMin = null;
			mEmpty = true;
		}

		public boolean iterate(Text o) {
			if (o != null && o.getLength() > 0 && o.getBytes().length>0 ) {
				
				if (mEmpty) {
					// 解决空串的问题,跳过空串,不进行比较
					if(o.toString().trim().length() <= 0){
						return false;
					}
					mMin = new Text(o);
					mEmpty = false;
				} else if (mMin.compareTo(o) > 0) {
					mMin.set(o);
				}
			}
			return true;
		}

		public Text terminatePartial() {
			return mEmpty ? null : mMin;
		}

		public boolean merge(Text o) {
			return iterate(o);
		}

		public Text terminate() {
			return mEmpty ? null : mMin;
		}
	}
}

UDTF函数示例模板:

package com.dtwave.udfs.xin.news;

import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;

/**
 * 将传入参数的第四列进行切割 第四列进行,以逗号进行分离,进行行转列的操作 然后输出 输出四列数据
 *
 * @author zhangchenguang
 *
 */
public class CusDepictOptionsConvert extends GenericUDTF {

	private PrimitiveObjectInspector stringOI = null;
	
	@Override
	public void close() throws HiveException {

	}

	@Override
	public void process(Object[] args) throws HiveException {
		String proCode = args[0].toString();
		String cusId = args[1].toString();
		String cusDepictCfgId = args[2].toString();
		String cusDepictOptionIds = args[3].toString();
		String[] cisDepictOptionIdArray = cusDepictOptionIds.split(",");
		for (String s : cisDepictOptionIdArray) {
			// forward(proCode, cusId, cusDepictCfgId, s);
			Object[] arg = new Object[] { proCode, cusId, cusDepictCfgId, s };
			forward(arg);
		}
	}

	@Override
	public StructObjectInspector initialize(ObjectInspector[] objectInspectors) throws UDFArgumentException {
		// 异常检测
		if (objectInspectors.length != 4) {
			throw new UDFArgumentException("NameParserGenericUDTF() takes exactly one argument");
		}
		if (objectInspectors[0].getCategory() != ObjectInspector.Category.PRIMITIVE
				&& ((PrimitiveObjectInspector) objectInspectors[0])
						.getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING) {
			throw new UDFArgumentException("NameParserGenericUDTF() takes a string as a parameter");
		}
		// 输入
		stringOI = (PrimitiveObjectInspector) objectInspectors[0];

		// 输出
		List fieldNames = new ArrayList(4);
		List fieldOIs = new ArrayList(4);
		// 输出列名
		fieldNames.add("c1");
		fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
		fieldNames.add("c2");
		fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
		fieldNames.add("c3");
		fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
		fieldNames.add("c4");
		fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
		return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
	}

}

在下篇博客里面分析一下Hive自定义函数心得体会。

 

你可能感兴趣的:(大数据,Hive)