hive函数UDF

1. 示例代码

 

package com.lixg.udf;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.hadoop.hive.ql.exec.UDF;

public class GetCmsID extends UDF {

	public String evaluate(String url){
		String cmsid = null;
		if (url==null || "".equals(url)) {
			return cmsid;
		}
		Pattern pat = Pattern.compile("topicId=[0-9]+") ;
		Matcher matcher = pat.matcher(url);
		
		if (matcher.find()) {
			cmsid=matcher.group().split("topicId=")[1];
		}
		return cmsid;
	}
	
	public String evaluate(String pattern,String url){
		String cmsid = null;
		if (url==null || "".equals(url)) {
			return cmsid;
		}
		Pattern pat = Pattern.compile(pattern+"[0-9]+") ;
		Matcher matcher = pat.matcher(url);
		
		if (matcher.find()) {
			cmsid=matcher.group().split(pattern)[1];
		}
		return cmsid;
	}
	
	public static void main(String[] args) {
		String url="http://www.taobao.com/cms/view.do?topicId=18980";
		GetCmsID getCmsID = new GetCmsID();
		
		System.out.println(getCmsID.evaluate(url)) ;
		System.out.println(getCmsID.evaluate("cmsid=",url)) ;
	}

}

2. 导出为GetCmsID.jar

3. 运行

 

hive> add jar /usr/local/hivedemo/GetCmsID.jar;
hive> create temporary function getCmsID as 'com.lixg.udf.GetCmsID';
hive> select getCmsID(t.col1) from t limit 10;
hive> drop temporary function getCmsID;
备注:
1.getCmsID为临时的函数,所以每次进入hive都需要add jar以及create temporary操作
2.UDF只能实现一进一出的操作,如果需要实现多进一出,则需要实现UDAF

你可能感兴趣的:(hive udf)