大数据:Hive自定义函数的使用

1.先开发一个java类,继承UDF,并重载evaluate方法

package com.wlw.udf;
/**
 * 将json转为对象
 * @author Tomas 
 */
import com.alibaba.fastjson.JSON;
import org.apache.hadoop.hive.ql.exec.UDF;

public class JsonPraser extends UDF{

    public String evaluate(String str){
        MovieRateBean movieRateBean = JSON.parseObject(str, MovieRateBean.class);
        return movieRateBean.toString();
    }

}
package com.wlw.udf;

/**
 * javabeen类
 * @author Tomas
 */
public class MovieRateBean {
    private String movie;
    private String rate;
    private String timeStamp;
    private String uid;
    public String getMovie() {
        return movie;
    }
    public void setMovie(String movie) {
        this.movie = movie;
    }
    public String getRate() {
        return rate;
    }
    public void setRate(String rate) {
        this.rate = rate;
    }
    public String getTimeStamp() {
        return timeStamp;
    }
    public void setTimeStamp(String timeStamp) {
        this.timeStamp = timeStamp;
    }
    public String getUid() {
        return uid;
    }
    public void setUid(String uid) {
        this.uid = uid;
    }
    @Override
    public String toString() {
        return movie + "\t" + rate + "\t" + timeStamp + "\t" + uid;
    }
}

2.打成jar包上传到服务器

3.建立数据表并导入数据

create table rat_json(line string) row format delimited;
load data local inpath '/home/hadoop/rating.json' into table rat_json;

4.将jar包添加到hive的classpath

hive>add JAR /home/hadoop/udf.jar;

5.创建自定义函数与开发好的java class关联

create temporary function jsonpar as 'com.wlw.udf.JsonPraser';

6.使用自定义函数

insert overwrite table t_rating
select split(jsonpar(line),'\t')[0]as movieid,
  split(jsonpar(line),'\t')[1] as rate,split(jsonpar(line),'\t')[2] as timestring,
  split(jsonpar(line),'\t')[3] as uid from   rat_json limit 10;

7.使用hive自带的函数

select get_json_object(line,'$.movie') as moive,get_json_object(line,'$.rate') as rate  from rat_json limit 10;
大数据:Hive自定义函数的使用_第1张图片
hive自带函数.png

你可能感兴趣的:(大数据:Hive自定义函数的使用)