Hive-UDTF-函数解析Json字符串

前言:

本章主要讲解Hive的一对多"UDTF"函数解析Json字符串,根据业务需求返回对应的值;

正文:

开发工具:IDEA+JDK1.8+MAVEN

1.创建maven项目并导入UDTF函数的依赖jar包

org.apache.hive

hive-exec

1.1.1

2.编写UDTF(User-Defined Table-Generating Functions)需要继承GenericUDTF类,类中部分代码如下:

/**

  * A Generic User-defined Table Generating Function (UDTF)

  *

  * Generates a variable number of output rows for a single input row. Useful for

  * explode(array)...

  */

  public abstract class GenericUDTF {

  ​

   public StructObjectInspector initialize(StructObjectInspector argOIs)

         throws UDFArgumentException {

       List inputFields = argOIs.getAllStructFieldRefs();

       ObjectInspector[] udtfInputOIs = new ObjectInspector[inputFields.size()];

       for (int i = 0; i < inputFields.size(); i++) {

         udtfInputOIs[i] = inputFields.get(i).getFieldObjectInspector();

       }

       return initialize(udtfInputOIs);

    }


   /**

      * Give a set of arguments for the UDTF to process.

      *

      * @param args

      *          object array of arguments

      */

   public abstract void process(Object[] args) throws HiveException;

  ​

   /**

      * Called to notify the UDTF that there are no more rows to process.

      * Clean up code or additional forward() calls can be made here.

      */

   public abstract void close() throws HiveException;

  }

继承GenericUDTF需要实现以上方法,其中initialize方法和UDF中类似,主要是判断输入类型并确定返回的字段类型。process方法对udft函数输入的每一样进行操作,通过调用forward方法返回一行或多行数据。close方法在process调用结束后调用,用于进行其它一些额外操作,只执行一次。

import com.google.common.collect.Lists;

import org.apache.hadoop.hive.ql.exec.UDFArgumentException;

import org.apache.hadoop.hive.ql.metadata.HiveException;

import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;

import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;

import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;

import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;

import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;

import org.json.JSONArray;

import org.json.JSONException;

import org.json.JSONObject;

import java.util.Iterator;

import java.util.List;

/**

* @author yangxuan

* @date 2019/08/19

*/

public class JsonUDTFextends GenericUDTF {

private String[]obj =new String[3];

/**

* process()方法

* 主要业务逻辑方法:

    *  @param objects

    *  objects接收的参数

*/

    public void process(Object[] objects)throws HiveException {

//判断objects[0]是否为null,是直接return;

        if(objects[0] ==null){

return;

}

//获取参数并转为String并用JSONObject解析字符串

        String json = objects[0].toString();

try {

JSONObject jsonObject =new JSONObject(json);

Iterator keys = jsonObject.keys();

while (keys.hasNext()){

JSONArray o = (JSONArray)jsonObject.get(String.valueOf(keys.next()));

for(int i =0; i

JSONObject j2 = (JSONObject)o.get(i);

Iterator keys1 = j2.keys();

while (keys1.hasNext()){

JSONArray o2 = (JSONArray)j2.get(String.valueOf(keys1.next()));

for(int j =0;j

JSONObject j3 = (JSONObject)o2.get(j);

obj[0]= toStr(j3.get("equipParamNameId"));

obj[1]= toStr(j3.get("deviceParamNameId"));

obj[2]= toStr(j3.get("finalValue"));

//通过调用forward方法返回一行或多行数据

                            forward(obj);

}

}

}

}

}catch (JSONException e) {

e.printStackTrace();

}

}

private String toStr(Object o){

if(o==null){

return "";

}

return String.valueOf(o);

}

//close方法在process调用结束后调用,用于进行其它一些额外操作,只执行一次。

    public void close()throws HiveException {

}

/**

*返回类型String,String,String

*init方法,主要定义输出的字段名及字段类型

*/

    @Override

    public StructObjectInspector initialize(StructObjectInspector argOIs)throws UDFArgumentException {

//设置列名

        List colName = Lists.newLinkedList();

colName.add("equipParamNameId");

colName.add("deviceValue");

colName.add("finalValue");

//设置对应每列的类型

        List resType = Lists.newLinkedList();

resType.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);

resType.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);

resType.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);

//返回分别为列名 和列类型

        return ObjectInspectorFactory.getStandardStructObjectInspector(colName,resType);

}

}

你可能感兴趣的:(Hive-UDTF-函数解析Json字符串)