根据手机号得到运营商和归属地的hive UDTF

package com.tq.udf.person;


import com.google.gson.JsonObject;
import com.google.gson.JsonParser;



import com.google.i18n.phonenumbers.PhoneNumberUtil;
import com.google.i18n.phonenumbers.Phonenumber;
import com.google.i18n.phonenumbers.geocoding.PhoneNumberOfflineGeocoder;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;


import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


/**
 * @author tq
 * @date 2019/5/5 16:10
 */
public class GetTelDetail extends GenericUDTF {
    public static void main(String[] args) throws HiveException {


//        String[] result = getInfo("15967193899asd");
//        System.out.println(result[0]);
//        System.out.println(result[1]);
//
//        Object[] objects = {"qwe",null};
//        new GetTelDetail().process(objects);
    }

    public static String[] getInfo(String tel) {

        //正则提取数字
        String regex = "[^0-9]";
        Pattern p = Pattern.compile(regex);
        Matcher m = p.matcher(tel);
        //判断是否为11位号码
        if(m.replaceAll("").length()==11) {
            tel = m.replaceAll("");
        }else {
            tel = "123";
        }
       // System.out.println(tel);

        //第三方接口,可以返回运营商,归属地,后者只精确到省份,所以只要运营商
        String url = "https://tcc.taobao.com/cc/json/mobile_tel_segment.htm?tel=".concat(tel);

        StringBuilder json = new StringBuilder();
        try {
            URL oracel = new URL(url);
            // System.out.println(oracel);
            URLConnection yc = oracel.openConnection();
            // System.out.println(yc);
            BufferedReader in = new BufferedReader(new InputStreamReader(
                    yc.getInputStream(), "GBK"
            ));
            String inputLine = null;
            while ((inputLine = in.readLine()) != null) {
                json.append(inputLine);
            }
            in.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
        // System.out.println(json);
        String b = json.toString().replace("__GetZoneResult_ = ", "").replace("'", "\"").replace(",", ",\"").replace("{", "{\"").replace(":", "\":").replace(" ", "");

        // System.out.println(b.contains("catName"));
        String[] result = new String[2];
        
        //谷歌的包,可以根据手机号得到归属地,精确到市
        PhoneNumberUtil phoneNumberUtil = PhoneNumberUtil.getInstance();
        PhoneNumberOfflineGeocoder phoneNumberOfflineGeocoder = PhoneNumberOfflineGeocoder.getInstance();

        String language = "CN";
        Phonenumber.PhoneNumber referencePhonenumber = null;


        try {
            referencePhonenumber = phoneNumberUtil.parse(tel, language);
        } catch (Exception e) {
            e.printStackTrace();
        }

        String city = phoneNumberOfflineGeocoder.getDescriptionForNumber(referencePhonenumber, Locale.CHINA);

        result[1] = city;
        
        //判断第三方接口返回的数据是否正确
        if (b.contains("catName")) {
            JsonParser jsonParser = new JsonParser();
            JsonObject jsonObject = jsonParser.parse(b).getAsJsonObject();

            // System.out.println(jsonObject.has("catName"));
            result[0] = jsonObject.get("catName").getAsString();
            return result;
        } else {
            result[0] = "未知";
            result[1] = "未知";
            return result;
        }
    }

    @Override
    public StructObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException {
        if (args.length != 2) {
            throw new UDFArgumentLengthException("ExplodeMap takes only two argument");
        }
        if (args[0].getCategory() != ObjectInspector.Category.PRIMITIVE || args[1].getCategory() != ObjectInspector.Category.PRIMITIVE ) {
            throw new UDFArgumentException("ExplodeMap takes string as a parameter");
        }

        ArrayList fieldNames = new ArrayList();
        ArrayList fieldOIs = new ArrayList();
        fieldNames.add("col1");
        fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
        fieldNames.add("col2");
        fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
        fieldNames.add("col3");
        fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
        return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
    }

    @Override
    public void process(Object[] objects) throws HiveException {

        String tel;
        if (objects[1] == null || objects[1] == "") {
            tel = "123";
        } else {
            tel = objects[1].toString();
        }


        String[] arr = getInfo(tel);


        String[] result = {objects[0].toString(), arr[0], arr[1]};
//        System.out.println(result[0]);
//        System.out.println(result[1]);
//        System.out.println(result[2]);
        forward(result);
    }

    @Override
    public void close() throws HiveException {

    }
}

由于hive的udtf只能单独使用,不能select col1,udtf(col2) from xxx
所以我多加了个参数,这个参数原样返回,这样使用 select udtf(col1,col2) from xxx,col1字段可以用做和其它表join


pom文件附上


        org.apache.hive
        hive-exec
        1.1.0-cdh5.16.0
    

    
    
        org.apache.hive
        hive-common
        1.1.0-cdh5.16.1
    

    
    
        org.apache.hadoop
        hadoop-common
        2.6.0-cdh5.16.0
    

        
            com.google.code.gson
            gson
            2.8.3
        

        
        
            com.googlecode.libphonenumber
            libphonenumber
            8.9.9
        

        
        
            com.googlecode.libphonenumber
            geocoder
            2.98
        

        
        
            com.googlecode.libphonenumber
            carrier
            1.88
        

        
        
            com.googlecode.libphonenumber
            prefixmapper
            2.97
        

你可能感兴趣的:(学习经历)