Flink 1.62 读取hive表数据 写入hbase

package com.coder.flink.core.aaa_spark;

import com.alibaba.fastjson.JSONObject;
import com.coder.flink.core.FlinkHive.WriteToHbase_test;
import org.apache.commons.lang.StringUtils;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.flink.streaming.api.functions.source.RichSourceFunction;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.util.Bytes;

import java.sql.*;
import java.util.Random;

public class FlinkReadHiveToHbase {
    public static void main(String[] args) throws ClassNotFoundException, SQLException {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        DataStreamSource hbaseData = env.addSource(new sourceFromjson());
        hbaseData.addSink(new writeToHbase());
        hbaseData.print();
        try {
            env.execute("Flink write data to hbase");

        } catch (Exception e) {
            e.printStackTrace();
        }
    }


    private static class sourceFromjson extends RichSourceFunction {
        private transient Statement st = null;
        private String dateTime = "20190718";
        @Override
        public void open(Configuration parameters) throws Exception {
            super.open(parameters);
            Class.forName("org.apache.hive.jdbc.HiveDriver");
            Connection con = DriverManager.getConnection("jdbc:hive2://172.10.4.96:10000/aijiami", "hive", "hive");
            st = con.createStatement();
        }

        @Override
        public void run(SourceContext ctx) throws Exception {
           // System.out.println("dateTime = " + dateTime);
            ResultSet rs = st.executeQuery("SELECT " +
                    "deviceId," +
                    "shortappkey," +
                    "province," +
                    "city," +
                    "factory," +
                    "phoneOS," +
                    "networkType," +
                    "deviceScreenStr," +
                    "operateTypeStr," +
                    "appKey," +
                    "user_name," +
                    "user_sex," +
                    "user_age," +
                    "user_education," +
                    "user_occupation," +
                    "source_channel," +
                    "urlTime " +
                    "from ods_event_detail  where dt = "+dateTime );

            while (rs.next()) {
                //todo 15个字段
//            System.out.println(rs.getString(1) + "," + rs.getString(2));
//                String aa = rs.getString(0);
                String deviceId = rs.getString(1);
                if (StringUtils.isBlank(deviceId)){
                    //todo 如果为空 不插入
//                    deviceId = "null";
                    continue;
                }

                String shortappkey = rs.getString(2);
                if (StringUtils.isBlank(shortappkey)){
                    shortappkey = "null";
                }

                String province = rs.getString(3);
                if (StringUtils.isBlank(province)){
                    province = "null";
                }

                String city = rs.getString(4);
                if (StringUtils.isBlank(city)){
                    city = "null";
                }



                String factory = rs.getString(5);

                if (StringUtils.isBlank(factory)){
                    factory = "null";
                }

                String phoneOS = rs.getString(6);
                if (StringUtils.isBlank(phoneOS)){
                    phoneOS = "null";
                }

                String networkType = rs.getString(7);
                if (StringUtils.isBlank(networkType)){
                    networkType = "null";
                }

                String deviceScreenStr = rs.getString(8);
                if (StringUtils.isBlank(deviceScreenStr)){
                    deviceScreenStr = "null";
                }

                String operateTypeStr = rs.getString(9);
                if (StringUtils.isBlank(operateTypeStr)){
                    operateTypeStr = "null";
                }


                String appKey = rs.getString(10);
                if (StringUtils.isBlank(appKey)){
                    appKey = "null";
                }

                String user_name = rs.getString(11);
                if (StringUtils.isBlank(user_name)){
                    user_name = "null";
                }

                String user_sex = rs.getString(12);
                if (StringUtils.isBlank(user_sex)){
                    user_sex = "null";
                }

                String user_age = rs.getString(13);
                if (StringUtils.isBlank(user_age)){
                    user_age = "null";
                }

                String user_education = rs.getString(14);
                if (StringUtils.isBlank(user_education)){
                    user_education = "null";
                }

                String user_occupation = rs.getString(15);
                if (StringUtils.isBlank(user_occupation)){
                    user_occupation = "null";
                }

                String source_channel = rs.getString(16);
                if (StringUtils.isBlank(source_channel)){
                    source_channel = "null";
                }

                String urlTime = rs.getString(17);
                if (StringUtils.isBlank(urlTime)){
                    urlTime = "null";
                }


                //todo rowkey
                String rowKey = getRowKey(shortappkey, deviceId);
                JSONObject json = new JSONObject();
                json.put("rowKey", rowKey);
                json.put("deviceId", deviceId);
                json.put("province", province);
                json.put("city", city);
                json.put("factory", factory);
                json.put("phoneOS", phoneOS);
                json.put("networkType", networkType);
                json.put("deviceScreenStr", deviceScreenStr);
                json.put("operateTypeStr", operateTypeStr);
                json.put("appKey", appKey);
                json.put("user_name", user_name);
                json.put("user_sex", user_sex);
                json.put("user_age", user_age);
                json.put("user_education", user_education);
                json.put("user_occupation", user_occupation);
                json.put("source_channel", source_channel);
                json.put("urlTime", urlTime);

                ctx.collect(json);
            }

            //rs.close();
            //st.close();
            //con.close();
        }

        @Override
        public void cancel() {

        }
    }


    //todo 写入到hbase
    private static class writeToHbase extends RichSinkFunction {

        private Table queryListTable = null;
        private Random rnd = new Random();

        @Override
        public void open(Configuration parameters) throws Exception {
            super.open(parameters);
            org.apache.hadoop.conf.Configuration conf = HBaseConfiguration.create();
            conf.set("hbase.zookeeper.quorum", "node1.hadoop,node2.hadoop,node3.hadoop");
            conf.set("hbase.zookeeper.property.clientPort", "2181");
            // 线程池,性能未知
            org.apache.hadoop.hbase.client.Connection connection = ConnectionFactory.createConnection(conf);
            queryListTable = connection.getTable(TableName.valueOf("wxgz_user_data_test"));
        }

        @Override
        public void close() throws Exception {
            super.close();
        }


        @Override
        public void invoke(JSONObject json, Context context) throws Exception {
            //value 是dataStream 传入的数据 ,我们这里测试写死
            try {
//                System.out.println("json = " + json);
                String rowKey = json.getString("rowKey");
                String deviceId = json.getString("deviceId");
                String province = json.getString("province");
                String city = json.getString("city");
                String factory = json.getString("factory");
                String phoneOS = json.getString("phoneOS");
                String networkType = json.getString("networkType");
                String deviceScreenStr = json.getString("deviceScreenStr");
                String operateTypeStr = json.getString("operateTypeStr");
                String appKey = json.getString("appKey");
                String user_name = json.getString("user_name");
                String user_sex = json.getString("user_sex");
//                String user_age = json.getString("user_age");
                String user_age = (14+rnd.nextInt(40))+"";
                String user_education = json.getString("user_education");
                String user_occupation = json.getString("user_occupation");
                String source_channel = json.getString("source_channel");
                String urlTime = json.getString("urlTime");

                Put put = new Put(Bytes.toBytes(rowKey));
                put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("deviceId"), Bytes.toBytes(deviceId));
                put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("province"), Bytes.toBytes(province));
                put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("city"), Bytes.toBytes(city));
                put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("factory"), Bytes.toBytes(factory));
                put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("phoneOS"), Bytes.toBytes(phoneOS));
                put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("networkType"), Bytes.toBytes(networkType));
                put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("deviceScreenStr"), Bytes.toBytes(deviceScreenStr));
                put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("operateTypeStr"), Bytes.toBytes(operateTypeStr));
                put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("appKey"), Bytes.toBytes(appKey));
                put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("user_name"), Bytes.toBytes(user_name));
                put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("user_sex"), Bytes.toBytes(user_sex));
                put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("user_age"), Bytes.toBytes(user_age));
                put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("user_education"), Bytes.toBytes(user_education));
                put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("user_occupation"), Bytes.toBytes(user_occupation));
                put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("source_channel"), Bytes.toBytes(source_channel));
                put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("time"), Bytes.toBytes(urlTime));
                queryListTable.put(put);
//                System.out.println("rowKey=【" + rowKey + "】,执行成功==========");
            } catch (Exception ex) {
                ex.printStackTrace();
            }

        }


    }


    //todo 工具类
    private static String getRowKey(String shortAppKey, String deviceId) {

        //todo Test

        int number = 50;

        String deviceStr = "";


        if (StringUtils.isNotBlank(shortAppKey)) {

            deviceStr = shortAppKey + "|" + deviceId;

        } else {

            deviceStr = deviceId;
        }

        Integer deviceIdHashCode = Integer.valueOf(deviceStr.hashCode());

        Integer abs = Integer.valueOf(Math.abs(deviceIdHashCode.intValue()));

        Integer getMod = Integer.valueOf(abs.intValue() % number);

        String newPrefix = String.format("%03d", getMod);

        String newRowKey = newPrefix + "|" + deviceStr;

        return newRowKey;
    }


}

 

第二种读取hive的方式:

这里需要HCatInputFormat类

package com.coder.flink.core.FlinkHive


import org.apache.flink.api.scala.ExecutionEnvironment
import org.apache.hadoop.conf.Configuration
import org.apache.flink.api.scala._

import scala.util.parsing.json.JSONObject


//读取hive的数据
object ReadHive {
  def main(args: Array[String]): Unit = {

      val conf = new Configuration()
      conf.set("hive.metastore.local", "false")

      conf.set("hive.metastore.uris", "thrift://172.10.4.141:9083")
       //如果是高可用 就需要是nameserver
//      conf.set("hive.metastore.uris", "thrift://172.10.4.142:9083")

      val env = ExecutionEnvironment.getExecutionEnvironment


      //todo 返回类型
      val dataset: DataSet[JSONObject] = env.createInput(new HCatInputFormat[JSONObject]("test", "test01", conf))

      dataset.first(10).print()
//      env.execute("flink hive test")


  }

}


HCatInputFormat 类:

 

 

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.coder.flink.core.FlinkHive;

import org.apache.flink.api.java.tuple.Tuple;

import org.apache.hadoop.conf.Configuration;
import org.apache.hive.hcatalog.common.HCatException;
import org.apache.hive.hcatalog.data.HCatRecord;


/**
 * A InputFormat to read from HCatalog tables.
 * The InputFormat supports projection (selection and order of fields) and partition filters.
 *
 * 

Data can be returned as {@link HCatRecord} or Flink {@link Tuple}. * Flink tuples support only up to 25 fields. * * @param */ public class HCatInputFormat extends HCatInputFormatBase { private static final long serialVersionUID = 1L; public HCatInputFormat() {} public HCatInputFormat(String database, String table) throws Exception { super(database, table); } public HCatInputFormat(String database, String table, Configuration config) throws Exception { super(database, table, config); } @Override protected int getMaxFlinkTupleSize() { return 25; } @Override protected T buildFlinkTuple(T t, HCatRecord record) throws HCatException { Tuple tuple = (Tuple) t; // Extract all fields from HCatRecord for (int i = 0; i < this.fieldNames.length; i++) { // get field value Object o = record.get(this.fieldNames[i], this.outputSchema); // Set field value in Flink tuple. // Partition columns are returned as String and // need to be converted to original type. switch(this.outputSchema.get(i).getType()) { case INT: if (o instanceof String) { tuple.setField(Integer.parseInt((String) o), i); } else { tuple.setField(o, i); } break; case TINYINT: if (o instanceof String) { tuple.setField(Byte.parseByte((String) o), i); } else { tuple.setField(o, i); } break; case SMALLINT: if (o instanceof String) { tuple.setField(Short.parseShort((String) o), i); } else { tuple.setField(o, i); } break; case BIGINT: if (o instanceof String) { tuple.setField(Long.parseLong((String) o), i); } else { tuple.setField(o, i); } break; case BOOLEAN: if (o instanceof String) { tuple.setField(Boolean.parseBoolean((String) o), i); } else { tuple.setField(o, i); } break; case FLOAT: if (o instanceof String) { tuple.setField(Float.parseFloat((String) o), i); } else { tuple.setField(o, i); } break; case DOUBLE: if (o instanceof String) { tuple.setField(Double.parseDouble((String) o), i); } else { tuple.setField(o, i); } break; case STRING: tuple.setField(o, i); break; case BINARY: if (o instanceof String) { throw new RuntimeException("Cannot handle partition keys of type BINARY."); } else { tuple.setField(o, i); } break; case ARRAY: if (o instanceof String) { throw new RuntimeException("Cannot handle partition keys of type ARRAY."); } else { tuple.setField(o, i); } break; case MAP: if (o instanceof String) { throw new RuntimeException("Cannot handle partition keys of type MAP."); } else { tuple.setField(o, i); } break; case STRUCT: if (o instanceof String) { throw new RuntimeException("Cannot handle partition keys of type STRUCT."); } else { tuple.setField(o, i); } break; default: throw new RuntimeException("Invalid Type"); } } return (T) tuple; } }

你可能感兴趣的:(Flink 1.62 读取hive表数据 写入hbase)