package com.coder.flink.core.aaa_spark; import com.alibaba.fastjson.JSONObject; import com.coder.flink.core.FlinkHive.WriteToHbase_test; import org.apache.commons.lang.StringUtils; import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.configuration.Configuration; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.datastream.DataStreamSource; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.api.functions.sink.RichSinkFunction; import org.apache.flink.streaming.api.functions.source.RichSourceFunction; import org.apache.flink.streaming.api.functions.source.SourceFunction; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.ConnectionFactory; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.util.Bytes; import java.sql.*; import java.util.Random; public class FlinkReadHiveToHbase { public static void main(String[] args) throws ClassNotFoundException, SQLException { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStreamSourcehbaseData = env.addSource(new sourceFromjson()); hbaseData.addSink(new writeToHbase()); hbaseData.print(); try { env.execute("Flink write data to hbase"); } catch (Exception e) { e.printStackTrace(); } } private static class sourceFromjson extends RichSourceFunction { private transient Statement st = null; private String dateTime = "20190718"; @Override public void open(Configuration parameters) throws Exception { super.open(parameters); Class.forName("org.apache.hive.jdbc.HiveDriver"); Connection con = DriverManager.getConnection("jdbc:hive2://172.10.4.96:10000/aijiami", "hive", "hive"); st = con.createStatement(); } @Override public void run(SourceContext ctx) throws Exception { // System.out.println("dateTime = " + dateTime); ResultSet rs = st.executeQuery("SELECT " + "deviceId," + "shortappkey," + "province," + "city," + "factory," + "phoneOS," + "networkType," + "deviceScreenStr," + "operateTypeStr," + "appKey," + "user_name," + "user_sex," + "user_age," + "user_education," + "user_occupation," + "source_channel," + "urlTime " + "from ods_event_detail where dt = "+dateTime ); while (rs.next()) { //todo 15个字段 // System.out.println(rs.getString(1) + "," + rs.getString(2)); // String aa = rs.getString(0); String deviceId = rs.getString(1); if (StringUtils.isBlank(deviceId)){ //todo 如果为空 不插入 // deviceId = "null"; continue; } String shortappkey = rs.getString(2); if (StringUtils.isBlank(shortappkey)){ shortappkey = "null"; } String province = rs.getString(3); if (StringUtils.isBlank(province)){ province = "null"; } String city = rs.getString(4); if (StringUtils.isBlank(city)){ city = "null"; } String factory = rs.getString(5); if (StringUtils.isBlank(factory)){ factory = "null"; } String phoneOS = rs.getString(6); if (StringUtils.isBlank(phoneOS)){ phoneOS = "null"; } String networkType = rs.getString(7); if (StringUtils.isBlank(networkType)){ networkType = "null"; } String deviceScreenStr = rs.getString(8); if (StringUtils.isBlank(deviceScreenStr)){ deviceScreenStr = "null"; } String operateTypeStr = rs.getString(9); if (StringUtils.isBlank(operateTypeStr)){ operateTypeStr = "null"; } String appKey = rs.getString(10); if (StringUtils.isBlank(appKey)){ appKey = "null"; } String user_name = rs.getString(11); if (StringUtils.isBlank(user_name)){ user_name = "null"; } String user_sex = rs.getString(12); if (StringUtils.isBlank(user_sex)){ user_sex = "null"; } String user_age = rs.getString(13); if (StringUtils.isBlank(user_age)){ user_age = "null"; } String user_education = rs.getString(14); if (StringUtils.isBlank(user_education)){ user_education = "null"; } String user_occupation = rs.getString(15); if (StringUtils.isBlank(user_occupation)){ user_occupation = "null"; } String source_channel = rs.getString(16); if (StringUtils.isBlank(source_channel)){ source_channel = "null"; } String urlTime = rs.getString(17); if (StringUtils.isBlank(urlTime)){ urlTime = "null"; } //todo rowkey String rowKey = getRowKey(shortappkey, deviceId); JSONObject json = new JSONObject(); json.put("rowKey", rowKey); json.put("deviceId", deviceId); json.put("province", province); json.put("city", city); json.put("factory", factory); json.put("phoneOS", phoneOS); json.put("networkType", networkType); json.put("deviceScreenStr", deviceScreenStr); json.put("operateTypeStr", operateTypeStr); json.put("appKey", appKey); json.put("user_name", user_name); json.put("user_sex", user_sex); json.put("user_age", user_age); json.put("user_education", user_education); json.put("user_occupation", user_occupation); json.put("source_channel", source_channel); json.put("urlTime", urlTime); ctx.collect(json); } //rs.close(); //st.close(); //con.close(); } @Override public void cancel() { } } //todo 写入到hbase private static class writeToHbase extends RichSinkFunction { private Table queryListTable = null; private Random rnd = new Random(); @Override public void open(Configuration parameters) throws Exception { super.open(parameters); org.apache.hadoop.conf.Configuration conf = HBaseConfiguration.create(); conf.set("hbase.zookeeper.quorum", "node1.hadoop,node2.hadoop,node3.hadoop"); conf.set("hbase.zookeeper.property.clientPort", "2181"); // 线程池,性能未知 org.apache.hadoop.hbase.client.Connection connection = ConnectionFactory.createConnection(conf); queryListTable = connection.getTable(TableName.valueOf("wxgz_user_data_test")); } @Override public void close() throws Exception { super.close(); } @Override public void invoke(JSONObject json, Context context) throws Exception { //value 是dataStream 传入的数据 ,我们这里测试写死 try { // System.out.println("json = " + json); String rowKey = json.getString("rowKey"); String deviceId = json.getString("deviceId"); String province = json.getString("province"); String city = json.getString("city"); String factory = json.getString("factory"); String phoneOS = json.getString("phoneOS"); String networkType = json.getString("networkType"); String deviceScreenStr = json.getString("deviceScreenStr"); String operateTypeStr = json.getString("operateTypeStr"); String appKey = json.getString("appKey"); String user_name = json.getString("user_name"); String user_sex = json.getString("user_sex"); // String user_age = json.getString("user_age"); String user_age = (14+rnd.nextInt(40))+""; String user_education = json.getString("user_education"); String user_occupation = json.getString("user_occupation"); String source_channel = json.getString("source_channel"); String urlTime = json.getString("urlTime"); Put put = new Put(Bytes.toBytes(rowKey)); put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("deviceId"), Bytes.toBytes(deviceId)); put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("province"), Bytes.toBytes(province)); put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("city"), Bytes.toBytes(city)); put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("factory"), Bytes.toBytes(factory)); put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("phoneOS"), Bytes.toBytes(phoneOS)); put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("networkType"), Bytes.toBytes(networkType)); put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("deviceScreenStr"), Bytes.toBytes(deviceScreenStr)); put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("operateTypeStr"), Bytes.toBytes(operateTypeStr)); put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("appKey"), Bytes.toBytes(appKey)); put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("user_name"), Bytes.toBytes(user_name)); put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("user_sex"), Bytes.toBytes(user_sex)); put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("user_age"), Bytes.toBytes(user_age)); put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("user_education"), Bytes.toBytes(user_education)); put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("user_occupation"), Bytes.toBytes(user_occupation)); put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("source_channel"), Bytes.toBytes(source_channel)); put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("time"), Bytes.toBytes(urlTime)); queryListTable.put(put); // System.out.println("rowKey=【" + rowKey + "】,执行成功=========="); } catch (Exception ex) { ex.printStackTrace(); } } } //todo 工具类 private static String getRowKey(String shortAppKey, String deviceId) { //todo Test int number = 50; String deviceStr = ""; if (StringUtils.isNotBlank(shortAppKey)) { deviceStr = shortAppKey + "|" + deviceId; } else { deviceStr = deviceId; } Integer deviceIdHashCode = Integer.valueOf(deviceStr.hashCode()); Integer abs = Integer.valueOf(Math.abs(deviceIdHashCode.intValue())); Integer getMod = Integer.valueOf(abs.intValue() % number); String newPrefix = String.format("%03d", getMod); String newRowKey = newPrefix + "|" + deviceStr; return newRowKey; } }
第二种读取hive的方式:
这里需要HCatInputFormat类
package com.coder.flink.core.FlinkHive import org.apache.flink.api.scala.ExecutionEnvironment import org.apache.hadoop.conf.Configuration import org.apache.flink.api.scala._ import scala.util.parsing.json.JSONObject //读取hive的数据 object ReadHive { def main(args: Array[String]): Unit = { val conf = new Configuration() conf.set("hive.metastore.local", "false") conf.set("hive.metastore.uris", "thrift://172.10.4.141:9083") //如果是高可用 就需要是nameserver // conf.set("hive.metastore.uris", "thrift://172.10.4.142:9083") val env = ExecutionEnvironment.getExecutionEnvironment //todo 返回类型 val dataset: DataSet[JSONObject] = env.createInput(new HCatInputFormat[JSONObject]("test", "test01", conf)) dataset.first(10).print() // env.execute("flink hive test") } }
HCatInputFormat 类:
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.coder.flink.core.FlinkHive; import org.apache.flink.api.java.tuple.Tuple; import org.apache.hadoop.conf.Configuration; import org.apache.hive.hcatalog.common.HCatException; import org.apache.hive.hcatalog.data.HCatRecord; /** * A InputFormat to read from HCatalog tables. * The InputFormat supports projection (selection and order of fields) and partition filters. * *Data can be returned as {@link HCatRecord} or Flink {@link Tuple}. * Flink tuples support only up to 25 fields. * * @param
*/ public class HCatInputFormat extends HCatInputFormatBase { private static final long serialVersionUID = 1L; public HCatInputFormat() {} public HCatInputFormat(String database, String table) throws Exception { super(database, table); } public HCatInputFormat(String database, String table, Configuration config) throws Exception { super(database, table, config); } @Override protected int getMaxFlinkTupleSize() { return 25; } @Override protected T buildFlinkTuple(T t, HCatRecord record) throws HCatException { Tuple tuple = (Tuple) t; // Extract all fields from HCatRecord for (int i = 0; i < this.fieldNames.length; i++) { // get field value Object o = record.get(this.fieldNames[i], this.outputSchema); // Set field value in Flink tuple. // Partition columns are returned as String and // need to be converted to original type. switch(this.outputSchema.get(i).getType()) { case INT: if (o instanceof String) { tuple.setField(Integer.parseInt((String) o), i); } else { tuple.setField(o, i); } break; case TINYINT: if (o instanceof String) { tuple.setField(Byte.parseByte((String) o), i); } else { tuple.setField(o, i); } break; case SMALLINT: if (o instanceof String) { tuple.setField(Short.parseShort((String) o), i); } else { tuple.setField(o, i); } break; case BIGINT: if (o instanceof String) { tuple.setField(Long.parseLong((String) o), i); } else { tuple.setField(o, i); } break; case BOOLEAN: if (o instanceof String) { tuple.setField(Boolean.parseBoolean((String) o), i); } else { tuple.setField(o, i); } break; case FLOAT: if (o instanceof String) { tuple.setField(Float.parseFloat((String) o), i); } else { tuple.setField(o, i); } break; case DOUBLE: if (o instanceof String) { tuple.setField(Double.parseDouble((String) o), i); } else { tuple.setField(o, i); } break; case STRING: tuple.setField(o, i); break; case BINARY: if (o instanceof String) { throw new RuntimeException("Cannot handle partition keys of type BINARY."); } else { tuple.setField(o, i); } break; case ARRAY: if (o instanceof String) { throw new RuntimeException("Cannot handle partition keys of type ARRAY."); } else { tuple.setField(o, i); } break; case MAP: if (o instanceof String) { throw new RuntimeException("Cannot handle partition keys of type MAP."); } else { tuple.setField(o, i); } break; case STRUCT: if (o instanceof String) { throw new RuntimeException("Cannot handle partition keys of type STRUCT."); } else { tuple.setField(o, i); } break; default: throw new RuntimeException("Invalid Type"); } } return (T) tuple; } }