Flink写入Hbase

基本流程:


从Kafka中读取数据,再写入到Hbase。

写入Kafka代码

import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerRecord;

import java.util.Date;
import java.util.Properties;

public class JProducer extends Thread{
    public static void main(String[] args) {
        JProducer jProducer = new JProducer();
        jProducer.start();
    }

    @Override
    public void run() {
        try {
            producer();
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }
    private void producer() throws InterruptedException {
        Properties pros = config();
        Producer producer = new KafkaProducer(pros);
        for (int i=0; i < 10;i++){
            String json = "{\"id\":" + i + ",\"ip\":\"192.168.0." + i + "\",\"date\":" + new Date().toString() + "}";
            String k = "key" + i;
            producer.send(new ProducerRecord("flink_topic", k, json));
            System.out.println("发送数据:" + json);
            Thread.sleep(1000);
        }
    }
    private Properties config(){
        Properties props = new Properties();
        props.put("bootstrap.servers", "localhost:9092");
        props.put("acks", "1");
        props.put("retries", 0);
        props.put("batch.size", 16384);
        props.put("linger.ms", 1);
        props.put("buffer.memory", 33554432);
        props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
        props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
        return props;
    }
}

Flink写入Habse代码

import org.apache.commons.net.ntp.TimeStamp;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;
import java.util.Date;
import java.util.Properties;

public class FlinkHBase {
    private static String zkServer = "localhost";
    private static String port = "2181";
    private static TableName tableName = TableName.valueOf("testFlink");
    private static final String cf = "ke";
    private static final String topic = "flink_topic";

    public static void main(String[] args) {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//        env.enableCheckpointing(1000);
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        DataStream transction = env.addSource(new FlinkKafkaConsumer(topic, new SimpleStringSchema(), configByKafka()));
        transction.rebalance().map(new MapFunction() {
            private static final long serialVersionUID = 1L;
            public Object map(String value) throws Exception {
                System.out.println(value);
                write2Hbase(value);
                return value;
            }
        }).print();
        try{
            env.execute("Sink2Hbase");
        }catch (Exception e){
            e.printStackTrace();
        }
    }
    public static Properties configByKafka() {
        Properties props = new Properties();
        props.put("bootstrap.servers", "localhost:9092");
        props.put("group.id", "kv_flink");
        props.put("enable.auto.commit", "true");
        props.put("auto.commit.interval.ms", "1000");
        props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
        props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
        return props;
    }
    public static void write2Hbase(String value) throws IOException{
        Configuration config = HBaseConfiguration.create();
        config.set("hbase.zookeeper.quorum", zkServer);
        config.set("hbase.zookeeper.property.clientPort", port);
        config.setInt("hbase.rpc.timeout", 30000);
        config.setInt("hbase.client.operation.timeout", 30000);
        config.setInt("hbase.client.scanner.timeout.period", 30000);
        Connection connection = ConnectionFactory.createConnection(config);
        Admin admin = connection.getAdmin();
//        if (!admin.tableExists(tableName)){
//            admin.createTable(new HTableDescriptor(tableName).addFamily(new HColumnDescriptor(cf)));
//        }
        Table table = connection.getTable(tableName);
        TimeStamp ts = new TimeStamp(new Date());
        Date date = ts.getDate();
        Put put = new Put(Bytes.toBytes(date.getTime()));
        put.addColumn(Bytes.toBytes(cf), Bytes.toBytes("test"), Bytes.toBytes(value));
        System.out.println("写入:" + value);
        table.put(put);
        table.close();
        connection.close();
    }
}

pom.xml



    4.0.0

    com.kun
    flink2hbase
    1.0-SNAPSHOT

    
        
            org.apache.flink
            flink-java
            1.7.2
        
        
            org.apache.flink
            flink-streaming-java_2.11
            1.7.2
        
        
            org.apache.flink
            flink-clients_2.11
            1.7.2
        
        
            org.slf4j
            slf4j-api
            1.7.26
        
        
            org.slf4j
            slf4j-simple
            1.7.26
        
        
            org.apache.flink
            flink-connector-kafka_2.11
            1.7.2
        
        
            org.apache.flink
            flink-hbase_2.11
            1.7.2
        
        
            org.apache.flink
            flink-core
            1.7.2
        
        
            org.apache.hadoop
            hadoop-common
            2.9.2
        
    

Bug

java.lang.NoClassDefFoundError: Could not initialize class org.apache.hadoop.hbase.util.ByteStringer

如果在依赖中引入高版本的protobuf,会造成hbase的各种问题,所以在有HBase的项目中一定要慎重对待protobuf的版本。核对pom中的依赖,去除高版本的protobuf。(MySQl的JDBC会引入)

Hbase Bug

见我另一篇博客

你可能感兴趣的:(Flink写入Hbase)