使用Flink读取HBase数据

使用Flink DataStream API进行相关操作,依赖包如下:


    org.apache.hbase
    hbase-client
    ${hbase.version}


    org.apache.hbase
    hbase-common
    ${hbase.version}


    org.apache.flink
    flink-streaming-java_2.11
    ${flink.version}

代码如下:

1) 先创建类继承RichSourceFunction

import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.source.RichSourceFunction;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;

import java.util.List;


public class HBaseReader extends RichSourceFunction {
    private Connection connection = null;
    private ResultScanner rs = null;
    private Table table = null;

    @Override
    public void open(Configuration parameters) throws Exception {
        org.apache.hadoop.conf.Configuration hconf = HBaseConfiguration.create();
        hconf.set("hbase.zookeeper.quorum", "host1:2181,host2:2181,host3:2181");
        hconf.set("zookeeper.znode.parent", "/hbase");
        connection = ConnectionFactory.createConnection(hconf);
    }

    @Override
    public void run(SourceContext sourceContext) throws Exception {
        table = connection.getTable(TableName.valueOf("testDb:testTable"));
        Scan scan = new Scan();
        scan.addFamily(Bytes.toBytes("flow"));
        rs = table.getScanner(scan);
        for (Result result : rs) {
            StringBuilder sb = new StringBuilder();
            List cells = result.listCells();
            for (Cell cell : cells) {
                String value = Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());
                sb.append(value).append("-");
            }
            String value = sb.replace(sb.length() - 1, sb.length(), "").toString();
            sourceContext.collect(value);
        }
    }

    @Override
    public void cancel() {

    }

    @Override
    public void close() throws Exception {
        if (rs != null) rs.close();
        if (table != null) table.close();
        if (connection != null) connection.close();
    }
}

2) 运行Flink

import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;


public class FlinkHBaseDemo {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        // 添加数据源
        DataStream stream = env.addSource(new HBaseReader());
        stream.print();
        env.execute("FlinkHBaseDemo");
    }
}

你可能感兴趣的:(大数据,flink,flink,hbase)