Flink Sink Hbase

Flink将数据落地Hbase

Flink主类

package flink.sink2hbase;

import flink.sink2hbase.deserialization.JsonDeserializationSchema;
import flink.sink.HbaseSinkFunction;
import flink.sink2hbase.map.HTableBaseMap;
import flink.sink2hbase.table.UserHTable;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import flink.sink2hbase.pojo.User;
import util.BeanUtil;
import util.Property;

import java.util.Properties;

public class FlinkSinkHbase {

    private static OutputTag<UserHTable> userOutputTag = new OutputTag<>("用户表", TypeInformation.of(UserHTable.class));

    public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        Properties prop = Property.getKafkaProperties();
        prop.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
        prop.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
        prop.setProperty(ConsumerConfig.GROUP_ID_CONFIG,"g1");
        
        // 将从kafka中读取过来的Json串反序列化成User对象
        FlinkKafkaConsumer<User> consumer = new FlinkKafkaConsumer<>("test",new JsonDeserializationSchema<>(User.class),prop);

        DataStreamSource<User> mainStream = env.addSource(consumer);

        SingleOutputStreamOperator<User> dataStream = mainStream
            .process(new ProcessFunction<User, User>() {
            @Override
            public void processElement(User user,
                              Context context,
                              Collector<User> collector) throws Exception {
                UserHTable userHTable = UserHTable.builder().build();
                
                // 将对象转换成HTable对象
                BeanUtil.copy(userHTable,user);
                context.output(userOutputTag,userHTable);
            }
        });

        dataStream
                .getSideOutput(userOutputTag)
                .map(new HTableBaseMap<>())
                .addSink(new HbaseSinkFunction("t_user","info"));

        env.execute("mainStream");
    }
}

反序列化

package flink.sink2hbase.deserialization;

import com.alibaba.fastjson.JSON;
import lombok.extern.slf4j.Slf4j;
import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.typeutils.TypeExtractor;
import java.io.IOException;

@Slf4j
public class JsonDeserializationSchema<T> implements DeserializationSchema<T> {
    private Class<T> clazz;

    public JsonDeserializationSchema(Class<T> clazz) {
        this.clazz = clazz;
    }

    @Override
    public T deserialize(byte[] bytes) throws IOException {
        try {
            return JSON.parseObject(new String(bytes), clazz);
        } catch (Throwable t) {
            log.error("json parse error", t.getCause());
        }
        return null;
    }

    @Override
    public boolean isEndOfStream(T t) {
        return false;
    }

    @Override
    public TypeInformation<T> getProducedType() {
        return TypeExtractor.getForClass(clazz);
    }
}

BeanUtil

package util;

import exception.BeanUtilException;
import org.apache.commons.beanutils.BeanUtils;

public class BeanUtil {
    public static void copy(Object dest, Object orig){
        try {
            BeanUtils.copyProperties(dest,orig);
        } catch (Exception e) {
            throw new BeanUtilException(e.getMessage(),e.getCause());
        }
    }
}

HTableBase接口

package flink.sink2hbase.table;

public interface HTableBase {
    byte[] rowKey(byte[] prefix);
}

UserHTable

package flink.sink2hbase.table;

import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.Serializable;


@Data
@AllArgsConstructor
@NoArgsConstructor
@Builder
public class UserHTable implements Serializable,HTableBase {
    private String name;
    private int age;

    @Override
    public byte[] rowKey(byte[] prefix) {
        return Bytes.toBytes(name);
    }
}

HbaseBaseMap

package flink.sink2hbase.map;

import flink.sink2hbase.table.HTableBase;
import flink.sink2hbase.table.HTableRow;
import org.apache.flink.api.common.functions.MapFunction;

public class HTableBaseMap<T extends HTableBase> implements MapFunction<T, HTableRow> {
    @Override
    public HTableRow map(T hTableBase) throws Exception {
        return HTableRow.builder(hTableBase,null);
    }
}

HtableRow

package flink.sink2hbase.table;


import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.extern.slf4j.Slf4j;
import util.ByteUtil;

import java.io.Serializable;
import java.lang.reflect.Field;
import java.util.Arrays;
import java.util.Map;
import java.util.stream.Collectors;

@Slf4j
@Data
@AllArgsConstructor
public class HTableRow implements Serializable {
    private byte[] row;
    private Map<String, byte[]> colValueMap;

    public static HTableRow builder(HTableBase src, byte[] prefix) {
        byte[] rowKey = src.rowKey(prefix);
        Map<String, byte[]> colValueMap = Arrays.stream(src.getClass().getDeclaredFields())
                .filter(f -> fieldValue(src, f) != null)
                .collect(Collectors.toMap(f -> f.getName(), f -> fieldValue(src, f)));
        return new HTableRow(rowKey,colValueMap);
    }

    public static byte[] fieldValue(Object src, Field f) {
        f.setAccessible(true);
        try {
            return ByteUtil.toByte(f.get(src));
        } catch (IllegalAccessException e) {
            log.error("Method fieldValue get value failed", e.getCause());
            e.printStackTrace();
        }
        return null;
    }
}

HbaseSink

package flink.sink;

import flink.sink2hbase.table.HTableRow;
import lombok.extern.slf4j.Slf4j;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import util.Property;

import java.util.Map;

@Slf4j
public class HbaseSinkFunction extends RichSinkFunction<HTableRow> {
    private Connection conn = null;
    private byte[] cfBytes;
    private String tableNameStr;
    private BufferedMutator mutator;
    private int count;

    public HbaseSinkFunction(String tableName,String cFamily) {
        this.tableNameStr = tableName;
        this.cfBytes = Bytes.toBytes(cFamily);
    }

    @Override
    public void open(org.apache.flink.configuration.Configuration parameters) throws Exception {
        super.open(parameters);
        Configuration conf = HBaseConfiguration.create();
        conf.set("hbase.zookeeper.quorum", Property.getContextProperties().getProperty("hbase.zookeeper.quorum"));
        conf.set("hbase.zookeeper.property.clientPort", Property.getContextProperties().getProperty("hbase.zookeeper.property.clientPort"));
        conn = ConnectionFactory.createConnection(conf);
        TableName tableName = TableName.valueOf(tableNameStr);
        
        // 更加高效的插入Hbase
        BufferedMutatorParams params = new BufferedMutatorParams(tableName);
        params.maxKeyValueSize(10485760);
        params.writeBufferSize(1024 * 1024);
        mutator = conn.getBufferedMutator(params);
        count = 0;
    }

    @Override
    public void close() throws Exception {
        super.close();
        if(conn != null){
            conn.close();
        }
    }

    @Override
    public void invoke(HTableRow hTableRow, Context context) throws Exception {
        Put put = new Put(hTableRow.getRow());
        for (Map.Entry<String, byte[]> entry : hTableRow.getColValueMap().entrySet()) {
            if (entry.getValue() != null)
                put.addColumn(cfBytes, Bytes.toBytes(entry.getKey()), entry.getValue());
        }
        log.info("put into hbase: \n" + put.toJSON());
        mutator.mutate(put);
        if (count >= 100) {
            mutator.flush();
            count = 0;
        }
        count++;
    }
}

你可能感兴趣的:(Hbase)