Flink将数据落地Hbase
Flink主类
package flink.sink2hbase;
import flink.sink2hbase.deserialization.JsonDeserializationSchema;
import flink.sink.HbaseSinkFunction;
import flink.sink2hbase.map.HTableBaseMap;
import flink.sink2hbase.table.UserHTable;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import flink.sink2hbase.pojo.User;
import util.BeanUtil;
import util.Property;
import java.util.Properties;
public class FlinkSinkHbase {
private static OutputTag<UserHTable> userOutputTag = new OutputTag<>("用户表", TypeInformation.of(UserHTable.class));
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
Properties prop = Property.getKafkaProperties();
prop.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
prop.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
prop.setProperty(ConsumerConfig.GROUP_ID_CONFIG,"g1");
FlinkKafkaConsumer<User> consumer = new FlinkKafkaConsumer<>("test",new JsonDeserializationSchema<>(User.class),prop);
DataStreamSource<User> mainStream = env.addSource(consumer);
SingleOutputStreamOperator<User> dataStream = mainStream
.process(new ProcessFunction<User, User>() {
@Override
public void processElement(User user,
Context context,
Collector<User> collector) throws Exception {
UserHTable userHTable = UserHTable.builder().build();
BeanUtil.copy(userHTable,user);
context.output(userOutputTag,userHTable);
}
});
dataStream
.getSideOutput(userOutputTag)
.map(new HTableBaseMap<>())
.addSink(new HbaseSinkFunction("t_user","info"));
env.execute("mainStream");
}
}
反序列化
package flink.sink2hbase.deserialization;
import com.alibaba.fastjson.JSON;
import lombok.extern.slf4j.Slf4j;
import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.typeutils.TypeExtractor;
import java.io.IOException;
@Slf4j
public class JsonDeserializationSchema<T> implements DeserializationSchema<T> {
private Class<T> clazz;
public JsonDeserializationSchema(Class<T> clazz) {
this.clazz = clazz;
}
@Override
public T deserialize(byte[] bytes) throws IOException {
try {
return JSON.parseObject(new String(bytes), clazz);
} catch (Throwable t) {
log.error("json parse error", t.getCause());
}
return null;
}
@Override
public boolean isEndOfStream(T t) {
return false;
}
@Override
public TypeInformation<T> getProducedType() {
return TypeExtractor.getForClass(clazz);
}
}
BeanUtil
package util;
import exception.BeanUtilException;
import org.apache.commons.beanutils.BeanUtils;
public class BeanUtil {
public static void copy(Object dest, Object orig){
try {
BeanUtils.copyProperties(dest,orig);
} catch (Exception e) {
throw new BeanUtilException(e.getMessage(),e.getCause());
}
}
}
HTableBase接口
package flink.sink2hbase.table;
public interface HTableBase {
byte[] rowKey(byte[] prefix);
}
UserHTable
package flink.sink2hbase.table;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.Serializable;
@Data
@AllArgsConstructor
@NoArgsConstructor
@Builder
public class UserHTable implements Serializable,HTableBase {
private String name;
private int age;
@Override
public byte[] rowKey(byte[] prefix) {
return Bytes.toBytes(name);
}
}
HbaseBaseMap
package flink.sink2hbase.map;
import flink.sink2hbase.table.HTableBase;
import flink.sink2hbase.table.HTableRow;
import org.apache.flink.api.common.functions.MapFunction;
public class HTableBaseMap<T extends HTableBase> implements MapFunction<T, HTableRow> {
@Override
public HTableRow map(T hTableBase) throws Exception {
return HTableRow.builder(hTableBase,null);
}
}
HtableRow
package flink.sink2hbase.table;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.extern.slf4j.Slf4j;
import util.ByteUtil;
import java.io.Serializable;
import java.lang.reflect.Field;
import java.util.Arrays;
import java.util.Map;
import java.util.stream.Collectors;
@Slf4j
@Data
@AllArgsConstructor
public class HTableRow implements Serializable {
private byte[] row;
private Map<String, byte[]> colValueMap;
public static HTableRow builder(HTableBase src, byte[] prefix) {
byte[] rowKey = src.rowKey(prefix);
Map<String, byte[]> colValueMap = Arrays.stream(src.getClass().getDeclaredFields())
.filter(f -> fieldValue(src, f) != null)
.collect(Collectors.toMap(f -> f.getName(), f -> fieldValue(src, f)));
return new HTableRow(rowKey,colValueMap);
}
public static byte[] fieldValue(Object src, Field f) {
f.setAccessible(true);
try {
return ByteUtil.toByte(f.get(src));
} catch (IllegalAccessException e) {
log.error("Method fieldValue get value failed", e.getCause());
e.printStackTrace();
}
return null;
}
}
HbaseSink
package flink.sink;
import flink.sink2hbase.table.HTableRow;
import lombok.extern.slf4j.Slf4j;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import util.Property;
import java.util.Map;
@Slf4j
public class HbaseSinkFunction extends RichSinkFunction<HTableRow> {
private Connection conn = null;
private byte[] cfBytes;
private String tableNameStr;
private BufferedMutator mutator;
private int count;
public HbaseSinkFunction(String tableName,String cFamily) {
this.tableNameStr = tableName;
this.cfBytes = Bytes.toBytes(cFamily);
}
@Override
public void open(org.apache.flink.configuration.Configuration parameters) throws Exception {
super.open(parameters);
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", Property.getContextProperties().getProperty("hbase.zookeeper.quorum"));
conf.set("hbase.zookeeper.property.clientPort", Property.getContextProperties().getProperty("hbase.zookeeper.property.clientPort"));
conn = ConnectionFactory.createConnection(conf);
TableName tableName = TableName.valueOf(tableNameStr);
BufferedMutatorParams params = new BufferedMutatorParams(tableName);
params.maxKeyValueSize(10485760);
params.writeBufferSize(1024 * 1024);
mutator = conn.getBufferedMutator(params);
count = 0;
}
@Override
public void close() throws Exception {
super.close();
if(conn != null){
conn.close();
}
}
@Override
public void invoke(HTableRow hTableRow, Context context) throws Exception {
Put put = new Put(hTableRow.getRow());
for (Map.Entry<String, byte[]> entry : hTableRow.getColValueMap().entrySet()) {
if (entry.getValue() != null)
put.addColumn(cfBytes, Bytes.toBytes(entry.getKey()), entry.getValue());
}
log.info("put into hbase: \n" + put.toJSON());
mutator.mutate(put);
if (count >= 100) {
mutator.flush();
count = 0;
}
count++;
}
}