4.0.0
org.example
flink-kafka-hbase
1.0-SNAPSHOT
1.13.6
2.11
mysql
mysql-connector-java
5.1.34
com.alibaba
fastjson
1.2.28
compile
com.google.guava
guava
23.0
com.google.code.gson
gson
2.8.5
org.apache.httpcomponents
httpclient
4.5.2
org.projectlombok
lombok
1.18.4
com.jayway.jsonpath
json-path
2.4.0
compile
joda-time
joda-time
2.9.9
junit
junit
4.12
test
org.apache.flink
flink-statebackend-rocksdb_${scala.binary.version}
${flink.version}
org.apache.flink
flink-connector-kafka_${scala.binary.version}
${flink.version}
org.apache.flink
flink-java
${flink.version}
org.apache.flink
flink-streaming-java_${scala.binary.version}
${flink.version}
org.apache.flink
flink-clients_${scala.binary.version}
${flink.version}
org.apache.flink
flink-runtime-web_${scala.binary.version}
${flink.version}
com.alibaba.ververica
flink-connector-mysql-cdc
1.4.0
org.apache.flink
flink-hbase_${scala.binary.version}
1.9.3
org.apache.hadoop
hadoop-common
2.7.4
org.apache.hadoop
hadoop-mapreduce-client-core
2.6.0
org.apache.flink
flink-hadoop-compatibility_${scala.binary.version}
${flink.version}
org.slf4j
slf4j-api
1.7.25
org.apache.maven.plugins
maven-compiler-plugin
8
org.apache.maven.plugins
maven-shade-plugin
3.1.0
false
package
shade
com.cwf.kafka.hbasedemo.KafkaHBaseStreamWriteMain
reference.conf
*:*:*:*
META-INF/*.SF
META-INF/*.DSA
META-INF/*.RSA
package com.cwf.kafka.hbasedemo;
import lombok.extern.slf4j.Slf4j;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;
import java.util.Properties;
/**
* 往kafka中写数据,模拟生产者
*/
@Slf4j
public class KafkaUtilsProducer {
public static final String broker_list = "10.252.92.4:9092";
public static final String topic = "zhisheng"; //kafka topic 需要和 flink 程序用同一个 topic
public static void writeToKafka() throws InterruptedException {
Properties props = new Properties();
props.put("bootstrap.servers", broker_list);
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
KafkaProducer producer = new KafkaProducer(props);
int i = 0;
while (true) {
Thread.sleep(100L);// 每隔100ms 发送一次
ProducerRecord record = new ProducerRecord(
topic, null, null, String.valueOf(System.currentTimeMillis()));
producer.send(record);
log.info("record:{}", record);
if (i % 10 == 0) {
producer.flush();
log.info("flush");
}
i++;
}
}
public static void main(String[] args) throws InterruptedException {
writeToKafka();
}
}
package com.cwf.kafka.hbasedemo;
import lombok.extern.slf4j.Slf4j;
import org.apache.flink.api.common.io.OutputFormat;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
import java.util.Properties;
@Slf4j
public class KafkaHBaseStreamWriteMain {
public static String TOPIC = "zhisheng";
public static void main(String[] args) throws Exception {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.addSource(new FlinkKafkaConsumer<>(
TOPIC, //这个 kafka topic 需要和上面的工具类的 topic 一致
new SimpleStringSchema(),
getKafkaProps()))
.writeUsingOutputFormat(new HBaseOutputFormat());
env.execute("Flink HBase connector sink");
}
private static Properties getKafkaProps() {
// 配置kafka
Properties props = new Properties();
props.put("bootstrap.servers", "10.252.92.4:9092");
props.put("zookeeper.connect", "10.252.92.4:2181");
props.put("group.id", "metric-group");
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("auto.offset.reset", "latest");
return props;
}
private static class HBaseOutputFormat implements OutputFormat {
private org.apache.hadoop.conf.Configuration configuration;
private Connection connection = null;
private Table table = null;
@Override
public void configure(Configuration parameters) {
// 配置Hbase
configuration = HBaseConfiguration.create();
configuration.set("hbase.zookeeper.quorum", "10.252.92.4:2181");
configuration.set("hbase.zookeeper.property.clientPort", "2081");
configuration.set("hbase.rpc.timeout", "30000");
configuration.set("hbase.client.operation.timeout", "30000");
configuration.set("hbase.client.scanner.timeout.period", "30000");
}
@Override
public void open(int taskNumber, int numTasks) throws IOException {
connection = ConnectionFactory.createConnection(configuration);
TableName tableName = TableName.valueOf("zhisheng_stream");
Admin admin = connection.getAdmin();
if (!admin.tableExists(tableName)) { //检查是否有该表,如果没有,创建
log.info("不存在表:{}", tableName);
admin.createTable(
new HTableDescriptor(TableName.valueOf("zhisheng_stream"))
.addFamily(new HColumnDescriptor("info_stream")));
}
table = connection.getTable(tableName);
}
@Override
public void writeRecord(String record) throws IOException {
log.info("rowkey->{},column->info_stream:{},value->{}", record.substring(6, 10), record, "cwf_" + record);
Put put = new Put(Bytes.toBytes(record.substring(6, 10)));
put.addColumn(Bytes.toBytes("info_stream"), Bytes.toBytes(record), Bytes.toBytes("cwf_" + record));
table.put(put);
}
@Override
public void close() throws IOException {
table.close();
connection.close();
}
}
}
控制台
Hbase控制台
这样就说明在本地运行成功了 完成了 生产者->Kafka->消费者(Flink)->数据仓库(Hbase)
在Hbase查找Rowkey=2162
成功