本篇博客是关于通过java将kafka中一个消息队列的数据进行消费并写入Hbase中。
create_namespace 'events_db'
create 'events_db:user_friend','uf'
我们通过flume将源数据写入kafka的topic当中,然后这个topic就已经具有所有需要处理的数据了,然后通过以下代码对数据进行处理,分为3个模块:
1.write模块 ,消费kafka源数据,并调用handler中的ICusTopo对消费的数据进行处理,并写入hbase表中。
IWriter接口
public interface IWriter {
int write(ConsumerRecords<String,String> records);
}
KafkaWriter实现类
public class KafkaWriter2 implements IWriter {
private IHandler handler;
private Connection con = null;
private Table table = null;
public KafkaWriter2(IHandler handler,String tableName) {
this.handler = handler;
final Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum","192.168.126.166");
conf.set("hbase.zookeeper.property.clientPort","2181");
conf.set("hbase.dir","hdfs://192.168.126.166:9000/hbase");
try {
con = ConnectionFactory.createConnection(conf);
table = con.getTable(TableName.valueOf(tableName));
} catch (IOException e) {
e.printStackTrace();
}
}
@Override
public int write(ConsumerRecords<String, String> records) {
try {
List<Put> datas = handler.ICustopo(records);
table.put(datas);
} catch (IOException e) {
e.printStackTrace();
}
return 0;
}
}
2.handler模块,对每次的500条数据集进行处理,返回一个Put类型的集合,传入一个ConsumerRecords的对象,实际是每次从kafka拿取500条数据集,可以通过调整下图参数进行改变,KafkaConsumer默认就是每次消费500条数据。
IHandler接口
import org.apache.hadoop.hbase.client.Put;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import java.util.List;
public interface IHandler {
List<Put> ICustopo(ConsumerRecords<String,String> records);
}
UserFriendHandler
import java.util.ArrayList;
import java.util.List;
/**
* @Description
* @Author:WuYou on @date 2020/6/1
*/
public class UserFriendHandler implements IHandler {
@Override
public List<Put> ICustopo(ConsumerRecords<String, String> records) {
List<Put> datas = new ArrayList<>();
for (ConsumerRecord<String, String> record : records) {
System.out.println(record.value().toString());
final String[] infos = record.value().toString().split(",");
Put put = new Put(Bytes.toBytes((infos[0]+infos[1]).hashCode()));
put.addColumn("uf".getBytes(),"userid".getBytes(),infos[0].getBytes());
put.addColumn("uf".getBytes(),"friendid".getBytes(),infos[1].getBytes());
datas.add(put);
}
return datas;
}
}
3.worker模块,用于配置hbase连接与调用writer的方法对数据进行处理,最终存储进hbase中。
IWorker 接口
public interface IWorker {
void pushData();
}
KafkaParentWorker 抽象类
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.common.serialization.StringDeserializer;
import java.util.Properties;
/**
* @Description
* @Author:WuYou on @date 2020/6/1
*/
public abstract class KafkaParentWorker implements IWorker{
private Properties prop = new Properties();
private long pollTime = 0;
private String topicName;
public KafkaParentWorker(String topicName,long pollTime,String groupId) {
this.pollTime = pollTime;
this.topicName = topicName;
prop.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"192.168.126.166:9092");
prop.put(ConsumerConfig.GROUP_ID_CONFIG,groupId);
prop.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
prop.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
prop.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG,"30000");
prop.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,"false");
prop.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");
prop.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG,"1000");
}
public KafkaParentWorker(String topicName) {
this(topicName,100,"groupId1");
}
public Properties getProp() {
return prop;
}
public void setProp(Properties prop) {
this.prop = prop;
}
public long getPollTime() {
return pollTime;
}
public void setPollTime(long pollTime) {
this.pollTime = pollTime;
}
public String getTopicName() {
return topicName;
}
public void setTopicName(String topicName) {
this.topicName = topicName;
}
}
KafkaSonWorker 抽象类的子类
import com.Test.writer.IWriter;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import java.util.Collections;
/**
* @Description
* @Author:WuYou on @date 2020/6/1
*/
public class KafkaSonWorker extends KafkaParentWorker {
private IWriter writer;
public KafkaSonWorker(String topicName, long pollTime, String groupId, IWriter writer) {
super(topicName, pollTime, groupId);
this.writer = writer;
}
public KafkaSonWorker(String topicName, IWriter writer) {
super(topicName);
this.writer = writer;
}
@Override
public void pushData() {
final KafkaConsumer<String, String> consumer = new KafkaConsumer<>(super.getProp());
consumer.subscribe(Collections.singleton(this.getTopicName()));
while(true){
final ConsumerRecords<String, String> records = consumer.poll(this.getPollTime());
writer.write(records);
}
}
}
4.测试类
public class testHb {
public static void main(String[] args) {
final UserFriendHandler userFriendHandler = new UserFriendHandler();
final KtoHbWriter ktoHbWriter = new KtoHbWriter(userFriendHandler,"events_db:haha");
final KafkaSonWorker kafkaSonWorker = new KafkaSonWorker("user_friends",ktoHbWriter);
kafkaSonWorker.pushData();
}
}
这样就可以将数据写入hbase中了,想要查询hbase中数据是否正确,可以进入linux中hbase安装目录的bin目录下使用以下命令
./hbase org.apache.hadoop.hbase.mapreduce.RowCounter 'events_db:user_friend'
好了,这样就可以检查数据条目了,这样就完成了。