一、Kafka项目应用架构分析
1、Kafka生态架构
数据收集的速度,跟处理的速度不一定一致,故使用Kafka中间件作为数据收集和数据处理的一个Access入口,接收flume收集的数据,并通过kafkaSpout提交给Storm进行处理。
2、kafka 消息生产者
3、kafka 消息消费者
二、Kafka Producer
1、配置FlumeCluster向Kafka传送数据
#配置数据来源,flume channel通道名
producer.sources.s.type=spooldir
producer.sources.s.spoolDi=/home/dir/logdfs
producer.sources.s.channels=c
#配置flume sink 类型
producer.sinks.r.type=org.apache.flume.plugins.KafkaSink
producer.sinks.r.metadata.broker.list=dn1:9092,dn2:9092,dn3:9092
producer.sinks.r.costom.topic.name=kafka-ubas
服务器上一脚本负责不断产生数据,
flume负责接收,通过配置kafka,接收flume指定通道/sink数据;kafkaMonitor管理页面( 同ActiveMQ管理页面)可看到Producer数据动态增长。
2、Kafka客户端消费测试
/*
* Kafka Consumer测试
*/
public class TestKafkaConsumer extends Thread {
private ConsumerConnector consumer;
private String topic;
private final int SLEEP = 1000 * 3;
public static void main(String[] args) {
TestKafkaConsumer consumer = new TestKafkaConsumer(KafkaParam.TOPIC); //创建指定好连接信息的consumer实例
consumer.start(); //执行kafkaConsumer的run方法
}
/*
* 构造函数,创建Consumer实例
* consumerConfig-集群连接配置信息
*/
public TestKafkaConsumer(String topic) {
consumer = Consumer.createJavaConsumerConnector(this.consumerConfig());
this.topic = topic;
}
private ConsumerConfig consumerConfig() {
Properties props = new Properties();
props.put("zookeeper.connect", KafkaParam.ZK_HOSTS);//该常量来自于配置信息
props.put("group.id", KafkaParam.GROUP_ID);
props.put("zookeeper.session.timeout.ms", "40000");
props.put("zookeeper.sync.time.ms", "200");
props.put("auto.commit.interval.ms", "1000");
return new ConsumerConfig(props);
}
/*
* 执行接收kafka消息流
*/
@Override
public void run() {
Map topicCountMap = new HashMap();
topicCountMap.put(topic, new Integer(1));
Map>> consumerMap = consumer.createMessageStreams(topicCountMap);//创建数据流Map,用于存放get到的信息
KafkaStream stream = consumerMap.get(topic).get(0);
ConsumerIterator it = stream.iterator();
while (it.hasNext()) {
System.out.println("接收消息->" + new String(it.next().message()));
try {
sleep(SLEEP);
} catch (Exception ex) {
ex.printStackTrace();
}
}
}}
抽象一个关于kafka连接的配置信息类
public class KafkaConfigure{
private static String zkHosts = "";
static {
String flag = SystemConfig.getProperty("dev.tag"); //加载配置文件 如下
zkHosts = SystemConfig.getProperty(flag + ".kafka.zk.host");//获取配置中zk连接信息
}
public interface KafkaConf {
public static final String CREATE = "--create";
public static final String DELETE = "--delete";
public static final String LIST = "--list";
public static final String PARTITIONS = "--partitions";
public static final String REPLICATION = "--replication-factor";
public static final String TOPIC = "--topic";
public static final String ZK = "--zookeeper";
}
public interface KafkaParam {
public static final String GROUP_ID = "kafkaUbasGroup";
public static final String TOPIC = "kafka-ubas"; //同1中consumer.topic.name参数
public static String ZK_HOSTS = zkHosts;
}
public interface StormParam {
public static final String PV = "pv";
public static final String APP_ID = "appid";
}
}
system-config.properties配置文件如下
# set hdfs input path
hdfs.input.path.ubas=/home/hdfs/kafka/in/%s
# set hdfs output path
hdfs.output.path.ubas=/home/hdfs/kafka/out/%s
# set test hdfs host
test.kafka.zk.host=192.168.55.15:2181,192.168.55.17:2181,192.168.55.18:2181
# set pro hdfs host
pro.kafka.zk.host=192.168.10.101:2181,192.168.10.103:2181,192.168.10.104:2181
# set redis host
real-time.redis.host=192.168.55.18,192.168.55.17,192.168.55.15
real-time.redis.port=6379
三、Kafka Consumer
数据从Kafka consumer 到Storm实时处理
关于Storm数据处理的几个核心类,Spout、Bolt详见:storm集群组件编程模型
1、Spout
public class KafkaSpout implements IRichSpout {
private static final long serialVersionUID = -7107773519958260350L;
private static final Logger LOGGER = LoggerFactory.getLogger(KafkaSpout.class);
SpoutOutputCollector collector;
private ConsumerConnector consumer;
private String topic;
//连接信息
private static ConsumerConfig createConsumerConfig() {
Properties props = new Properties();
props.put("zookeeper.connect", KafkaParam.ZK_HOSTS);
props.put("group.id", KafkaParam.GROUP_ID);
props.put("zookeeper.session.timeout.ms", "40000");
props.put("zookeeper.sync.time.ms", "200");
props.put("auto.commit.interval.ms", "1000");
return new ConsumerConfig(props);
}
//构造函数
public KafkaSpout(String topic) {
this.topic = topic;
}
@SuppressWarnings("rawtypes")
public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
this.collector = collector;
}
public void activate() {
try {
this.consumer = Consumer.createJavaConsumerConnector(createConsumerConfig());
Map topickMap = new HashMap();
topickMap.put(topic, new Integer(1));
Map>> streamMap = consumer.createMessageStreams(topickMap);
KafkaStream stream = streamMap.get(topic).get(0);
ConsumerIterator it = stream.iterator();
while (it.hasNext()) {
String value = new String(it.next().message());
LOGGER.info("[ Consumer ] Message is : " + value);
collector.emit(new Values(value), value);
}
} catch (Exception ex) {
ex.printStackTrace();
LOGGER.error("Spout has error,msg is " + ex.getMessage());
}
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("KafkaSpout"));
}
2、两个bolt,分别用于数据分割处理和计算处理
/**
* 负责分割数据处理
*/
public class MessageBlots implements IRichBolt {
private static final long serialVersionUID = -2025360103997307370L;
OutputCollector collector;
@SuppressWarnings("rawtypes")
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this.collector = collector;
}
public void execute(Tuple input) {
String[] line = input.getString(0).split(",");
for (int i = 0; i < line.length; i++) {
List a = new ArrayList();
a.add(input);
switch (i) {
case 0:
this.collector.emit(a, new Values(line[i]));
break;
case 3:
this.collector.emit(a, new Values(line[i]));
break;
case 4:
this.collector.emit(a, new Values(line[i]));
break;
case 6:
this.collector.emit(a, new Values(line[i]));
break;
default:
break;
}
}
this.collector.ack(input);
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("attribute"));
}
}
/**
*数据保存处理
*/
public class StatsBlots implements IRichBolt {
private static final long serialVersionUID = -619395076356762569L;
private static Logger LOG = LoggerFactory.getLogger(StatsBlots.class);
OutputCollector collector;
Map counter;
@SuppressWarnings("rawtypes")
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this.collector = collector;
this.counter = new HashMap();
}
public void execute(Tuple input) {
String key = input.getString(0);
// Other KPI
if (!InetAddressUtils.isIPv4(key) && !key.contains(StormParam.APP_ID)) {
Integer integer = this.counter.get(key);
if (integer != null) {
integer += 1;
this.counter.put(key, integer);
} else {
this.counter.put(key, 1);
}
}
// PV
if (InetAddressUtils.isIPv4(key)) {
Integer pvInt = this.counter.get(StormParam.PV);
if (pvInt != null) {
pvInt += 1;
this.counter.put(StormParam.PV, pvInt);
} else {
this.counter.put(StormParam.PV, 1);
}
System.out.println(key + ",pv=" + pvInt);
}
// AppId
if (key.contains(StormParam.APP_ID)) {
Integer appIdInt = this.counter.get(key);
if (appIdInt != null) {
appIdInt += 1;
this.counter.put(key, appIdInt);
} else {
this.counter.put(key, 1);
}
}
/*
* 数据持久化
*/
try {
Jedis jedis = JedisFactory.getJedisInstance("real-time");
for (Entry entry : this.counter.entrySet()) {
LOG.info("Bolt stats kpi is [" + entry.getKey() + "|" + entry.getValue().toString() + "]");
// write result to redis
jedis.set(CalendarUtils.today() + "_" + entry.getKey(), entry.getValue().toString());
// write result to mysql
// ...
}
} catch (Exception ex) {
ex.printStackTrace();
LOG.error("Jedis error, msg is " + ex.getMessage());
}
this.collector.ack(input);
}
}
3、提交Storm topology
public class KafkaTopology {
public static void main(String[] args) {
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("kafkaUbasGroup", new KafkaSpout("kafka-ubas"));
builder.setBolt("messageBlots", new MessageBlots()).shuffleGrouping("kafkaUbasGroup");
builder.setBolt("kpiCounter", new StatsBlots(), 2).fieldsGrouping("messageBlots", new Fields("attribute"));
Config config = new Config();
// config.setDebug(true);
// storm.messaging.netty.max_retries
// storm.messaging.netty.max_wait_ms
if (args != null && args.length > 0) {
// online commit Topology 集群提交
config.put(Config.NIMBUS_HOST, args[0]);
config.setNumWorkers(3);
try {
StormSubmitter.submitTopologyWithProgressBar(KafkaTopology.class.getSimpleName(), config,
builder.createTopology());
} catch (Exception e) {
e.printStackTrace();
}
} else {
// Local commit jar 本地提交
LocalCluster local = new LocalCluster();
local.submitTopology("stats", config, builder.createTopology());
// try {
// Thread.sleep(50);
// } catch (InterruptedException e) {
// e.printStackTrace();
// }
// local.shutdown();
}
}
四、storm到DB的数据持久化