文章出处:http://blog.csdn.net/lili72/article/details/42246671
1 应用背景: 需要实时统计用户的登陆数,在线人数,活跃时间,下载等指标的数据,或者清洗后移到hdfs上。
2 设计架构:
1) 客户端产生数据---
2) kafka-生产者实时采集数据(保留7天)-----
3) storm实时消费数据,处理数据
4)把实时数据统计结果缓存到memcached 中
5) 把数据保存到mysql
3 组件之间的通信.
3.1 客户端发送数据---Nginx接收 分布式放在多台服务器上。
3.2 (flume读取接 收集文件信息传给kafka)-kafka生产者直接收集文件信息。
3.3 kafka与storm 通过插件storm-kafka 通信
3.4 storm 与缓存 memcached java程序 读取mysql的结果缓存到 memcached
3.5 zookeeper 用工具 curator-client,锁同步机制。
(对应的插件可以在github上找到 https://github.com/)
4 场景在现:即席查询用户注册数,用户登录数,当前在线人数
4.1 Storm 处理:
4.1.1 数据清理阶段:
Storm从kafka得到对应的topic数据,然后对数据进行清洗。Storm获取实时JSON数据,然后通过解析JSON数据,格式化之后利用storm-hdfs把数据传到HDFS上。或者实时统计数据存放到关系型数据库中。
package com.ks.topology;
import storm.kafka.BrokerHosts;
import storm.kafka.KafkaSpout;
import storm.kafka.SpoutConfig;
import storm.kafka.StringScheme;
import storm.kafka.ZkHosts;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.spout.SchemeAsMultiScheme;
import backtype.storm.topology.TopologyBuilder;
import com.google.common.collect.ImmutableList;
import com.ks.bolt.ConJsonToData;
import com.ks.bolt.CounterBolt;
/**
* @author root
*
*/
public class CountUserLogin {
/**
* @param args
*/
public static void main(String[] args) {
try{
String kafkaZookeeper = "192.168.119.131:2181,192.168.119.132:2181,192.168.119.133:2181";
BrokerHosts brokerHosts = new ZkHosts(kafkaZookeeper);
SpoutConfig kafkaConfig = new SpoutConfig(brokerHosts, "userlogin", "/userlogin", "id");
kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
kafkaConfig.zkServers = ImmutableList.of("192.168.119.131","192.168.119.132","192.168.119.133");
kafkaConfig.zkPort = 2181;
//kafkaConfig.forceFromStart = true;
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("spout", new KafkaSpout(kafkaConfig), 2);
builder.setBolt("counter", new CounterBolt(),1).shuffleGrouping("spout");
builder.setBolt("ConJsonToData", new ConJsonToData(),1).shuffleGrouping("counter");
Config config = new Config();
config.setDebug(true);
if(args!=null && args.length > 0) {
config.setNumWorkers(2);
StormSubmitter.submitTopology(args[0], config, builder.createTopology());
} else {
config.setMaxTaskParallelism(3);
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("CountUserLogin-topology", config, builder.createTopology());
Thread.sleep(500000);
cluster.shutdown();
}
}catch (Exception e) {
e.printStackTrace();
}
}
}
package com.ks.bolt;
import backtype.storm.topology.BasicOutputCollector;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseBasicBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
public class CounterBolt extends BaseBasicBolt {
/**
*
*/
private static final long serialVersionUID = -5508421065181891596L;
private static long counter = 0;
@Override
public void execute(Tuple tuple, BasicOutputCollector collector) {
System.out.println("msg = "+tuple.getString(0)+" -------------counter = "+(counter++));
collector.emit(new Values(tuple));
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("userloginStr"));
}
}
package com.ks.bolt;
import java.io.IOException;
import org.codehaus.jackson.JsonParseException;
import org.codehaus.jackson.map.JsonMappingException;
import org.codehaus.jackson.map.ObjectMapper;
import backtype.storm.topology.BasicOutputCollector;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseBasicBolt;
import backtype.storm.tuple.Tuple;
import com.ks.model.UserModel;
public class ConJsonToData extends BaseBasicBolt{
private static final ObjectMapper mapper = new ObjectMapper();
private static final long serialVersionUID = 5596476183440049414L;
@Override
public void execute(Tuple tuple, BasicOutputCollector collector) {
String str =tuple .getString(0);
System.out.println("str------------" +str+" str------------");
UserModel bean =null;
if(str!=null){
try {
bean = mapper.readValue(str, UserModel.class);
System.out.println(bean.toString());
} catch (JsonParseException e) {
e.printStackTrace();
} catch (JsonMappingException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer arg0) {
}
}
model
package com.ks.model;
public class UserLog {
//{"serverid":"1001","time":"2014-12-11 00:00:51","userid":12345678,"appid":8888,"client_ip":"192.136.20.210"}
private String serverid="";
private String time="";
private String userid="";
private Integer appid=0;
private String client_ip="";
public UserLog(){
}
public UserLog(String serverid, String time, String userid, Integer appid,
String client_ip) {
this.serverid = serverid;
this.time = time;
this.userid = userid;
this.appid = appid;
this.client_ip = client_ip;
}
public String getServerid() {
return serverid;
}
public void setServerid(String serverid) {
this.serverid = serverid;
}
public String getTime() {
return time;
}
public void setTime(String time) {
this.time = time;
}
public String getUserid() {
return userid;
}
public void setUserid(String userid) {
this.userid = userid;
}
public Integer getAppid() {
return appid;
}
public void setAppid(Integer appid) {
this.appid = appid;
}
public String getClient_ip() {
return client_ip;
}
public void setClient_ip(String client_ip) {
this.client_ip = client_ip;
}
@Override
public String toString() {
return serverid+"|" + userid+"|" +appid+"|"+time+"|"+client_ip;
}
}
package com.ks.model;
public class UserModel {
private UserLog data;
private String type="" ;
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public UserModel(){
}
public UserLog getData() {
return data;
}
public void setData(UserLog data) {
this.data = data;
}
@Override
public String toString() {
return data.toString()+"|"+type;
}
}