1 应用背景: 需要实时统计用户的登陆数,在线人数,活跃时间,下载等指标的数据,或者清洗后移到hdfs上。
2 设计架构:
1) 客户端产生数据---
2) kafka-生产者实时采集数据(保留7天)-----
3) storm实时消费数据,处理数据
4)把实时数据统计结果缓存到memcached 中
5) 把数据保存到mysql
3 组件之间的通信.
3.1 客户端发送数据---Nginx接收 分布式放在多台服务器上。
3.2 (flume读取接 收集文件信息传给kafka)-kafka生产者直接收集文件信息。
3.3 kafka与storm 通过插件storm-kafka 通信
3.4 storm 与缓存 memcached java程序 读取mysql的结果缓存到 memcached
3.5 zookeeper 用工具 curator-client,锁同步机制。
(对应的插件可以在github上找到 https://github.com/)
4 场景在现:即席查询用户注册数,用户登录数,当前在线人数
4.1 Storm 处理:
4.1.1 数据清理阶段:
Storm从kafka得到对应的topic数据,然后对数据进行清洗。Storm获取实时JSON数据,然后通过解析JSON数据,格式化之后利用storm-hdfs把数据传到HDFS上。或者实时统计数据存放到关系型数据库中。
package com.ks.topology; import storm.kafka.BrokerHosts; import storm.kafka.KafkaSpout; import storm.kafka.SpoutConfig; import storm.kafka.StringScheme; import storm.kafka.ZkHosts; import backtype.storm.Config; import backtype.storm.LocalCluster; import backtype.storm.StormSubmitter; import backtype.storm.spout.SchemeAsMultiScheme; import backtype.storm.topology.TopologyBuilder; import com.google.common.collect.ImmutableList; import com.ks.bolt.ConJsonToData; import com.ks.bolt.CounterBolt; /** * @author root * */ public class CountUserLogin { /** * @param args */ public static void main(String[] args) { try{ String kafkaZookeeper = "192.168.119.131:2181,192.168.119.132:2181,192.168.119.133:2181"; BrokerHosts brokerHosts = new ZkHosts(kafkaZookeeper); SpoutConfig kafkaConfig = new SpoutConfig(brokerHosts, "userlogin", "/userlogin", "id"); kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme()); kafkaConfig.zkServers = ImmutableList.of("192.168.119.131","192.168.119.132","192.168.119.133"); kafkaConfig.zkPort = 2181; //kafkaConfig.forceFromStart = true; TopologyBuilder builder = new TopologyBuilder(); builder.setSpout("spout", new KafkaSpout(kafkaConfig), 2); builder.setBolt("counter", new CounterBolt(),1).shuffleGrouping("spout"); builder.setBolt("ConJsonToData", new ConJsonToData(),1).shuffleGrouping("counter"); Config config = new Config(); config.setDebug(true); if(args!=null && args.length > 0) { config.setNumWorkers(2); StormSubmitter.submitTopology(args[0], config, builder.createTopology()); } else { config.setMaxTaskParallelism(3); LocalCluster cluster = new LocalCluster(); cluster.submitTopology("CountUserLogin-topology", config, builder.createTopology()); Thread.sleep(500000); cluster.shutdown(); } }catch (Exception e) { e.printStackTrace(); } } }
package com.ks.bolt; import backtype.storm.topology.BasicOutputCollector; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.topology.base.BaseBasicBolt; import backtype.storm.tuple.Fields; import backtype.storm.tuple.Tuple; import backtype.storm.tuple.Values; public class CounterBolt extends BaseBasicBolt { /** * */ private static final long serialVersionUID = -5508421065181891596L; private static long counter = 0; @Override public void execute(Tuple tuple, BasicOutputCollector collector) { System.out.println("msg = "+tuple.getString(0)+" -------------counter = "+(counter++)); collector.emit(new Values(tuple)); } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("userloginStr")); } }
package com.ks.bolt; import java.io.IOException; import org.codehaus.jackson.JsonParseException; import org.codehaus.jackson.map.JsonMappingException; import org.codehaus.jackson.map.ObjectMapper; import backtype.storm.topology.BasicOutputCollector; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.topology.base.BaseBasicBolt; import backtype.storm.tuple.Tuple; import com.ks.model.UserModel; public class ConJsonToData extends BaseBasicBolt{ private static final ObjectMapper mapper = new ObjectMapper(); private static final long serialVersionUID = 5596476183440049414L; @Override public void execute(Tuple tuple, BasicOutputCollector collector) { String str =tuple .getString(0); System.out.println("str------------" +str+" str------------"); UserModel bean =null; if(str!=null){ try { bean = mapper.readValue(str, UserModel.class); System.out.println(bean.toString()); } catch (JsonParseException e) { e.printStackTrace(); } catch (JsonMappingException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } } @Override public void declareOutputFields(OutputFieldsDeclarer arg0) { } }model
package com.ks.model; public class UserLog { //{"serverid":"1001","time":"2014-12-11 00:00:51","userid":12345678,"appid":8888,"client_ip":"192.136.20.210"} private String serverid=""; private String time=""; private String userid=""; private Integer appid=0; private String client_ip=""; public UserLog(){ } public UserLog(String serverid, String time, String userid, Integer appid, String client_ip) { this.serverid = serverid; this.time = time; this.userid = userid; this.appid = appid; this.client_ip = client_ip; } public String getServerid() { return serverid; } public void setServerid(String serverid) { this.serverid = serverid; } public String getTime() { return time; } public void setTime(String time) { this.time = time; } public String getUserid() { return userid; } public void setUserid(String userid) { this.userid = userid; } public Integer getAppid() { return appid; } public void setAppid(Integer appid) { this.appid = appid; } public String getClient_ip() { return client_ip; } public void setClient_ip(String client_ip) { this.client_ip = client_ip; } @Override public String toString() { return serverid+"|" + userid+"|" +appid+"|"+time+"|"+client_ip; } }
package com.ks.model; public class UserModel { private UserLog data; private String type="" ; public String getType() { return type; } public void setType(String type) { this.type = type; } public UserModel(){ } public UserLog getData() { return data; } public void setData(UserLog data) { this.data = data; } @Override public String toString() { return data.toString()+"|"+type; } }