storm准实时应用

文章出处:http://blog.csdn.net/lili72/article/details/42246671


应用背景: 需要实时统计用户的登陆数,在线人数,活跃时间,下载等指标的数据,或者清洗后移到hdfs上。

 

设计架构:

        1) 客户端产生数据---

        2) kafka-生产者实时采集数据(保留7天)-----

        3) storm实时消费数据,处理数据

        4)把实时数据统计结果缓存到memcached 

        5) 把数据保存到mysql

 

组件之间的通信.

3.1  客户端发送数据---Nginx接收 分布式放在多台服务器上。 

3.2  flume读取接 收集文件信息传给kafka-kafka生产者直接收集文件信息。 

3.3  kafkastorm 通过插件storm-kafka 通信

3.4  storm 与缓存 memcached   java程序 读取mysql的结果缓存到 memcached

3.5   zookeeper 用工具 curator-client,锁同步机制。

        (对应的插件可以在github上找到 https://github.com/

4  场景在现:即席查询用户注册数,用户登录数,当前在线人数

   4.1   Storm 处理:

          4.1.1 数据清理阶段:

         Stormkafka得到对应的topic数据,然后对数据进行清洗。Storm获取实时JSON数据,然后通过解析JSON数据,格式化之后利用storm-hdfs把数据传到HDFS上。或者实时统计数据存放到关系型数据库中。

      

     

package com.ks.topology;

import storm.kafka.BrokerHosts;
import storm.kafka.KafkaSpout;
import storm.kafka.SpoutConfig;
import storm.kafka.StringScheme;
import storm.kafka.ZkHosts;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.spout.SchemeAsMultiScheme;
import backtype.storm.topology.TopologyBuilder;

import com.google.common.collect.ImmutableList;
import com.ks.bolt.ConJsonToData;
import com.ks.bolt.CounterBolt;

/**
 * @author root
 *
 */
public class CountUserLogin {

	/**
	 * @param args
	 */
	public static void main(String[] args) {
		try{
			String kafkaZookeeper = "192.168.119.131:2181,192.168.119.132:2181,192.168.119.133:2181";
			BrokerHosts brokerHosts = new ZkHosts(kafkaZookeeper);
			SpoutConfig kafkaConfig = new SpoutConfig(brokerHosts, "userlogin", "/userlogin", "id");
	        kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
	        kafkaConfig.zkServers =  ImmutableList.of("192.168.119.131","192.168.119.132","192.168.119.133");
	        kafkaConfig.zkPort = 2181;
			
	        //kafkaConfig.forceFromStart = true;
			
	        TopologyBuilder builder = new TopologyBuilder();
	        builder.setSpout("spout", new KafkaSpout(kafkaConfig), 2);
	        builder.setBolt("counter", new CounterBolt(),1).shuffleGrouping("spout");
	       builder.setBolt("ConJsonToData", new ConJsonToData(),1).shuffleGrouping("counter");
	        
	        Config config = new Config();
	        config.setDebug(true);
	        
	        if(args!=null && args.length > 0) {
	            config.setNumWorkers(2);
	            
	            StormSubmitter.submitTopology(args[0], config, builder.createTopology());
	        } else {        
	            config.setMaxTaskParallelism(3);
	
	            LocalCluster cluster = new LocalCluster();
	            cluster.submitTopology("CountUserLogin-topology", config, builder.createTopology());
	        
	            Thread.sleep(500000);
	
	            cluster.shutdown();
	        }
		}catch (Exception e) {
			e.printStackTrace();
		}
	}

}

package com.ks.bolt;

import backtype.storm.topology.BasicOutputCollector;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseBasicBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;

public class CounterBolt extends BaseBasicBolt {

	/**
	 * 
	 */
	private static final long serialVersionUID = -5508421065181891596L;
	
	private static long counter = 0;
	
	@Override
	public void execute(Tuple tuple, BasicOutputCollector collector) {
		
		System.out.println("msg = "+tuple.getString(0)+" -------------counter = "+(counter++));
		collector.emit(new Values(tuple));
		
	}

	@Override
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		declarer.declare(new Fields("userloginStr"));

	}

}

package com.ks.bolt;

import java.io.IOException;

import org.codehaus.jackson.JsonParseException;
import org.codehaus.jackson.map.JsonMappingException;
import org.codehaus.jackson.map.ObjectMapper;

import backtype.storm.topology.BasicOutputCollector;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseBasicBolt;
import backtype.storm.tuple.Tuple;

import com.ks.model.UserModel;

public class ConJsonToData  extends BaseBasicBolt{

	private static final ObjectMapper mapper = new ObjectMapper();
	private static final long serialVersionUID = 5596476183440049414L;

	@Override
	public void execute(Tuple tuple, BasicOutputCollector collector) {
		String  str =tuple .getString(0);
		System.out.println("str------------"  +str+"  str------------");
		UserModel bean =null;
        if(str!=null){
       	 try {
			bean = mapper.readValue(str, UserModel.class);
			 System.out.println(bean.toString());
		} catch (JsonParseException e) {
			e.printStackTrace();
		} catch (JsonMappingException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
       
        }
		
	}

	@Override
	public void declareOutputFields(OutputFieldsDeclarer arg0) {
		
	}

}
model

package com.ks.model;

public class UserLog {
	
//{"serverid":"1001","time":"2014-12-11 00:00:51","userid":12345678,"appid":8888,"client_ip":"192.136.20.210"}
	
	private String   serverid="";
	
	private String  time="";
	
	private  String  userid="";
	
	private  Integer  appid=0;
	
	private  String  client_ip="";
	
	
	public UserLog(){
		
	}
	
	public UserLog(String serverid, String time, String userid, Integer appid,
			String client_ip) {
		this.serverid = serverid;
		this.time = time;
		this.userid = userid;
		this.appid = appid;
		this.client_ip = client_ip;
	}

	public String getServerid() {
		return serverid;
	}

	public void setServerid(String serverid) {
		this.serverid = serverid;
	}
	

	public String getTime() {
		return time;
	}

	public void setTime(String time) {
		this.time = time;
	}

	public String getUserid() {
		return userid;
	}

	public void setUserid(String userid) {
		this.userid = userid;
	}

	public Integer getAppid() {
		return appid;
	}

	public void setAppid(Integer appid) {
		this.appid = appid;
	}

	public String getClient_ip() {
		return client_ip;
	}

	public void setClient_ip(String client_ip) {
		this.client_ip = client_ip;
	}

	
	@Override
		public String toString() {
			return  serverid+"|" + userid+"|" +appid+"|"+time+"|"+client_ip;
		}
	
}

package com.ks.model;

public class UserModel {
	
	private  UserLog  data;
	
	private String  type="" ;
	
	public String getType() {
		return type;
	}

	public void setType(String type) {
		this.type = type;
	}

	public  UserModel(){
	}

	public UserLog getData() {
		return data;
	}

	public void setData(UserLog data) {
		this.data = data;
	}
	
	@Override
	public String toString() {
		
		return data.toString()+"|"+type;
	}

}



你可能感兴趣的:(学习笔记,工作总结,maven,hadoop)