storm 实时

首先创建一个Topology主类,

然后spout是从kafka就收的数据流 KafkaSpout,

第一个bolt会从前面的spout接收数据,做一些初步的处理,传输给下一个bolt

不适应重量级的计算。

j实时UI无法准确的查看数据的执行情况,准确的性能调优存在一定困难。

所以在这里创建了一个抽象类继承BaseBasicBolt  ,然后其他的bolt会继承创建的这个抽象类。

可以在bolt执行前和执行后记录时间。

并记录每个数据的执行流程和各个环节bolt的执行状态和耗时。

import java.util.ArrayList;
import java.util.List;

import org.apache.zookeeper.ZooKeeper;

import storm.kafka.KafkaSpout;
import storm.kafka.SpoutConfig;
import storm.kafka.StringScheme;
import storm.kafka.ZkHosts;

public class BasicTopology
{

	public static void main(String[] args)
			throws Exception
	{
	    if (args == null || args.length < 2) {
	        throw new NullPointerException("************** Topology args number must be three!");
	    }

		String zkhosts = args[0];
	   	String nimbusHost = args[1];
	   	String name = "user_profile_full_log_test";
	   	
	   	
	   	
	    TopologyBuilder builder = new TopologyBuilder();
	
	    //ZkHosts zkhost = new ZkHosts("192.168.112.138:2181,192.168.112.139:2181,192.168.112.140:2181");
	    ZkHosts zkhost = new ZkHosts(zkhosts);
	
	    String topic = "tracker";
	
	    
	    String spoutId = "kafkaSpout";
	    SpoutConfig spoutConfig = new SpoutConfig(zkhost, topic, "", spoutId);
	    
	    List<String> zkServers = new ArrayList<String>();
	    if (zkhosts != null && !zkhosts.isEmpty()) {
	        for (String host : zkhosts.split(",")) {
	            zkServers.add(host.split(":")[0]);
	        }
	    }
	    spoutConfig.zkServers = zkServers;
	    spoutConfig.zkPort = Integer.valueOf(2181);
//	    spoutConfig.forceFromStart = true;
	    spoutConfig.socketTimeoutMs = 60 * 1000;
	    spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
	    String zkRoot = "/consumers/" + name + "0";
	    spoutConfig.zkRoot=zkRoot;
	    
    	ZKUtils dm=new ZKUtils();
    	try{
    		ZooKeeper zk = dm.createZKInstance( zkhosts );
    		zk.delete(zkRoot, -1);
    	}catch(Exception e){
    		System.out.println("e:"+e.getMessage());
    	}
           

//	    builder.setSpout("kafka_reader_test", new RandomSentenceSpout(), Integer.valueOf(1));
	    builder.setSpout("kafka_reader", new KafkaSpout(spoutConfig), 1);
	    builder.setBolt("get_usertrack", new GetTrackInfoBolt(),2).shuffleGrouping("kafka_reader");
	    builder.setBolt("save_userstat", new SaveUserStatBolt(),2).shuffleGrouping("get_usertrack");
//	    builder.setBolt("save_userprofile", new SaveUserProfileBolt(),16).shuffleGrouping("save_userstat");
	    //builder.setBolt("save_useraction", new SaveUserActionBolt(),4).shuffleGrouping("get_usertrack");

	    
//	    builder.setSpout("order_reader", new OrderSpout(), Integer.valueOf(1));
	    //builder.setBolt("save_useraction_order", new SaveUserActionBolt(),2).shuffleGrouping("order_reader");
//	    builder.setBolt("order_save_userstat", new SaveUserStatBolt(),8).shuffleGrouping("order_reader");
//	    builder.setBolt("order_save_userprofile", new SaveUserProfileBolt(),14).shuffleGrouping("order_save_userstat");

//	    builder.setBolt("save_usermobileprofile", new SaveMobileUserProfileBolt(),4).shuffleGrouping("save_userprofile");
//	    builder.setBolt("save_mergeuserprofile", new MobileMergePcProfileBolt(),4).shuffleGrouping("save_usermobileprofile");

	    Config conf = new Config();
	    //conf.setDebug(true);
//	    conf.registerMetricsConsumer(MonitorLogConsumer.class, 1);
//	    List list = new ArrayList();
//	    list.add("com.yhd.monitor.genlog.TraceTaskHook");
	    try
	    {
	        if (args != null && args.length == 2 ) {
	        	
	        	//UserProfileModel model = new UserProfileModel();
	        	//String modelString = JSON.toJSONString(model);
	        	//conf.put(UserProfileConstants.USER_PROFILE_MODEL, modelString);
    
	            conf.put("topology.max.spout.pending", Integer.valueOf(1024));
	            conf.put(Config.STORM_ZOOKEEPER_SESSION_TIMEOUT, 60000);
	            conf.put(Config.STORM_ZOOKEEPER_RETRY_TIMES, 10);
	            conf.put(Config.STORM_ZOOKEEPER_RETRY_INTERVAL, 1000);
	            conf.put(Config.TOPOLOGY_ACKER_EXECUTORS, 0);
//	            conf.put(Config.TOPOLOGY_AUTO_TASK_HOOKS, list);//
	            conf.setNumWorkers(12);
	            conf.setMaxTaskParallelism(100);

	            conf.put("nimbus.host", nimbusHost);
	            conf.put("nimbus.thrift.port", Integer.valueOf(6627));
	            conf.put("storm.zookeeper.servers", zkServers);
	            conf.setMessageTimeoutSecs(300);
	           
	        	StormSubmitter.submitTopology(name, conf, builder.createTopology());

	        }
	        else {
	        	System.out.println("local:" );
	          	UserProfileModel model = new UserProfileModel();
	          	String modelString = JSON.toJSONString(model);
	          	conf.put(UserProfileConstants.USER_PROFILE_MODEL, modelString);
	              
	    	    conf.setMaxTaskParallelism(1);
	    	    LocalCluster cluster = new LocalCluster();
	    	    cluster.submitTopology("local_user_profile", conf, builder.createTopology());
	
	    	    Thread.sleep(10000);
	
	    	    cluster.shutdown();
	        }
	    } catch (Exception e) {
	      e.printStackTrace();
	    }
	}
}

 

上面是topology主类,主要是实现bolt流程的衔接,本地运行和线上环境的切换。

下面是创建的基础拦截继承类

public abstract class MonitorBaseBolt extends BaseBasicBolt {

	private static final long serialVersionUID = 1L;
	final public static String SPLIT_TAG = new String(new byte[] { 1 });
	private static Logger log = Logger.getLogger(MonitorBaseBolt.class);

	public void execute(Tuple input, BasicOutputCollector collector) {
		MessageId mi = input.getMessageId();
		Map<Long, Long> map = mi.getAnchorsToIds();
		String uid = UUID.randomUUID().toString();
		String key = getRootId(map);
		MonitorLogExecutor.getInstance().put("s" + SPLIT_TAG + key + SPLIT_TAG + uid + SPLIT_TAG + super.getClass().getName() + SPLIT_TAG + new Date().getTime());
		preExecute(input, collector);
		MonitorLogExecutor.getInstance().put("e" + SPLIT_TAG + key + SPLIT_TAG + uid + SPLIT_TAG + super.getClass().getName() + SPLIT_TAG + new Date().getTime());
	}
	
	public String getRootId(Map<Long, Long> map) {
		if(map == null || map.keySet() == null || map.keySet().isEmpty()) {
			return "";
		}
		String result = "";
		for(Long root : map.keySet()) {
			result += "k" + root;
		}
		return result;
	}

	public void preExecute(Tuple input, BasicOutputCollector collector) {

	}

 

还有执行日志存储

import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;

import org.apache.hadoop.hbase.client.Put;

import com.yhd.common.hbase.UserProfileDBHelper;


public class MonitorLogExecutor {
	BlockingQueue<String> queue = new LinkedBlockingQueue<String>(1000);
	public static MonitorLogExecutor executor;
	public static synchronized MonitorLogExecutor getInstance() {
		if(executor == null) {
			executor = new MonitorLogExecutor();
			executor.execute();
		}
		return executor;
	}
	public void execute() {
		while(true) {
			try {
				String ml = queue.poll();
				if(ml == null) {
					Thread.sleep(5000);
					continue;
				}
				//"s" + SPLIT_TAG + key + SPLIT_TAG + uid + SPLIT_TAG + super.getClass().getName()
				//+ SPLIT_TAG + new Date().getTime()
				String[] vals = ml.split(MonitorBaseBolt.SPLIT_TAG);
				Put put = new Put(vals[1].getBytes());
				put.add("log".getBytes(), (vals[3] + MonitorBaseBolt.SPLIT_TAG + vals[2] + MonitorBaseBolt.SPLIT_TAG
						+ vals[0]).getBytes(), vals[4].getBytes());
				UserProfileDBHelper.getInstance().save(put, "real_log");
				//batch save
			} catch (Exception e) {
				e.printStackTrace();
			}
		}
	}
	
	public void put(String data) {
		queue.offer(data);
	}
}

 

然后就是第一个bolt的创建

 

public class GetTrackInfoBolt extends MonitorBaseBolt
{
	private static final long serialVersionUID = 1L;
	private String strDev = UserStatConstants.PC_TAG;
	private int trackNumber = 0;
	private static Logger log = Logger.getLogger(GetTrackInfoBolt.class);

	private String getRowKey(String userId, String guId) {

		if (null == userId || userId.equals("\\N") ||
				userId.isEmpty()) {
			if(null == guId)
				return null;
			
	      	Matcher matcher = UserActionConstants.NOT_GUID_PATTERN.matcher(guId);
	    	if(matcher.find()){
				log.error("invail guid:" + guId );
	    		return null;	
	    	}
	    	userId = guId;
		}
		
		if(userId.equals("\\N") ||  
				userId.isEmpty() || userId.equals("null")) {
			return null;
		}
		

		return userId;
	}
	

	public String GetDev(String url) {
		boolean bMobile = false;
		if(null != url && url.startsWith(UserStatConstants.MOBILE_URL_TAG)) {
			bMobile = true;
		}
		
		String strDev = bMobile ? UserStatConstants.MOBILE_TAG: 
			UserStatConstants.PC_TAG;
		return strDev;
	}
	@Override
  public void preExecute(Tuple input, BasicOutputCollector collector)
  {
    try
    {
      String mesg = input.getString(0);
      
      if ((mesg != null) && (!mesg.isEmpty())) {
        
        String[] trackList = mesg.split("\n");
//        List<UserActionTuple> infos = new ArrayList<UserActionTuple>();
        for (String track : trackList) {
        	//flume中是\t,测试是byte 1
        	String[] trackInfo = (track + " ").split("\t");
    		
        	//很多时候只发送39个
        	if ( trackInfo.length < 42 ) { 
        		log.error(trackInfo.length + " " + trackInfo[1]);
        		//FileUtil.write(CommonConstants.NORMAL_LOG, trackInfo[1]);
    			//System.out.println("item count is wrong, size:" + trackInfo.length +  trackInfo[1]);
    			continue;
    		}
    		
            String url = trackInfo[1];
            String referer = trackInfo[2];
            String guId = trackInfo[5];
            String sessionId = trackInfo[10];
            String trackTime = trackInfo[17];
            String userId = trackInfo[18];
            String productIds = trackInfo[21];
            String provinceId = trackInfo[38];
            String cityId = trackInfo[41].trim(); 
            String ieVersion = trackInfo[29];
            String platform = trackInfo[30];
            String linkPositon = trackInfo[34];
            String buttonPosition = trackInfo[35];
            
            log.error("trackTime:" + trackTime );
           

			
//			System.out.println("######:" + userId + " " + guId + " " +
//			url + " " + linkPositon + " " + buttonPosition);
			
			StringBuilder strProductIds = new StringBuilder( productIds ); 
			UserActionQualifier userActionQualifier = null;
			userActionQualifier = ProActionAnalyzer.getProductActionType(url,strProductIds, 
					linkPositon, buttonPosition);
			
			if(null == userActionQualifier) {
				
				userActionQualifier = ProSetActionAnalyzer.getProductSetActionType( url );
			}
			
	//		if(null == userActionQualifier) {
	//			userActionQualifier = OtherActionAnalyzer.getOtherActionType( url );
	//		}
	
			if ( null == userActionQualifier ) {
				//userActionQualifier = new UserActionQualifier(UserActionConstants.LEAVEACTION);
				//System.out.println("no action:" + url);
				continue;
			}
			//String strKey =  getRowKey(userId, guId, trackTime);
			String strKey =  getRowKey(userId, guId);
			
			if (null == strKey)
				continue;
			
			strDev = GetDev( url );
			
			UserAction userAction = new UserAction();
			userAction.setS(sessionId);
			userAction.setP(provinceId);
			userAction.setC(cityId);
			userAction.setO(platform);
			userAction.setB(ieVersion);
			userAction.setR(referer);
			userAction.setL(linkPositon);
			userAction.setBP(buttonPosition);
			userAction.setU(url);
			userAction.setA(userActionQualifier.getActionType());
			
			String userKey = strDev + strKey;
			String userType = userActionQualifier.getLogType() + CommonConstants.TRACK_SPLIT 
					+ userActionQualifier.getActionType()
					+ CommonConstants.TRACK_SPLIT + userActionQualifier.getActionObject();
			
	        //collector.emit(new Values(userKey,userType,trackTime,userAction));
			UserActionTuple userActionTuple = new UserActionTuple();
			userActionTuple.setUserKey(userKey);
			userActionTuple.setUserType(userType);
			userActionTuple.setTrackTime(trackTime);
			userActionTuple.setUserAction(userAction);

			trackNumber += 1;
			//infos.add(userActionTuple);
			collector.emit(new Values(userActionTuple));
        }
        
//        if ((infos != null) && (!infos.isEmpty())) {
//        	log.error("The number of useful track is " + trackNumber + " " + infos.size());
//            collector.emit(new Values(new Object[] { infos }));
//        }
      }
    }
    catch (Exception e) {
    	log.error("split track wrong:" + e.toString());
    	e.printStackTrace();
    }
  }

  public void declareOutputFields(OutputFieldsDeclarer declarer)
  {
    //declarer.declare(new Fields("userkey","type","time","userAction"));
    declarer.declare(new Fields("trackInfos"));
  }
}

 描述了大致的storm实现流程。

你可能感兴趣的:(storm)