首先创建一个Topology主类,
然后spout是从kafka就收的数据流 KafkaSpout,
第一个bolt会从前面的spout接收数据,做一些初步的处理,传输给下一个bolt
不适应重量级的计算。
j实时UI无法准确的查看数据的执行情况,准确的性能调优存在一定困难。
所以在这里创建了一个抽象类继承BaseBasicBolt ,然后其他的bolt会继承创建的这个抽象类。
可以在bolt执行前和执行后记录时间。
并记录每个数据的执行流程和各个环节bolt的执行状态和耗时。
import java.util.ArrayList; import java.util.List; import org.apache.zookeeper.ZooKeeper; import storm.kafka.KafkaSpout; import storm.kafka.SpoutConfig; import storm.kafka.StringScheme; import storm.kafka.ZkHosts; public class BasicTopology { public static void main(String[] args) throws Exception { if (args == null || args.length < 2) { throw new NullPointerException("************** Topology args number must be three!"); } String zkhosts = args[0]; String nimbusHost = args[1]; String name = "user_profile_full_log_test"; TopologyBuilder builder = new TopologyBuilder(); //ZkHosts zkhost = new ZkHosts("192.168.112.138:2181,192.168.112.139:2181,192.168.112.140:2181"); ZkHosts zkhost = new ZkHosts(zkhosts); String topic = "tracker"; String spoutId = "kafkaSpout"; SpoutConfig spoutConfig = new SpoutConfig(zkhost, topic, "", spoutId); List<String> zkServers = new ArrayList<String>(); if (zkhosts != null && !zkhosts.isEmpty()) { for (String host : zkhosts.split(",")) { zkServers.add(host.split(":")[0]); } } spoutConfig.zkServers = zkServers; spoutConfig.zkPort = Integer.valueOf(2181); // spoutConfig.forceFromStart = true; spoutConfig.socketTimeoutMs = 60 * 1000; spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme()); String zkRoot = "/consumers/" + name + "0"; spoutConfig.zkRoot=zkRoot; ZKUtils dm=new ZKUtils(); try{ ZooKeeper zk = dm.createZKInstance( zkhosts ); zk.delete(zkRoot, -1); }catch(Exception e){ System.out.println("e:"+e.getMessage()); } // builder.setSpout("kafka_reader_test", new RandomSentenceSpout(), Integer.valueOf(1)); builder.setSpout("kafka_reader", new KafkaSpout(spoutConfig), 1); builder.setBolt("get_usertrack", new GetTrackInfoBolt(),2).shuffleGrouping("kafka_reader"); builder.setBolt("save_userstat", new SaveUserStatBolt(),2).shuffleGrouping("get_usertrack"); // builder.setBolt("save_userprofile", new SaveUserProfileBolt(),16).shuffleGrouping("save_userstat"); //builder.setBolt("save_useraction", new SaveUserActionBolt(),4).shuffleGrouping("get_usertrack"); // builder.setSpout("order_reader", new OrderSpout(), Integer.valueOf(1)); //builder.setBolt("save_useraction_order", new SaveUserActionBolt(),2).shuffleGrouping("order_reader"); // builder.setBolt("order_save_userstat", new SaveUserStatBolt(),8).shuffleGrouping("order_reader"); // builder.setBolt("order_save_userprofile", new SaveUserProfileBolt(),14).shuffleGrouping("order_save_userstat"); // builder.setBolt("save_usermobileprofile", new SaveMobileUserProfileBolt(),4).shuffleGrouping("save_userprofile"); // builder.setBolt("save_mergeuserprofile", new MobileMergePcProfileBolt(),4).shuffleGrouping("save_usermobileprofile"); Config conf = new Config(); //conf.setDebug(true); // conf.registerMetricsConsumer(MonitorLogConsumer.class, 1); // List list = new ArrayList(); // list.add("com.yhd.monitor.genlog.TraceTaskHook"); try { if (args != null && args.length == 2 ) { //UserProfileModel model = new UserProfileModel(); //String modelString = JSON.toJSONString(model); //conf.put(UserProfileConstants.USER_PROFILE_MODEL, modelString); conf.put("topology.max.spout.pending", Integer.valueOf(1024)); conf.put(Config.STORM_ZOOKEEPER_SESSION_TIMEOUT, 60000); conf.put(Config.STORM_ZOOKEEPER_RETRY_TIMES, 10); conf.put(Config.STORM_ZOOKEEPER_RETRY_INTERVAL, 1000); conf.put(Config.TOPOLOGY_ACKER_EXECUTORS, 0); // conf.put(Config.TOPOLOGY_AUTO_TASK_HOOKS, list);// conf.setNumWorkers(12); conf.setMaxTaskParallelism(100); conf.put("nimbus.host", nimbusHost); conf.put("nimbus.thrift.port", Integer.valueOf(6627)); conf.put("storm.zookeeper.servers", zkServers); conf.setMessageTimeoutSecs(300); StormSubmitter.submitTopology(name, conf, builder.createTopology()); } else { System.out.println("local:" ); UserProfileModel model = new UserProfileModel(); String modelString = JSON.toJSONString(model); conf.put(UserProfileConstants.USER_PROFILE_MODEL, modelString); conf.setMaxTaskParallelism(1); LocalCluster cluster = new LocalCluster(); cluster.submitTopology("local_user_profile", conf, builder.createTopology()); Thread.sleep(10000); cluster.shutdown(); } } catch (Exception e) { e.printStackTrace(); } } }
上面是topology主类,主要是实现bolt流程的衔接,本地运行和线上环境的切换。
下面是创建的基础拦截继承类
public abstract class MonitorBaseBolt extends BaseBasicBolt { private static final long serialVersionUID = 1L; final public static String SPLIT_TAG = new String(new byte[] { 1 }); private static Logger log = Logger.getLogger(MonitorBaseBolt.class); public void execute(Tuple input, BasicOutputCollector collector) { MessageId mi = input.getMessageId(); Map<Long, Long> map = mi.getAnchorsToIds(); String uid = UUID.randomUUID().toString(); String key = getRootId(map); MonitorLogExecutor.getInstance().put("s" + SPLIT_TAG + key + SPLIT_TAG + uid + SPLIT_TAG + super.getClass().getName() + SPLIT_TAG + new Date().getTime()); preExecute(input, collector); MonitorLogExecutor.getInstance().put("e" + SPLIT_TAG + key + SPLIT_TAG + uid + SPLIT_TAG + super.getClass().getName() + SPLIT_TAG + new Date().getTime()); } public String getRootId(Map<Long, Long> map) { if(map == null || map.keySet() == null || map.keySet().isEmpty()) { return ""; } String result = ""; for(Long root : map.keySet()) { result += "k" + root; } return result; } public void preExecute(Tuple input, BasicOutputCollector collector) { }
还有执行日志存储
import java.util.concurrent.BlockingQueue; import java.util.concurrent.LinkedBlockingQueue; import org.apache.hadoop.hbase.client.Put; import com.yhd.common.hbase.UserProfileDBHelper; public class MonitorLogExecutor { BlockingQueue<String> queue = new LinkedBlockingQueue<String>(1000); public static MonitorLogExecutor executor; public static synchronized MonitorLogExecutor getInstance() { if(executor == null) { executor = new MonitorLogExecutor(); executor.execute(); } return executor; } public void execute() { while(true) { try { String ml = queue.poll(); if(ml == null) { Thread.sleep(5000); continue; } //"s" + SPLIT_TAG + key + SPLIT_TAG + uid + SPLIT_TAG + super.getClass().getName() //+ SPLIT_TAG + new Date().getTime() String[] vals = ml.split(MonitorBaseBolt.SPLIT_TAG); Put put = new Put(vals[1].getBytes()); put.add("log".getBytes(), (vals[3] + MonitorBaseBolt.SPLIT_TAG + vals[2] + MonitorBaseBolt.SPLIT_TAG + vals[0]).getBytes(), vals[4].getBytes()); UserProfileDBHelper.getInstance().save(put, "real_log"); //batch save } catch (Exception e) { e.printStackTrace(); } } } public void put(String data) { queue.offer(data); } }
然后就是第一个bolt的创建
public class GetTrackInfoBolt extends MonitorBaseBolt { private static final long serialVersionUID = 1L; private String strDev = UserStatConstants.PC_TAG; private int trackNumber = 0; private static Logger log = Logger.getLogger(GetTrackInfoBolt.class); private String getRowKey(String userId, String guId) { if (null == userId || userId.equals("\\N") || userId.isEmpty()) { if(null == guId) return null; Matcher matcher = UserActionConstants.NOT_GUID_PATTERN.matcher(guId); if(matcher.find()){ log.error("invail guid:" + guId ); return null; } userId = guId; } if(userId.equals("\\N") || userId.isEmpty() || userId.equals("null")) { return null; } return userId; } public String GetDev(String url) { boolean bMobile = false; if(null != url && url.startsWith(UserStatConstants.MOBILE_URL_TAG)) { bMobile = true; } String strDev = bMobile ? UserStatConstants.MOBILE_TAG: UserStatConstants.PC_TAG; return strDev; } @Override public void preExecute(Tuple input, BasicOutputCollector collector) { try { String mesg = input.getString(0); if ((mesg != null) && (!mesg.isEmpty())) { String[] trackList = mesg.split("\n"); // List<UserActionTuple> infos = new ArrayList<UserActionTuple>(); for (String track : trackList) { //flume中是\t,测试是byte 1 String[] trackInfo = (track + " ").split("\t"); //很多时候只发送39个 if ( trackInfo.length < 42 ) { log.error(trackInfo.length + " " + trackInfo[1]); //FileUtil.write(CommonConstants.NORMAL_LOG, trackInfo[1]); //System.out.println("item count is wrong, size:" + trackInfo.length + trackInfo[1]); continue; } String url = trackInfo[1]; String referer = trackInfo[2]; String guId = trackInfo[5]; String sessionId = trackInfo[10]; String trackTime = trackInfo[17]; String userId = trackInfo[18]; String productIds = trackInfo[21]; String provinceId = trackInfo[38]; String cityId = trackInfo[41].trim(); String ieVersion = trackInfo[29]; String platform = trackInfo[30]; String linkPositon = trackInfo[34]; String buttonPosition = trackInfo[35]; log.error("trackTime:" + trackTime ); // System.out.println("######:" + userId + " " + guId + " " + // url + " " + linkPositon + " " + buttonPosition); StringBuilder strProductIds = new StringBuilder( productIds ); UserActionQualifier userActionQualifier = null; userActionQualifier = ProActionAnalyzer.getProductActionType(url,strProductIds, linkPositon, buttonPosition); if(null == userActionQualifier) { userActionQualifier = ProSetActionAnalyzer.getProductSetActionType( url ); } // if(null == userActionQualifier) { // userActionQualifier = OtherActionAnalyzer.getOtherActionType( url ); // } if ( null == userActionQualifier ) { //userActionQualifier = new UserActionQualifier(UserActionConstants.LEAVEACTION); //System.out.println("no action:" + url); continue; } //String strKey = getRowKey(userId, guId, trackTime); String strKey = getRowKey(userId, guId); if (null == strKey) continue; strDev = GetDev( url ); UserAction userAction = new UserAction(); userAction.setS(sessionId); userAction.setP(provinceId); userAction.setC(cityId); userAction.setO(platform); userAction.setB(ieVersion); userAction.setR(referer); userAction.setL(linkPositon); userAction.setBP(buttonPosition); userAction.setU(url); userAction.setA(userActionQualifier.getActionType()); String userKey = strDev + strKey; String userType = userActionQualifier.getLogType() + CommonConstants.TRACK_SPLIT + userActionQualifier.getActionType() + CommonConstants.TRACK_SPLIT + userActionQualifier.getActionObject(); //collector.emit(new Values(userKey,userType,trackTime,userAction)); UserActionTuple userActionTuple = new UserActionTuple(); userActionTuple.setUserKey(userKey); userActionTuple.setUserType(userType); userActionTuple.setTrackTime(trackTime); userActionTuple.setUserAction(userAction); trackNumber += 1; //infos.add(userActionTuple); collector.emit(new Values(userActionTuple)); } // if ((infos != null) && (!infos.isEmpty())) { // log.error("The number of useful track is " + trackNumber + " " + infos.size()); // collector.emit(new Values(new Object[] { infos })); // } } } catch (Exception e) { log.error("split track wrong:" + e.toString()); e.printStackTrace(); } } public void declareOutputFields(OutputFieldsDeclarer declarer) { //declarer.declare(new Fields("userkey","type","time","userAction")); declarer.declare(new Fields("trackInfos")); } }
描述了大致的storm实现流程。