jstorm源码之 RichSpoutBatchExecutor

阅读更多
一、作用
RichSpoutBatchExecutor是IRichSpout及其子类Spout的executor执行器;主要包括:
        1、RichSpoutEmitter :用户tuple的emit
        2、RichSpoutCoordinator:用于spout的调度
        3、CaptureCollector: 用于获取spout的collect的field的

二、
package storm.trident.spout;

import backtype.storm.Config;

public class RichSpoutBatchExecutor implements ITridentSpout {
    public static final String MAX_BATCH_SIZE_CONF = "topology.spout.max.batch.size";

    IRichSpout _spout;
    // 构造RichSpouBatchExecutor 初始化Spout
    public RichSpoutBatchExecutor(IRichSpout spout) {
        _spout = spout;
    }

    // 获取Spout配置信息
    @Override
    public Map getComponentConfiguration() {
        return _spout.getComponentConfiguration();
    }

    // 获取向下级投递的stream fields
    @Override
    public Fields getOutputFields() {
        return TridentUtils.getSingleOutputStreamFields(_spout);

    }
    // 构建batch的调度器
    @Override
    public BatchCoordinator getCoordinator(String txStateId, Map conf, TopologyContext context) {
        return new RichSpoutCoordinator();
    }

   //  构建emitter
    @Override
    public Emitter getEmitter(String txStateId, Map conf, TopologyContext context) {
        return new RichSpoutEmitter(conf, context);
    }

    // Emitter的实现
    //  指定emit的参数
    class RichSpoutEmitter implements ITridentSpout.Emitter {
        // batch的大小
        int _maxBatchSize;
        // 是否完成初始化
        boolean prepared = false;
        // filed collect
        CaptureCollector _collector;
        // 记录fail的id
        RotatingMap> idsMap;
        //  spout 的配置
        Map _conf;
        // topology的上下文
        TopologyContext _context;
        // 记录emit上一次的时间
        long lastRotate = System.currentTimeMillis();
        //  记录emit的当前时间  主要是根据message timeout进行计算
        long rotateTime;

        public RichSpoutEmitter(Map conf, TopologyContext context) {
            _conf = conf;
            _context = context;
             // spout的batch的size 根据配置文件中max_batch_size的内容得来的
            Number batchSize = (Number) conf.get(MAX_BATCH_SIZE_CONF);
            //  默认大小为1000
            if (batchSize == null)
                batchSize = 1000;
            _maxBatchSize = batchSize.intValue();            
            _collector = new CaptureCollector();
            idsMap = new RotatingMap(3);
            rotateTime = 1000L * ((Number) conf.get(Config.TOPOLOGY_MESSAGE_TIMEOUT_SECS)).intValue();
        }

        // batch执行emit操作
        //  获取batch对应的transaction的id 和 当前系统时间与lastRotate的差值 
       //   若是超过timeout指定的时间即认为batch的emit的操作失败  通过循环遍历idsMap中最后一个桶中 并将对应桶中的id执行fail   最后更新lastRotate对应的时间  同时也要将这一batch的txId执行fail
        //   若是没有超时或者提出超时的那部分id 
        @Override
        public void emitBatch(TransactionAttempt tx, Object coordinatorMeta, TridentCollector collector) {
            long txid = tx.getTransactionId();

            long now = System.currentTimeMillis();
            if (now - lastRotate > rotateTime) {
                Map> failed = idsMap.rotate();
                for (Long id : failed.keySet()) {
                    // TODO: this isn't right... it's not in the map anymore
                    fail(id);
                }
                lastRotate = now;
            }

            if (idsMap.containsKey(txid)) {
                fail(txid);
            }
             // 重置collect 使用batch的collector  同时ids重置
            _collector.reset(collector);
             //  判断当前spout是否已经完成初始化 否则执行初始化  调整状态
            if (!prepared) {
                _spout.open(_conf, _context, new SpoutOutputCollector(_collector));
                prepared = true;
            }
            //  循环判断batchs中的batch是否处理完成  根据collect中的emitted的个数和当前的下标进行比对 若是小于当前下标则为后续的batch未处理
            //  最终将当前完成的id存放idsMap
            for (int i = 0; i < _maxBatchSize; i++) {
                _spout.nextTuple();
                if (_collector.numEmitted < i) {
                    break;
                }
            }
            idsMap.put(txid, _collector.ids);

        }

        // batch指定成功后 执行ack
        @Override
        public void success(TransactionAttempt tx) {
            ack(tx.getTransactionId());
        }
        // 循环执行spout的ack
        private void ack(long batchId) {
            List ids = (List) idsMap.remove(batchId);
            if (ids != null) {
                for (Object id : ids) {
                    _spout.ack(id);
                }
            }
        }
       // 循环执行spout的fail
        private void fail(long batchId) {
            List ids = (List) idsMap.remove(batchId);
            if (ids != null) {
                for (Object id : ids) {
                    _spout.fail(id);
                }
            }
        }

        // 关闭spout
        @Override
        public void close() {
            _spout.close();
        }

    }

    // spout的coordinator
    class RichSpoutCoordinator implements ITridentSpout.BatchCoordinator {
         // 事务初始化
        @Override
        public Object initializeTransaction(long txid, Object prevMetadata, Object currMetadata) {
            return null;
        }
        // 事务完成
        @Override
        public void success(long txid) {
        }
        //  是否已经准备
        @Override
        public boolean isReady(long txid) {
            return true;
        }
         //  关闭
        @Override
        public void close() {
        }
    }

    //  spout的collect  主要是针对batch的操作 进行collect的
    static class CaptureCollector implements ISpoutOutputCollector {
      
        TridentCollector _collector;
        public List ids;
        public int numEmitted;

        public void reset(TridentCollector c) {
            _collector = c;
            ids = new ArrayList();
        }

        @Override
        public void reportError(Throwable t) {
            _collector.reportError(t);
        }

        @Override
        public List emit(String stream, List values, Object id) {
            if (id != null)
                ids.add(id);
            numEmitted++;
            _collector.emit(values);
            return null;
        }

        @Override
        public void emitDirect(int task, String stream, List values, Object id) {
            throw new UnsupportedOperationException("Trident does not support direct streams");
        }

    }

}

你可能感兴趣的:(jstorm源码之 RichSpoutBatchExecutor)