聊聊flink的SpoutWrapper

序

本文主要研究一下flink的SpoutWrapper

SpoutWrapper

flink-storm_2.11-1.6.2-sources.jar!/org/apache/flink/storm/wrappers/SpoutWrapper.java

/**
 * A {@link SpoutWrapper} wraps an {@link IRichSpout} in order to execute it within a Flink Streaming program. It
 * takes the spout's output tuples and transforms them into Flink tuples of type {@code OUT} (see
 * {@link SpoutCollector} for supported types).

 * 

 * Per default, {@link SpoutWrapper} calls the wrapped spout's {@link IRichSpout#nextTuple() nextTuple()} method in
 * an infinite loop.

 * Alternatively, {@link SpoutWrapper} can call {@link IRichSpout#nextTuple() nextTuple()} for a finite number of
 * times and terminate automatically afterwards (for finite input streams). The number of {@code nextTuple()} calls can
 * be specified as a certain number of invocations or can be undefined. In the undefined case, {@link SpoutWrapper}
 * terminates if no record was emitted to the output collector for the first time during a call to
 * {@link IRichSpout#nextTuple() nextTuple()}.

 * If the given spout implements {@link FiniteSpout} interface and {@link #numberOfInvocations} is not provided or
 * is {@code null}, {@link SpoutWrapper} calls {@link IRichSpout#nextTuple() nextTuple()} method until
 * {@link FiniteSpout#reachedEnd()} returns true.
 */
public final class SpoutWrapper extends RichParallelSourceFunction implements StoppableFunction {
    //......

    /** The number of {@link IRichSpout#nextTuple()} calls. */
    private Integer numberOfInvocations; // do not use int -> null indicates an infinite loop

    /**
     * Instantiates a new {@link SpoutWrapper} that calls the {@link IRichSpout#nextTuple() nextTuple()} method of
     * the given {@link IRichSpout spout} a finite number of times. The output type will be one of {@link Tuple0} to
     * {@link Tuple25} depending on the spout's declared number of attributes.
     *
     * @param spout
     *            The {@link IRichSpout spout} to be used.
     * @param numberOfInvocations
     *            The number of calls to {@link IRichSpout#nextTuple()}. If value is negative, {@link SpoutWrapper}
     *            terminates if no tuple was emitted for the first time. If value is {@code null}, finite invocation is
     *            disabled.
     * @throws IllegalArgumentException
     *             If the number of declared output attributes is not with range [0;25].
     */
    public SpoutWrapper(final IRichSpout spout, final Integer numberOfInvocations)
            throws IllegalArgumentException {
        this(spout, (Collection) null, numberOfInvocations);
    }

    /**
     * Instantiates a new {@link SpoutWrapper} that calls the {@link IRichSpout#nextTuple() nextTuple()} method of
     * the given {@link IRichSpout spout} in an infinite loop. The output type will be one of {@link Tuple0} to
     * {@link Tuple25} depending on the spout's declared number of attributes.
     *
     * @param spout
     *            The {@link IRichSpout spout} to be used.
     * @throws IllegalArgumentException
     *             If the number of declared output attributes is not with range [0;25].
     */
    public SpoutWrapper(final IRichSpout spout) throws IllegalArgumentException {
        this(spout, (Collection) null, null);
    }

    @Override
    public final void run(final SourceContext ctx) throws Exception {
        final GlobalJobParameters config = super.getRuntimeContext().getExecutionConfig()
                .getGlobalJobParameters();
        StormConfig stormConfig = new StormConfig();

        if (config != null) {
            if (config instanceof StormConfig) {
                stormConfig = (StormConfig) config;
            } else {
                stormConfig.putAll(config.toMap());
            }
        }

        final TopologyContext stormTopologyContext = WrapperSetupHelper.createTopologyContext(
                (StreamingRuntimeContext) super.getRuntimeContext(), this.spout, this.name,
                this.stormTopology, stormConfig);

        SpoutCollector collector = new SpoutCollector(this.numberOfAttributes,
                stormTopologyContext.getThisTaskId(), ctx);

        this.spout.open(stormConfig, stormTopologyContext, new SpoutOutputCollector(collector));
        this.spout.activate();

        if (numberOfInvocations == null) {
            if (this.spout instanceof FiniteSpout) {
                final FiniteSpout finiteSpout = (FiniteSpout) this.spout;

                while (this.isRunning && !finiteSpout.reachedEnd()) {
                    finiteSpout.nextTuple();
                }
            } else {
                while (this.isRunning) {
                    this.spout.nextTuple();
                }
            }
        } else {
            int counter = this.numberOfInvocations;
            if (counter >= 0) {
                while ((--counter >= 0) && this.isRunning) {
                    this.spout.nextTuple();
                }
            } else {
                do {
                    collector.tupleEmitted = false;
                    this.spout.nextTuple();
                } while (collector.tupleEmitted && this.isRunning);
            }
        }
    }

    /**
     * {@inheritDoc}
     *
     * Sets the {@link #isRunning} flag to {@code false}.
     */
    @Override
    public void cancel() {
        this.isRunning = false;
    }

    /**
     * {@inheritDoc}
     *
     * Sets the {@link #isRunning} flag to {@code false}.
     */
    @Override
    public void stop() {
        this.isRunning = false;
    }

    @Override
    public void close() throws Exception {
        this.spout.close();
    }
}

SpoutWrapper继承了RichParallelSourceFunction类，实现了StoppableFunction接口的stop方法
SpoutWrapper的run方法创建了flink的SpoutCollector作为storm的SpoutOutputCollector的构造器参数，之后调用spout的open方法，把包装了SpoutCollector(flink)的SpoutOutputCollector传递给spout，用来收集spout发射的数据
之后就是根据numberOfInvocations参数来调用spout.nextTuple()方法来发射数据；numberOfInvocations是控制调用spout的nextTuple的次数，它可以在创建SpoutWrapper的时候在构造器中设置，如果使用没有numberOfInvocations参数的构造器，则该值为null，表示infinite loop
flink对storm的spout有进行封装，提供了FiniteSpout接口，它有个reachedEnd接口用来判断数据是否发送完毕，来将storm的spout改造为finite模式；这里如果使用的是storm原始的spout，则就是一直循环调用nextTuple方法
如果有设置numberOfInvocations而且大于等于0，则根据指定的次数来调用nextTuple方法；如果该值小于0，则根据collector.tupleEmitted值来判断是否终止循环

SpoutCollector

flink-storm_2.11-1.6.2-sources.jar!/org/apache/flink/storm/wrappers/SpoutCollector.java

/**
 * A {@link SpoutCollector} is used by {@link SpoutWrapper} to provided an Storm
 * compatible output collector to the wrapped spout. It transforms the emitted Storm tuples into
 * Flink tuples and emits them via the provide {@link SourceContext} object.
 */
class SpoutCollector extends AbstractStormCollector implements ISpoutOutputCollector {

    /** The Flink source context object. */
    private final SourceContext flinkContext;

    /**
     * Instantiates a new {@link SpoutCollector} that emits Flink tuples to the given Flink source context. If the
     * number of attributes is specified as zero, any output type is supported. If the number of attributes is between 0
     * to 25, the output type is {@link Tuple0} to {@link Tuple25}, respectively.
     *
     * @param numberOfAttributes
     *            The number of attributes of the emitted tuples.
     * @param taskId
     *            The ID of the producer task (negative value for unknown).
     * @param flinkContext
     *            The Flink source context to be used.
     * @throws UnsupportedOperationException
     *             if the specified number of attributes is greater than 25
     */
    SpoutCollector(final HashMap numberOfAttributes, final int taskId,
            final SourceContext flinkContext) throws UnsupportedOperationException {
        super(numberOfAttributes, taskId);
        assert (flinkContext != null);
        this.flinkContext = flinkContext;
    }

    @Override
    protected List doEmit(final OUT flinkTuple) {
        this.flinkContext.collect(flinkTuple);
        // TODO
        return null;
    }

    @Override
    public void reportError(final Throwable error) {
        // not sure, if Flink can support this
    }

    @Override
    public List emit(final String streamId, final List

聊聊flink的SpoutWrapper

序

SpoutWrapper

SpoutCollector

AbstractStormCollector.tansformAndEmit

Task.run

StreamTask

StoppableSourceStreamTask

SourceStreamTask

StreamSource

小结

doc

你可能感兴趣的:(聊聊flink的SpoutWrapper)