/**
* A {@link BoltWrapper} wraps an {@link IRichBolt} in order to execute the Storm bolt within a Flink Streaming program.
* It takes the Flink input tuples of type {@code IN} and transforms them into {@link StormTuple}s that the bolt can
* process. Furthermore, it takes the bolt's output tuples and transforms them into Flink tuples of type {@code OUT}
* (see {@link AbstractStormCollector} for supported types).
*
* Works for single input streams only! See {@link MergedInputsBoltWrapper} for multi-input stream
* Bolts.
*/
public class BoltWrapper extends AbstractStreamOperator implements OneInputStreamOperator {
@Override
public void open() throws Exception {
super.open();
this.flinkCollector = new TimestampedCollector<>(this.output);
GlobalJobParameters config = getExecutionConfig().getGlobalJobParameters();
StormConfig stormConfig = new StormConfig();
if (config != null) {
if (config instanceof StormConfig) {
stormConfig = (StormConfig) config;
} else {
stormConfig.putAll(config.toMap());
}
}
this.topologyContext = WrapperSetupHelper.createTopologyContext(
getRuntimeContext(), this.bolt, this.name, this.stormTopology, stormConfig);
final OutputCollector stormCollector = new OutputCollector(new BoltCollector(
this.numberOfAttributes, this.topologyContext.getThisTaskId(), this.flinkCollector));
if (this.stormTopology != null) {
Map inputs = this.topologyContext.getThisSources();
for (GlobalStreamId inputStream : inputs.keySet()) {
for (Integer tid : this.topologyContext.getComponentTasks(inputStream
.get_componentId())) {
this.inputComponentIds.put(tid, inputStream.get_componentId());
this.inputStreamIds.put(tid, inputStream.get_streamId());
this.inputSchemas.put(tid,
this.topologyContext.getComponentOutputFields(inputStream));
}
}
}
this.bolt.prepare(stormConfig, this.topologyContext, stormCollector);
}
@Override
public void dispose() throws Exception {
super.dispose();
this.bolt.cleanup();
}
@Override
public void processElement(final StreamRecord element) throws Exception {
this.flinkCollector.setTimestamp(element);
IN value = element.getValue();
if (this.stormTopology != null) {
Tuple tuple = (Tuple) value;
Integer producerTaskId = tuple.getField(tuple.getArity() - 1);
this.bolt.execute(new StormTuple<>(value, this.inputSchemas.get(producerTaskId),
producerTaskId, this.inputStreamIds.get(producerTaskId), this.inputComponentIds
.get(producerTaskId), MessageId.makeUnanchored()));
} else {
this.bolt.execute(new StormTuple<>(value, this.inputSchemas.get(null), -1, null, null,
MessageId.makeUnanchored()));
}
}
}
/**
* A {@link BoltCollector} is used by {@link BoltWrapper} to provided an Storm compatible
* output collector to the wrapped bolt. It transforms the emitted Storm tuples into Flink tuples
* and emits them via the provide {@link Output} object.
*/
class BoltCollector extends AbstractStormCollector implements IOutputCollector {
/** The Flink output Collector. */
private final Collector flinkOutput;
/**
* Instantiates a new {@link BoltCollector} that emits Flink tuples to the given Flink output object. If the
* number of attributes is negative, any output type is supported (ie, raw type). If the number of attributes is
* between 0 and 25, the output type is {@link Tuple0} to {@link Tuple25}, respectively.
*
* @param numberOfAttributes
* The number of attributes of the emitted tuples per output stream.
* @param taskId
* The ID of the producer task (negative value for unknown).
* @param flinkOutput
* The Flink output object to be used.
* @throws UnsupportedOperationException
* if the specified number of attributes is greater than 25
*/
BoltCollector(final HashMap numberOfAttributes, final int taskId,
final Collector flinkOutput) throws UnsupportedOperationException {
super(numberOfAttributes, taskId);
assert (flinkOutput != null);
this.flinkOutput = flinkOutput;
}
@Override
protected List doEmit(final OUT flinkTuple) {
this.flinkOutput.collect(flinkTuple);
// TODO
return null;
}
@Override
public void reportError(final Throwable error) {
// not sure, if Flink can support this
}
@Override
public List emit(final String streamId, final Collection anchors, final List
/**
* Wrapper around an {@link Output} for user functions that expect a {@link Collector}.
* Before giving the {@link TimestampedCollector} to a user function you must set
* the timestamp that should be attached to emitted elements. Most operators
* would set the timestamp of the incoming
* {@link org.apache.flink.streaming.runtime.streamrecord.StreamRecord} here.
*
* @param The type of the elements that can be emitted.
*/
@Internal
public class TimestampedCollector implements Collector {
private final Output> output;
private final StreamRecord reuse;
/**
* Creates a new {@link TimestampedCollector} that wraps the given {@link Output}.
*/
public TimestampedCollector(Output> output) {
this.output = output;
this.reuse = new StreamRecord(null);
}
@Override
public void collect(T record) {
output.collect(reuse.replace(record));
}
public void setTimestamp(StreamRecord> timestampBase) {
if (timestampBase.hasTimestamp()) {
reuse.setTimestamp(timestampBase.getTimestamp());
} else {
reuse.eraseTimestamp();
}
}
public void setAbsoluteTimestamp(long timestamp) {
reuse.setTimestamp(timestamp);
}
public void eraseTimestamp() {
reuse.eraseTimestamp();
}
@Override
public void close() {
output.close();
}
}
/**
* The Task represents one execution of a parallel subtask on a TaskManager.
* A Task wraps a Flink operator (which may be a user function) and
* runs it, providing all services necessary for example to consume input data,
* produce its results (intermediate result partitions) and communicate
* with the JobManager.
*
*
The Flink operators (implemented as subclasses of
* {@link AbstractInvokable} have only data readers, -writers, and certain event callbacks.
* The task connects those to the network stack and actor messages, and tracks the state
* of the execution and handles exceptions.
*
*
Tasks have no knowledge about how they relate to other tasks, or whether they
* are the first attempt to execute the task, or a repeated attempt. All of that
* is only known to the JobManager. All the task knows are its own runnable code,
* the task's configuration, and the IDs of the intermediate results to consume and
* produce (if any).
*
*
Each Task is run by one dedicated thread.
*/
public class Task implements Runnable, TaskActions, CheckpointListener {
//......
/**
* The core work method that bootstraps the task and executes its code.
*/
@Override
public void run() {
//......
// now load and instantiate the task's invokable code
invokable = loadAndInstantiateInvokable(userCodeClassLoader, nameOfInvokableClass, env);
// ----------------------------------------------------------------
// actual task core work
// ----------------------------------------------------------------
// we must make strictly sure that the invokable is accessible to the cancel() call
// by the time we switched to running.
this.invokable = invokable;
// switch to the RUNNING state, if that fails, we have been canceled/failed in the meantime
if (!transitionState(ExecutionState.DEPLOYING, ExecutionState.RUNNING)) {
throw new CancelTaskException();
}
// notify everyone that we switched to running
notifyObservers(ExecutionState.RUNNING, null);
taskManagerActions.updateTaskExecutionState(new TaskExecutionState(jobId, executionId, ExecutionState.RUNNING));
// make sure the user code classloader is accessible thread-locally
executingThread.setContextClassLoader(userCodeClassLoader);
// run the invokable
invokable.invoke();
//......
}
}
/**
* Base class for all streaming tasks. A task is the unit of local processing that is deployed
* and executed by the TaskManagers. Each task runs one or more {@link StreamOperator}s which form
* the Task's operator chain. Operators that are chained together execute synchronously in the
* same thread and hence on the same stream partition. A common case for these chains
* are successive map/flatmap/filter tasks.
*
*
The task chain contains one "head" operator and multiple chained operators.
* The StreamTask is specialized for the type of the head operator: one-input and two-input tasks,
* as well as for sources, iteration heads and iteration tails.
*
*
The Task class deals with the setup of the streams read by the head operator, and the streams
* produced by the operators at the ends of the operator chain. Note that the chain may fork and
* thus have multiple ends.
*
*
The life cycle of the task is set up as follows:
*
{@code
* -- setInitialState -> provides state of all operators in the chain
*
* -- invoke()
* |
* +----> Create basic utils (config, etc) and load the chain of operators
* +----> operators.setup()
* +----> task specific init()
* +----> initialize-operator-states()
* +----> open-operators()
* +----> run()
* +----> close-operators()
* +----> dispose-operators()
* +----> common cleanup
* +----> task specific cleanup()
* }
*
*
The {@code StreamTask} has a lock object called {@code lock}. All calls to methods on a
* {@code StreamOperator} must be synchronized on this lock object to ensure that no methods
* are called concurrently.
*
* @param
* @param
*/
@Internal
public abstract class StreamTask>
extends AbstractInvokable
implements AsyncExceptionHandler {
//......
@Override
public final void invoke() throws Exception {
boolean disposed = false;
try {
//......
// let the task do its work
isRunning = true;
run();
// if this left the run() method cleanly despite the fact that this was canceled,
// make sure the "clean shutdown" is not attempted
if (canceled) {
throw new CancelTaskException();
}
LOG.debug("Finished task {}", getName());
//......
}
finally {
// clean up everything we initialized
isRunning = false;
//......
}
}
}
@Override
protected void run() throws Exception {
// cache processor reference on the stack, to make the code more JIT friendly
final StreamInputProcessor inputProcessor = this.inputProcessor;
while (running && inputProcessor.processInput()) {
// all the work happens in the "processInput" method
}
}