flatmap,map,filter都是转换,通过datastream的transform方法。以最经典的flatmap为例,transform,需要三个参数,操作名字,输出类型,对输入数据的操作。返回
SingleOutputStreamOperator,是datastream的子类,也就是可以链式的调用。返回一个OneInputTransformation。是StreamTransformation的子类。
也就是说,map,flatmap,filter等用到了transform的datastream上的操作,只是生产一个StreamTransformation,每个StreamTransformation都保存着上个StreamTransformation的引用input,相当于一个带有parentId的StreamTransformation的tree
也就是说。通过datastream的链式调用,就是在绘制这个StreamTransformation的tree的过程。
每个StreamTransformation都保存了。INPUT,OUTPUT的数据类型,和具体的转换逻辑StreamOperator。
最后绘制的这个转换的tree,由StreamGraphGenerator来绘制。
结论就是一个tranformation,持有上一个transformation的引用,和子集的operator逻辑(join时候有多个operator,单流时有一个)。和并发度,buffertimeout等具体计算节点的上下文。
从operator的接口来看,除了要处理数据外,还要处理WATERMARK。也就是数据在流的过程中,会有WATERMARK伴随着一起。而map,flat,filter等纯计算的并不关心WATERMARK,而是直接转发给下一步。
public SingleOutputStreamOperator flatMap(FlatMapFunction flatMapper) {
TypeInformation outType = TypeExtractor.getFlatMapReturnTypes(clean(flatMapper),
getType(), Utils.getCallLocationName(), true);
return transform("Flat Map", outType, new StreamFlatMap<>(clean(flatMapper)));
}
@PublicEvolving
public SingleOutputStreamOperator transform(String operatorName, TypeInformation outTypeInfo, OneInputStreamOperator operator) {
// read the output type of the input Transform to coax out errors about MissingTypeInfo
transformation.getOutputType();
OneInputTransformation resultTransform = new OneInputTransformation<>(
this.transformation,
operatorName,
operator,
outTypeInfo,
environment.getParallelism());
@SuppressWarnings({ "unchecked", "rawtypes" })
SingleOutputStreamOperator returnStream = new SingleOutputStreamOperator(environment, resultTransform);
getExecutionEnvironment().addOperator(resultTransform);
return returnStream;
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.streaming.api.transformations;
import com.google.common.collect.Lists;
import org.apache.flink.annotation.Internal;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.streaming.api.operators.ChainingStrategy;
import org.apache.flink.streaming.api.operators.OneInputStreamOperator;
import java.util.Collection;
import java.util.List;
/**
* This Transformation represents the application of a
* {@link org.apache.flink.streaming.api.operators.OneInputStreamOperator} to one input
* {@link org.apache.flink.streaming.api.transformations.StreamTransformation}.
*
* @param The type of the elements in the nput {@code StreamTransformation}
* @param The type of the elements that result from this {@code OneInputTransformation}
*/
@Internal
public class OneInputTransformation extends StreamTransformation {
private final StreamTransformation input;
private final OneInputStreamOperator operator;
private KeySelector stateKeySelector;
private TypeInformation> stateKeyType;
/**
* Creates a new {@code OneInputTransformation} from the given input and operator.
*
* @param input The input {@code StreamTransformation}
* @param name The name of the {@code StreamTransformation}, this will be shown in Visualizations and the Log
* @param operator The {@code TwoInputStreamOperator}
* @param outputType The type of the elements produced by this {@code OneInputTransformation}
* @param parallelism The parallelism of this {@code OneInputTransformation}
*/
public OneInputTransformation(
StreamTransformation input,
String name,
OneInputStreamOperator operator,
TypeInformation outputType,
int parallelism) {
super(name, outputType, parallelism);
this.input = input;
this.operator = operator;
}
/**
* Returns the input {@code StreamTransformation} of this {@code OneInputTransformation}.
*/
public StreamTransformation getInput() {
return input;
}
/**
* Returns the {@code TypeInformation} for the elements of the input.
*/
public TypeInformation getInputType() {
return input.getOutputType();
}
/**
* Returns the {@code TwoInputStreamOperator} of this Transformation.
*/
public OneInputStreamOperator getOperator() {
return operator;
}
/**
* Sets the {@link KeySelector} that must be used for partitioning keyed state of this operation.
*
* @param stateKeySelector The {@code KeySelector} to set
*/
public void setStateKeySelector(KeySelector stateKeySelector) {
this.stateKeySelector = stateKeySelector;
}
/**
* Returns the {@code KeySelector} that must be used for partitioning keyed state in this
* Operation.
*
* @see #setStateKeySelector
*/
public KeySelector getStateKeySelector() {
return stateKeySelector;
}
public void setStateKeyType(TypeInformation> stateKeyType) {
this.stateKeyType = stateKeyType;
}
public TypeInformation> getStateKeyType() {
return stateKeyType;
}
@Override
public Collection> getTransitivePredecessors() {
List> result = Lists.newArrayList();
result.add(this);
result.addAll(input.getTransitivePredecessors());
return result;
}
@Override
public final void setChainingStrategy(ChainingStrategy strategy) {
operator.setChainingStrategy(strategy);
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.streaming.api.operators;
import org.apache.flink.annotation.Internal;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.streaming.api.watermark.Watermark;
import org.apache.flink.streaming.runtime.streamrecord.StreamRecord;
@Internal
public class StreamFlatMap
extends AbstractUdfStreamOperator>
implements OneInputStreamOperator {
private static final long serialVersionUID = 1L;
private transient TimestampedCollector collector;
public StreamFlatMap(FlatMapFunction flatMapper) {
super(flatMapper);
chainingStrategy = ChainingStrategy.ALWAYS;
}
@Override
public void open() throws Exception {
super.open();
collector = new TimestampedCollector(output);
}
@Override
public void processElement(StreamRecord element) throws Exception {
collector.setTimestamp(element);
userFunction.flatMap(element.getValue(), collector);
}
@Override
public void processWatermark(Watermark mark) throws Exception {
output.emitWatermark(mark);
}
}