07_Flink Streaming transform

flatmap,map,filter都是转换,通过datastream的transform方法。以最经典的flatmap为例,transform,需要三个参数,操作名字,输出类型,对输入数据的操作。返回

SingleOutputStreamOperator,是datastream的子类,也就是可以链式的调用。返回一个OneInputTransformation。是StreamTransformation的子类。

也就是说,map,flatmap,filter等用到了transform的datastream上的操作,只是生产一个StreamTransformation,每个StreamTransformation都保存着上个StreamTransformation的引用input,相当于一个带有parentId的StreamTransformation的tree

也就是说。通过datastream的链式调用,就是在绘制这个StreamTransformation的tree的过程。

每个StreamTransformation都保存了。INPUT,OUTPUT的数据类型,和具体的转换逻辑StreamOperator。

最后绘制的这个转换的tree,由StreamGraphGenerator来绘制。

结论就是一个tranformation,持有上一个transformation的引用,和子集的operator逻辑(join时候有多个operator,单流时有一个)。和并发度,buffertimeout等具体计算节点的上下文。

从operator的接口来看,除了要处理数据外,还要处理WATERMARK。也就是数据在流的过程中,会有WATERMARK伴随着一起。而map,flat,filter等纯计算的并不关心WATERMARK,而是直接转发给下一步。 

public  SingleOutputStreamOperator flatMap(FlatMapFunction flatMapper) {

		TypeInformation outType = TypeExtractor.getFlatMapReturnTypes(clean(flatMapper),
				getType(), Utils.getCallLocationName(), true);

		return transform("Flat Map", outType, new StreamFlatMap<>(clean(flatMapper)));

	}

 
	@PublicEvolving
	public  SingleOutputStreamOperator transform(String operatorName, TypeInformation outTypeInfo, OneInputStreamOperator operator) {

		// read the output type of the input Transform to coax out errors about MissingTypeInfo
		transformation.getOutputType();

		OneInputTransformation resultTransform = new OneInputTransformation<>(
				this.transformation,
				operatorName,
				operator,
				outTypeInfo,
				environment.getParallelism());

		@SuppressWarnings({ "unchecked", "rawtypes" })
		SingleOutputStreamOperator returnStream = new SingleOutputStreamOperator(environment, resultTransform);

		getExecutionEnvironment().addOperator(resultTransform);

		return returnStream;
	}

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.flink.streaming.api.transformations;

import com.google.common.collect.Lists;
import org.apache.flink.annotation.Internal;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.streaming.api.operators.ChainingStrategy;
import org.apache.flink.streaming.api.operators.OneInputStreamOperator;

import java.util.Collection;
import java.util.List;

/**
 * This Transformation represents the application of a
 * {@link org.apache.flink.streaming.api.operators.OneInputStreamOperator} to one input
 * {@link org.apache.flink.streaming.api.transformations.StreamTransformation}.
 *
 * @param  The type of the elements in the nput {@code StreamTransformation}
 * @param  The type of the elements that result from this {@code OneInputTransformation}
 */
@Internal
public class OneInputTransformation extends StreamTransformation {

	private final StreamTransformation input;

	private final OneInputStreamOperator operator;

	private KeySelector stateKeySelector;
	
	private TypeInformation stateKeyType;

	/**
	 * Creates a new {@code OneInputTransformation} from the given input and operator.
	 *
	 * @param input The input {@code StreamTransformation}
	 * @param name The name of the {@code StreamTransformation}, this will be shown in Visualizations and the Log
	 * @param operator The {@code TwoInputStreamOperator}
	 * @param outputType The type of the elements produced by this {@code OneInputTransformation}
	 * @param parallelism The parallelism of this {@code OneInputTransformation}
	 */
	public OneInputTransformation(
			StreamTransformation input,
			String name,
			OneInputStreamOperator operator,
			TypeInformation outputType,
			int parallelism) {
		super(name, outputType, parallelism);
		this.input = input;
		this.operator = operator;
	}

	/**
	 * Returns the input {@code StreamTransformation} of this {@code OneInputTransformation}.
	 */
	public StreamTransformation getInput() {
		return input;
	}

	/**
	 * Returns the {@code TypeInformation} for the elements of the input.
	 */
	public TypeInformation getInputType() {
		return input.getOutputType();
	}

	/**
	 * Returns the {@code TwoInputStreamOperator} of this Transformation.
	 */
	public OneInputStreamOperator getOperator() {
		return operator;
	}

	/**
	 * Sets the {@link KeySelector} that must be used for partitioning keyed state of this operation.
	 *
	 * @param stateKeySelector The {@code KeySelector} to set
	 */
	public void setStateKeySelector(KeySelector stateKeySelector) {
		this.stateKeySelector = stateKeySelector;
	}

	/**
	 * Returns the {@code KeySelector} that must be used for partitioning keyed state in this
	 * Operation.
	 *
	 * @see #setStateKeySelector
	 */
	public KeySelector getStateKeySelector() {
		return stateKeySelector;
	}

	public void setStateKeyType(TypeInformation stateKeyType) {
		this.stateKeyType = stateKeyType;
	}

	public TypeInformation getStateKeyType() {
		return stateKeyType;
	}

	@Override
	public Collection> getTransitivePredecessors() {
		List> result = Lists.newArrayList();
		result.add(this);
		result.addAll(input.getTransitivePredecessors());
		return result;
	}

	@Override
	public final void setChainingStrategy(ChainingStrategy strategy) {
		operator.setChainingStrategy(strategy);
	}
}


/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.streaming.api.operators;

import org.apache.flink.annotation.Internal;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.streaming.api.watermark.Watermark;
import org.apache.flink.streaming.runtime.streamrecord.StreamRecord;

@Internal
public class StreamFlatMap
		extends AbstractUdfStreamOperator>
		implements OneInputStreamOperator {

	private static final long serialVersionUID = 1L;

	private transient TimestampedCollector collector;

	public StreamFlatMap(FlatMapFunction flatMapper) {
		super(flatMapper);
		chainingStrategy = ChainingStrategy.ALWAYS;
	}

	@Override
	public void open() throws Exception {
		super.open();
		collector = new TimestampedCollector(output);
	}

	@Override
	public void processElement(StreamRecord element) throws Exception {
		collector.setTimestamp(element);
		userFunction.flatMap(element.getValue(), collector);
	}

	@Override
	public void processWatermark(Watermark mark) throws Exception {
		output.emitWatermark(mark);
	}
}





你可能感兴趣的:(flink,streaming)