codemosi

01_Flink Streaming env

package com.alibaba.flink.train.streaming;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;

public class HelloWorld {
	public static void main(String[] args) throws Exception {
		StreamExecutionEnvironment env = StreamExecutionEnvironment
				.getExecutionEnvironment();
		// env.setParallelism(4);//并发度
		DataStream dataStream = env
				.readTextFile("D:/flinkdata/helloworld"); // 1:(flink storm
															// )(hadoop hive)
		dataStream
				.flatMap(
						new FlatMapFunction>() {
							@Override
							public void flatMap(String input,
									Collector> collector)
									throws Exception {
								String[] objs = input.split(" ");
								for (String obj : objs) {
									collector
											.collect(new Tuple2(
													obj, 1));// (这里很关键，表示0位置是word，1的位置是1次数)
								}
							}
						})// 2:(flink 1)(storm 1)
				.keyBy(0)// 3:以第0个位置的值，做分区。
				.sum(1)// (flink:8)(storm:5)，对第1个位置的值做sum的操作。
				.printToErr();
		env.execute();//启动任务
		while (true) {

		}
	}

}
 
  
 
   
  storm flink spark
hadoop hive hbase storm flink spark
hadoop hive hbase storm flink spark
hadoop hive hbase storm flink spark
hadoop hive hbase storm flink spark
 flink spark
hadoop hive hbase storm flink spark
hadoop hive hbase storm flink spark
hadoop hive hbase storm flink spark
hadoop hive hbase storm flink spark
hadoop hive hbase storm flink spark
hadoop hive hbase storm flink spark
hadoop hive hbase storm flink spark
hadoop hive hbase storm flink spark
hadoop hive hbase storm flink spark
hadoop hive hbase storm flink spark
hadoop hive hbase storm flink spark
hadoop hive  storm flink spark
hadoop hive hbase storm flink spark
hadoop hive hbase storm flink spark
hadoop hive hbase storm flink spark
hadoop hive hbase storm flink spark
hadoop hive hbase storm flink spark
hadoop hive hbase  
  连接flink流计算的核心是 
  1：StreamExecutionEnvironment对象简称env，作为流式计算的上下文。env对象可以完成流式计算的功能。包括 
  1：接入数据源 
  2：设置并发度等运行参数（通过持有的ExecutionConfig对象，间接的设置运行参数） 
  3：启动任务 
  2：DataStream对象。DataStream对象可以完成流式计算的功能。包括 
   
  1：数据落地 
  2：逻辑计算 
  3：数据分区 
  4：过滤 
  5：窗口 
  6：join 
  
 
  除了上面这些业务相关的流计算功能，还有一些流计算稳定性的底层的功能，由flink core完成 
  1：业务time提取 
  2：exactly once 
  3：数据反压 
  4：容错机制 
  5：状态管理 
  6：checkpoint 
  
 
  首先最关注的应该是env这个对象 
   
  /* 
  
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.streaming.api.environment;

import com.esotericsoftware.kryo.Serializer;
import com.google.common.base.Preconditions;

import org.apache.flink.annotation.PublicEvolving;
import org.apache.flink.annotation.Internal;
import org.apache.flink.annotation.Public;
import org.apache.flink.api.common.ExecutionConfig;
import org.apache.flink.api.common.InvalidProgramException;
import org.apache.flink.api.common.JobExecutionResult;
import org.apache.flink.api.common.functions.InvalidTypesException;
import org.apache.flink.api.common.functions.StoppableFunction;
import org.apache.flink.api.common.io.FileInputFormat;
import org.apache.flink.api.common.io.InputFormat;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.ClosureCleaner;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.io.TextInputFormat;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.api.java.typeutils.MissingTypeInfo;
import org.apache.flink.api.java.typeutils.PojoTypeInfo;
import org.apache.flink.api.java.typeutils.ResultTypeQueryable;
import org.apache.flink.api.java.typeutils.TypeExtractor;
import org.apache.flink.client.program.ContextEnvironment;
import org.apache.flink.client.program.OptimizerPlanEnvironment;
import org.apache.flink.client.program.PreviewPlanEnvironment;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.core.fs.Path;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.functions.source.FileMonitoringFunction;
import org.apache.flink.streaming.api.functions.source.FileMonitoringFunction.WatchType;
import org.apache.flink.streaming.api.functions.source.FileReadFunction;
import org.apache.flink.streaming.api.functions.source.FileSourceFunction;
import org.apache.flink.streaming.api.functions.source.FromElementsFunction;
import org.apache.flink.streaming.api.functions.source.FromIteratorFunction;
import org.apache.flink.streaming.api.functions.source.FromSplittableIteratorFunction;
import org.apache.flink.streaming.api.functions.source.ParallelSourceFunction;
import org.apache.flink.streaming.api.functions.source.SocketTextStreamFunction;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import org.apache.flink.streaming.api.functions.source.StatefulSequenceSource;
import org.apache.flink.streaming.api.graph.StreamGraph;
import org.apache.flink.streaming.api.graph.StreamGraphGenerator;
import org.apache.flink.streaming.api.operators.StoppableStreamSource;
import org.apache.flink.streaming.api.operators.StreamSource;
import org.apache.flink.runtime.state.AbstractStateBackend;
import org.apache.flink.streaming.api.transformations.StreamTransformation;
import org.apache.flink.util.SplittableIterator;

import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;

import static java.util.Objects.requireNonNull;

/**
 * An ExecutionEnvironment for streaming jobs. An instance of it is
 * necessary to construct streaming topologies.
 */
/**
 * The StreamExecutionEnvironment is the context in which a streaming program is executed. A
 * {@link LocalStreamEnvironment} will cause execution in the current JVM, a
 * {@link RemoteStreamEnvironment} will cause execution on a remote setup.
 * 
 * The environment provides methods to control the job execution (such as setting the parallelism
 * or the fault tolerance/checkpointing parameters) and to interact with the outside world (data access).
 *
 * @see org.apache.flink.streaming.api.environment.LocalStreamEnvironment
 * @see org.apache.flink.streaming.api.environment.RemoteStreamEnvironment
 */
@Public
public abstract class StreamExecutionEnvironment {

	/** The default name to use for a streaming job if no other name has been specified */
	public static final String DEFAULT_JOB_NAME = "Flink Streaming Job";
	
	/** The time characteristic that is used if none other is set */
	private static final TimeCharacteristic DEFAULT_TIME_CHARACTERISTIC = TimeCharacteristic.ProcessingTime;

	/** The default buffer timeout (max delay of records in the network stack) */
	private static final long DEFAULT_NETWORK_BUFFER_TIMEOUT = 100L;

	/** The environment of the context (local by default, cluster if invoked through command line) */
	private static StreamExecutionEnvironmentFactory contextEnvironmentFactory;

	/** The default parallelism used when creating a local environment */
	private static int defaultLocalParallelism = Runtime.getRuntime().availableProcessors();
	
	// ------------------------------------------------------------------------

	/** The execution configuration for this environment */
	private final ExecutionConfig config = new ExecutionConfig();
	
	/** Settings that control the checkpointing behavior */ 
	private final CheckpointConfig checkpointCfg = new CheckpointConfig();
	
	protected final List> transformations = new ArrayList<>();
	
	private long bufferTimeout = DEFAULT_NETWORK_BUFFER_TIMEOUT;
	
	protected boolean isChainingEnabled = true;
	
	/** The state backend used for storing k/v state and state snapshots */
	private AbstractStateBackend defaultStateBackend;
	
	/** The time characteristic used by the data streams */
	private TimeCharacteristic timeCharacteristic = DEFAULT_TIME_CHARACTERISTIC;

	
	// --------------------------------------------------------------------------------------------
	// Constructor and Properties
	// --------------------------------------------------------------------------------------------

	/**
	 * Gets the config object.
	 */
	public ExecutionConfig getConfig() {
		return config;
	}

	/**
	 * Sets the parallelism for operations executed through this environment.
	 * Setting a parallelism of x here will cause all operators (such as map,
	 * batchReduce) to run with x parallel instances. This method overrides the
	 * default parallelism for this environment. The
	 * {@link LocalStreamEnvironment} uses by default a value equal to the
	 * number of hardware contexts (CPU cores / threads). When executing the
	 * program via the command line client from a JAR file, the default degree
	 * of parallelism is the one configured for that setup.
	 *
	 * @param parallelism The parallelism
	 */
	public StreamExecutionEnvironment setParallelism(int parallelism) {
		if (parallelism < 1) {
			throw new IllegalArgumentException("parallelism must be at least one.");
		}
		config.setParallelism(parallelism);
		return this;
	}

	/**
	 * Gets the parallelism with which operation are executed by default.
	 * Operations can individually override this value to use a specific
	 * parallelism.
	 *
	 * @return The parallelism used by operations, unless they override that
	 * value.
	 */
	public int getParallelism() {
		return config.getParallelism();
	}

	/**
	 * Sets the maximum time frequency (milliseconds) for the flushing of the
	 * output buffers. By default the output buffers flush frequently to provide
	 * low latency and to aid smooth developer experience. Setting the parameter
	 * can result in three logical modes:
	 * 

	 * 

	 * 
	 * A positive integer triggers flushing periodically by that integer
	 * 
	 * 0 triggers flushing after every record thus minimizing latency
	 * 
	 * -1 triggers flushing only when the output buffer is full thus maximizing
	 * throughput
	 * 
	 *
	 * @param timeoutMillis
	 * 		The maximum time between two output flushes.
	 */
	public StreamExecutionEnvironment setBufferTimeout(long timeoutMillis) {
		if (timeoutMillis < -1) {
			throw new IllegalArgumentException("Timeout of buffer must be non-negative or -1");
		}

		this.bufferTimeout = timeoutMillis;
		return this;
	}

	/**
	 * Sets the maximum time frequency (milliseconds) for the flushing of the
	 * output buffers. For clarification on the extremal values see
	 * {@link #setBufferTimeout(long)}.
	 *
	 * @return The timeout of the buffer.
	 */
	public long getBufferTimeout() {
		return this.bufferTimeout;
	}

	/**
	 * Disables operator chaining for streaming operators. Operator chaining
	 * allows non-shuffle operations to be co-located in the same thread fully
	 * avoiding serialization and de-serialization.
	 *
	 * @return StreamExecutionEnvironment with chaining disabled.
	 */
	@PublicEvolving
	public StreamExecutionEnvironment disableOperatorChaining() {
		this.isChainingEnabled = false;
		return this;
	}

	/**
	 * Returns whether operator chaining is enabled.
	 *
	 * @return {@code true} if chaining is enabled, false otherwise.
	 */
	@PublicEvolving
	public boolean isChainingEnabled() {
		return isChainingEnabled;
	}

	// ------------------------------------------------------------------------
	//  Checkpointing Settings
	// ------------------------------------------------------------------------

	/**
	 * Gets the checkpoint config, which defines values like checkpoint interval, delay between
	 * checkpoints, etc.
	 * 
	 * @return The checkpoint config.
	 */
	public CheckpointConfig getCheckpointConfig() {
		return checkpointCfg;
	}

	/**
	 * Enables checkpointing for the streaming job. The distributed state of the streaming
	 * dataflow will be periodically snapshotted. In case of a failure, the streaming
	 * dataflow will be restarted from the latest completed checkpoint. This method selects
	 * {@link CheckpointingMode#EXACTLY_ONCE} guarantees.
	 * 
	 * The job draws checkpoints periodically, in the given interval. The state will be
	 * stored in the configured state backend.
	 * 
	 * NOTE: Checkpointing iterative streaming dataflows in not properly supported at
	 * the moment. For that reason, iterative jobs will not be started if used
	 * with enabled checkpointing. To override this mechanism, use the 
	 * {@link #enableCheckpointing(long, CheckpointingMode, boolean)} method.
	 *
	 * @param interval Time interval between state checkpoints in milliseconds.
	 */
	public StreamExecutionEnvironment enableCheckpointing(long interval) {
		checkpointCfg.setCheckpointInterval(interval);
		return this;
	}

	/**
	 * Enables checkpointing for the streaming job. The distributed state of the streaming
	 * dataflow will be periodically snapshotted. In case of a failure, the streaming
	 * dataflow will be restarted from the latest completed checkpoint.
	 *
	 * The job draws checkpoints periodically, in the given interval. The system uses the
	 * given {@link CheckpointingMode} for the checkpointing ("exactly once" vs "at least once").
	 * The state will be stored in the configured state backend.
	 *
	 * NOTE: Checkpointing iterative streaming dataflows in not properly supported at
	 * the moment. For that reason, iterative jobs will not be started if used
	 * with enabled checkpointing. To override this mechanism, use the 
	 * {@link #enableCheckpointing(long, CheckpointingMode, boolean)} method.
	 *
	 * @param interval 
	 *             Time interval between state checkpoints in milliseconds.
	 * @param mode 
	 *             The checkpointing mode, selecting between "exactly once" and "at least once" guaranteed.
	 */
	public StreamExecutionEnvironment enableCheckpointing(long interval, CheckpointingMode mode) {
		checkpointCfg.setCheckpointingMode(mode);
		checkpointCfg.setCheckpointInterval(interval);
		return this;
	}
	
	/**
	 * Enables checkpointing for the streaming job. The distributed state of the streaming
	 * dataflow will be periodically snapshotted. In case of a failure, the streaming
	 * dataflow will be restarted from the latest completed checkpoint.
	 *
	 * The job draws checkpoints periodically, in the given interval. The state will be
	 * stored in the configured state backend.
	 *
	 * NOTE: Checkpointing iterative streaming dataflows in not properly supported at
	 * the moment. If the "force" parameter is set to true, the system will execute the
	 * job nonetheless.
	 * 
	 * @param interval
	 *            Time interval between state checkpoints in millis.
	 * @param mode
	 *            The checkpointing mode, selecting between "exactly once" and "at least once" guaranteed.
	 * @param force
	 *            If true checkpointing will be enabled for iterative jobs as well.
	 */
	@Deprecated
	@SuppressWarnings("deprecation")
	@PublicEvolving
	public StreamExecutionEnvironment enableCheckpointing(long interval, CheckpointingMode mode, boolean force) {
		checkpointCfg.setCheckpointingMode(mode);
		checkpointCfg.setCheckpointInterval(interval);
		checkpointCfg.setForceCheckpointing(force);
		return this;
	}

	/**
	 * Enables checkpointing for the streaming job. The distributed state of the streaming
	 * dataflow will be periodically snapshotted. In case of a failure, the streaming
	 * dataflow will be restarted from the latest completed checkpoint. This method selects
	 * {@link CheckpointingMode#EXACTLY_ONCE} guarantees.
	 *
	 * The job draws checkpoints periodically, in the default interval. The state will be
	 * stored in the configured state backend.
	 *
	 * NOTE: Checkpointing iterative streaming dataflows in not properly supported at
	 * the moment. For that reason, iterative jobs will not be started if used
	 * with enabled checkpointing. To override this mechanism, use the 
	 * {@link #enableCheckpointing(long, CheckpointingMode, boolean)} method.
	 * 
	 * @deprecated Use {@link #enableCheckpointing(long)} instead.
	 */
	@Deprecated
	@PublicEvolving
	public StreamExecutionEnvironment enableCheckpointing() {
		checkpointCfg.setCheckpointInterval(500);
		return this;
	}

	/**
	 * Returns the checkpointing interval or -1 if checkpointing is disabled.
	 * 
	 * Shorthand for {@code getCheckpointConfig().getCheckpointInterval()}.
	 *
	 * @return The checkpointing interval or -1
	 */
	public long getCheckpointInterval() {
		return checkpointCfg.getCheckpointInterval();
	}

	/**
	 * Returns whether checkpointing is force-enabled.
	 */
	@Deprecated
	@SuppressWarnings("deprecation")
	@PublicEvolving
	public boolean isForceCheckpointing() {
		return checkpointCfg.isForceCheckpointing();
	}

	/**
	 * Returns the checkpointing mode (exactly-once vs. at-least-once).
	 * 
	 * 
Shorthand for {@code getCheckpointConfig().getCheckpointingMode()}.
	 * 
	 * @return The checkpoin
	 */
	public CheckpointingMode getCheckpointingMode() {
		return checkpointCfg.getCheckpointingMode();
	}

	/**
	 * Sets the state backend that describes how to store and checkpoint operator state. It defines in
	 * what form the key/value state ({@link ValueState}, accessible
	 * from operations on {@link org.apache.flink.streaming.api.datastream.KeyedStream}) is maintained
	 * (heap, managed memory, externally), and where state snapshots/checkpoints are stored, both for
	 * the key/value state, and for checkpointed functions (implementing the interface
	 * {@link org.apache.flink.streaming.api.checkpoint.Checkpointed}).
	 *
	 * 
The {@link org.apache.flink.runtime.state.memory.MemoryStateBackend} for example
	 * maintains the state in heap memory, as objects. It is lightweight without extra dependencies,
	 * but can checkpoint only small states (some counters).
	 * 
	 * 
In contrast, the {@link org.apache.flink.runtime.state.filesystem.FsStateBackend}
	 * stores checkpoints of the state (also maintained as heap objects) in files. When using a replicated
	 * file system (like HDFS, S3, MapR FS, Tachyon, etc) this will guarantee that state is not lost upon
	 * failures of individual nodes and that streaming program can be executed highly available and strongly
	 * consistent (assuming that Flink is run in high-availability mode).
	 *
	 * @return This StreamExecutionEnvironment itself, to allow chaining of function calls.
	 * 
	 * @see #getStateBackend()
	 */
	@PublicEvolving
	public StreamExecutionEnvironment setStateBackend(AbstractStateBackend backend) {
		this.defaultStateBackend = requireNonNull(backend);
		return this;
	}

	/**
	 * Returns the state backend that defines how to store and checkpoint state.
	 * @return The state backend that defines how to store and checkpoint state.
	 * 
	 * @see #setStateBackend(AbstractStateBackend)
	 */
	@PublicEvolving
	public AbstractStateBackend getStateBackend() {
		return defaultStateBackend;
	}

	/**
	 * Sets the restart strategy configuration. The configuration specifies which restart strategy
	 * will be used for the execution graph in case of a restart.
	 *
	 * @param restartStrategyConfiguration Restart strategy configuration to be set
	 */
	@PublicEvolving
	public void setRestartStrategy(RestartStrategies.RestartStrategyConfiguration restartStrategyConfiguration) {
		config.setRestartStrategy(restartStrategyConfiguration);
	}

	/**
	 * Returns the specified restart strategy configuration.
	 *
	 * @return The restart strategy configuration to be used
	 */
	@PublicEvolving
	public RestartStrategies.RestartStrategyConfiguration getRestartStrategy() {
		return config.getRestartStrategy();
	}

	/**
	 * Sets the number of times that failed tasks are re-executed. A value of
	 * zero effectively disables fault tolerance. A value of {@code -1}
	 * indicates that the system default value (as defined in the configuration)
	 * should be used.
	 *
	 * @param numberOfExecutionRetries
	 * 		The number of times the system will try to re-execute failed tasks.
	 *
	 * @deprecated This method will be replaced by {@link #setRestartStrategy}. The
	 * {@link RestartStrategies.FixedDelayRestartStrategyConfiguration} contains the number of
	 * execution retries.
	 */
	@Deprecated
	@PublicEvolving
	public void setNumberOfExecutionRetries(int numberOfExecutionRetries) {
		config.setNumberOfExecutionRetries(numberOfExecutionRetries);
	}

	/**
	 * Gets the number of times the system will try to re-execute failed tasks.
	 * A value of {@code -1} indicates that the system default value (as defined
	 * in the configuration) should be used.
	 *
	 * @return The number of times the system will try to re-execute failed tasks.
	 *
	 * @deprecated This method will be replaced by {@link #getRestartStrategy}. The
	 * {@link RestartStrategies.FixedDelayRestartStrategyConfiguration} contains the number of
	 * execution retries.
	 */
	@Deprecated
	@PublicEvolving
	public int getNumberOfExecutionRetries() {
		return config.getNumberOfExecutionRetries();
	}

	/**
	 * Sets the default parallelism that will be used for the local execution
	 * environment created by {@link #createLocalEnvironment()}.
	 *
	 * @param parallelism
	 * 		The parallelism to use as the default local parallelism.
	 */
	@PublicEvolving
	public static void setDefaultLocalParallelism(int parallelism) {
		defaultLocalParallelism = parallelism;
	}

	// --------------------------------------------------------------------------------------------
	// Registry for types and serializers
	// --------------------------------------------------------------------------------------------

	/**
	 * Adds a new Kryo default serializer to the Runtime.
	 * 

	 * Note that the serializer instance must be serializable (as defined by
	 * java.io.Serializable), because it may be distributed to the worker nodes
	 * by java serialization.
	 *
	 * @param type
	 * 		The class of the types serialized with the given serializer.
	 * @param serializer
	 * 		The serializer to use.
	 */
	public  & Serializable>void addDefaultKryoSerializer(Class type, T serializer) {
		config.addDefaultKryoSerializer(type, serializer);
	}

	/**
	 * Adds a new Kryo default serializer to the Runtime.
	 *
	 * @param type
	 * 		The class of the types serialized with the given serializer.
	 * @param serializerClass
	 * 		The class of the serializer to use.
	 */
	public void addDefaultKryoSerializer(Class type,
			Class> serializerClass) {
		config.addDefaultKryoSerializer(type, serializerClass);
	}

	/**
	 * Registers the given type with a Kryo Serializer.
	 * 

	 * Note that the serializer instance must be serializable (as defined by
	 * java.io.Serializable), because it may be distributed to the worker nodes
	 * by java serialization.
	 *
	 * @param type
	 * 		The class of the types serialized with the given serializer.
	 * @param serializer
	 * 		The serializer to use.
	 */
	public  & Serializable>void registerTypeWithKryoSerializer(Class type, T serializer) {
		config.registerTypeWithKryoSerializer(type, serializer);
	}

	/**
	 * Registers the given Serializer via its class as a serializer for the
	 * given type at the KryoSerializer
	 *
	 * @param type
	 * 		The class of the types serialized with the given serializer.
	 * @param serializerClass
	 * 		The class of the serializer to use.
	 */
	public void registerTypeWithKryoSerializer(Class type,
			Class> serializerClass) {
		config.registerTypeWithKryoSerializer(type, serializerClass);
	}

	/**
	 * Registers the given type with the serialization stack. If the type is
	 * eventually serialized as a POJO, then the type is registered with the
	 * POJO serializer. If the type ends up being serialized with Kryo, then it
	 * will be registered at Kryo to make sure that only tags are written.
	 *
	 * @param type
	 * 		The class of the type to register.
	 */
	public void registerType(Class type) {
		if (type == null) {
			throw new NullPointerException("Cannot register null type class.");
		}

		TypeInformation typeInfo = TypeExtractor.createTypeInfo(type);

		if (typeInfo instanceof PojoTypeInfo) {
			config.registerPojoType(type);
		} else {
			config.registerKryoType(type);
		}
	}

	// --------------------------------------------------------------------------------------------
	//  Time characteristic
	// --------------------------------------------------------------------------------------------

	/**
	 * Sets the time characteristic for all streams create from this environment, e.g., processing
	 * time, event time, or ingestion time.
	 *
	 * 

	 * If you set the characteristic to IngestionTime of EventTime this will set a default
	 * watermark update interval of 200 ms. If this is not applicable for your application
	 * you should change it using {@link ExecutionConfig#setAutoWatermarkInterval(long)}.
	 * 
	 * @param characteristic The time characteristic.
	 */
	@PublicEvolving
	public void setStreamTimeCharacteristic(TimeCharacteristic characteristic) {
		this.timeCharacteristic = requireNonNull(characteristic);
		if (characteristic == TimeCharacteristic.ProcessingTime) {
			getConfig().setAutoWatermarkInterval(0);
		} else {
			getConfig().setAutoWatermarkInterval(200);
		}
	}

	/**
	 * Gets the time characteristic.
	 *
	 * @see #setStreamTimeCharacteristic(org.apache.flink.streaming.api.TimeCharacteristic)
	 *
	 * @return The time characteristic.
	 */
	@PublicEvolving
	public TimeCharacteristic getStreamTimeCharacteristic() {
		return timeCharacteristic;
	}
	
	// --------------------------------------------------------------------------------------------
	// Data stream creations
	// --------------------------------------------------------------------------------------------

	/**
	 * Creates a new data stream that contains a sequence of numbers. This is a parallel source,
	 * if you manually set the parallelism to {@code 1}
	 * (using {@link org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator#setParallelism(int)})
	 * the generated sequence of elements is in order.
	 *
	 * @param from
	 * 		The number to start at (inclusive)
	 * @param to
	 * 		The number to stop at (inclusive)
	 * @return A data stream, containing all number in the [from, to] interval
	 */
	public DataStreamSource generateSequence(long from, long to) {
		if (from > to) {
			throw new IllegalArgumentException("Start of sequence must not be greater than the end");
		}
		return addSource(new StatefulSequenceSource(from, to), "Sequence Source");
	}

	/**
	 * Creates a new data stream that contains the given elements. The elements must all be of the same type, for
	 * example, all of the {@link String} or {@link Integer}.
	 * 

	 * The framework will try and determine the exact type from the elements. In case of generic elements, it may be
	 * necessary to manually supply the type information via {@link #fromCollection(java.util.Collection,
	 * org.apache.flink.api.common.typeinfo.TypeInformation)}.
	 * 

	 * Note that this operation will result in a non-parallel data stream source, i.e. a data stream source with a
	 * degree of parallelism one.
	 *
	 * @param data
	 * 		The array of elements to create the data stream from.
	 * @param 
	 * 		The type of the returned data stream
	 * @return The data stream representing the given array of elements
	 */
	@SafeVarargs
	public final  DataStreamSource fromElements(OUT... data) {
		if (data.length == 0) {
			throw new IllegalArgumentException("fromElements needs at least one element as argument");
		}

		TypeInformation typeInfo;
		try {
			typeInfo = TypeExtractor.getForObject(data[0]);
		}
		catch (Exception e) {
			throw new RuntimeException("Could not create TypeInformation for type " + data[0].getClass().getName()
				+ "; please specify the TypeInformation manually via "
				+ "StreamExecutionEnvironment#fromElements(Collection, TypeInformation)");
		}
		return fromCollection(Arrays.asList(data), typeInfo);
	}

	/**
	 * Creates a new data set that contains the given elements. The framework will determine the type according to the 
	 * based type user supplied. The elements should be the same or be the subclass to the based type. 
	 * The sequence of elements must not be empty.
	 * Note that this operation will result in a non-parallel data stream source, i.e. a data stream source with a
	 * degree of parallelism one.
	 *
	 * @param type
	 * 		The based class type in the collection.
	 * @param data
	 * 		The array of elements to create the data stream from.
	 * @param 
	 * 		The type of the returned data stream
	 * @return The data stream representing the given array of elements
	 */
	@SafeVarargs
	public final  DataStreamSource fromElements(Class type, OUT... data) {
		if (data.length == 0) {
			throw new IllegalArgumentException("fromElements needs at least one element as argument");
		}

		TypeInformation typeInfo;
		try {
			typeInfo = TypeExtractor.getForClass(type);
		}
		catch (Exception e) {
			throw new RuntimeException("Could not create TypeInformation for type " + type.getName()
					+ "; please specify the TypeInformation manually via "
					+ "StreamExecutionEnvironment#fromElements(Collection, TypeInformation)");
		}
		return fromCollection(Arrays.asList(data), typeInfo);
	}

	/**
	 * Creates a data stream from the given non-empty collection. The type of the data stream is that of the
	 * elements in the collection.
	 *
	 * 
The framework will try and determine the exact type from the collection elements. In case of generic
	 * elements, it may be necessary to manually supply the type information via
	 * {@link #fromCollection(java.util.Collection, org.apache.flink.api.common.typeinfo.TypeInformation)}.
	 *
	 * Note that this operation will result in a non-parallel data stream source, i.e. a data stream source with a
	 * parallelism one.
	 *
	 * @param data
	 * 		The collection of elements to create the data stream from.
	 * @param 
	 *     The generic type of the returned data stream.
	 * @return
	 *     The data stream representing the given collection
	 */
	public  DataStreamSource fromCollection(Collection data) {
		Preconditions.checkNotNull(data, "Collection must not be null");
		if (data.isEmpty()) {
			throw new IllegalArgumentException("Collection must not be empty");
		}

		OUT first = data.iterator().next();
		if (first == null) {
			throw new IllegalArgumentException("Collection must not contain null elements");
		}

		TypeInformation typeInfo;
		try {
			typeInfo = TypeExtractor.getForObject(first);
		}
		catch (Exception e) {
			throw new RuntimeException("Could not create TypeInformation for type " + first.getClass()
					+ "; please specify the TypeInformation manually via "
					+ "StreamExecutionEnvironment#fromElements(Collection, TypeInformation)");
		}
		return fromCollection(data, typeInfo);
	}

	/**
	 * Creates a data stream from the given non-empty collection.
	 * 
	 * Note that this operation will result in a non-parallel data stream source,
	 * i.e., a data stream source with a parallelism one.
	 *
	 * @param data
	 * 		The collection of elements to create the data stream from
	 * @param typeInfo
	 * 		The TypeInformation for the produced data stream
	 * @param 
	 * 		The type of the returned data stream
	 * @return The data stream representing the given collection
	 */
	public  DataStreamSource fromCollection(Collection data, TypeInformation typeInfo) {
		Preconditions.checkNotNull(data, "Collection must not be null");
		
		// must not have null elements and mixed elements
		FromElementsFunction.checkCollection(data, typeInfo.getTypeClass());

		SourceFunction function;
		try {
			function = new FromElementsFunction<>(typeInfo.createSerializer(getConfig()), data);
		}
		catch (IOException e) {
			throw new RuntimeException(e.getMessage(), e);
		}
		return addSource(function, "Collection Source", typeInfo).setParallelism(1);
	}

	/**
	 * Creates a data stream from the given iterator.
	 * 
	 * Because the iterator will remain unmodified until the actual execution happens,
	 * the type of data returned by the iterator must be given explicitly in the form of the type
	 * class (this is due to the fact that the Java compiler erases the generic type information).
	 * 
	 * Note that this operation will result in a non-parallel data stream source, i.e.,
	 * a data stream source with a parallelism of one.
	 *
	 * @param data
	 * 		The iterator of elements to create the data stream from
	 * @param type
	 * 		The class of the data produced by the iterator. Must not be a generic class.
	 * @param 
	 * 		The type of the returned data stream
	 * @return The data stream representing the elements in the iterator
	 * @see #fromCollection(java.util.Iterator, org.apache.flink.api.common.typeinfo.TypeInformation)
	 */
	public  DataStreamSource fromCollection(Iterator data, Class type) {
		return fromCollection(data, TypeExtractor.getForClass(type));
	}

	/**
	 * Creates a data stream from the given iterator.
	 * 
	 * Because the iterator will remain unmodified until the actual execution happens,
	 * the type of data returned by the iterator must be given explicitly in the form of the type
	 * information. This method is useful for cases where the type is generic.
	 * In that case, the type class (as given in
	 * {@link #fromCollection(java.util.Iterator, Class)} does not supply all type information.
	 * 
	 * Note that this operation will result in a non-parallel data stream source, i.e.,
	 * a data stream source with a parallelism one.
	 *
	 * @param data
	 * 		The iterator of elements to create the data stream from
	 * @param typeInfo
	 * 		The TypeInformation for the produced data stream
	 * @param 
	 * 		The type of the returned data stream
	 * @return The data stream representing the elements in the iterator
	 */
	public  DataStreamSource fromCollection(Iterator data, TypeInformation typeInfo) {
		Preconditions.checkNotNull(data, "The iterator must not be null");

		SourceFunction function = new FromIteratorFunction<>(data);
		return addSource(function, "Collection Source", typeInfo);
	}

	/**
	 * Creates a new data stream that contains elements in the iterator. The iterator is splittable, allowing the
	 * framework to create a parallel data stream source that returns the elements in the iterator.
	 * 
	 * Because the iterator will remain unmodified until the actual execution happens, the type of data returned by the
	 * iterator must be given explicitly in the form of the type class (this is due to the fact that the Java compiler
	 * erases the generic type information).
	 *
	 * @param iterator
	 * 		The iterator that produces the elements of the data stream
	 * @param type
	 * 		The class of the data produced by the iterator. Must not be a generic class.
	 * @param 
	 * 		The type of the returned data stream
	 * @return A data stream representing the elements in the iterator
	 */
	public  DataStreamSource fromParallelCollection(SplittableIterator iterator, Class type) {
		return fromParallelCollection(iterator, TypeExtractor.getForClass(type));
	}

	/**
	 * Creates a new data stream that contains elements in the iterator. The iterator is splittable, allowing the
	 * framework to create a parallel data stream source that returns the elements in the iterator.
	 * 
	 * Because the iterator will remain unmodified until the actual execution happens, the type of data returned by the
	 * iterator must be given explicitly in the form of the type information. This method is useful for cases where the
	 * type is generic. In that case, the type class (as given in {@link #fromParallelCollection(org.apache.flink.util.SplittableIterator,
	 * Class)} does not supply all type information.
	 *
	 * @param iterator
	 * 		The iterator that produces the elements of the data stream
	 * @param typeInfo
	 * 		The TypeInformation for the produced data stream.
	 * @param 
	 * 		The type of the returned data stream
	 * @return A data stream representing the elements in the iterator
	 */
	public  DataStreamSource fromParallelCollection(SplittableIterator iterator, TypeInformation
			typeInfo) {
		return fromParallelCollection(iterator, typeInfo, "Parallel Collection Source");
	}

	// private helper for passing different names
	private  DataStreamSource fromParallelCollection(SplittableIterator iterator, TypeInformation
			typeInfo, String operatorName) {
		return addSource(new FromSplittableIteratorFunction<>(iterator), operatorName, typeInfo);
	}

	/**
	 * Creates a data stream that represents the Strings produced by reading the given file line wise. The file will be
	 * read with the system's default character set.
	 *
	 * @param filePath
	 * 		The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path").
	 * @return The data stream that represents the data read from the given file as text lines
	 */
	public DataStreamSource readTextFile(String filePath) {
		Preconditions.checkNotNull(filePath, "The file path may not be null.");
		TextInputFormat format = new TextInputFormat(new Path(filePath));
		TypeInformation typeInfo = BasicTypeInfo.STRING_TYPE_INFO;

		return createInput(format, typeInfo, "Read Text File Source");
	}

	/**
	 * Creates a data stream that represents the Strings produced by reading the given file line wise. The {@link
	 * java.nio.charset.Charset} with the given name will be used to read the files.
	 *
	 * @param filePath
	 * 		The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path")
	 * @param charsetName
	 * 		The name of the character set used to read the file
	 * @return The data stream that represents the data read from the given file as text lines
	 */
	public DataStreamSource readTextFile(String filePath, String charsetName) {
		Preconditions.checkNotNull(filePath, "The file path may not be null.");
		TextInputFormat format = new TextInputFormat(new Path(filePath));
		TypeInformation typeInfo = BasicTypeInfo.STRING_TYPE_INFO;
		format.setCharsetName(charsetName);

		return createInput(format, typeInfo, "Read Text File Source");
	}

	/**
	 * Reads the given file with the given imput format.
	 *
	 * @param filePath
	 * 		The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path")
	 * @param inputFormat
	 * 		The input format used to create the data stream
	 * @param 
	 * 		The type of the returned data stream
	 * @return The data stream that represents the data read from the given file
	 */
	public  DataStreamSource readFile(FileInputFormat inputFormat, String filePath) {
		Preconditions.checkNotNull(inputFormat, "InputFormat must not be null.");
		Preconditions.checkNotNull(filePath, "The file path must not be null.");

		inputFormat.setFilePath(new Path(filePath));
		try {
			return createInput(inputFormat, TypeExtractor.getInputFormatTypes(inputFormat), "Read File source");
		} catch (Exception e) {
			throw new InvalidProgramException("The type returned by the input format could not be automatically " +
					"determined. " +
					"Please specify the TypeInformation of the produced type explicitly by using the " +
					"'createInput(InputFormat, TypeInformation)' method instead.");
		}
	}

	/**
	 * Creates a data stream that contains the contents of file created while system watches the given path. The file
	 * will be read with the system's default character set.
	 *
	 * @param filePath
	 * 		The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path/")
	 * @param intervalMillis
	 * 		The interval of file watching in milliseconds
	 * @param watchType
	 * 		The watch type of file stream. When watchType is {@link org.apache.flink.streaming.api.functions.source.FileMonitoringFunction.WatchType#ONLY_NEW_FILES}, the system processes
	 * 		only
	 * 		new files. {@link org.apache.flink.streaming.api.functions.source.FileMonitoringFunction.WatchType#REPROCESS_WITH_APPENDED} means that the system re-processes all contents of
	 * 		appended file. {@link org.apache.flink.streaming.api.functions.source.FileMonitoringFunction.WatchType#PROCESS_ONLY_APPENDED} means that the system processes only appended
	 * 		contents
	 * 		of files.
	 * @return The DataStream containing the given directory.
	 */
	public DataStream readFileStream(String filePath, long intervalMillis,
											WatchType watchType) {
		DataStream> source = addSource(new FileMonitoringFunction(
				filePath, intervalMillis, watchType), "Read File Stream source");

		return source.flatMap(new FileReadFunction());
	}

	/**
	 * Creates a new data stream that contains the strings received infinitely from a socket. Received strings are
	 * decoded by the system's default character set. On the termination of the socket server connection retries can be
	 * initiated.
	 * 

	 * Let us note that the socket itself does not report on abort and as a consequence retries are only initiated when
	 * the socket was gracefully terminated.
	 *
	 * @param hostname
	 * 		The host name which a server socket binds
	 * @param port
	 * 		The port number which a server socket binds. A port number of 0 means that the port number is automatically
	 * 		allocated.
	 * @param delimiter
	 * 		A character which splits received strings into records
	 * @param maxRetry
	 * 		The maximal retry interval in seconds while the program waits for a socket that is temporarily down.
	 * 		Reconnection is initiated every second. A number of 0 means that the reader is immediately terminated,
	 * 		while
	 * 		a	negative value ensures retrying forever.
	 * @return A data stream containing the strings received from the socket
	 */
	@PublicEvolving
	public DataStreamSource socketTextStream(String hostname, int port, char delimiter, long maxRetry) {
		return addSource(new SocketTextStreamFunction(hostname, port, delimiter, maxRetry),
				"Socket Stream");
	}

	/**
	 * Creates a new data stream that contains the strings received infinitely from a socket. Received strings are
	 * decoded by the system's default character set. The reader is terminated immediately when the socket is down.
	 *
	 * @param hostname
	 * 		The host name which a server socket binds
	 * @param port
	 * 		The port number which a server socket binds. A port number of 0 means that the port number is automatically
	 * 		allocated.
	 * @param delimiter
	 * 		A character which splits received strings into records
	 * @return A data stream containing the strings received from the socket
	 */
	@PublicEvolving
	public DataStreamSource socketTextStream(String hostname, int port, char delimiter) {
		return socketTextStream(hostname, port, delimiter, 0);
	}

	/**
	 * Creates a new data stream that contains the strings received infinitely from a socket. Received strings are
	 * decoded by the system's default character set, using'\n' as delimiter. The reader is terminated immediately when
	 * the socket is down.
	 *
	 * @param hostname
	 * 		The host name which a server socket binds
	 * @param port
	 * 		The port number which a server socket binds. A port number of 0 means that the port number is automatically
	 * 		allocated.
	 * @return A data stream containing the strings received from the socket
	 */
	@PublicEvolving
	public DataStreamSource socketTextStream(String hostname, int port) {
		return socketTextStream(hostname, port, '\n');
	}

	/**
	 * Generic method to create an input data stream with {@link org.apache.flink.api.common.io.InputFormat}.
	 * 

	 * Since all data streams need specific information about their types, this method needs to determine the type of
	 * the data produced by the input format. It will attempt to determine the data type by reflection, unless the
	 * input
	 * format implements the {@link org.apache.flink.api.java.typeutils.ResultTypeQueryable} interface. In the latter
	 * case, this method will invoke the {@link org.apache.flink.api.java.typeutils.ResultTypeQueryable#getProducedType()}
	 * method to determine data type produced by the input format.
	 *
	 * @param inputFormat
	 * 		The input format used to create the data stream
	 * @param 
	 * 		The type of the returned data stream
	 * @return The data stream that represents the data created by the input format
	 */
	@PublicEvolving
	public  DataStreamSource createInput(InputFormat inputFormat) {
		return createInput(inputFormat, TypeExtractor.getInputFormatTypes(inputFormat), "Custom File source");
	}

	/**
	 * Generic method to create an input data stream with {@link org.apache.flink.api.common.io.InputFormat}.
	 * 

	 * The data stream is typed to the given TypeInformation. This method is intended for input formats where the
	 * return
	 * type cannot be determined by reflection analysis, and that do not implement the
	 * {@link org.apache.flink.api.java.typeutils.ResultTypeQueryable} interface.
	 *
	 * @param inputFormat
	 * 		The input format used to create the data stream
	 * @param 
	 * 		The type of the returned data stream
	 * @return The data stream that represents the data created by the input format
	 */
	@PublicEvolving
	public  DataStreamSource createInput(InputFormat inputFormat, TypeInformation typeInfo) {
		return createInput(inputFormat, typeInfo, "Custom File source");
	}

	// private helper for passing different names
	private  DataStreamSource createInput(InputFormat inputFormat,
			TypeInformation typeInfo, String sourceName) {
		FileSourceFunction function = new FileSourceFunction<>(inputFormat, typeInfo);
		return addSource(function, sourceName, typeInfo);
	}

	/**
	 * Adds a Data Source to the streaming topology.
	 *
	 * 

	 * By default sources have a parallelism of 1. To enable parallel execution, the user defined source should
	 * implement {@link org.apache.flink.streaming.api.functions.source.ParallelSourceFunction} or extend {@link
	 * org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction}. In these cases the resulting source
	 * will have the parallelism of the environment. To change this afterwards call {@link
	 * org.apache.flink.streaming.api.datastream.DataStreamSource#setParallelism(int)}
	 *
	 * @param function
	 * 		the user defined function
	 * @param 
	 * 		type of the returned stream
	 * @return the data stream constructed
	 */
	public  DataStreamSource addSource(SourceFunction function) {
		return addSource(function, "Custom Source");
	}

	/**
	 * Ads a data source with a custom type information thus opening a
	 * {@link DataStream}. Only in very special cases does the user need to
	 * support type information. Otherwise use
	 * {@link #addSource(org.apache.flink.streaming.api.functions.source.SourceFunction)}
	 *
	 * @param function
	 * 		the user defined function
	 * @param sourceName
	 * 		Name of the data source
	 * @param 
	 * 		type of the returned stream
	 * @return the data stream constructed
	 */
	public  DataStreamSource addSource(SourceFunction function, String sourceName) {
		return addSource(function, sourceName, null);
	}

	/**
	 * Ads a data source with a custom type information thus opening a
	 * {@link DataStream}. Only in very special cases does the user need to
	 * support type information. Otherwise use
	 * {@link #addSource(org.apache.flink.streaming.api.functions.source.SourceFunction)}
	 *
	 * @param function
	 * 		the user defined function
	 * @param 
	 * 		type of the returned stream
	 * @param typeInfo
	 * 		the user defined type information for the stream
	 * @return the data stream constructed
	 */
	public  DataStreamSource addSource(SourceFunction function, TypeInformation typeInfo) {
		return addSource(function, "Custom Source", typeInfo);
	}

	/**
	 * Ads a data source with a custom type information thus opening a
	 * {@link DataStream}. Only in very special cases does the user need to
	 * support type information. Otherwise use
	 * {@link #addSource(org.apache.flink.streaming.api.functions.source.SourceFunction)}
	 *
	 * @param function
	 * 		the user defined function
	 * @param sourceName
	 * 		Name of the data source
	 * @param 
	 * 		type of the returned stream
	 * @param typeInfo
	 * 		the user defined type information for the stream
	 * @return the data stream constructed
	 */
	@SuppressWarnings("unchecked")
	public  DataStreamSource addSource(SourceFunction function, String sourceName, TypeInformation typeInfo) {

		if(typeInfo == null) {
			if (function instanceof ResultTypeQueryable) {
				typeInfo = ((ResultTypeQueryable) function).getProducedType();
			} else {
				try {
					typeInfo = TypeExtractor.createTypeInfo(
							SourceFunction.class,
							function.getClass(), 0, null, null);
				} catch (final InvalidTypesException e) {
					typeInfo = (TypeInformation) new MissingTypeInfo(sourceName, e);
				}
			}
		}

		boolean isParallel = function instanceof ParallelSourceFunction;

		clean(function);
		StreamSource sourceOperator;
		if (function instanceof StoppableFunction) {
			sourceOperator = new StoppableStreamSource<>(cast2StoppableSourceFunction(function));
		} else {
			sourceOperator = new StreamSource<>(function);
		}

		return new DataStreamSource<>(this, typeInfo, sourceOperator, isParallel, sourceName);
	}

	/**
	 * Casts the source function into a SourceFunction implementing the StoppableFunction.
	 *
	 * This method should only be used if the source function was checked to implement the
	 * {@link StoppableFunction} interface.
	 *
	 * @param sourceFunction Source function to cast
	 * @param  Output type of source function
	 * @param  Union type of SourceFunction and StoppableFunction
	 * @return The casted source function so that it's type implements the StoppableFunction
	 */
	@SuppressWarnings("unchecked")
	private  & StoppableFunction> T cast2StoppableSourceFunction(SourceFunction sourceFunction) {
		return (T) sourceFunction;
	}

	/**
	 * Triggers the program execution. The environment will execute all parts of
	 * the program that have resulted in a "sink" operation. Sink operations are
	 * for example printing results or forwarding them to a message queue.
	 * 

	 * The program execution will be logged and displayed with a generated
	 * default name.
	 *
	 * @return The result of the job execution, containing elapsed time and accumulators.
	 * @throws Exception which occurs during job execution.
	 */
	public JobExecutionResult execute() throws Exception {
		return execute(DEFAULT_JOB_NAME);
	}

	/**
	 * Triggers the program execution. The environment will execute all parts of
	 * the program that have resulted in a "sink" operation. Sink operations are
	 * for example printing results or forwarding them to a message queue.
	 * 

	 * The program execution will be logged and displayed with the provided name
	 *
	 * @param jobName
	 * 		Desired name of the job
	 * @return The result of the job execution, containing elapsed time and accumulators.
	 * @throws Exception which occurs during job execution.
	 */
	public abstract JobExecutionResult execute(String jobName) throws Exception;

	/**
	 * Getter of the {@link org.apache.flink.streaming.api.graph.StreamGraph} of the streaming job.
	 *
	 * @return The streamgraph representing the transformations
	 */
	@Internal
	public StreamGraph getStreamGraph() {
		if (transformations.size() <= 0) {
			throw new IllegalStateException("No operators defined in streaming topology. Cannot execute.");
		}
		return StreamGraphGenerator.generate(this, transformations);
	}

	/**
	 * Creates the plan with which the system will execute the program, and
	 * returns it as a String using a JSON representation of the execution data
	 * flow graph. Note that this needs to be called, before the plan is
	 * executed.
	 *
	 * @return The execution plan of the program, as a JSON String.
	 */
	public String getExecutionPlan() {
		return getStreamGraph().getStreamingPlanAsJSON();
	}

	/**
	 * Returns a "closure-cleaned" version of the given function. Cleans only if closure cleaning
	 * is not disabled in the {@link org.apache.flink.api.common.ExecutionConfig}
	 */
	@Internal
	public  F clean(F f) {
		if (getConfig().isClosureCleanerEnabled()) {
			ClosureCleaner.clean(f, true);
		}
		ClosureCleaner.ensureSerializable(f);
		return f;
	}

	/**
	 * Adds an operator to the list of operators that should be executed when calling
	 * {@link #execute}.
	 *
	 * 

	 * When calling {@link #execute()} only the operators that where previously added to the list
	 * are executed.
	 *
	 * 
	 * This is not meant to be used by users. The API methods that create operators must call
	 * this method.
	 */
	@Internal
	public void addOperator(StreamTransformation transformation) {
		Preconditions.checkNotNull(transformation, "transformation must not be null.");
		this.transformations.add(transformation);
	}

	// --------------------------------------------------------------------------------------------
	//  Factory methods for ExecutionEnvironments
	// --------------------------------------------------------------------------------------------

	/**
	 * Creates an execution environment that represents the context in which the
	 * program is currently executed. If the program is invoked standalone, this
	 * method returns a local execution environment, as returned by
	 * {@link #createLocalEnvironment()}.
	 *
	 * @return The execution environment of the context in which the program is
	 * executed.
	 */
	public static StreamExecutionEnvironment getExecutionEnvironment() {
		if (contextEnvironmentFactory != null) {
			return contextEnvironmentFactory.createExecutionEnvironment();
		}

		// because the streaming project depends on "flink-clients" (and not the other way around)
		// we currently need to intercept the data set environment and create a dependent stream env.
		// this should be fixed once we rework the project dependencies
		
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		if (env instanceof ContextEnvironment) {
			return new StreamContextEnvironment((ContextEnvironment) env);
		} else if (env instanceof OptimizerPlanEnvironment | env instanceof PreviewPlanEnvironment) {
			return new StreamPlanEnvironment(env);
		} else {
			return createLocalEnvironment();
		}
	}

	/**
	 * Creates a {@link LocalStreamEnvironment}. The local execution environment
	 * will run the program in a multi-threaded fashion in the same JVM as the
	 * environment was created in. The default parallelism of the local
	 * environment is the number of hardware contexts (CPU cores / threads),
	 * unless it was specified differently by {@link #setParallelism(int)}.
	 *
	 * @return A local execution environment.
	 */
	public static LocalStreamEnvironment createLocalEnvironment() {
		return createLocalEnvironment(defaultLocalParallelism);
	}

	/**
	 * Creates a {@link LocalStreamEnvironment}. The local execution environment
	 * will run the program in a multi-threaded fashion in the same JVM as the
	 * environment was created in. It will use the parallelism specified in the
	 * parameter.
	 *
	 * @param parallelism
	 * 		The parallelism for the local environment.
	 * @return A local execution environment with the specified parallelism.
	 */
	public static LocalStreamEnvironment createLocalEnvironment(int parallelism) {
		LocalStreamEnvironment env = new LocalStreamEnvironment();
		env.setParallelism(parallelism);
		return env;
	}

	/**
	 * Creates a {@link LocalStreamEnvironment}. The local execution environment
	 * will run the program in a multi-threaded fashion in the same JVM as the
	 * environment was created in. It will use the parallelism specified in the
	 * parameter.
	 *
	 * @param parallelism
	 * 		The parallelism for the local environment.
	 * 	@param configuration
	 * 		Pass a custom configuration into the cluster
	 * @return A local execution environment with the specified parallelism.
	 */
	public static LocalStreamEnvironment createLocalEnvironment(int parallelism, Configuration configuration) {
		LocalStreamEnvironment currentEnvironment = new LocalStreamEnvironment(configuration);
		currentEnvironment.setParallelism(parallelism);
		return currentEnvironment;
	}

	/**
	 * Creates a {@link RemoteStreamEnvironment}. The remote environment sends
	 * (parts of) the program to a cluster for execution. Note that all file
	 * paths used in the program must be accessible from the cluster. The
	 * execution will use no parallelism, unless the parallelism is set
	 * explicitly via {@link #setParallelism}.
	 *
	 * @param host
	 * 		The host name or address of the master (JobManager), where the
	 * 		program should be executed.
	 * @param port
	 * 		The port of the master (JobManager), where the program should
	 * 		be executed.
	 * @param jarFiles
	 * 		The JAR files with code that needs to be shipped to the
	 * 		cluster. If the program uses user-defined functions,
	 * 		user-defined input formats, or any libraries, those must be
	 * 		provided in the JAR files.
	 * @return A remote environment that executes the program on a cluster.
	 */
	public static StreamExecutionEnvironment createRemoteEnvironment(
			String host, int port, String... jarFiles) {
		return new RemoteStreamEnvironment(host, port, jarFiles);
	}

	/**
	 * Creates a {@link RemoteStreamEnvironment}. The remote environment sends
	 * (parts of) the program to a cluster for execution. Note that all file
	 * paths used in the program must be accessible from the cluster. The
	 * execution will use the specified parallelism.
	 *
	 * @param host
	 * 		The host name or address of the master (JobManager), where the
	 * 		program should be executed.
	 * @param port
	 * 		The port of the master (JobManager), where the program should
	 * 		be executed.
	 * @param parallelism
	 * 		The parallelism to use during the execution.
	 * @param jarFiles
	 * 		The JAR files with code that needs to be shipped to the
	 * 		cluster. If the program uses user-defined functions,
	 * 		user-defined input formats, or any libraries, those must be
	 * 		provided in the JAR files.
	 * @return A remote environment that executes the program on a cluster.
	 */
	public static StreamExecutionEnvironment createRemoteEnvironment(
			String host, int port, int parallelism, String... jarFiles)
	{
		RemoteStreamEnvironment env = new RemoteStreamEnvironment(host, port, jarFiles);
		env.setParallelism(parallelism);
		return env;
	}

	/**
	 * Creates a {@link RemoteStreamEnvironment}. The remote environment sends
	 * (parts of) the program to a cluster for execution. Note that all file
	 * paths used in the program must be accessible from the cluster. The
	 * execution will use the specified parallelism.
	 *
	 * @param host
	 * 		The host name or address of the master (JobManager), where the
	 * 		program should be executed.
	 * @param port
	 * 		The port of the master (JobManager), where the program should
	 * 		be executed.
	 * @param clientConfig
	 * 		The configuration used by the client that connects to the remote cluster.
	 * @param jarFiles
	 * 		The JAR files with code that needs to be shipped to the
	 * 		cluster. If the program uses user-defined functions,
	 * 		user-defined input formats, or any libraries, those must be
	 * 		provided in the JAR files.
	 * @return A remote environment that executes the program on a cluster.
	 */
	public static StreamExecutionEnvironment createRemoteEnvironment(
			String host, int port, Configuration clientConfig, String... jarFiles)
	{
		return new RemoteStreamEnvironment(host, port, clientConfig, jarFiles);
	}
	
	// --------------------------------------------------------------------------------------------
	//  Methods to control the context and local environments for execution from packaged programs
	// --------------------------------------------------------------------------------------------

	protected static void initializeContextEnvironment(StreamExecutionEnvironmentFactory ctx) {
		contextEnvironmentFactory = ctx;
	}
	
	protected static void resetContextEnvironment() {
		contextEnvironmentFactory = null;
	}
}
 
   
  
 
  env的3个最核心的方法 
  DataStream addSource(Source)， 
  void execute()， 
  StreamExecutionEnvironment getExecutionEnvironment()； 
  /**
	 * Ads a data source with a custom type information thus opening a
	 * {@link DataStream}. Only in very special cases does the user need to
	 * support type information. Otherwise use
	 * {@link #addSource(org.apache.flink.streaming.api.functions.source.SourceFunction)}
	 *
	 * @param function
	 * 		the user defined function
	 * @param sourceName
	 * 		Name of the data source
	 * @param 
	 * 		type of the returned stream
	 * @param typeInfo
	 * 		the user defined type information for the stream
	 * @return the data stream constructed
	 */
	@SuppressWarnings("unchecked")
	public  DataStreamSource addSource(SourceFunction function, String sourceName, TypeInformation typeInfo) {

		if(typeInfo == null) {
			if (function instanceof ResultTypeQueryable) {
				typeInfo = ((ResultTypeQueryable) function).getProducedType();
			} else {
				try {
					typeInfo = TypeExtractor.createTypeInfo(
							SourceFunction.class,
							function.getClass(), 0, null, null);
				} catch (final InvalidTypesException e) {
					typeInfo = (TypeInformation) new MissingTypeInfo(sourceName, e);
				}
			}
		}

		boolean isParallel = function instanceof ParallelSourceFunction;

		clean(function);
		StreamSource sourceOperator;
		if (function instanceof StoppableFunction) {
			sourceOperator = new StoppableStreamSource<>(cast2StoppableSourceFunction(function));
		} else {
			sourceOperator = new StreamSource<>(function);
		}

		return new DataStreamSource<>(this, typeInfo, sourceOperator, isParallel, sourceName);
	} 
   
  /**
	 * Creates an execution environment that represents the context in which the
	 * program is currently executed. If the program is invoked standalone, this
	 * method returns a local execution environment, as returned by
	 * {@link #createLocalEnvironment()}.
	 *
	 * @return The execution environment of the context in which the program is
	 * executed.
	 */
	public static StreamExecutionEnvironment getExecutionEnvironment() {
		if (contextEnvironmentFactory != null) {
			return contextEnvironmentFactory.createExecutionEnvironment();
		}

		// because the streaming project depends on "flink-clients" (and not the other way around)
		// we currently need to intercept the data set environment and create a dependent stream env.
		// this should be fixed once we rework the project dependencies
		
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		if (env instanceof ContextEnvironment) {
			return new StreamContextEnvironment((ContextEnvironment) env);
		} else if (env instanceof OptimizerPlanEnvironment | env instanceof PreviewPlanEnvironment) {
			return new StreamPlanEnvironment(env);
		} else {
			return createLocalEnvironment();
		}
	} 
  
 
  /**
	 * Triggers the program execution. The environment will execute all parts of
	 * the program that have resulted in a "sink" operation. Sink operations are
	 * for example printing results or forwarding them to a message queue.
	 * 
	 * The program execution will be logged and displayed with a generated
	 * default name.
	 *
	 * @return The result of the job execution, containing elapsed time and accumulators.
	 * @throws Exception which occurs during job execution.
	 */
	public JobExecutionResult execute() throws Exception {
		return execute(DEFAULT_JOB_NAME);
	}

你可能感兴趣的:(flink,streaming)

nosql数据库技术与应用知识点皆过客，揽星河 NoSQL nosql 数据库大数据数据分析数据结构非关系型数据库
Nosql知识回顾大数据处理流程数据采集(flume、爬虫、传感器)数据存储(本门课程NoSQL所处的阶段)Hdfs、MongoDB、HBase等数据清洗(入仓)Hive等数据处理、分析(Spark、Flink等)数据可视化数据挖掘、机器学习应用(Python、SparkMLlib等)大数据时代存储的挑战(三高)高并发(同一时间很多人访问)高扩展(要求随时根据需求扩展存储)高效率(要求读写速度快)
Kafka详细解析与应用分析芊言芊语 kafka 分布式
Kafka是一个开源的分布式事件流平台（EventStreamingPlatform），由LinkedIn公司最初采用Scala语言开发，并基于ZooKeeper协调管理。如今，Kafka已经被Apache基金会纳入其项目体系，广泛应用于大数据实时处理领域。Kafka凭借其高吞吐量、持久化、分布式和可靠性的特点，成为构建实时流数据管道和流处理应用程序的重要工具。Kafka架构Kafka的架构主要由
Spark 组件 GraphX、Streaming 叶域大数据 spark spark 大数据分布式
Spark组件GraphX、Streaming一、SparkGraphX1.1GraphX的主要概念1.2GraphX的核心操作1.3示例代码1.4GraphX的应用场景二、SparkStreaming2.1SparkStreaming的主要概念2.2示例代码2.3SparkStreaming的集成2.4SparkStreaming的应用场景SparkGraphX用于处理图和图并行计算。Graph
全面指南：用户行为从前端数据采集到实时处理的最佳实践数字沉思营销流量运营系统架构前端内容运营大数据
引言在当今的数据驱动世界，实时数据采集和处理已经成为企业做出及时决策的重要手段。本文将详细介绍如何通过前端JavaScript代码采集用户行为数据、利用API和Kafka进行数据传输、通过Flink实时处理数据的完整流程。无论你是想提升产品体验还是做用户行为分析，这篇文章都将为你提供全面的解决方案。设计一个通用的ClickHouse表来存储用户事件时，需要考虑多种因素，包括事件类型、时间戳、用户信
详解 Flink 的常见部署方式文刀小桂 Flink flink 大数据
一、常见部署模式分类1.按是否依赖外部资源调度1.1Standalone模式独立模式(Standalone)是独立运行的，不依赖任何外部的资源管理平台，只需要运行所有Flink组件服务1.2Yarn模式Yarn模式是指客户端把Flink应用提交给Yarn的ResourceManager,Yarn的ResourceManager会在Yarn的NodeManager上创建容器。在这些容器上，Flink
大数据之flink与hive 星辰_mya 大数据 flink hive
其实吧我不太想写flink，因为线上经验确实不多，这也是我需要补的地方，没有条件创造条件，先来一篇吧flink：高性能低延迟流批一体的分布式计算框架基于事件时间对实时数据精准处理快速响应支持批处理，高效离线分析和数据挖掘数据仓库的引擎丰富数据源/接收器，集成多种数据存储格式和源，比较常见就是咱们今天的主题hive了checkpoint恢复机制，故障恢复快速恢复计算任务分布式弹性扩展，据业务灵活增加
Java中的大数据处理框架对比分析省赚客app开发者 java 开发语言
Java中的大数据处理框架对比分析大家好，我是微赚淘客系统3.0的小编，是个冬天不穿秋裤，天冷也要风度的程序猿！今天，我们将深入探讨Java中常用的大数据处理框架，并对它们进行对比分析。大数据处理框架是现代数据驱动应用的核心，它们帮助企业处理和分析海量数据，以提取有价值的信息。本文将重点介绍ApacheHadoop、ApacheSpark、ApacheFlink和ApacheStorm这四种流行的
PostgreSQL进阶教程爱分享的码瑞哥 postgresql
PostgreSQL进阶教程目录事务和并发控制事务事务隔离级别锁高级查询联合查询窗口函数子查询CTE（公用表表达式）数据类型自定义数据类型数组JSON高级索引部分索引表达式索引GIN和GiST索引性能调优查询优化配置优化备份与恢复物理备份逻辑备份扩展与插件PostGISpg_cron集群与高可用StreamingReplicationPatroni事务和并发控制事务事务是一个或多个SQL语句的组合
ExoPlayer简单使用 csdn_zxw 安卓视频播放 android
ExoPlayerLibrary概述ExoPlayer是运行在YouTubeappAndroid版本上的视频播放器ExoPlayer是构建在Android低水平媒体API之上的一个应用层媒体播放器。和Android内置的媒体播放器相比，ExoPlayer有许多优点。ExoPlayer支持内置的媒体播放器支持的所有格式外加自适应格式DASH和SmoothStreaming。ExoPlayer可以被高
一文搞懂 Flink Task 数据交互之数据写源码 mn_kw flink 交互 java
一文搞懂FlinkTask数据交互之数据写源码1.RecordWriterOutput2.RecordWriter3.数据分区器ChannelSelector4.数据输出模型ResultPartition5.子模型ResultSubpartition6.本地buffer池LocalBufferPool7.获取buffer8.将buffer添加到ResultSubpartitionFlink重要源码
「经济学人」Streaming-video wars 英语学习社
GameofphonesHBOwillleadAT&T’schallengetoNetflixTimeWarner’scrownjewelmustscaleupwhilemaintainingqualityINLATE2012,justbeforethereleaseof“HouseofCards”,TedSarandos,chiefcontentofficerofNetflix,declared
概率图模型（PGM）综述医学影像处理概率图模型概率图模型综述
RefLink:http://www.sigvc.org/bbs/thread-728-1-1.htmlGraphicalModel的基本类型基本的GraphicalModel可以大致分为两个类别：贝叶斯网络(BayesianNetwork)和马尔可夫随机场(MarkovRandomField)。它们的主要区别在于采用不同类型的图来表达变量之间的关系：贝叶斯网络采用有向无环图(DirectedAc
【HDFS】【HDFS架构】【HDFS Architecture】【架构】资源存储库 hdfs 架构 hadoop
目录1Introduction介绍2AssumptionsandGoals假设和目标HardwareFailure硬件故障StreamingDataAccess流式数据访问LargeDataSets大型数据集SimpleCoherencyModel简单凝聚力模型“MovingComputationisCheaperthanMovingData”“移动计算比移动数据更便宜”PortabilityAc
Python基础知识进阶之正则表达式_头歌python正则表达式进阶前端陈萨龙程序员 python 学习面试
最后硬核资料：关注即可领取PPT模板、简历模板、行业经典书籍PDF。技术互助：技术群大佬指点迷津，你的问题可能不是问题，求资源在群里喊一声。面试题库：由技术群里的小伙伴们共同投稿，热乎的大厂面试真题，持续更新中。知识体系：含编程语言、算法、大数据生态圈组件（Mysql、Hive、Spark、Flink）、数据仓库、Python、前端等等。网上学习资料一大堆，但如果学到的知识不成体系，遇到问题时只是
大数据新视界 --大数据大厂之Flink强势崛起：大数据新视界的璀璨明珠青云交大数据新视界 Flink 大数据数据类型实时处理流处理框架对比应用场景数据处理大数据新视界数据库
亲爱的朋友们，热烈欢迎你们来到青云交的博客！能与你们在此邂逅，我满心欢喜，深感无比荣幸。在这个瞬息万变的时代，我们每个人都在苦苦追寻一处能让心灵安然栖息的港湾。而我的博客，正是这样一个温暖美好的所在。在这里，你们不仅能够收获既富有趣味又极为实用的内容知识，还可以毫无拘束地畅所欲言，尽情分享自己独特的见解。我真诚地期待着你们的到来，愿我们能在这片小小的天地里共同成长，共同进步。本博客的精华专栏：Ja
pg13.x主从节点搭建以及数据同步亦诗亦诗 postgresql
步骤1:安装postgres可参考：centosarm安装Postgres此处我搭建了2台postgres，分别为：10.211.110.55、10.211.110.56步骤2:主库配置比如在PostgreSQL中实现一主一从的集群模式，此处选定10.211.110.55为主节点，另一个为从节点，可以使用流复制（StreamingReplication）来实现主节点数据实时同步到从节点。以下是一般
flink增量检查点降低状态依赖实现的详细步骤 goTsHgo Flink 大数据分布式 flink 大数据
增量检查点启动恢复的时间是很久的，业务上不能接受，所以可以通过降低状态依赖来减少恢复的时间。降低状态依赖尽可能减少状态的复杂性和依赖关系，通过拆分状态或将状态外部化到其他服务中，从而降低恢复的开销。实施措施：将状态分割为更小的单元，减少每次恢复的状态量。使用外部状态存储服务，减少Flink状态后端的负担。拆分状态和将状态外部化到其他服务可以帮助减少作业的状态依赖，从而降低恢复时间和复杂度。以下是详
flink table factory基础知识 loukey_j
一、概述在flink中很多组件都是TableFactory的子类。比如序列化，反序列化，tableSinkFactory,tableSourceFactory.TableFactory是用来创建序列化，反序列器，tableSource和tableSink的工厂。二、TableFactory源码在flink框架中，TableFactory的子类并不是程序员自己随心new出来的。flink的提供给程序
2024年最全使用Python求解方程_python解方程(1)，字节面试官迟到 2401_84569545 程序员 python 学习面试
最后硬核资料：关注即可领取PPT模板、简历模板、行业经典书籍PDF。技术互助：技术群大佬指点迷津，你的问题可能不是问题，求资源在群里喊一声。面试题库：由技术群里的小伙伴们共同投稿，热乎的大厂面试真题，持续更新中。知识体系：含编程语言、算法、大数据生态圈组件（Mysql、Hive、Spark、Flink）、数据仓库、Python、前端等等。网上学习资料一大堆，但如果学到的知识不成体系，遇到问题时只是
01-Flink安装部署及入门案例（仅供学习），音视频时代你还不会NDK开发小猪佩琪962 2024年程序员学习 flink 学习大数据
先自我介绍一下，小编浙江大学毕业，去过华为、字节跳动等大厂，目前阿里P7深知大多数程序员，想要提升技能，往往是自己摸索成长，但自己不成体系的自学效果低效又漫长，而且极易碰到天花板技术停滞不前！因此收集整理了一份《2024年最新大数据全套学习资料》，初衷也很简单，就是希望能够帮助到想自学提升又不知道该从何学起的朋友。既有适合小白学习的零基础资料，也有适合3年以上经验的小伙伴深入学习提升的进阶课程，涵
比较Spark与Flink 傲雪凌霜，松柏长青大数据后端 spark flink 大数据
ApacheSpark和ApacheFlink都是目前非常流行的大数据处理引擎，但它们在架构、处理模式、应用场景等方面有一些显著的区别。下面是二者的对比：1.处理模式Spark:主要支持批处理（BatchProcessing），也能通过SparkStreaming处理流式数据，但SparkStreaming本质上是通过微批（micro-batching）的方式处理流数据，延迟相对较高。SparkS
Apache Flink：实时流处理与批处理的统一框架小码快撩 flink 大数据
导语在大数据处理领域，流处理和批处理是两种主要的处理方式。然而，传统的系统通常将这两者视为独立的任务，需要不同的工具和框架来处理。ApacheFlink是一个开源的流处理框架，它打破了这种界限，提供了一个统一的平台来处理实时流数据和批处理数据。一、基本概念与架构ApacheFlink的基本概念与架构主要包括以下几个核心组成部分：基本概念1.流处理模型：无界流(UnboundedStreams):数
EasyDarwin开源平台直播架构 jljf_hh
ClientClientCMSCMSEasyCameraEasyCameraEasyDarwinEasyDarwin请求设备列表设备列表json请求设备直播(携带SN序列号)推送视频请求(携带EasyDarwin地址和端口)RTSP直播推送RTPStreaming视频推送成功返回具体的视频直播URLRTSP/HTTP直播流程开源代码：https://github.com/EasyDarwin开源资
推荐项目：Monibuca（m7s）—— 开源直播服务器框架的明日之星陶真蔷Scott
推荐项目：Monibuca（m7s）——开源直播服务器框架的明日之星monibucaMonibucaisaModularized,ExtensibleframeworkforbuildingStreamingServer项目地址:https://gitcode.com/gh_mirrors/mo/monibuca随着在线视频和流媒体服务的日益普及，高质量且灵活的直播服务器框架成为了开发者眼中的瑰宝
llm在网页中的流式输出（langchain与streamlit库）格瑞Lxf langchain
一、手动进行流式输出llm本身是支持在终端流式输出的，以ollama为例llm=Ollama(base_url="http://localhost:11434",model="qwen",callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),)查看源码StreamingStdOutCallbackHandler类中
flink独立集群部署嘎子吱吱吱吱 flink hadoop linux
#flink独立集群部署说明安装环境三台服务器47.106.23.1（master）47.112.173.2（worker1）47.115.162.3（worker1）提前装好jdk和ssh,以下操作最好不要用root账号提前下载好flink的包并解压设置三台服务器之间ssh免密登录生成本机秘钥以47.106.23.1为例（其他两台参考本服务器）#生成本机秘钥cd;ssh-keygen-trsa-
Flink的时间与watermarks详解大数据技术与数仓
当我们在使用Flink的时候，避免不了要和时间(time)、水位线(watermarks)打交道，理解这些概念是开发分布式流处理应用的基础。那么Flink支持哪些时间语义？Flink是如何处理乱序事件的？什么是水位线？水位线是如何生成的？水位线的传播方式是什么？让我们带着这些问题来开始本文的内容。时间语义基本概念时间是Flink等流处理中最重要的概念之一，在Flink中Time可以分为三种：Eve
实时数仓之实时数仓架构(Hudi)(1) 2401_84164527 程序员架构
目前比较流行的实时数仓架构有两类，其中一类是以Flink+Doris为核心的实时数仓架构方案；另一类是以湖仓一体架构为核心的实时数仓架构方案。本文针对Flink+Hudi湖仓一体架构进行介绍，这套架构的特点是可以基于一套数据完全实现Lambda架构。实时数仓架构图如下：技术框架Kafka：用于接入数据源；FlinkCDC：如果直接接入业务数据源可以考虑CDC方式，如果通过Kafka缓冲接入业务数据
2024年大数据最新实时数仓之实时数仓架构(Hudi) 2401_84185556 程序员大数据架构
技术框架Kafka：用于接入数据源；FlinkCDC：如果直接接入业务数据源可以考虑CDC方式，如果通过Kafka缓冲接入业务数据可以忽略;Flink：用于数据ETL，包括接入数据、处理数据及输出数据全链路数据计算任务；Spark：用于数据ETL，包括处理数据及输出数据全链路数据计算任务；Hudi：湖仓一体数据管理框架，用来管理模型数据，包括ODS/DWD/DWS/DIM/ADS等；Doris：O
实时数仓之实时数仓架构(Hudi)(1)，2024年最新熬夜整理华为最新大数据开发笔试题 2401_84181221 程序员架构大数据
+Hudi：湖仓一体数据管理框架，用来管理模型数据，包括ODS/DWD/DWS/DIM/ADS等；+Doris：OLAP引擎，同步数仓结果模型，对外提供数据服务支持；+Hbase：用来存储维表信息，维表数据来源一部分有Flink加工实时写入，另一部分是从Spark任务生产，其主要作用用来支持FlinkETL处理过程中的LookupJoin功能。这里选用Hbase原因主要因为Table的HbaseC
开发者关心的那些事圣子足道 ios 游戏编程 apple 支付
我要在app里添加IAP，必须要注册自己的产品标识符（product identifiers）。产品标识符是什么？产品标识符（Product Identifiers）是一串字符串，它用来识别你在应用内贩卖的每件商品。App Store用产品标识符来检索产品信息，标识符只能包含大小写字母（A-Z）、数字（0-9）、下划线（-）、以及圆点(.)。你可以任意排列这些元素，但我们建议你创建标识符时使用
负载均衡器技术Nginx和F5的优缺点对比 bijian1013 nginx F5
对于数据流量过大的网络中，往往单一设备无法承担，需要多台设备进行数据分流，而负载均衡器就是用来将数据分流到多台设备的一个转发器。目前有许多不同的负载均衡技术用以满足不同的应用需求，如软/硬件负载均衡、本地/全局负载均衡、更高
LeetCode[Math] - #9 Palindrome Number Cwind java Algorithm 题解 LeetCode Math
原题链接：#9 Palindrome Number 要求：判断一个整数是否是回文数，不要使用额外的存储空间难度：简单分析：题目限制不允许使用额外的存储空间应指不允许使用O(n)的内存空间，O(1)的内存用于存储中间结果是可以接受的。于是考虑将该整型数反转，然后与原数字进行比较。注：没有看到有关负数是否可以是回文数的明确结论，例如
画图板的基本实现 15700786134 画图板
要实现画图板的基本功能，除了在qq登陆界面中用到的组件和方法外，还需要添加鼠标监听器，和接口实现。首先，需要显示一个JFrame界面： public class DrameFrame extends JFrame { //显示
linux的ps命令被触发 linux
Linux中的ps命令是Process Status的缩写。ps命令用来列出系统中当前运行的那些进程。ps命令列出的是当前那些进程的快照，就是执行ps命令的那个时刻的那些进程，如果想要动态的显示进程信息，就可以使用top命令。要对进程进行监测和控制，首先必须要了解当前进程的情况，也就是需要查看当前进程，而 ps 命令就是最基本同时也是非常强大的进程查看命令。使用该命令可以确定有哪些进程正在运行
Android 音乐播放器下一曲连续跳几首歌肆无忌惮_ android
最近在写安卓音乐播放器的时候遇到个问题。在MediaPlayer播放结束时会回调 player.setOnCompletionListener(new OnCompletionListener() { @Override public void onCompletion(MediaPlayer mp) { mp.reset(); Log.i("H
java导出txt文件的例子知了ing java servlet
代码很简单就一个servlet,如下： package com.eastcom.servlet; import java.io.BufferedOutputStream; import java.io.IOException; import java.net.URLEncoder; import java.sql.Connection; import java.sql.Resu
Scala stack试玩, 提高第三方依赖下载速度矮蛋蛋 scala sbt
原文地址： http://segmentfault.com/a/1190000002894524 sbt下载速度实在是惨不忍睹, 需要做些配置优化下载typesafe离线包, 保存为ivy本地库 wget http://downloads.typesafe.com/typesafe-activator/1.3.4/typesafe-activator-1.3.4.zip 解压r
phantomjs安装(linux，附带环境变量设置) ，以及casperjs安装。 alleni123 linux spider
1. 首先从官网 http://phantomjs.org/下载phantomjs压缩包，解压缩到/root/phantomjs文件夹。 2. 安装依赖 sudo yum install fontconfig freetype libfreetype.so.6 libfontconfig.so.1 libstdc++.so.6 3. 配置环境变量 vi /etc/profil
JAVA IO FileInputStream和FileOutputStream，字节流的打包输出百合不是茶 java核心思想 JAVA IO操作字节流
在程序设计语言中，数据的保存是基本，如果某程序语言不能保存数据那么该语言是不可能存在的，JAVA是当今最流行的面向对象设计语言之一，在保存数据中也有自己独特的一面，字节流和字符流 1，字节流是由字节构成的，字符流是由字符构成的字节流和字符流都是继承的InputStream和OutPutStream ,java中两种最基本的就是字节流和字符流类 FileInputStream
Spring基础实例（依赖注入和控制反转） bijian1013 spring
前提条件：在http://www.springsource.org/download网站上下载Spring框架，并将spring.jar、log4j-1.2.15.jar、commons-logging.jar加载至工程1.武器接口 package com.bijian.spring.base3; public interface Weapon { void kil
HR看重的十大技能 bijian1013 提升能力 HR 成长
一个人掌握何种技能取决于他的兴趣、能力和聪明程度，也取决于他所能支配的资源以及制定的事业目标，拥有过硬技能的人有更多的工作机会。但是，由于经济发展前景不确定，掌握对你的事业有所帮助的技能显得尤为重要。以下是最受雇主欢迎的十种技能。　　一、解决问题的能力　　每天，我们都要在生活和工作中解决一些综合性的问题。那些能够发现问题、解决问题并迅速作出有效决
【Thrift一】Thrift编译安装 bit1129 thrift
什么是Thrift The Apache Thrift software framework, for scalable cross-language services development, combines a software stack with a code generation engine to build services that work efficiently and s
【Avro三】Hadoop MapReduce读写Avro文件 bit1129 mapreduce
Avro是Doug Cutting(此人绝对是神一般的存在）牵头开发的。开发之初就是围绕着完善Hadoop生态系统的数据处理而开展的（使用Avro作为Hadoop MapReduce需要处理数据序列化和反序列化的场景）,因此Hadoop MapReduce集成Avro也就是自然而然的事情。这个例子是一个简单的Hadoop MapReduce读取Avro格式的源文件进行计数统计，然后将计算结果
nginx定制500，502，503，504页面 ronin47 nginx　错误显示
server { listen 80; error_page 500/500.html; error_page 502/502.html; error_page 503/503.html; error_page 504/504.html; location /test {return502;}} 配置很简单，和配
java-1.二叉查找树转为双向链表 bylijinnan 二叉查找树
import java.util.ArrayList; import java.util.List; public class BSTreeToLinkedList { /* 把二元查找树转变成排序的双向链表题目：输入一棵二元查找树，将该二元查找树转换成一个排序的双向链表。要求不能创建任何新的结点，只调整指针的指向。 10 / \ 6 14 / \
Netty源码学习-HTTP-tunnel bylijinnan java netty
Netty关于HTTP tunnel的说明： http://docs.jboss.org/netty/3.2/api/org/jboss/netty/channel/socket/http/package-summary.html#package_description 这个说明有点太简略了一个完整的例子在这里： https://github.com/bylijinnan
JSONUtil.serialize(map)和JSON.toJSONString(map)的区别 coder_xpf jquery json map val()
JSONUtil.serialize(map)和JSON.toJSONString(map)的区别数据库查询出来的map有一个字段为空通过System.out.println()输出 JSONUtil.serialize(map)： {"one":"1","two":"nul
Hibernate缓存总结 cuishikuan 开源 ssh javaweb hibernate缓存三大框架
一、为什么要用Hibernate缓存？ Hibernate是一个持久层框架，经常访问物理数据库。为了降低应用程序对物理数据源访问的频次，从而提高应用程序的运行性能。缓存内的数据是对物理数据源中的数据的复制，应用程序在运行时从缓存读写数据，在特定的时刻或事件会同步缓存和物理数据源的数据。二、Hibernate缓存原理是怎样的？ Hibernate缓存包括两大类：Hib
CentOs6 dalan_123 centos
首先su - 切换到root下面1、首先要先安装GCC GCC-C++ Openssl等以来模块：yum -y install make gcc gcc-c++ kernel-devel m4 ncurses-devel openssl-devel2、再安装ncurses模块yum -y install ncurses-develyum install ncurses-devel3、下载Erang
10款用 jquery 实现滚动条至页面底端自动加载数据效果 dcj3sjt126com JavaScript
无限滚动自动翻页可以说是web2.0时代的一项堪称伟大的技术，它让我们在浏览页面的时候只需要把滚动条拉到网页底部就能自动显示下一页的结果，改变了一直以来只能通过点击下一页来翻页这种常规做法。无限滚动自动翻页技术的鼻祖是微博的先驱：推特(twitter)，后来必应图片搜索、谷歌图片搜索、google reader、箱包批发网等纷纷抄袭了这一项技术，于是靠滚动浏览器滚动条
ImageButton去边框&Button或者ImageButton的背景透明 dcj3sjt126com imagebutton
在ImageButton中载入图片后，很多人会觉得有图片周围的白边会影响到美观，其实解决这个问题有两种方法一种方法是将ImageButton的背景改为所需要的图片。如：android:background="@drawable/XXX" 第二种方法就是将ImageButton背景改为透明，这个方法更常用在XML里； <ImageBut
JSP之c:foreach eksliang jsp forearch
原文出自：http://www.cnblogs.com/draem0507/archive/2012/09/24/2699745.html <c:forEach>标签用于通用数据循环，它有以下属性属性描述是否必须缺省值 items 进行循环的项目否无 begin 开始条件否 0 end 结束条件否集合中的最后一个项目 step 步长否 1
Android实现主动连接蓝牙耳机 gqdy365 android
在Android程序中可以实现自动扫描蓝牙、配对蓝牙、建立数据通道。蓝牙分不同类型，这篇文字只讨论如何与蓝牙耳机连接。大致可以分三步：一、扫描蓝牙设备： 1、注册并监听广播： BluetoothAdapter.ACTION_DISCOVERY_STARTED BluetoothDevice.ACTION_FOUND BluetoothAdapter.ACTION_DIS
android学习轨迹之四：org.json.JSONException: No value for hyz301 json
org.json.JSONException: No value for items 在JSON解析中会遇到一种错误，很常见的错误 06-21 12:19:08.714 2098-2127/com.jikexueyuan.secret I/System.out﹕ Result:{"status":1,"page":1,&
干货分享：从零开始学编程系列汇总 justjavac 编程
程序员总爱重新发明轮子，于是做了要给轮子汇总。从零开始写个编译器吧系列 (知乎专栏) 从零开始写一个简单的操作系统 (伯乐在线) 从零开始写JavaScript框架 (图灵社区) 从零开始写jQuery框架 (蓝色理想 ) 从零开始nodejs系列文章 (粉丝日志) 从零开始编写网络游戏
jquery-autocomplete 使用手册 macroli jquery Ajax 脚本
jquery-autocomplete学习一、用前必备官方网站：http://bassistance.de/jquery-plugins/jquery-plugin-autocomplete/ 当前版本：1.1 需要JQuery版本：1.2.6 二、使用 <script src="./jquery-1.3.2.js" type="text/ja
PLSQL-Developer或者Navicat等工具连接远程oracle数据库的详细配置以及数据库编码的修改超声波 oracle plsql
　　在服务器上将Oracle安装好之后接下来要做的就是通过本地机器来远程连接服务器端的oracle数据库，常用的客户端连接工具就是PLSQL-Developer或者Navicat这些工具了。刚开始也是各种报错，什么TNS:no listener;TNS:lost connection;TNS:target hosts...花了一天的时间终于让PLSQL-Developer和Navicat等这些客户
数据仓库数据模型之：极限存储--历史拉链表 superlxw1234 极限存储数据仓库数据模型拉链历史表
在数据仓库的数据模型设计过程中，经常会遇到这样的需求： 1. 数据量比较大; 2. 表中的部分字段会被update,如用户的地址，产品的描述信息，订单的状态等等; 3. 需要查看某一个时间点或者时间段的历史快照信息，比如，查看某一个订单在历史某一个时间点的状态，比如，查看某一个用户在过去某一段时间内，更新过几次等等; 4. 变化的比例和频率不是很大，比如，总共有10
10点睛Spring MVC4.1-全局异常处理 wiselyman spring mvc
10.1 全局异常处理使用@ControllerAdvice注解来实现全局异常处理; 使用@ControllerAdvice的属性缩小处理范围 10.2 演示演示控制器 package com.wisely.web; import org.springframework.stereotype.Controller; import org.spring