flink 从kafka读取数据 (java)

import jdk.nashorn.internal.codegen.CompilerConstants;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition;
import sun.util.resources.cldr.ar.CalendarData_ar_SD;

import java.util.HashMap;
import java.util.Properties;

/**
 * kafkaSource
 *
 *    從指定的offset出消费kafka
 */
public class StreamingKafkaSource {

    public static void main(String[] args) throws Exception {
        //获取Flink的运行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        //checkpoint配置
        env.enableCheckpointing(5000);
        env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        env.getCheckpointConfig().setCheckpointTimeout(60000);
        env.getCheckpointConfig().setMinPauseBetweenCheckpoints(500);
        env.getCheckpointConfig().setMaxConcurrentCheckpoints(1);
        env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        //设置statebackend

        //env.setStateBackend(new RocksDBStateBackend("hdfs://hadoop100:9000/flink/checkpoints",true));

        //设置kafka 的基本信息
        String topic = "test";
        Properties prop = new Properties();
        prop.setProperty("bootstrap.servers","192.168.200.10:9092");
        prop.setProperty("group.id","con1");

        //设置kafka 中主题对应分区的开始消费的offset
        HashMap kafkaTopicPartitionMap = new HashMap<>();
        kafkaTopicPartitionMap.put(new KafkaTopicPartition(topic,0),10L);
        kafkaTopicPartitionMap.put(new KafkaTopicPartition(topic,1),0L);
        kafkaTopicPartitionMap.put(new KafkaTopicPartition(topic,2),0L);

        FlinkKafkaConsumer011 myConsumer = new FlinkKafkaConsumer011<>(topic, new SimpleStringSchema(), prop);

        //指定消费的策略
        //myConsumer.setStartFromGroupOffsets();     //默认消费策略  消費kafka正在传输的信息
        myConsumer.setStartFromSpecificOffsets(kafkaTopicPartitionMap);     //从指定的offset开始消费
        DataStreamSource text = env.addSource(myConsumer);
       /* text.map(new MapFunction() {
            @Override
            public Object map(String value) throws Exception {
                System.out.println("原始接收到数据:" + value);
                System.out.println();
                Thread.sleep(10000000); //只是便于查看
                return value;
            }
        });*/
        text.print().setParallelism(1);

        env.execute("StreamingFromCollection");


    }
}

你可能感兴趣的:(flink,实时,java)