JAVA开发Spark-读取kafka数据

1-依赖

2-源代码 

pom中的依赖



    org.apache.spark
    spark-core_2.12
    3.0.2




    org.apache.spark
    spark-core_2.12
    3.0.2


    org.apache.spark
    spark-streaming_2.12
    3.0.2




    org.apache.spark
    spark-streaming-kafka-0-10_2.12
    3.0.2



    org.apache.kafka
    kafka-clients
    0.10.0.0
  

如果出现版本问题 可以把最后一个kafka-clients的依赖删除 因为spark-streaming-kafka-0-10_2.12中有kafka-client
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.common.serialization.StringDeserializer;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.api.java.function.VoidFunction;
import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.api.java.JavaInputDStream;
import org.apache.spark.streaming.api.java.JavaPairDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.spark.streaming.kafka010.ConsumerStrategies;
import org.apache.spark.streaming.kafka010.KafkaUtils;
import org.apache.spark.streaming.kafka010.LocationStrategies;
import scala.Tuple2;

import java.util.*;
//从kafka读取数据

public class KafakSpark {

    public static void SparkReadProduct() throws InterruptedException {
        SparkConf conf = new SparkConf();//配置环境
        JavaSparkContext sc = new JavaSparkContext(conf);
        JavaStreamingContext ssc = new JavaStreamingContext(sc, Durations.seconds(5));
        ssc.checkpoint("路径");

        //kafka 的参数
        Map kafkaParams = new HashMap<>();
        kafkaParams.put("bootstrap.servers", "kafka的地址");//kafak的地址
        kafkaParams.put("key.deserializer", StringDeserializer.class);
        kafkaParams.put("value.deserializer", StringDeserializer.class);
        kafkaParams.put("group.id", "组的名字");//配置group
        Collection topics = Arrays.asList("分区名字");//配置topic
        kafkaParams.put("auto.offset.reset", "latest");
        kafkaParams.put("enable.auto.commit", false);

        //读取kafka数据 创建Dstream;
        JavaInputDStream> javaInputDStream = KafkaUtils.createDirectStream(
                ssc,
                LocationStrategies.PreferConsistent(),
                ConsumerStrategies.Subscribe(topics, kafkaParams));


        JavaPairDStream DStream = javaInputDStream.mapToPair(new PairFunction, String, String>(){
            private static final long serialVersionUID = 1L;
            @Override
            public Tuple2 call(ConsumerRecord consumerRecord) throws Exception {
                return new Tuple2<>(consumerRecord.key(), consumerRecord.value());
            }
        });
       DStream.foreachRDD(new VoidFunction>() {
            @Override
            public void call(JavaPairRDD javaPairRDD) throws Exception {
                // TODO Auto-generated method stub
                javaPairRDD.foreach(new VoidFunction>() {
                    @Override
                    public void call(Tuple2 tuple2)
                            throws Exception {
                        // TODO Auto-generated method stub
                        System.out.println(tuple2._2);
                    }
                });
            }
        });
        ssc.start();
        ssc.awaitTermination();
    }

    public static void main(String[] args) throws InterruptedException {
        SparkReadProduct();

    }

}

你可能感兴趣的:(spark,kafka)