sparkStreaming获取kafka数据(java版本)

环境

spark-2.2.0

kafka_2.11-0.10.0.1

jdk1.8

配置好jdk,创建项目并将kafka和spark的jar包添加到项目中,除此之外还需要添加spark-streaming-kafka-*****.jar,笔者这里用的是spark-streaming-kafka-0-10_2.11-2.2.0.jar,可在spark官网上自行下载

sparkStreaming获取kafka数据(java版本)_第1张图片

 1 import java.util.Arrays;
 2 import java.util.Collection;
 3 import java.util.HashMap;
 4 import java.util.Map;
 5 
 6 import org.apache.kafka.clients.consumer.ConsumerRecord;
 7 import org.apache.kafka.common.serialization.StringDeserializer;
 8 import org.apache.spark.SparkConf;
 9 import org.apache.spark.api.java.JavaPairRDD;
10 import org.apache.spark.api.java.JavaRDD;
11 import org.apache.spark.api.java.function.Function;
12 import org.apache.spark.api.java.function.PairFunction;
13 import org.apache.spark.api.java.function.VoidFunction;
14 import org.apache.spark.rdd.RDD;
15 import org.apache.spark.streaming.Durations;
16 import org.apache.spark.streaming.api.java.JavaDStream;
17 import org.apache.spark.streaming.api.java.JavaInputDStream;
18 import org.apache.spark.streaming.api.java.JavaPairDStream;
19 import org.apache.spark.streaming.api.java.JavaStreamingContext;
20 import org.apache.spark.streaming.kafka010.ConsumerStrategies;
21 import org.apache.spark.streaming.kafka010.KafkaUtils;
22 import org.apache.spark.streaming.kafka010.LocationStrategies;
23 
24 import scala.Tuple2;
25 
26 public class SparkStreamingFromkafka {
27 
28     public static void main(String[] args) throws Exception {
29         // TODO Auto-generated method stub
30         SparkConf sparkConf  = new SparkConf().setMaster("local[*]").setAppName("SparkStreamingFromkafka");
31         JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf , Durations.seconds(1));
32         
33         Map kafkaParams = new HashMap<>();
34         kafkaParams.put("bootstrap.servers", "192.168.246.134:9092");//多个可用ip可用","隔开
35         kafkaParams.put("key.deserializer", StringDeserializer.class);
36         kafkaParams.put("value.deserializer", StringDeserializer.class);
37         kafkaParams.put("group.id", "sparkStreaming");
38         Collection topics = Arrays.asList("video");//配置topic,可以是数组
39 
40         JavaInputDStream> javaInputDStream =KafkaUtils.createDirectStream(
41                 streamingContext, 
42                 LocationStrategies.PreferConsistent(), 
43                 ConsumerStrategies.Subscribe(topics, kafkaParams));
44     
45         JavaPairDStream javaPairDStream = javaInputDStream.mapToPair(new PairFunction, String, String>(){
46             private static final long serialVersionUID = 1L;
47             @Override
48             public Tuple2 call(ConsumerRecord consumerRecord) throws Exception {
49                 return new Tuple2<>(consumerRecord.key(), consumerRecord.value());
50             }
51         });
52         javaPairDStream.foreachRDD(new VoidFunction>() {
53             @Override
54             public void call(JavaPairRDD javaPairRDD) throws Exception {
55                 // TODO Auto-generated method stub
56                 javaPairRDD.foreach(new VoidFunction>() {
57                     @Override
58                     public void call(Tuple2 tuple2)
59                             throws Exception {
60                         // TODO Auto-generated method stub
61                         System.out.println(tuple2._2);
62                     }
63                 });
64             }
65         });
66         streamingContext.start();
67         streamingContext.awaitTermination();
68     }
69 
70 }

 

转载于:https://www.cnblogs.com/timeTraveler/p/9869694.html

你可能感兴趣的:(java,大数据,scala)