package com.spark.test;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Arrays;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import scala.Tuple2;
import kafka.serializer.StringDecoder;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.*;
import org.apache.spark.streaming.api.java.*;
import org.apache.spark.streaming.kafka.KafkaUtils;
import org.apache.spark.streaming.Durations;
public class JavaDirectKafkaWordCount {
public static void main(String[] args) throws Exception {
//String brokers = args[0];
// String topics = args[1];
// Create context with a 2 seconds batch interval
/**
* setMaster("local[2]"),至少要指定两个线程,一条用于用于接收消息,一条线程用于处理消息
*/
SparkConf sparkConf = new SparkConf().setAppName("JavaDirectKafkaWordCount").setMaster("local[2]");
JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(2));
Set topicsSet = new HashSet<>(Arrays.asList("test"));
Map kafkaParams = new HashMap<>();
kafkaParams.put("metadata.broker.list", "192.168.168.200:9092");
// Create direct kafka stream with brokers and topics
JavaPairInputDStream messages = KafkaUtils.createDirectStream(
jssc,
String.class,
String.class,
StringDecoder.class,
StringDecoder.class,
kafkaParams,
topicsSet
);
// Get the lines, split them into words, count the words and print
JavaDStream lines = messages.map(new Function, String>() {
public String call(Tuple2 tuple2) {
return tuple2._2();
}
});
JavaDStream words = lines.flatMap(new FlatMapFunction() {
public Iterator call(String line) {
return Arrays.asList(line.split(" ")).iterator();
}
});
JavaPairDStream wordCounts = words.mapToPair(
new PairFunction() {
public Tuple2 call(String s) {
return new Tuple2<>(s, 1);
}
}).reduceByKey(
new Function2() {
public Integer call(Integer i1, Integer i2) {
return i1 + i2;
}
});
wordCounts.print();
// Start the computation
jssc.start();
jssc.awaitTermination();
}
}