sparkstreaming API 操作实例 java
public static void main(String[] args) throws InterruptedException {
SparkSession spark = SparkSession.builder().appName("test streaming").master("local[2]").getOrCreate();
JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
JavaStreamingContext jssc = new JavaStreamingContext(sc, Durations.seconds(5));
jssc.sparkContext().setLogLevel("WARN");
jssc.checkpoint("hdfs://hdp01:8020/user/zhangruichao/streaming/checkpoint");
JavaReceiverInputDStream<String> stream = jssc.socketTextStream("localhost", 9999);
reduceByKeyAndWindow(stream);
jssc.start();
jssc.awaitTermination();
jssc.close();
}
private static void reduceByKeyAndWindow(JavaReceiverInputDStream<String> stream) {
JavaPairDStream<String, Integer> pairDStream = streamToPair(stream);
JavaPairDStream<String, Integer> javaPairDStream = pairDStream.reduceByKeyAndWindow(new Function2<Integer, Integer, Integer>() {
@Override
public Integer call(Integer integer, Integer integer2) throws Exception {
return integer + integer2;
}
}, Durations.seconds(10), Durations.seconds(10));
javaPairDStream.print();
}
private static void transform(JavaReceiverInputDStream<String> stream, JavaStreamingContext jssc) {
JavaPairDStream<String, Integer> pairDStream = streamToPair(stream);
List<Tuple2<String, Boolean>> writeList
= Arrays.asList(new Tuple2<>("java", true), new Tuple2<>("python", true), new Tuple2<>("spark", false));
JavaPairRDD<String, Boolean> parallelizePairs = jssc.sparkContext().parallelizePairs(writeList);
JavaDStream<String> transformRes = pairDStream.transform(javaPairRDD -> {
JavaPairRDD<String, Tuple2<Integer, Optional<Boolean>>> pairRDD = javaPairRDD.leftOuterJoin(parallelizePairs);
JavaPairRDD<String, Tuple2<Integer, Optional<Boolean>>> filter = pairRDD.filter(tuple2 -> {
if (tuple2._2._2.isPresent() && tuple2._2._2.get())
return false;
return true;
});
return filter.keys();
});
transformRes.print();
}
private static JavaPairDStream<String, Integer> streamToPair(JavaReceiverInputDStream<String> stream) {
return stream.flatMap(s -> {
String[] s1 = s.split(" ");
List<Tuple2<String, Integer>> list = new ArrayList<>();
for (String s2 : s1) {
list.add(new Tuple2<>(s2, 1));
}
return list.iterator();
}).mapToPair(tuple2 -> new Tuple2<>(tuple2._1(), tuple2._2()));
}
private static void reduceByKey(JavaReceiverInputDStream<String> stream) {
JavaDStream<String> reduce = stream.reduce(new Function2<String, String, String>() {
@Override
public String call(String s, String s2) throws Exception {
return s.concat(",").concat(s2);
}
});
reduce.print();
}
private static void updateStateByKey(JavaReceiverInputDStream<String> stream) {
JavaPairDStream<String, Integer> javaPairDStream = streamToPair(stream);
JavaPairDStream<String, Integer> wordCount = javaPairDStream.updateStateByKey(new Function2<List<Integer>, Optional<Integer>, Optional<Integer>>() {
@Override
public Optional<Integer> call(List<Integer> values, Optional<Integer> state) throws Exception {
int newValue = 0;
if (state.isPresent()) {
newValue = state.get();
}
newValue += values.stream().mapToInt(value -> value).sum();
return Optional.of(newValue);
}
});
wordCount.print();
}
private static void countByValue(JavaReceiverInputDStream<String> stream) {
JavaPairDStream<String, Long> pairDStream = stream.countByValue();
pairDStream.print();
}
private static void wordCountByFlatMap(JavaReceiverInputDStream<String> stream) {
JavaPairDStream<String, Integer> pairDStream = streamToPair(stream).reduceByKey(Integer::sum);
pairDStream.print();
}
```