Flink_WordCount_代码实例

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.Collector;


public class WindowWordCount {

    public static void main(String [] args) throws Exception{

        // get env
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        DataStream> dataStream = env
                // source
                .socketTextStream("localhost", 9999)
                // flatmap DataStream->DataStream 将读入的一份数据,转换成0到n个。这里就是拆分。
                .flatMap(new FlatMapFunction() {
                    @Override
                    public void flatMap(String s, Collector collector) throws Exception {
                        String [] words = s.split(" ");
                        for(String word : words){
                            collector.collect(word);
                        }
                    }
                })
                // map  DataStream->DataStream 读入一份,转换成一份,这里是组装成tuple对
                .map(new MapFunction>() {
                    @Override
                    public Tuple2 map(String s) throws Exception {
                        return new Tuple2(s, 1l);
                    }
                })
                // keyBy DataStream->KeyedStream 逻辑上将数据根据key进行分区,保证相同的key分到一起。默认是hash分区
                .keyBy(0)
                // window
                .timeWindow(Time.seconds(2))
                // sum WindowedStream->DataStream 聚合窗口内容。另外还有min,max等
                .sum(1);
        //sink
        dataStream.print();
        env.execute("Word Count!");
    }
}

你可能感兴趣的:(Flink)