flink 流处理学习笔记

1.java 编写流处理应用程序
打来socket输入数据
https://blog.csdn.net/qq_37585545/article/details/82250984
nc -lp 9999

代码:

public static void main(String[] args) throws Exception {


    //step1 :获取运行环境
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    //step2:读数据
    DataStreamSource test = env.socketTextStream("localhost", 9999);
    //step3:转换
    //1.读数据
    test.flatMap(new FlatMapFunction>() {
        @Override
        public void flatMap(String s, Collector> collector) throws Exception {
            //2.每一行的数据按照指定的分隔符拆分
            String[] splits = s.toLowerCase().split(",");
            for (String split:splits){
                if (split.length()>0){
                    //3.为每一个单词赋上次数为1 (hello,1)
                    collector.collect(new Tuple2(split,1));
                }
            }
        }
        //4.合并操作                                   
    }).keyBy(0).timeWindow(Time.seconds(5)).sum(1).print().setParallelism(1);//并行度
    //step4:执行
     env.execute("StreamingWCJavaApp");

}
  
    }

2.获取参数

    int port = 0;
    ParameterTool parameterTool = ParameterTool.fromArgs(args);
    port = Optional.of(parameterTool.getInt("port")).orElse(9999);

3.Scala 实现流处理demo

 def main(args: Array[String]): Unit = {

    //step1 :获取运行环境
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    //step2:读数据
    val test = env.socketTextStream("localhost", 9999)
    //step3:转换
    //1.读数据
    //2.每一行的数据按照指定的分隔符拆分
    //3.为每一个单词赋上次数为1 (hello,1)
    //4.合并操作
    import org.apache.flink.api.scala._
    test.flatMap(_.toLowerCase.split(","))
      .filter(_.nonEmpty)
      .map((_, 1))
      .keyBy(0)
      .timeWindow(Time.seconds(6))
      .sum(1)
      .print().setParallelism(1);

    env.execute("StreamWCScalaApp")
  }

你可能感兴趣的:(大数据,技术博客,flink,大数据,flink学习)