Flink 本地环境 使用scala/Java 分别 编写Demo

Flink Demo

前言

Apache Flink是一个框架和分布式处理引擎,用于在无界和有界数据流上进行有状态计算。Flink设计用于在所有常见的集群环境中运行,以内存速度和任何规模执行计算。
有界:指数据流有开始,和结束
无界:指又开始,没结束
和spark的区别:spark
Flink 本地环境 使用scala/Java 分别 编写Demo_第1张图片

一. 准备

  • jar依赖
  
            org.apache.flink
            flink-scala_2.11
            1.6.2
        
      
        
            org.apache.flink
            flink-streaming-scala_2.11
            1.6.2
        
        
            org.apache.flink
            flink-clients_2.11
            1.6.2
        

二. socket 读取WC

scala实现socket WC
  • 代码
package com.imooc.spark.flink

import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.scala._
object SocketWindowWordCount {

  def main(args: Array[String]) : Unit = {

    // the port to connect to
   /* val port: Int = try {
      ParameterTool.fromArgs(args).getInt("port")
    } catch {
      case e: Exception => {
        System.err.println("No port specified. Please run 'SocketWindowWordCount --port '")
        return
      }
    }*/

    // get the execution environment
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment

    // get input data by connecting to the socket
    val text = env.socketTextStream("192.168.52.130", 9999, '\n')

    // parse the data, group it, window it, and aggregate the counts
    val windowCounts = text
      .flatMap { w => w.split("\\s") }
      .map { w => WordWithCount(w, 1) }
      .keyBy("word")
      .timeWindow(Time.seconds(5), Time.seconds(1))
      .sum("count")

    // print the results with a single thread, rather than in parallel
    windowCounts.print().setParallelism(1)
    println(windowCounts)
    env.execute("Socket Window WordCount")
  }

  // Data type for words with count
  case class WordWithCount(word: String, count: Long)

}

  • 结果
[hadoop@hadoop001 conf]$ nc -lk 9999
hello flink
hello world
hello spark
hello spark
hellp java
hello hive
hello hadoop


WordWithCount(hello,1)
WordWithCount(flink,1)
WordWithCount(flink,1)
WordWithCount(hello,1)
WordWithCount(hello,1)
WordWithCount(flink,1)
WordWithCount(flink,1)
WordWithCount(hello,1)

Java实现socket WC

1.代码

package com.imooc.spark.flink;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.Collector;

/**
 * @ClassName SocketWindowWordCount
 * @Description TODO
 * @Author lxp
 * @Date 2018/11/16 0016
 **/
public class SocketWindowWordCount1 {
    public static void main(String[] args) throws Exception {

        // the port to connect to
       /* final int port;
        try {
            final ParameterTool params = ParameterTool.fromArgs(args);
            port = params.getInt("port");
        } catch (Exception e) {
            System.err.println("No port specified. Please run 'SocketWindowWordCount --port '");
            return;
        }*/

        // get the execution environment
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        // get input data by connecting to the socket
        DataStream text = env.socketTextStream("192.168.52.130", 9999, "\n");

        // parse the data, group it, window it, and aggregate the counts
        DataStream windowCounts = text
                .flatMap(new FlatMapFunction() {
                    @Override
                    public void flatMap(String value, Collector out) {
                        for (String word : value.split(",")) {
                            out.collect(new WordWithCount(word, 1L));
                        }
                    }
                })
                .keyBy("word")
                .timeWindow(Time.seconds(5), Time.seconds(1))
                .reduce(new ReduceFunction() {
                    @Override
                    public WordWithCount reduce(WordWithCount a, WordWithCount b) {
                        return new WordWithCount(a.word, a.count + b.count);
                    }
                });

        // print the results with a single thread, rather than in parallel
        windowCounts.print().setParallelism(1);

        env.execute("Socket Window WordCount");
    }

    // Data type for words with count
    public static class WordWithCount {

        public String word;
        public long count;

        public WordWithCount() {}

        public WordWithCount(String word, long count) {
            this.word = word;
            this.count = count;
        }

        @Override
        public String toString() {
            return word + " : " + count;
        }
    }
}

三. 读取外部数据

1. 读取Text文档
  • 代码
val env = ExecutionEnvironment.getExecutionEnvironment
val text = env.readTextFile("file:///E:/testData/flink/")
text.print().setParallelism(1)
env.execute("ReadDataSource")
  • 设置级联递归读取文件
//设置级联读取子文件夹
    val parameters  = new Configuration()
    parameters.setBoolean("recursive.file.enumeration",true)
    val text = env.readTextFile("file:///E:/testData/flink/","GBK").withParameters(parameters)
  • 结果

2.读取csv文档

  • 代码
package com.imooc.spark.flink

import org.apache.flink.api.scala.ExecutionEnvironment
import org.apache.flink.streaming.api.scala._

/**
  * 读取外部数据源
  **/
object ReadDataSourceOfCsv {

  def main(args: Array[String]): Unit = {

    //创建env
    //val env = StreamExecutionEnvironment.getExecutionEnvironment
    val env = ExecutionEnvironment.getExecutionEnvironment

    //获取数据源
    //val text = env.readTextFile("file:///E:/testData/SparkStreaming/9.txt")
    //val text = env.readTextFile("file:///E:/testData/flink/")
    //读取csv
    val text = env.readCsvFile[(String,Int,String)]("file:///E:/testData/flintTest.csv"
      ,ignoreFirstLine=true)

    //env.readCsvFile()

    //输出
    //text.print().setParallelism(1)
    text.print()
    //开启
    env.execute("ReadDataSourceOfCsv")

  }
  case class  User(name:String,age:Int,addr:String)
}

  • 结果
    在这里插入图片描述

3.Table 读取

  • 代码
在这里插入代码片
  • 结果

你可能感兴趣的:(Flink)