【Flink流式计算框架】flink入门程序实现(单词计数)_实时统计_离线统计

003Flink

Flink程序
步骤一:获取执行环境,获取的是程序入口
步骤二:获取数据源
步骤三:数据处理
Out.collect(new Tuple2<>(word,1))
Out,collect(Tuple2.of(word,1))这俩结果一样
步骤四:数据的输出
步骤五:启动应用程序
工作中算子参数,用面向对象的
抽离复杂的算子

实时需求分析
       实时统计每隔1秒统计最近2秒单词出现的次数
开发环境部署

  1. 官网建议使用IDEA,IDEA默认集成了scala和maven,使用起来方便
  2. pom依赖如下:
<properties>
   <flink.version>1.9.0</flink.version>
    <scala.version>2.11.8</scala.version>
</properties>

<dependencies>
    <dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-streaming-java_2.11</artifactId>
    <version>${flink.version}</version>
    </dependency>
</dependencies>

实时代码开发(java)

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.Collector;

public class WindowWordCountJava {
    public static void main(String[] args) throws Exception {
        //flink提供的工具类,获取传递的参数
        ParameterTool parameterTool = ParameterTool.fromArgs(args);
        String hostname = parameterTool.get("hostname");
        int port = parameterTool.getInt("port");
        //步骤一:获取执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        //步骤二:获取数据源
        DataStream<String> dataStream = env.socketTextStream(hostname, port);
        //步骤三:执行逻辑操作
        DataStream<WordCount> wordAndOneStream = dataStream.flatMap(new FlatMapFunction<String, WordCount>() {
            public void flatMap(String line, Collector<WordCount> out) {
                String[] fields = line.split(",");
                for (String word : fields) {
                    out.collect(new WordCount(word, 1L));
                }
            }
        });

        DataStream<WordCount> resultStream = wordAndOneStream.keyBy("word")
                .timeWindow(Time.seconds(2), Time.seconds(1))//每隔1秒计算最近2秒
                .sum("count");
        //步骤四:结果打印
        resultStream.print();
        //步骤五:任务启动
        env.execute("WindowWordCountJava");
    }

    public static class WordCount{
        public String word;
        public long count;
        //记得要有这个空构建
        public WordCount(){

        }
        public WordCount(String word,long count){
            this.word = word;
            this.count = count;
        }

        @Override
        public String toString() {
            return "WordCount{" +
                    "word='" + word + '\'' +
                    ", count=" + count +
                    '}';
        }
    }
}

实时代码开发(scala)
添加依赖

<dependency>
         <groupId>org.apache.flink</groupId>
          <artifactId>flink-streaming-scala_2.11</artifactId>
          <version>${flink.version}</version>
</dependency>

scala开发依赖和编译插件

<properties>
        <flink.version>1.9.0</flink.version>
        <scala.version>2.11.8</scala.version>
    </properties>

    <dependencies>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java_2.11</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-scala_2.11</artifactId>
            <version>${flink.version}</version>
        </dependency>
    </dependencies>

    <build>
        <pluginManagement>
            <plugins>
                <!-- scala插件 -->
                <plugin>
                    <groupId>net.alchim31.maven</groupId>
                    <artifactId>scala-maven-plugin</artifactId>
                    <version>3.2.2</version>
                </plugin>
                <!-- maven 插件 -->
                <plugin>
                    <groupId>org.apache.maven.plugins</groupId>
                    <artifactId>maven-compiler-plugin</artifactId>
                    <version>3.5.1</version>
                    <configuration>
                        <source>1.8</source>
                        <target>1.8</target>
                    </configuration>
                </plugin>
            </plugins>
        </pluginManagement>
        <plugins>
            <plugin>
                <groupId>net.alchim31.maven</groupId>
                <artifactId>scala-maven-plugin</artifactId>
                <executions>
                    <execution>
                        <id>scala-compile-first</id>
                        <phase>process-resources</phase>
                        <goals>
                            <goal>add-source</goal>
                            <goal>compile</goal>
                        </goals>
                    </execution>
                    <execution>
                        <id>scala-test-compile</id>
                        <phase>process-test-resources</phase>
                        <goals>
                            <goal>testCompile</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>

            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <executions>
                    <execution>
                        <phase>compile</phase>
                        <goals>
                            <goal>compile</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>

            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-shade-plugin</artifactId>
                <version>2.4.3</version>
                <executions>
                    <execution>
                        <phase>package</phase>
                        <goals>
                            <goal>shade</goal>
                        </goals>
                        <configuration>
                            <filters>
                                <filter>
                                    <artifact>*:*</artifact>
                                    <excludes>
                                        <exclude>META-INF/*.SF
                                        META-INF/*.DSA
                                        META-INF/*.RSA
                                    
                                
                            
                        
                    
                
            
        
    

代码实现

import org.apache.flink.api.java.utils.ParameterTool
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.api.windowing.time.Time

/**
  * 滑动窗口
  * 每隔1秒钟统计最近2秒内的数据,打印到控制台。
  */
object WindowWordCountScala {
  def main(args: Array[String]): Unit = {
    //获取参数
    val hostname = ParameterTool.fromArgs(args).get("hostname")
    val port = ParameterTool.fromArgs(args).getInt("port")
    //TODO 导入隐式转换
    import org.apache.flink.api.scala._
    //步骤一:获取执行环境
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    //步骤二:获取数据源
    val textStream = env.socketTextStream(hostname,port)
    //步骤三:数据处理
    val wordCountStream = textStream.flatMap(line => line.split(","))
      .map((_, 1))
      .keyBy(0)
      .timeWindow(Time.seconds(2), Time.seconds(1))
      .sum(1)
    //步骤四:数据结果处理
    wordCountStream.print()
    //步骤六:启动程序
    env.execute("WindowWordCountScala")
  }
}

离线代码开发(java)

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.operators.AggregateOperator;
import org.apache.flink.api.java.operators.DataSource;
import org.apache.flink.api.java.operators.FlatMapOperator;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.util.Collector;

public class WordCount {
    public static void main(String[] args)throws  Exception {
        //步骤一:获取离线的程序入口
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        String inputPath="D:\\input\\hello.txt";
        //步骤二:获取数据源
        DataSource<String> dataSet = env.readTextFile(inputPath);
        //步骤三:数据处理
        FlatMapOperator<String, Tuple2<String, Integer>> wordAndOneDataSet = dataSet.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
            @Override
            public void flatMap(String line, Collector<Tuple2<String, Integer>> collector) throws Exception {
                String[] fileds = line.split(",");
                for (String word : fileds) {
                    collector.collect(new Tuple2<String, Integer>(word, 1));
                }
            }
        });

        AggregateOperator<Tuple2<String, Integer>> result = wordAndOneDataSet.groupBy(0)
                .sum(1);
        //步骤四:数据结果处理
        result.writeAsText("D:\\kkb\\flinklesson\\src\\output\\result").setParallelism(1);
        //步骤五:启动程序
        env.execute("word count");
    }
}

换一种写法,把flink的算子抽离出来,代码看起来会更清晰。

public class WordCount {
    public static void main(String[] args)throws  Exception {
        //步骤一:获取离线的程序入口
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        String inputPath="D:\\input\\hello.txt";
        //步骤二:获取数据源
        DataSource<String> dataSet = env.readTextFile(inputPath);
        //步骤三:数据处理
        FlatMapOperator<String, Tuple2<String, Integer>> wordAndOneDataSet = dataSet.flatMap(new MySplitWordsTask());

        AggregateOperator<Tuple2<String, Integer>> result = wordAndOneDataSet.groupBy(0)
                .sum(1);
        //步骤四:数据结果处理
        result.writeAsText("D:\\kkb\\flinklesson\\src\\output\\result1").setParallelism(1);
        //步骤五:启动程序
        env.execute("word count");
    }


    public static class MySplitWordsTask implements   FlatMapFunction<String,Tuple2<String,Integer>>{
        @Override
        public void flatMap(String line, Collector<Tuple2<String, Integer>> collector) throws Exception {
            String[] fileds = line.split(",");
            for (String word : fileds) {
                collector.collect(new Tuple2<String, Integer>(word, 1));
            }
        }
    }
}

你可能感兴趣的:(flink)