Flink Java编程:统计字数和读取本地文件

一、第一个程序wordcount(实时流处理)

1、pom.xml文件

<properties>

          <project.build.sourceEncoding>UTF-8project.build.sourceEncoding>

           <flink.version>1.5.1flink.version>

     properties>

     <dependencies>

           <dependency>

                <groupId>org.apache.flinkgroupId>

                <artifactId>flink-javaartifactId>

                <version>${flink.version}version>

           dependency>

           <dependency>

                <groupId>org.apache.flinkgroupId>

                <artifactId>flink-streaming-java_2.11artifactId>

                <version>${flink.version}version>

           dependency>

           <dependency>

                <groupId>org.apache.flinkgroupId>

                <artifactId>flink-clients_2.11artifactId>

                <version>${flink.version}version>

           dependency>

           

     dependencies>

     

     <build>

           

           

           <plugins>

                <plugin>

                     <artifactId>maven-compiler-pluginartifactId>

                     <version>3.1version>

                     <configuration>

                           <source>1.8source>

                           <target>1.8target>

                     configuration>

                plugin>

                <plugin>

                     <groupId>org.apache.maven.pluginsgroupId>

                     <artifactId>maven-jar-pluginartifactId>

                     <version>2.4version>

                     <configuration>

                           <archive>

                                <manifest>

                                     <addClasspath>trueaddClasspath>

                                     <classpathPrefix>lib/classpathPrefix>

                                     <mainClass>com.tydic.SocketWindowWordCountmainClass>

                                manifest>

                           archive>

                     configuration>

                plugin>

                

                <plugin>

                     <groupId>org.apache.maven.pluginsgroupId>

                     <artifactId>maven-dependency-pluginartifactId>

                     <executions>

                           <execution>

                                <id>copyid>

                                <phase>packagephase>

                                <goals>

                                     <goal>copy-dependenciesgoal>

                                goals>

                                <configuration>

                                     <outputDirectory> ${project.build.directory}/lib

                                     outputDirectory>

                                configuration>

                           execution>

                     executions>

                plugin>

           plugins>

     build>

2、代码

importorg.apache.flink.api.common.functions.FlatMapFunction;

importorg.apache.flink.api.common.functions.ReduceFunction;

import org.apache.flink.api.java.utils.ParameterTool;

import org.apache.flink.streaming.api.datastream.DataStream;

import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

import org.apache.flink.streaming.api.windowing.time.Time;

import org.apache.flink.util.Collector;

public class SocketWindowWordCount {

     public static void main(String[] args) throws Exception {

          final int port;

          try {

              final ParameterTool params = ParameterTool.fromArgs(args);

              port = params.getInt("port");

          } catch (Exception e) {

              System.err.println("No port specified.Pleas run 'SocketWindowCount --port '");

              return;

          }

          final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

          // local模式

//        final StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();

          DataStream text = env.socketTextStream("192.168.128.146", port, "\n");

          @SuppressWarnings("serial")

          DataStream windowCounts = text

                   .flatMap(new FlatMapFunction() {

                        public void flatMap(String value, Collector out) throws Exception {

                             for (String word : value.split("\\s")) {

                                  out.collect(new WordWithCount(word, 1L));

                             }

                        }

                   })

                   .keyBy("word").timeWindow(Time.seconds(5), Time.seconds(1))

                   .reduce(new ReduceFunction() {

                        public WordWithCount reduce(WordWithCount a, WordWithCount b) throws Exception {

                             return new WordWithCount(a.word, a.count + b.count);

                        }

                   });

          windowCounts.print().setParallelism(1);

          env.execute("Socket Window WordCount(zyl_test)");

     }

     public static class WordWithCount {

          public String word;

          public long count;

          public WordWithCount() {

          }

          public WordWithCount(String word, long count) {

              this.word = word;

              this.count = count;

          }

          @Override

          public String toString() {

              return word + " : " + count;

          }

     }

}

3、打jar包,提交jar到flink集群

/opt/flink-1.5.1/bin/flink run FlinkMaven-0.0.1-SNAPSHOT.jar --port 9000

4、测试

启动服务:nc -l 9000

5、日志查看

[root@rhel6-147 log]# tail -f flink-root-taskexecutor-0-rhel6-147.out

hello : 1

tert : 1

...

 


 

二、读取本地文件(类似批处理)

package com.tydic;

importorg.apache.flink.api.common.functions.FlatMapFunction;

import org.apache.flink.api.java.DataSet;

import org.apache.flink.api.java.ExecutionEnvironment;

import org.apache.flink.api.java.aggregation.Aggregations;

import org.apache.flink.api.java.tuple.Tuple2;

import org.apache.flink.api.java.utils.ParameterTool;

import org.apache.flink.util.Collector;

public class LocalFileWordCount {

     public static void main(String[] args) throws Exception {

          final ParameterTool params = ParameterTool.fromArgs(args);

          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

          env.getConfig().setGlobalJobParameters(params);

          // get input data

          DataSet text = env.readTextFile(params.get("input"));

          DataSet> counts = text.flatMap(new Splitter()) // split up the lines in pairs (2-tuples) containing: (word,1)

                   .groupBy(0).aggregate(Aggregations.SUM, 1);// group by the tuple field "0" and sum up tuple field "1"

          counts.writeAsText(params.get("output"));

          env.execute("WordCount Example");

     }

}

// The operations are defined by specialized classes, here the Splitter class.

@SuppressWarnings("serial")

class Splitter implements FlatMapFunction> {

     @Override

     public void flatMap(String value, Collector> out) {

          // normalize and split the line into words

          String[] tokens = value.split("\\W+");

          // emit the pairs

          for (String token : tokens) {

              if (token.length() > 0) {

                   out.collect(new Tuple2(token, 1));

              }

          }

     }

}

 

运行:

/opt/flink-1.5.1/bin/flink run --class com.tydic.LocalFileWordCount FlinkMaven-0.0.1-SNAPSHOT.jar  --input file:///tmp/zyl/input.txt --output file:///tmp/zyl/output.txt

 

问题:

org.apache.flink.client.program.ProgramInvocationException: java.io.IOException: Error opening the Input Split file:/tmp/zyl/input.txt [0,24]: /tmp/zyl/input.txt (No such file or directory)

        at org.apache.flink.client.program.rest.RestClusterClient.submitJob(RestClusterClient.java:264)

        at org.apache.flink.client.program.ClusterClient.run(ClusterClient.java:464)

        at org.apache.flink.client.program.ClusterClient.run(ClusterClient.java:452)

        at org.apache.flink.client.program.ContextEnvironment.execute(ContextEnvironment.java:62)

        at com.tydic.LocalFileWordCount.main(LocalFileWordCount.java:25)

        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)

        at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)

        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.jav

 

解决方法:本地文件要在taskmanager所对应的机器上!

你可能感兴趣的:(Flink)