一、第一个程序wordcount(实时流处理)
1、pom.xml文件
<properties>
<project.build.sourceEncoding>UTF-8project.build.sourceEncoding>
<flink.version>1.5.1flink.version>
properties>
<dependencies>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-javaartifactId>
<version>${flink.version}version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-streaming-java_2.11artifactId>
<version>${flink.version}version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-clients_2.11artifactId>
<version>${flink.version}version>
dependency>
dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-compiler-pluginartifactId>
<version>3.1version>
<configuration>
<source>1.8source>
<target>1.8target>
configuration>
plugin>
<plugin>
<groupId>org.apache.maven.pluginsgroupId>
<artifactId>maven-jar-pluginartifactId>
<version>2.4version>
<configuration>
<archive>
<manifest>
<addClasspath>trueaddClasspath>
<classpathPrefix>lib/classpathPrefix>
<mainClass>com.tydic.SocketWindowWordCountmainClass>
manifest>
archive>
configuration>
plugin>
<plugin>
<groupId>org.apache.maven.pluginsgroupId>
<artifactId>maven-dependency-pluginartifactId>
<executions>
<execution>
<id>copyid>
<phase>packagephase>
<goals>
<goal>copy-dependenciesgoal>
goals>
<configuration>
<outputDirectory> ${project.build.directory}/lib
outputDirectory>
configuration>
execution>
executions>
plugin>
plugins>
build>
2、代码
importorg.apache.flink.api.common.functions.FlatMapFunction;
importorg.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.Collector;
public class SocketWindowWordCount {
public static void main(String[] args) throws Exception {
final int port;
try {
final ParameterTool params = ParameterTool.fromArgs(args);
port = params.getInt("port");
} catch (Exception e) {
System.err.println("No port specified.Pleas run 'SocketWindowCount --port
return;
}
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// local模式
// final StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
DataStream
@SuppressWarnings("serial")
DataStream
.flatMap(new FlatMapFunction
public void flatMap(String value, Collector
for (String word : value.split("\\s")) {
out.collect(new WordWithCount(word, 1L));
}
}
})
.keyBy("word").timeWindow(Time.seconds(5), Time.seconds(1))
.reduce(new ReduceFunction
public WordWithCount reduce(WordWithCount a, WordWithCount b) throws Exception {
return new WordWithCount(a.word, a.count + b.count);
}
});
windowCounts.print().setParallelism(1);
env.execute("Socket Window WordCount(zyl_test)");
}
public static class WordWithCount {
public String word;
public long count;
public WordWithCount() {
}
public WordWithCount(String word, long count) {
this.word = word;
this.count = count;
}
@Override
public String toString() {
return word + " : " + count;
}
}
}
3、打jar包,提交jar到flink集群
/opt/flink-1.5.1/bin/flink run FlinkMaven-0.0.1-SNAPSHOT.jar --port 9000
4、测试
启动服务:nc -l 9000
5、日志查看
[root@rhel6-147 log]# tail -f flink-root-taskexecutor-0-rhel6-147.out
hello : 1
tert : 1
...
二、读取本地文件(类似批处理)
package com.tydic;
importorg.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.aggregation.Aggregations;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.util.Collector;
public class LocalFileWordCount {
public static void main(String[] args) throws Exception {
final ParameterTool params = ParameterTool.fromArgs(args);
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().setGlobalJobParameters(params);
// get input data
DataSet
DataSet
.groupBy(0).aggregate(Aggregations.SUM, 1);// group by the tuple field "0" and sum up tuple field "1"
counts.writeAsText(params.get("output"));
env.execute("WordCount Example");
}
}
// The operations are defined by specialized classes, here the Splitter class.
@SuppressWarnings("serial")
class Splitter implements FlatMapFunction
@Override
public void flatMap(String value, Collector
// normalize and split the line into words
String[] tokens = value.split("\\W+");
// emit the pairs
for (String token : tokens) {
if (token.length() > 0) {
out.collect(new Tuple2
}
}
}
}
运行:
/opt/flink-1.5.1/bin/flink run --class com.tydic.LocalFileWordCount FlinkMaven-0.0.1-SNAPSHOT.jar --input file:///tmp/zyl/input.txt --output file:///tmp/zyl/output.txt
问题:
org.apache.flink.client.program.ProgramInvocationException: java.io.IOException: Error opening the Input Split file:/tmp/zyl/input.txt [0,24]: /tmp/zyl/input.txt (No such file or directory)
at org.apache.flink.client.program.rest.RestClusterClient.submitJob(RestClusterClient.java:264)
at org.apache.flink.client.program.ClusterClient.run(ClusterClient.java:464)
at org.apache.flink.client.program.ClusterClient.run(ClusterClient.java:452)
at org.apache.flink.client.program.ContextEnvironment.execute(ContextEnvironment.java:62)
at com.tydic.LocalFileWordCount.main(LocalFileWordCount.java:25)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.jav
解决方法:本地文件要在taskmanager所对应的机器上!