Apache Flink是一个框架和分布式处理引擎,用于在无界和有界数据流上进行有状态计算。Flink设计用于在所有常见的集群环境中运行,以内存速度和任何规模执行计算。
有界:指数据流有开始,和结束
无界:指又开始,没结束
和spark的区别:spark
org.apache.flink
flink-scala_2.11
1.6.2
org.apache.flink
flink-streaming-scala_2.11
1.6.2
org.apache.flink
flink-clients_2.11
1.6.2
package com.imooc.spark.flink
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.scala._
object SocketWindowWordCount {
def main(args: Array[String]) : Unit = {
// the port to connect to
/* val port: Int = try {
ParameterTool.fromArgs(args).getInt("port")
} catch {
case e: Exception => {
System.err.println("No port specified. Please run 'SocketWindowWordCount --port '")
return
}
}*/
// get the execution environment
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
// get input data by connecting to the socket
val text = env.socketTextStream("192.168.52.130", 9999, '\n')
// parse the data, group it, window it, and aggregate the counts
val windowCounts = text
.flatMap { w => w.split("\\s") }
.map { w => WordWithCount(w, 1) }
.keyBy("word")
.timeWindow(Time.seconds(5), Time.seconds(1))
.sum("count")
// print the results with a single thread, rather than in parallel
windowCounts.print().setParallelism(1)
println(windowCounts)
env.execute("Socket Window WordCount")
}
// Data type for words with count
case class WordWithCount(word: String, count: Long)
}
[hadoop@hadoop001 conf]$ nc -lk 9999
hello flink
hello world
hello spark
hello spark
hellp java
hello hive
hello hadoop
WordWithCount(hello,1)
WordWithCount(flink,1)
WordWithCount(flink,1)
WordWithCount(hello,1)
WordWithCount(hello,1)
WordWithCount(flink,1)
WordWithCount(flink,1)
WordWithCount(hello,1)
1.代码
package com.imooc.spark.flink;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.Collector;
/**
* @ClassName SocketWindowWordCount
* @Description TODO
* @Author lxp
* @Date 2018/11/16 0016
**/
public class SocketWindowWordCount1 {
public static void main(String[] args) throws Exception {
// the port to connect to
/* final int port;
try {
final ParameterTool params = ParameterTool.fromArgs(args);
port = params.getInt("port");
} catch (Exception e) {
System.err.println("No port specified. Please run 'SocketWindowWordCount --port '");
return;
}*/
// get the execution environment
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// get input data by connecting to the socket
DataStream text = env.socketTextStream("192.168.52.130", 9999, "\n");
// parse the data, group it, window it, and aggregate the counts
DataStream windowCounts = text
.flatMap(new FlatMapFunction() {
@Override
public void flatMap(String value, Collector out) {
for (String word : value.split(",")) {
out.collect(new WordWithCount(word, 1L));
}
}
})
.keyBy("word")
.timeWindow(Time.seconds(5), Time.seconds(1))
.reduce(new ReduceFunction() {
@Override
public WordWithCount reduce(WordWithCount a, WordWithCount b) {
return new WordWithCount(a.word, a.count + b.count);
}
});
// print the results with a single thread, rather than in parallel
windowCounts.print().setParallelism(1);
env.execute("Socket Window WordCount");
}
// Data type for words with count
public static class WordWithCount {
public String word;
public long count;
public WordWithCount() {}
public WordWithCount(String word, long count) {
this.word = word;
this.count = count;
}
@Override
public String toString() {
return word + " : " + count;
}
}
}
val env = ExecutionEnvironment.getExecutionEnvironment
val text = env.readTextFile("file:///E:/testData/flink/")
text.print().setParallelism(1)
env.execute("ReadDataSource")
//设置级联读取子文件夹
val parameters = new Configuration()
parameters.setBoolean("recursive.file.enumeration",true)
val text = env.readTextFile("file:///E:/testData/flink/","GBK").withParameters(parameters)
package com.imooc.spark.flink
import org.apache.flink.api.scala.ExecutionEnvironment
import org.apache.flink.streaming.api.scala._
/**
* 读取外部数据源
**/
object ReadDataSourceOfCsv {
def main(args: Array[String]): Unit = {
//创建env
//val env = StreamExecutionEnvironment.getExecutionEnvironment
val env = ExecutionEnvironment.getExecutionEnvironment
//获取数据源
//val text = env.readTextFile("file:///E:/testData/SparkStreaming/9.txt")
//val text = env.readTextFile("file:///E:/testData/flink/")
//读取csv
val text = env.readCsvFile[(String,Int,String)]("file:///E:/testData/flintTest.csv"
,ignoreFirstLine=true)
//env.readCsvFile()
//输出
//text.print().setParallelism(1)
text.print()
//开启
env.execute("ReadDataSourceOfCsv")
}
case class User(name:String,age:Int,addr:String)
}
在这里插入代码片