一、Transformation操作
1,map、flapmap、filter
public class transTest1_Base {
public static void main(String[] args) throws Exception{
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
DataStream<String> dataStream = env.readTextFile("C:\\Users\\Administrator\\IdeaProjects\\FlinkTutorial\\src\\main\\resources\\hello.txt");
//map操作,实现MapFunction,重写map方法
DataStream<Integer> out = dataStream.map(new MapFunction<String, Integer>() {
@Override
public Integer map(String value) throws Exception {
return value.length();
}
});
//flatmap操作
DataStream<String> outFlat = dataStream.flatMap(new FlatMapFunction<String, String>() {
@Override
public void flatMap(String value, Collector<String> out) throws Exception {
String[] strs = value.split(" ");
for (String str: strs){
out.collect(str);
}
}
});
//filter
DataStream<String> outFilter = dataStream.filter(new FilterFunction<String>() {
@Override
public boolean filter(String value) throws Exception {
return value.startsWith("h");
}
});
out.print("map");
outFlat.print("FlatMap");
outFilter.print("Filter");
env.execute();
}
}
2,keyBy
将DataStream变成KeyedStream,之后才能使用sum等聚合操作。
1)
sum()/min()/max()/minBy()/maxBy() 通过这些算子对KeyedStream的每一个支流做聚合。
public static void main(String[] args){
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<SensorReading> dataStream = env.fromCollection(Arrays.asList( new SensorReading("s1", 1728493489L, 37.1),
new SensorReading("s2", 1728493439L, 36.1),
new SensorReading("s3", 1728493489L, 38.0),
new SensorReading("s2", 1728493439L, 36.1),
new SensorReading("s3", 1728493431L, 36.6),
new SensorReading("s3", 1728493423L, 36.3)
));
KeyedStream<SensorReading, Tuple> keyedStream = dataStream.keyBy("id");
//java8 支持 lamda表达式
//KeyedStream keyedStream1 = dataStream.keyBy(data -> data.getId());
keyedStream.maxBy("temperature").print("max");
}
3,Reduce算子
package com.cys.transformation;
import com.cys.apitest.beans.SensorReading;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
public class TransTest3_Reduce {
public static void main(String[] args) throws Exception{
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<String> dataStream = env.readTextFile("filePath");
DataStream<SensorReading> sensorData = dataStream.map(line -> {
String[] fields = line.split(",");
return new SensorReading(fields[0], new Long(fields[1]), new Double(fields[2]));
});
//keyBy
KeyedStream<SensorReading, Tuple> keyedStream = sensorData.keyBy("id");
DataStream result = keyedStream.reduce(new ReduceFunction<SensorReading>() {
@Override
public SensorReading reduce(SensorReading value1, SensorReading value2) throws Exception {
return new SensorReading(value1.getId(), value2.getTimestamp(), Math.max(value1.getTemperature(),value2.getTemperature()));
}
});
/* //lamda相比scala,还是需要写return
keyedStream.reduce((value1, value2) -> {
return new SensorReading(value1.getId(), value2.getTimestamp(), Math.max(value1.getTemperature(),value2.getTemperature()));
});*/
env.execute();
}
}
4,Split和Select && Connection和CoMap &&Union
package com.cys.transformation;
import com.cys.apitest.beans.SensorReading;
import com.cys.apitest.source.Source_Selfdefinition;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.collector.selector.OutputSelector;
import org.apache.flink.streaming.api.datastream.ConnectedStreams;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.datastream.SplitStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.co.CoMapFunction;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Random;
public class TransTest4_MultipleStream {
public static void main(String[] args) throws Exception{
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<SensorReading> dataStream = env.addSource(new Source_Selfdefinition.MySensorSource());
//1.通过split算子给数据打标签,区分不同数据流,返回SplitStream类型
SplitStream<SensorReading> split = dataStream.split(new OutputSelector<SensorReading>() {
@Override
public Iterable<String> select(SensorReading sensorReading) {
return sensorReading.getTemperature() > 60 ? Collections.singletonList("high"):Collections.singletonList("low");
}
});
//通过SplitStream的select算子,将不同标签数据提取出来,返回DataStream类型
DataStream<SensorReading> highTempStream = split.select("high");
DataStream<SensorReading> lowTempStream = split.select("low");
DataStream<SensorReading> allTemp = split.select("high", "low");
highTempStream.print("high");
lowTempStream.print("low");
/* 2.通过connect连接两个流,并且两个流类型可以不向同;
比如,先将高温流转为二元组,再高温流(二元组)与低温流(SensorReading)合并。
*/
//将高温流通过map转为二元组
DataStream<Tuple2<String, Double>> warningStream = highTempStream.map(new MapFunction<SensorReading, Tuple2<String, Double>>() {
@Override
public Tuple2<String, Double> map(SensorReading value) throws Exception {
return new Tuple2<>(value.getId(), value.getTemperature());
}
});
//将高温流与低温流通过connect算子做联结,返回ConnectedStream类型
ConnectedStreams<Tuple2<String, Double>, SensorReading> connectedStreams = warningStream.connect(lowTempStream);
//将ConnectedStreams数据流内的两个子流,使用CoMapFunction合并,
DataStream<Object> result = connectedStreams.map(new CoMapFunction<Tuple2<String, Double>, SensorReading, Object>() {
@Override
//接收高温流(第一个子流)并做数据处理,返回类型可按需要定义,最终接收类型需要是两个map方法的父类(Object)
public Object map1(Tuple2<String, Double> stringDoubleTuple2) throws Exception {
return new Tuple3<>(stringDoubleTuple2.f0, stringDoubleTuple2.f1, "high temp");
}
//使用公共父类Object接收tuple2,tuple3
@Override
public Object map2(SensorReading sensorReading) throws Exception {
return new Tuple2<>(sensorReading.getId(), "normal");
}
});
result.print();
// 3, Union 合并多条数据流,数据类型必须一致。
highTempStream.union(lowTempStream,allTemp);
env.execute();
}
//自定义sourceFunction
public static class MySensorSource implements SourceFunction<SensorReading> {
//定义标志位,控制数据生成和停止run
private boolean flag = true;
@Override
public void run(SourceContext<SensorReading> sourceContext) throws Exception {
//定义各一随机数生成器
Random random = new Random();
//设置10个传感器的初试温度
HashMap<String, Double> sensorTempMap = new HashMap<String, Double>();
for(int i = 0; i < 10; i++){
sensorTempMap.put("sensor_" + (i+1), 60 + random.nextGaussian() * 20);
}
while (flag){
for(String sensorId: sensorTempMap.keySet()){
Double newtemp = sensorTempMap.get(sensorId) + random.nextGaussian();
sensorTempMap.put(sensorId, newtemp);
sourceContext.collect(new SensorReading(sensorId, System.currentTimeMillis(), newtemp));
}
Thread.sleep(1000L);
}
}
@Override
public void cancel() {
}
}
}