FlinkAPI(二)

一、Transformation操作
1,map、flapmap、filter

public class transTest1_Base {
    public static void main(String[] args) throws Exception{
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        DataStream<String> dataStream = env.readTextFile("C:\\Users\\Administrator\\IdeaProjects\\FlinkTutorial\\src\\main\\resources\\hello.txt");

        //map操作,实现MapFunction,重写map方法
        DataStream<Integer> out = dataStream.map(new MapFunction<String, Integer>() {
            @Override
            public Integer map(String value) throws Exception {
                return value.length();
            }
        });

        //flatmap操作
        DataStream<String> outFlat = dataStream.flatMap(new FlatMapFunction<String, String>() {
            @Override
            public void flatMap(String value, Collector<String> out) throws Exception {
                String[] strs = value.split(" ");
                for (String str: strs){
                    out.collect(str);
                }

            }
        });

        //filter
        DataStream<String> outFilter = dataStream.filter(new FilterFunction<String>() {
            @Override
            public boolean filter(String value) throws Exception {
                return value.startsWith("h");
            }
        });

        out.print("map");
        outFlat.print("FlatMap");
        outFilter.print("Filter");
        env.execute();

    }
}

2,keyBy
将DataStream变成KeyedStream,之后才能使用sum等聚合操作。
1)
sum()/min()/max()/minBy()/maxBy() 通过这些算子对KeyedStream的每一个支流做聚合。

    public static void main(String[] args){
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        DataStream<SensorReading> dataStream = env.fromCollection(Arrays.asList( new SensorReading("s1", 1728493489L, 37.1),
                new SensorReading("s2", 1728493439L, 36.1),
                new SensorReading("s3", 1728493489L, 38.0),
                new SensorReading("s2", 1728493439L, 36.1),
                new SensorReading("s3", 1728493431L, 36.6),
                new SensorReading("s3", 1728493423L, 36.3)
        ));
        
        KeyedStream<SensorReading, Tuple> keyedStream = dataStream.keyBy("id");
        //java8 支持 lamda表达式
        //KeyedStream keyedStream1 = dataStream.keyBy(data -> data.getId());

        keyedStream.maxBy("temperature").print("max");
    }

3,Reduce算子

package com.cys.transformation;

import com.cys.apitest.beans.SensorReading;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class TransTest3_Reduce {
    public static void main(String[] args) throws Exception{
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        DataStream<String> dataStream = env.readTextFile("filePath");
        DataStream<SensorReading> sensorData = dataStream.map(line -> {
           String[] fields = line.split(",");
           return new SensorReading(fields[0], new Long(fields[1]), new Double(fields[2]));
        });

        //keyBy
        KeyedStream<SensorReading, Tuple> keyedStream = sensorData.keyBy("id");

        DataStream result = keyedStream.reduce(new ReduceFunction<SensorReading>() {
            @Override
            public SensorReading reduce(SensorReading value1, SensorReading value2) throws Exception {
                return new SensorReading(value1.getId(), value2.getTimestamp(), Math.max(value1.getTemperature(),value2.getTemperature()));
            }
        });

/*        //lamda相比scala,还是需要写return
        keyedStream.reduce((value1, value2) -> {
            return new SensorReading(value1.getId(), value2.getTimestamp(), Math.max(value1.getTemperature(),value2.getTemperature()));
        });*/

        env.execute();
    }
}

4,Split和Select && Connection和CoMap &&Union

package com.cys.transformation;

import com.cys.apitest.beans.SensorReading;
import com.cys.apitest.source.Source_Selfdefinition;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.collector.selector.OutputSelector;
import org.apache.flink.streaming.api.datastream.ConnectedStreams;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.datastream.SplitStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.co.CoMapFunction;
import org.apache.flink.streaming.api.functions.source.SourceFunction;

import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Random;

public class TransTest4_MultipleStream {
    public static void main(String[] args) throws Exception{
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        DataStream<SensorReading> dataStream = env.addSource(new Source_Selfdefinition.MySensorSource());

        //1.通过split算子给数据打标签,区分不同数据流,返回SplitStream类型
        SplitStream<SensorReading> split = dataStream.split(new OutputSelector<SensorReading>() {
            @Override
            public Iterable<String> select(SensorReading sensorReading) {
                return sensorReading.getTemperature() > 60 ? Collections.singletonList("high"):Collections.singletonList("low");
            }
        });

        //通过SplitStream的select算子,将不同标签数据提取出来,返回DataStream类型
        DataStream<SensorReading> highTempStream = split.select("high");
        DataStream<SensorReading> lowTempStream = split.select("low");
        DataStream<SensorReading> allTemp = split.select("high", "low");


        highTempStream.print("high");
        lowTempStream.print("low");


/*        2.通过connect连接两个流,并且两个流类型可以不向同;
         比如,先将高温流转为二元组,再高温流(二元组)与低温流(SensorReading)合并。
        */
        //将高温流通过map转为二元组
        DataStream<Tuple2<String, Double>> warningStream = highTempStream.map(new MapFunction<SensorReading, Tuple2<String, Double>>() {
            @Override
            public Tuple2<String, Double> map(SensorReading value) throws Exception {

                return new Tuple2<>(value.getId(), value.getTemperature());
            }
        });
        //将高温流与低温流通过connect算子做联结,返回ConnectedStream类型
        ConnectedStreams<Tuple2<String, Double>, SensorReading> connectedStreams = warningStream.connect(lowTempStream);

        //将ConnectedStreams数据流内的两个子流,使用CoMapFunction合并,
        DataStream<Object> result = connectedStreams.map(new CoMapFunction<Tuple2<String, Double>, SensorReading, Object>() {

            @Override
            //接收高温流(第一个子流)并做数据处理,返回类型可按需要定义,最终接收类型需要是两个map方法的父类(Object)
            public Object map1(Tuple2<String, Double> stringDoubleTuple2) throws Exception {
                return new Tuple3<>(stringDoubleTuple2.f0, stringDoubleTuple2.f1, "high temp");
            }

            //使用公共父类Object接收tuple2,tuple3
            @Override
            public Object map2(SensorReading sensorReading) throws Exception {
                return new Tuple2<>(sensorReading.getId(), "normal");
            }
        });

        result.print();


        // 3, Union 合并多条数据流,数据类型必须一致。
        highTempStream.union(lowTempStream,allTemp);

        env.execute();
    }

    
    //自定义sourceFunction
    public static class MySensorSource implements SourceFunction<SensorReading> {

        //定义标志位,控制数据生成和停止run
        private boolean flag = true;

        @Override
        public void run(SourceContext<SensorReading> sourceContext) throws Exception {
            //定义各一随机数生成器
            Random random = new Random();

            //设置10个传感器的初试温度
            HashMap<String, Double> sensorTempMap = new HashMap<String, Double>();
            for(int i = 0; i < 10; i++){
                sensorTempMap.put("sensor_" + (i+1), 60 + random.nextGaussian() * 20);
            }

            while (flag){
                for(String sensorId: sensorTempMap.keySet()){
                    Double newtemp = sensorTempMap.get(sensorId) + random.nextGaussian();
                    sensorTempMap.put(sensorId, newtemp);
                    sourceContext.collect(new SensorReading(sensorId, System.currentTimeMillis(), newtemp));
                }
                Thread.sleep(1000L);

            }
        }

        @Override
        public void cancel() {

        }
    }
}

你可能感兴趣的:(Flink)