Flink13基础-DataStream API(Source算子)

一、创建基础类

import java.sql.Timestamp;

public class Event {

    public String user;

    public String url;

    public Long timestamp;

    public Event() {
    }

    public Event(String user, String url, Long timestamp) {
        this.user = user;
        this.url = url;
        this.timestamp = timestamp;
    }

    @Override
    public String toString() {
        return "Event{" +
                "user='" + user + '\'' +
                ", url='" + url + '\'' +
                ", timestamp=" + new Timestamp(timestamp) +
                '}';
    }
}

1、从文件中读取数据

// 创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
env.setParallelism(1);

// 1. 从文件中读取数据
DataStreamSource streamFile = env.readTextFile("input/clicks.txt");

streamFile.print();

env.execute();

2. 从集合中读取数据

// 2. 从集合中读取数据
// 从数组集合中读取数据
ArrayList nums = new ArrayList<>();
nums.add(2);
nums.add(5);
DataStreamSource stream1 = env.fromCollection(nums);

// 从对象集合中读取数据
ArrayList events = new ArrayList<>();
events.add(new Event("Mary","./home",1000L));
events.add(new Event("Bob","./cart",2000L));
DataStreamSource stream2 = env.fromCollection(events);

3. 从元素读取数据

// 3. 从元素读取数据
DataStreamSource stream3 = env.fromElements(
  new Event("Mary", "./home", 1000L),
  new Event("Bob", "./cart", 2000L)
);

4. 从socket文本流中读取

DataStreamSource stream4 = env.socketTextStream("hadoop102", 9092);

5. 从kafka中读取数据

Properties properties = new Properties();
properties.setProperty("bootstrap.servers", "hadoop102:9092");
properties.setProperty("group.id", "consumer-group");
properties.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
properties.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
properties.setProperty("auto.offset.reset", "latest");

DataStreamSource kafkaStream = env.addSource(new FlinkKafkaConsumer("clicks", new SimpleStringSchema(), properties));

6.从自定义数据源读取数据

自定义数据源

import org.apache.flink.streaming.api.functions.source.SourceFunction;

import java.util.Calendar;
import java.util.Random;

public class ClickSource implements SourceFunction {
    // 声明一个标志位
    private Boolean running = true;

    @Override
    public void run(SourceContext sourceContext) throws Exception {
        // 随机生成数据
        Random random = new Random();
        // 定义字段选取的数据集
        String[] users = {"Mary","Alice","Bob","Cary"};
        String[] urls = {"./home","./cart","./fav","./prod?id=100","./prod?id=10"};

        // 循环生成数据
        while (running) {
            String user = users[random.nextInt(users.length)];
            String url = urls[random.nextInt(urls.length)];
            long timestamp = Calendar.getInstance().getTimeInMillis();
            sourceContext.collect(new Event(user,url,timestamp));
            Thread.sleep(1000);
        }
    }

    @Override
    public void cancel() {
        running = false;
    }
}
// 从自定义数据源中读取数据
DataStreamSource clickSource = env.addSource(new ClickSource());
clickSource.print();

7.定义并行数据源

import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.ParallelSourceFunction;

import java.util.Random;

/**
 * 定义并行数据源
 */
public class SourceCustomTest {
    public static void main(String[] args) throws Exception {
        // 创建执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(100);

        // DataStreamSource customStream = env.addSource(new ClickSource());
        DataStreamSource customStream = env.addSource(new ParallelCustomSource()).setParallelism(2);

        customStream.print();

        env.execute();
    }

    public static class ParallelCustomSource implements ParallelSourceFunction {

        private Boolean running = true;
        private Random random = new Random();

        @Override
        public void run(SourceContext sourceContext) throws Exception {
            while (running) {
                sourceContext.collect(random.nextInt());
            }
        }

        @Override
        public void cancel() {
            running = false;
        }
    }
}

你可能感兴趣的:(flink)