Flink 之输出算子Sink

Flink 之输出算子Sink

  • 一、StreamingFileSink
    • 1.1、SinkUtils类
    • 1.2 、SinkStream类
  • 二、KafkaSink
    • 2.1、SinkUtils类
    • 2.2、SinkStream类
  • 三、RedisSink
    • 3.1、SinkUtils类
    • 3.2、SinkStream类
  • 四、ElasticsearchSink
    • 4.1 SinkUtils类
    • 4.2 SinkStream类
  • 五、MysqlSink
    • 5.1 SinkUtils类
    • 5.2 SinkStream类


一、StreamingFileSink

1.1、SinkUtils类

package com.hpsk.flink.sink;

import org.apache.flink.api.common.serialization.SimpleStringEncoder;
import org.apache.flink.core.fs.Path;
import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink;
import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.DefaultRollingPolicy;
import java.util.concurrent.TimeUnit;

public class SinkUtils {
    /**
     * 将数据写入文本文件
     * @param path 写入路径
     * @return StreamingFileSink
     */
    public StreamingFileSink<String> GetStreamingFileSink(String path){
        return StreamingFileSink
                .<String>forRowFormat(new Path(path),
                        new SimpleStringEncoder<>("UTF-8"))
                .withRollingPolicy( // 滚动策略:达到某一策略生成新的文件
                        DefaultRollingPolicy
                                .builder()
                                .withMaxPartSize(1024 * 1024 * 1024) // 文件大小
                                .withRolloverInterval(TimeUnit.MINUTES.toMillis(15)) // 滚动周期
                                .withInactivityInterval(TimeUnit.MINUTES.toMillis(5)) // 不活跃时间
                                .build()

                )
                .build();
    }
}

1.2 、SinkStream类

package com.hpsk.flink.stream;

import com.hpsk.flink.beans.Event;
import com.hpsk.flink.sink.SinkUtils;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class SinkStream {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(2);
        DataStreamSource<Event> inputDS = env.fromElements(
                new Event("Mary", "./home", 2000L),
                new Event("Bob", "./home", 3000L),
                new Event("Mary", "./home", 2000L),
                new Event("Alice", "./home", 1000L),
                new Event("Alice", "./home", 2000L),
                new Event("Mary", "./home", 3000L),
                new Event("Bob", "./home", 5000L),
                new Event("Bob", "./home", 6000L)
        );
        // 输出到文件
        inputDS
                .map(Event::toString)
                .addSink(new SinkUtils().GetStreamingFileSink("output/output.txt"));
        env.execute();
    }
}

二、KafkaSink

2.1、SinkUtils类

package com.hpsk.flink.sink;

import org.apache.flink.api.common.serialization.SimpleStringEncoder;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.core.fs.Path;
import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink;
import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.DefaultRollingPolicy;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import java.util.concurrent.TimeUnit;

public class SinkUtils {
    /**
     * 将数据写入kafka
     * @return FlinkKafkaProducer
     */
    public FlinkKafkaProducer<String> getFlinkKafkaProducer(){
        return new FlinkKafkaProducer<>(
                "hadoop102:9092", // brokerList
                "event", // topic
                new SimpleStringSchema()
        );
    }
}

2.2、SinkStream类

package com.hpsk.flink.stream;

import com.hpsk.flink.beans.Event;
import com.hpsk.flink.sink.SinkUtils;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class SinkStream {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(2);
        DataStreamSource<Event> inputDS = env.fromElements(
                new Event("Mary", "./home", 2000L),
                new Event("Bob", "./home", 3000L),
                new Event("Mary", "./home", 2000L),
                new Event("Alice", "./home", 1000L),
                new Event("Alice", "./home", 2000L),
                new Event("Mary", "./home", 3000L),
                new Event("Bob", "./home", 5000L),
                new Event("Bob", "./home", 6000L)
        );
        // 输出到kafka
        inputDS
                .map(Event::toString)
                .addSink(new SinkUtils().getFlinkKafkaProducer());
        env.execute();
    }
}

三、RedisSink

3.1、SinkUtils类

package com.hpsk.flink.sink;

import com.hpsk.flink.beans.Event;
import org.apache.flink.api.common.serialization.SimpleStringEncoder;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.core.fs.Path;
import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink;
import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.DefaultRollingPolicy;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.flink.streaming.connectors.redis.RedisSink;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommand;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommandDescription;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisMapper;
import java.util.concurrent.TimeUnit;

public class SinkUtils {
    /**
     * 将数据写入redis
     * @return RedisSink
     */
    public RedisSink<Event> getRedisSink(){
        // 创建jedis连接配置
        FlinkJedisPoolConfig config = new FlinkJedisPoolConfig
                .Builder()
                .setHost("hadoop102")
                .build();
        return new RedisSink<>(config, new RedisMapper<Event>() {
            @Override
            public RedisCommandDescription getCommandDescription() {
                return new RedisCommandDescription(RedisCommand.HSET, "event");
            }

            @Override
            public String getKeyFromData(Event event) {
                return event.user;
            }

            @Override
            public String getValueFromData(Event event) {
                return event.url;
            }
        });
    }
}

3.2、SinkStream类

package com.hpsk.flink.stream;

import com.hpsk.flink.beans.Event;
import com.hpsk.flink.sink.SinkUtils;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class SinkStream {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(2);
        DataStreamSource<Event> inputDS = env.fromElements(
                new Event("Mary", "./home", 2000L),
                new Event("Bob", "./home", 3000L),
                new Event("Mary", "./home", 2000L),
                new Event("Alice", "./home", 1000L),
                new Event("Alice", "./home", 2000L),
                new Event("Mary", "./home", 3000L),
                new Event("Bob", "./home", 5000L),
                new Event("Bob", "./home", 6000L)
        );
        // 输出到redis
        inputDS.addSink(new SinkUtils().getRedisSink());
        env.execute();
    }
}

四、ElasticsearchSink

4.1 SinkUtils类

package com.hpsk.flink.sink;

import com.hpsk.flink.beans.Event;
import org.apache.flink.api.common.functions.RuntimeContext;
import org.apache.flink.api.common.serialization.SimpleStringEncoder;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.core.fs.Path;
import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink;
import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.DefaultRollingPolicy;
import org.apache.flink.streaming.connectors.elasticsearch.ElasticsearchSinkFunction;
import org.apache.flink.streaming.connectors.elasticsearch.RequestIndexer;
import org.apache.flink.streaming.connectors.elasticsearch6.ElasticsearchSink;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.flink.streaming.connectors.redis.RedisSink;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommand;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommandDescription;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisMapper;
import org.apache.http.HttpHost;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.Requests;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.concurrent.TimeUnit;

public class SinkUtils {
    /**
     * 将数据写入ES
     * @return ElasticsearchSink
     */
    public ElasticsearchSink<Event> getElasticsearchSink(){
        // 定义es的连接配置
        ArrayList<HttpHost> httpHosts = new ArrayList<>();
        httpHosts.add(new HttpHost("hadoop102", 9200));
        return new ElasticsearchSink.Builder<>(httpHosts, new ElasticsearchSinkFunction<Event>() {
            @Override
            public void process(Event event, RuntimeContext ctx, RequestIndexer indexer) {
                // 定义写入的数据source
                HashMap<String, String> dataSource = new HashMap<>();
                dataSource.put(event.user, event.url);

                // 创建请求,作为向es发起的写入命令
                IndexRequest indexRequest = Requests.indexRequest()
                        .index("event")
                        .type("type")
                        .source(dataSource);

                // 用index发送请求
                indexer.add(indexRequest);
            }
        }).build();
    }
}

4.2 SinkStream类

package com.hpsk.flink.stream;

import com.hpsk.flink.beans.Event;
import com.hpsk.flink.sink.SinkUtils;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class SinkStream {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(2);
        DataStreamSource<Event> inputDS = env.fromElements(
                new Event("Mary", "./home", 2000L),
                new Event("Bob", "./home", 3000L),
                new Event("Mary", "./home", 2000L),
                new Event("Alice", "./home", 1000L),
                new Event("Alice", "./home", 2000L),
                new Event("Mary", "./home", 3000L),
                new Event("Bob", "./home", 5000L),
                new Event("Bob", "./home", 6000L)
        );
        // 输出到文件
//        inputDS
//                .map(Event::toString)
//                .addSink(new SinkUtils().getStreamingFileSink("output/output.txt"));
        // 输出到kafka
//        inputDS
//                .map(Event::toString)
//                .addSink(new SinkUtils().getFlinkKafkaProducer());

        // 输出到redis
//        inputDS.addSink(new SinkUtils().getRedisSink());

        // 输出到ES
        inputDS.addSink(new SinkUtils().getElasticsearchSink());

        env.execute();
    }
}

五、MysqlSink

5.1 SinkUtils类

package com.hpsk.flink.sink;

import com.hpsk.flink.beans.Event;
import org.apache.flink.api.common.functions.RuntimeContext;
import org.apache.flink.api.common.serialization.SimpleStringEncoder;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.core.fs.Path;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink;
import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.DefaultRollingPolicy;
import org.apache.flink.streaming.connectors.elasticsearch.ElasticsearchSinkFunction;
import org.apache.flink.streaming.connectors.elasticsearch.RequestIndexer;
import org.apache.flink.streaming.connectors.elasticsearch6.ElasticsearchSink;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.flink.streaming.connectors.redis.RedisSink;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommand;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommandDescription;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisMapper;
import org.apache.http.HttpHost;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.Requests;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.concurrent.TimeUnit;

public class SinkUtils {
    // 实现自定义的SinkFunction
    public static class JdbcSink extends RichSinkFunction<Event> {
        // 声明连接和预编译语句
        Connection connection = null;
        PreparedStatement insertStmt = null;
        PreparedStatement updateStmt = null;

        @Override
        public void open(Configuration parameters) throws Exception {
            connection = DriverManager.getConnection("jdbc:mysql://hadoop104:3306/test", "root", "****");
            insertStmt = connection.prepareStatement("insert into event (user, url) values (?, ?)");
            updateStmt = connection.prepareStatement("update event set url = ? where user = ?");
        }

        // 每来一条数据,调用连接,执行sql
        @Override
        public void invoke(Event event, Context context) throws Exception {
            // 直接执行更新语句,如果没有更新那么就插入
            updateStmt.setString(1, event.user);
            updateStmt.setString(2, event.url);
            updateStmt.execute();
            if( updateStmt.getUpdateCount() == 0 ){
                insertStmt.setString(1, event.user);
                insertStmt.setString(2, event.url);
                insertStmt.execute();
            }
        }

        @Override
        public void close() throws Exception {
            insertStmt.close();
            updateStmt.close();
            connection.close();
        }
    }
}

5.2 SinkStream类

package com.hpsk.flink.stream;

import com.hpsk.flink.beans.Event;
import com.hpsk.flink.sink.SinkUtils;
import org.apache.flink.connector.jdbc.JdbcConnectionOptions;
import org.apache.flink.connector.jdbc.JdbcSink;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class SinkStream {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(2);
        DataStreamSource<Event> inputDS = env.fromElements(
                new Event("Mary", "./home", 2000L),
                new Event("Bob", "./home", 3000L),
                new Event("Mary", "./home", 2000L),
                new Event("Alice", "./home", 1000L),
                new Event("Alice", "./home", 2000L),
                new Event("Mary", "./home", 3000L),
                new Event("Bob", "./home", 5000L),
                new Event("Bob", "./home", 6000L)
        );
        // 将数据写入mysql,方式一
//        inputDS.addSink(JdbcSink.sink(
//                "INSERT INTO event (user, url) values (?, ?)",
//                ((statement, event) -> {
//                    statement.setString(1, event.user);
//                    statement.setString(2, event.url);
//                }),
//                new JdbcConnectionOptions
//                        .JdbcConnectionOptionsBuilder()
//                        .withUrl("jdbc:mysql://hadoop104:3306/test")
//                        .withDriverName("com.mysql.cj.jdbc.Driver")
//                        .withUsername("root")
//                        .withPassword("****")
//                        .build()
//        ));
        // 将数据写入mysql,方式二
        inputDS.addSink(new SinkUtils.JdbcSink());
        env.execute();
    }

你可能感兴趣的:(Flink,flink,java,大数据)