Flink 之输出算子Sink
- 一、StreamingFileSink
-
- 1.1、SinkUtils类
- 1.2 、SinkStream类
- 二、KafkaSink
-
- 2.1、SinkUtils类
- 2.2、SinkStream类
- 三、RedisSink
-
- 3.1、SinkUtils类
- 3.2、SinkStream类
- 四、ElasticsearchSink
-
- 4.1 SinkUtils类
- 4.2 SinkStream类
- 五、MysqlSink
-
- 5.1 SinkUtils类
- 5.2 SinkStream类
一、StreamingFileSink
1.1、SinkUtils类
package com.hpsk.flink.sink;
import org.apache.flink.api.common.serialization.SimpleStringEncoder;
import org.apache.flink.core.fs.Path;
import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink;
import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.DefaultRollingPolicy;
import java.util.concurrent.TimeUnit;
public class SinkUtils {
public StreamingFileSink<String> GetStreamingFileSink(String path){
return StreamingFileSink
.<String>forRowFormat(new Path(path),
new SimpleStringEncoder<>("UTF-8"))
.withRollingPolicy(
DefaultRollingPolicy
.builder()
.withMaxPartSize(1024 * 1024 * 1024)
.withRolloverInterval(TimeUnit.MINUTES.toMillis(15))
.withInactivityInterval(TimeUnit.MINUTES.toMillis(5))
.build()
)
.build();
}
}
1.2 、SinkStream类
package com.hpsk.flink.stream;
import com.hpsk.flink.beans.Event;
import com.hpsk.flink.sink.SinkUtils;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
public class SinkStream {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(2);
DataStreamSource<Event> inputDS = env.fromElements(
new Event("Mary", "./home", 2000L),
new Event("Bob", "./home", 3000L),
new Event("Mary", "./home", 2000L),
new Event("Alice", "./home", 1000L),
new Event("Alice", "./home", 2000L),
new Event("Mary", "./home", 3000L),
new Event("Bob", "./home", 5000L),
new Event("Bob", "./home", 6000L)
);
inputDS
.map(Event::toString)
.addSink(new SinkUtils().GetStreamingFileSink("output/output.txt"));
env.execute();
}
}
二、KafkaSink
2.1、SinkUtils类
package com.hpsk.flink.sink;
import org.apache.flink.api.common.serialization.SimpleStringEncoder;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.core.fs.Path;
import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink;
import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.DefaultRollingPolicy;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import java.util.concurrent.TimeUnit;
public class SinkUtils {
public FlinkKafkaProducer<String> getFlinkKafkaProducer(){
return new FlinkKafkaProducer<>(
"hadoop102:9092",
"event",
new SimpleStringSchema()
);
}
}
2.2、SinkStream类
package com.hpsk.flink.stream;
import com.hpsk.flink.beans.Event;
import com.hpsk.flink.sink.SinkUtils;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
public class SinkStream {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(2);
DataStreamSource<Event> inputDS = env.fromElements(
new Event("Mary", "./home", 2000L),
new Event("Bob", "./home", 3000L),
new Event("Mary", "./home", 2000L),
new Event("Alice", "./home", 1000L),
new Event("Alice", "./home", 2000L),
new Event("Mary", "./home", 3000L),
new Event("Bob", "./home", 5000L),
new Event("Bob", "./home", 6000L)
);
inputDS
.map(Event::toString)
.addSink(new SinkUtils().getFlinkKafkaProducer());
env.execute();
}
}
三、RedisSink
3.1、SinkUtils类
package com.hpsk.flink.sink;
import com.hpsk.flink.beans.Event;
import org.apache.flink.api.common.serialization.SimpleStringEncoder;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.core.fs.Path;
import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink;
import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.DefaultRollingPolicy;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.flink.streaming.connectors.redis.RedisSink;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommand;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommandDescription;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisMapper;
import java.util.concurrent.TimeUnit;
public class SinkUtils {
public RedisSink<Event> getRedisSink(){
FlinkJedisPoolConfig config = new FlinkJedisPoolConfig
.Builder()
.setHost("hadoop102")
.build();
return new RedisSink<>(config, new RedisMapper<Event>() {
@Override
public RedisCommandDescription getCommandDescription() {
return new RedisCommandDescription(RedisCommand.HSET, "event");
}
@Override
public String getKeyFromData(Event event) {
return event.user;
}
@Override
public String getValueFromData(Event event) {
return event.url;
}
});
}
}
3.2、SinkStream类
package com.hpsk.flink.stream;
import com.hpsk.flink.beans.Event;
import com.hpsk.flink.sink.SinkUtils;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
public class SinkStream {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(2);
DataStreamSource<Event> inputDS = env.fromElements(
new Event("Mary", "./home", 2000L),
new Event("Bob", "./home", 3000L),
new Event("Mary", "./home", 2000L),
new Event("Alice", "./home", 1000L),
new Event("Alice", "./home", 2000L),
new Event("Mary", "./home", 3000L),
new Event("Bob", "./home", 5000L),
new Event("Bob", "./home", 6000L)
);
inputDS.addSink(new SinkUtils().getRedisSink());
env.execute();
}
}
四、ElasticsearchSink
4.1 SinkUtils类
package com.hpsk.flink.sink;
import com.hpsk.flink.beans.Event;
import org.apache.flink.api.common.functions.RuntimeContext;
import org.apache.flink.api.common.serialization.SimpleStringEncoder;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.core.fs.Path;
import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink;
import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.DefaultRollingPolicy;
import org.apache.flink.streaming.connectors.elasticsearch.ElasticsearchSinkFunction;
import org.apache.flink.streaming.connectors.elasticsearch.RequestIndexer;
import org.apache.flink.streaming.connectors.elasticsearch6.ElasticsearchSink;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.flink.streaming.connectors.redis.RedisSink;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommand;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommandDescription;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisMapper;
import org.apache.http.HttpHost;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.Requests;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.concurrent.TimeUnit;
public class SinkUtils {
public ElasticsearchSink<Event> getElasticsearchSink(){
ArrayList<HttpHost> httpHosts = new ArrayList<>();
httpHosts.add(new HttpHost("hadoop102", 9200));
return new ElasticsearchSink.Builder<>(httpHosts, new ElasticsearchSinkFunction<Event>() {
@Override
public void process(Event event, RuntimeContext ctx, RequestIndexer indexer) {
HashMap<String, String> dataSource = new HashMap<>();
dataSource.put(event.user, event.url);
IndexRequest indexRequest = Requests.indexRequest()
.index("event")
.type("type")
.source(dataSource);
indexer.add(indexRequest);
}
}).build();
}
}
4.2 SinkStream类
package com.hpsk.flink.stream;
import com.hpsk.flink.beans.Event;
import com.hpsk.flink.sink.SinkUtils;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
public class SinkStream {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(2);
DataStreamSource<Event> inputDS = env.fromElements(
new Event("Mary", "./home", 2000L),
new Event("Bob", "./home", 3000L),
new Event("Mary", "./home", 2000L),
new Event("Alice", "./home", 1000L),
new Event("Alice", "./home", 2000L),
new Event("Mary", "./home", 3000L),
new Event("Bob", "./home", 5000L),
new Event("Bob", "./home", 6000L)
);
inputDS.addSink(new SinkUtils().getElasticsearchSink());
env.execute();
}
}
五、MysqlSink
5.1 SinkUtils类
package com.hpsk.flink.sink;
import com.hpsk.flink.beans.Event;
import org.apache.flink.api.common.functions.RuntimeContext;
import org.apache.flink.api.common.serialization.SimpleStringEncoder;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.core.fs.Path;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink;
import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.DefaultRollingPolicy;
import org.apache.flink.streaming.connectors.elasticsearch.ElasticsearchSinkFunction;
import org.apache.flink.streaming.connectors.elasticsearch.RequestIndexer;
import org.apache.flink.streaming.connectors.elasticsearch6.ElasticsearchSink;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.flink.streaming.connectors.redis.RedisSink;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommand;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommandDescription;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisMapper;
import org.apache.http.HttpHost;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.Requests;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.concurrent.TimeUnit;
public class SinkUtils {
public static class JdbcSink extends RichSinkFunction<Event> {
Connection connection = null;
PreparedStatement insertStmt = null;
PreparedStatement updateStmt = null;
@Override
public void open(Configuration parameters) throws Exception {
connection = DriverManager.getConnection("jdbc:mysql://hadoop104:3306/test", "root", "****");
insertStmt = connection.prepareStatement("insert into event (user, url) values (?, ?)");
updateStmt = connection.prepareStatement("update event set url = ? where user = ?");
}
@Override
public void invoke(Event event, Context context) throws Exception {
updateStmt.setString(1, event.user);
updateStmt.setString(2, event.url);
updateStmt.execute();
if( updateStmt.getUpdateCount() == 0 ){
insertStmt.setString(1, event.user);
insertStmt.setString(2, event.url);
insertStmt.execute();
}
}
@Override
public void close() throws Exception {
insertStmt.close();
updateStmt.close();
connection.close();
}
}
}
5.2 SinkStream类
package com.hpsk.flink.stream;
import com.hpsk.flink.beans.Event;
import com.hpsk.flink.sink.SinkUtils;
import org.apache.flink.connector.jdbc.JdbcConnectionOptions;
import org.apache.flink.connector.jdbc.JdbcSink;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
public class SinkStream {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(2);
DataStreamSource<Event> inputDS = env.fromElements(
new Event("Mary", "./home", 2000L),
new Event("Bob", "./home", 3000L),
new Event("Mary", "./home", 2000L),
new Event("Alice", "./home", 1000L),
new Event("Alice", "./home", 2000L),
new Event("Mary", "./home", 3000L),
new Event("Bob", "./home", 5000L),
new Event("Bob", "./home", 6000L)
);
inputDS.addSink(new SinkUtils.JdbcSink());
env.execute();
}