Flink消费Kafka数据写入Elasticsearch

本案例通过flink消费kafka数据,写入elasticsearch,自动生成索引,其中flink版本为1.10.0,es版本为6.4.3。
package com.bigdata.flink.sinks;

import com.bigdata.project.utils.FlinkUtils;
import org.apache.flink.api.common.functions.RuntimeContext;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.elasticsearch.ElasticsearchSinkFunction;
import org.apache.flink.streaming.connectors.elasticsearch.RequestIndexer;
import org.apache.flink.streaming.connectors.elasticsearch6.ElasticsearchSink;
import org.apache.flink.streaming.connectors.elasticsearch6.RestClientFactory;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.http.HttpHost;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.Requests;
import org.elasticsearch.client.RestClientBuilder;

import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.util.*;

/**
 * @ author spencer
 * @ date 2020/6/4 17:56
 */
public class FlinkToEs {

    public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers", "hadoop00:9092,hadoop01:9092,hadoop01:9092");
        properties.setProperty("group.id", "gwc10");
        //如果没有记录偏移量,第一次从最开始消费
        properties.setProperty("auto.offset.reset", "latest");
        //kafka的消费者不自动提交偏移量
        properties.setProperty("enable.auto.commit", "false");

        FlinkKafkaConsumer kafkaSource = new FlinkKafkaConsumer<>(
                "wc10",
                new SimpleStringSchema(),
                properties
        );

//        env.addSource(kafkaSource)

//        ParameterTool parametes = ParameterTool.fromPropertiesFile(args[0]);
//
//        StreamExecutionEnvironment env = FlinkUtils.getEnv();
//        DataStream kafkaSource = FlinkUtils.createKafkaStream(parametes, SimpleStringSchema.class);

        List esHttphost = new ArrayList<>();
        esHttphost.add(new HttpHost("hadoop00", 9200));

        ElasticsearchSink.Builder esSinkBuilder = new ElasticsearchSink.Builder(
                esHttphost,
                new ElasticsearchSinkFunction() {

                    @Override
                    public void process(String data, RuntimeContext runtimeContext, RequestIndexer requestIndexer) {

                        Map json = new HashMap<>();
                        json.put("data", data);
                        System.out.println(json);
                        IndexRequest indexRequest = Requests.indexRequest()
                                .index("flink_index")
                                .type("_doc")
                                .source(json);

                        requestIndexer.add(indexRequest);
                    }
                }
        );

        // 内置自定义配置
        esSinkBuilder.setBulkFlushMaxActions(1);
        esSinkBuilder.setRestClientFactory(
                restClientBuilder -> {
//                    restClientBuilder.setDefaultHeaders();
//                    restClientBuilder.setMaxRetryTimeoutMillis(Integer.MAX_VALUE);
//                    restClientBuilder.setPathPrefix(...)
//                    restClientBuilder.setHttpClientConfigCallback(...)
                }
        );

        env.addSource(kafkaSource).addSink(esSinkBuilder.build());

        env.addSource(kafkaSource).print();

        env.execute("FlinkToEs");
    }
}

es更高版本还在测试中。。。

你可能感兴趣的:(flink,kafka,elasticsearch,大数据)