SparkStreaming 消费kafka数据,并存入Elasticsearch(java语言)

  1. 开发环境:spark2.3.1,es6.6.1,jdk1.8

  2. maven依赖:见末尾

  3. 完整代码

package spark;

import com.google.gson.Gson;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.common.serialization.StringDeserializer;
import org.apache.log4j.Logger;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.spark.streaming.kafka010.ConsumerStrategies;
import org.apache.spark.streaming.kafka010.KafkaUtils;
import org.apache.spark.streaming.kafka010.LocationStrategies;
import org.elasticsearch.spark.streaming.api.java.JavaEsSparkStreaming;

import java.util.*;

/**
 * ZhangYaotong
 * 2019-02-26
 */

public class KafkaSpark2Es
{
    public static void main(String[] args) throws InterruptedException {

        SparkConf conf = new SparkConf()
                .setMaster("spark://master:7077")
                .setAppName("KafkaSpark2Es")
                .set("spark.executor.memory", "2g")
                .set("spark.dynamicAllocation.enabled", "false");
        
        JavaSparkContext sc = new JavaSparkContext(conf);
        JavaStreamingContext ssc = new JavaStreamingContext(sc, Durations.seconds(10));

        String brokers = "master:9092";//指定kafka地址
        String groupId = "kafka-01";//指定消费者组id
        String topics = "topic1";//指定topic
        Set topicsSet = new HashSet<>(Arrays.asList(topics.split(",")));
        Map kafkaParams = new HashMap<>();
        kafkaParams.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers);
        kafkaParams.put(ConsumerConfig.GROUP_ID_CONFIG, groupId);
        kafkaParams.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
        kafkaParams.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
        kafkaParams.put("auto.offset.reset", "latest");
        kafkaParams.put("enable.auto.commit", false);

        JavaInputDStream> messages = KafkaUtils.createDirectStream(ssc,
                LocationStrategies.PreferConsistent(), ConsumerStrategies.Subscribe(topicsSet, kafkaParams));

        JavaDStream lines = messages.map(new Function, String>() {
            Map map = new HashMap<>();
            @Override
            public String call(ConsumerRecord record) throws Exception {
                String [] splits = record.value().split("\\|\\|");
                for (String date : splits){
                    if(date.lastIndexOf("=") != -1 && date.indexOf("=")!= date.length()){
                        map.put(date.substring(0, date.indexOf("=")), (date.substring(date.indexOf("=")+1, date.length())));
                    }
                }
                Gson gson = new Gson();
                return gson.toJson(map);
            }
        });
        lines.print();

        JavaEsSparkStreaming.saveJsonToEs(lines, "/log/doc1");

        ssc.start();

        ssc.awaitTermination();

    }
}

  1. pom文件



  4.0.0

  spark
  spark
  1.0-SNAPSHOT

  spark
  
  http://www.example.com

  
    UTF-8
    1.8
    1.8
    1.8
    2.2.3
    2.11.8
    6.6.1
  
  
    
    
      org.scala-lang
      scala-library
      ${scala.version}
    
    
      org.apache.spark
      spark-core_2.11
      ${spark.version}
    
    
      org.apache.spark
      spark-streaming_2.11
      ${spark.version}
    
    
      org.apache.spark
      spark-streaming-kafka-0-10_2.11
      ${spark.version}
    
    
      org.elasticsearch
      elasticsearch-spark-20_2.11
      ${elasticsearch.version}
    
    
    
    
      log4j
      log4j
      1.2.17
    
    
    
      junit
      junit
      4.11
      test
    
  

  
    
      
        
        
          maven-clean-plugin
          3.1.0
        
        
        
          maven-resources-plugin
          3.0.2
        
        
          maven-compiler-plugin
          3.8.0
        
        
          maven-surefire-plugin
          2.22.1
        
        
          maven-jar-plugin
          3.0.2
        
        
          maven-install-plugin
          2.5.2
        
        
          maven-deploy-plugin
          2.8.2
        
        
        
          maven-site-plugin
          3.7.1
        
        
          maven-project-info-reports-plugin
          3.0.0
        
      
    
  


你可能感兴趣的:(projectDemo)