spark读取es数据

spark-2.0.2

scala-2.11.8



    org.apache.spark
    spark-core_2.11
    2.0.2

    org.elasticsearch
    elasticsearch-spark-20_2.11
    6.0.0


    org.elasticsearch
    elasticsearch
    6.0.0


    org.elasticsearch.client
    transport
    6.0.0


    org.json
    json
    20090211


    org.apache.logging.log4j
    log4j-core
    2.7


    org.apache.logging.log4j
    log4j-api
    2.7


    junit
    junit
    4.7
    test


    io.netty
    netty-all
    4.1.13.Final
import org.elasticsearch.spark._
//esRDD
def main(args: Array[String]): Unit = {
    Logger.getLogger("org.apache.spark").setLevel(Level.OFF)
    Logger.getLogger("org.apache.hadoop").setLevel(Level.OFF)
    val conf: SparkConf = new SparkConf().setMaster("local").setAppName("aaa")
    conf.set("cluster.name", "name")
    conf.set("es.nodes", "ip")
    conf.set("es.port", "port")
    conf.set("es.scroll.size", "10000")
    conf.set("spark.broadcast.compress", "true") // 设置广播压缩
    conf.set("spark.rdd.compress", "true") // 设置RDD压缩
    conf.set("spark.io.compression.codec", "org.apache.spark.io.LZFCompressionCodec")
    conf.set("spark.shuffle.file.buffer", "1280k")
    conf.set("spark.reducer.maxSizeInFlight", "1024m")
    conf.set("spark.es.nodes.wan.only", "false")
    conf.set("spark.reducer.maxMblnFlight", "1024m")
    conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
    conf.set("index.mapper.dynamic","false")
    val sc=new SparkContext(conf)
    val read=sc.esRDD("index/type")  //读取
    EsSpark.saveToEs(read,"index1/type1",Map("es.mapping.id" -> "id")) //写入
    sc.stop()
  }
import static org.elasticsearch.spark.rdd.api.java.JavaEsSpark.esRDD;
SparkConf conf=new SparkConf().setAppName("Test").setMaster("local[*]");
conf.set("cluster.name", "elasticsearch");
conf.set("es.nodes", "ip");
conf.set("es.port", "port");
conf.set("es.scroll.size", "10000");
conf.set("es.nodes", "ELASTIC_SEARCH_IP");
conf.set("es.nodes.wan.only", "true");
conf.set("es.index.auto.create", "true");
SparkSession sparkSession = SparkSession.builder().config(conf).getOrCreate();
JavaSparkContext jsc = new JavaSparkContext(sparkSession.sparkContext());//adapter
JavaRDD> searchRdd = esRDD(jsc, "index").values();
for (Map item : searchRdd.collect()) {
    item.forEach((key, value)->{
        System.out.println("search key:" + key + ", search value:" + value);
    });
}
sparkSession.stop();

你可能感兴趣的:(spark读取es)