Spark 广播变量的使用(Java版)

 原文地址:https://www.cnblogs.com/learn-bigdata/p/10794881.html

广播变量:主要应用与Driver创建的变量,在Executor端需要用到时,可以使用广播变量

package SparkStreaming;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.broadcast.Broadcast;

import java.util.Arrays;
import java.util.List;

public class BroadCast {
    public static void main(String[] args) {
        SparkConf conf = new SparkConf()
                .setMaster("local")
                .setAppName("BroadCastTest");
        JavaSparkContext sc = new JavaSparkContext(conf);
        /*
        * 使用广播变量,广播变量的定义必须在driver端,因为sc没有被序列化不能被发送到Executor端
        * */
        Broadcast blackname = sc.broadcast("dwj3");
        List name = Arrays.asList(
                "dwj1",
                "dwj2",
                "dwj3");
         //创建广播变量(创建广播变量的类型根据自己的需要)
            //Broadcast>>  labelLogicMapBroadCast = sc.broadcast(labelLogicMap);
            //Broadcast>>> labelSecondLogicMapBroadCast = sc.broadcast(labelSecondLogicMap);
            //Broadcast>> hotCaculateMapBroadCast = sc.broadcast(hotCaculateMap);
            //Broadcast> importanceParameterMapBroadCast = sc.broadcast(importanceParameterMap);



        //String blackName = "dwj3";
        JavaRDD nameRDD = sc.parallelize(name);
        JavaRDD namefilter = nameRDD.filter(new Function() {
            @Override
            public Boolean call(String s) throws Exception {
                String blacknames = blackname.getValue();
                return !blacknames.equals(s);
            }
        });
        List lastname = namefilter.collect();
        for(String str:lastname){
            System.out.println(str);
        }
    }
}

 

你可能感兴趣的:(Spark)