java Tuple2 maptopair lambda

 

 

import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.PairFunction;
import scala.Tuple2;

import java.util.Arrays;
import java.util.List;
import java.util.Random;

public class Test1 {
    public static void main(String[] args)  {
        // (x,y) 元组第一个元素加一个随机的数字前缀

        SparkConf conf = new SparkConf().setAppName("appName").setMaster("local[*]");
        JavaSparkContext sc = new JavaSparkContext(conf);

        List data = Arrays.asList(new Tuple2(111L, 222L), new Tuple2(100L, 200L));
        JavaRDD rdd = sc.parallelize(data);


//        JavaPairRDD randomPrefixRdd = rdd.mapToPair
//                (
//                        new PairFunction, String, Long>()
//                            // input 
//                            // output typle2(string,long)
//                        {
//                            private static final long serialVersionUID = 1L;
//                            @Override
//                            public Tuple2 call(Tuple2 tuple)throws Exception
//                            {
//                                Random random = new Random();
//                                int prefix = random.nextInt(10);
//                                return new Tuple2(prefix + "_" + tuple._1, tuple._2);
//                            }
//                        }
//                );

        JavaPairRDD randomPrefixRdd = rdd.mapToPair( x -> new Tuple2<>(new Random().nextInt(10) + "_" + String.valueOf(x._1), x._2));

        System.out.println(randomPrefixRdd.collect());



    }

}

 

 

 

你可能感兴趣的:(java-spark)