Spark中RDD flatMap的示例

package examples;

import constant.Constants;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;

import java.util.Arrays;

/**
 * RDD.flatMap(Type inputObject):Iterable iterable
 * 输入一个参数, 返回一个包含0-n个数据的集合
 */
public class FlatMapExample {
    public static void main(String[] args) {
        SparkConf conf = new SparkConf()
                .setMaster("local[3]")
                .setAppName("AccumulatorExample");
        JavaSparkContext ctx = new JavaSparkContext(conf);

        JavaRDD linesRDD = ctx.textFile(Constants.TEXT_FILE_PATH);

        JavaRDD wordsRDD = linesRDD.flatMap(
                new FlatMapFunction() {
                    public Iterable call(String line) throws Exception {
                        return Arrays.asList(line.split(" "));
                    }
                }
        );

        System.out.println(wordsRDD.collect());
    }
}

你可能感兴趣的:(Spark)