SPARK-SQL - RDD/Dataset/DataFrame的互相转换

转换用到的方法如下
rdd(),as(),toDF()

代码示例

import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.*;
import pojo.Dog;

import java.util.Arrays;
import java.util.List;

public class test_23 {
    public static void main(String[] args) {
        SparkSession spark = SparkSession
                .builder()
                .config("spark.driver.host", "localhost")
                .appName("RDDDatasetTransform")
                .master("local")
                .getOrCreate();

        JavaSparkContext javaSparkContext = new JavaSparkContext(spark.sparkContext());

        spark.sparkContext().setLogLevel("ERROR");

        Dog dog1 = new Dog("jitty", "red");
        Dog dog2 = new Dog("mytty", "yellow");
        List list = Arrays.asList(dog1, dog2);

        JavaRDD dogJavaRDD = javaSparkContext.parallelize(list);

        //JavaRDD 不能直接转Dateset,需要走创建的方法
        //1:从JavaRDD中创建
        Dataset dogDf = spark.createDataFrame(dogJavaRDD, Dog.class);
        dogDf.show();

        //2: Dateset转RDD
        RDD dogRowRdd = dogDf.rdd();

        //3: RDD不能直接转Dateset,需要走创建的方法
        Encoder personEncoder = Encoders.bean(Dog.class);
        Dataset javaBeanDS = spark.createDataset(dogJavaRDD.rdd(), personEncoder);
        javaBeanDS.show();

        //4: Dataset转RDD
        RDD dogRdd = javaBeanDS.rdd();

        //5: DataFrame转Dataset
        Dataset dsFromDf = dogDf.as(Encoders.bean(Dog.class));
        dsFromDf.show();

        //6: Dataset转DataFrame
        Dataset dfFromDs = dsFromDf.toDF("first_name", "lovest_color");
        dfFromDs.show();

        spark.stop();
    }
}

 

你可能感兴趣的:(#,spark,spark)