Spark SQL 基本操作



    4.0.0

    demo.spark
    spark.learning
    pom
    1.0-SNAPSHOT

    
        core
        sparksql
        sparkstreaming
        sparkmlib
    

    
        2.3.1
    

    
        
            org.apache.spark
            spark-core_2.11
            ${spark.version}
        
    



    
        spark.learning
        demo.spark
        1.0-SNAPSHOT
    
    4.0.0

    spark-sql
    
        
            
                org.apache.maven.plugins
                maven-compiler-plugin
                
                    1.6
                    1.6
                
            
        
    


    

    
        org.apache.spark
        spark-sql_2.11
        ${spark.version}
    
    
        org.apache.spark
        spark-hive_2.11
        ${spark.version}
        provided
    

    



import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.sql.*;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

public class SparkSqlDemo{
    public static final String jsonpath = "D:\\workspace\\demos\\sparklearning\\sparksql\\src\\main\\resources\\data.json";
    public static final String textpath = "D:\\workspace\\demos\\sparklearning\\sparksql\\src\\main\\resources\\data.txt";



    public static void main(String args[]){
        System.setProperty("spark.sql.warehouse.dir", "d:\\");
        System.setProperty("hadoop.home.dir", "D:\\hadoop_dir_path\\hadoop-common-2.2.0-bin-master\\");

        SparkSession sparkSession = SparkSession.builder().master("local[*]").appName("Java Spark SQL").getOrCreate();

        JavaRDD jr = sparkSession.read().textFile(textpath).toJavaRDD().flatMap(new FlatMapFunction() {
            @Override
            public Iterator call(String s) throws Exception {
                List l = new ArrayList();
                String[] str = s.split(",");
                Person p = new Person();
                p.setAge(str[1]);
                p.setName(str[0]);
                l.add(p);
                return l.iterator();
            }
        });

        //转换成DataFrame
        Dataset personDataset = sparkSession.createDataFrame(jr,Person.class);
        personDataset.createOrReplaceTempView("user");
        sparkSession.sql("select * from user").show();
    }



    public static void jsontest(SparkSession sparkSession) throws AnalysisException {
        Encoder personEncoder = Encoders.bean(Person.class);
        Dataset ds = sparkSession.read().json(jsonpath).as(personEncoder);
        ds.createOrReplaceTempView("user");
        sparkSession.sql("select * from user").show();


    }
}
public class Person {
    public String name;
    public String age;

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public String getAge() {
        return age;
    }

    public void setAge(String age) {
        this.age = age;
    }
}

数据集JSON

{"name":"Michael"}
{"name":"Andy", "age":30}
{"name":"Justin", "age":19}
数据集TXT
Michael, 29
Andy, 30
Justin, 19

 

结果

+---+-------+
|age|   name|
+---+-------+
| 29|Michael|
| 30|   Andy|
| 19| Justin|
+---+-------+
 

你可能感兴趣的:(Spark,Java)