spark读取Excel文件:

spark读取Excel文件:

pom.xml文件:



    4.0.0

    cn.wgy.project
    ProjectTest0428
    1.0-SNAPSHOT
        
            
                org.apache.spark
                spark-core_2.11
                2.4.0
            

            
                org.apache.spark
                spark-sql_2.11
                2.4.0
            

            
                org.apache.spark
                spark-streaming_2.11
                2.3.0
            

            
                mysql
                mysql-connector-java
                5.1.27
            

            
                org.apache.spark
                spark-hive_2.11
                2.4.0
            

            
                org.apache.spark
                spark-graphx_2.11
                2.3.0
            
            
            
                com.cloudera.sparkts
                sparkts
                0.4.0
            
            
                org.apache.spark
                spark-mllib_2.11
                2.4.0
            
            
                com.alibaba
                fastjson
                1.2.75
            
            
                com.crealytics
                spark-excel_2.11
                0.13.1
            
            
            
                org.apache.commons
                commons-collections4
                4.4
            

        

读取Excel文件:

import org.apache.spark.SparkConf
import org.apache.spark.sql.{DataFrame, DataFrameReader, SparkSession}
import com.crealytics.spark.excel._

object T {
  def main(args: Array[String]): Unit = {
  val frame = getSpark2.read.excel(header = true,inferSchema = true).load("hdfs://hadoop102:9000/ss.xlsx")
    frame.show()
  }


  def getSpark2={
    val sparkSession: SparkSession = SparkSession.builder()
      .config("spark.sql.warehouse.dir","hdfs://hadoop102:9000/user/hive/warehouse")
      .master("local")
      .enableHiveSupport()
      .getOrCreate()
    sparkSession
  }
}

你可能感兴趣的:(大数据专栏,spark,大数据,hadoop)