spark 3.0.0 csv文件导入clickhouse

pom.xml文件:
因为spark jackson/guava 会有版本冲突,因此需要shade隔绝



    4.0.0
    spark_clickhouse
    spark_clickhouse
    1.0-SNAPSHOT
    
        
            org.apache.spark
            spark-sql_2.12
            3.0.0
            compile
        
        
            ru.yandex.clickhouse
            clickhouse-jdbc
            0.2.4
            compile
        

        
            com.fasterxml.jackson.core
            jackson-core
            2.10.2
        
        
            com.fasterxml.jackson.core
            jackson-databind
            2.10.2
        
        
        com.fasterxml.jackson.core
        jackson-annotations
        2.10.2
        
        
            com.fasterxml.jackson.module
            jackson-module-scala_2.12
            2.10.2
        

    

    
        
            
            org.apache.maven.plugins
            maven-shade-plugin
            3.1.0
            
                
                    package
                    
                        shade
                    
                    
                        
                            
                                com.fasterxml.jackson
                                noc.com.fasterxml.jackson
                            
                            
                                com.google.guava
                                noc.com.google.guava
                            
                        
                    
                
            
            
        
    



另外还有一个net.jpountz.lz4:lz4:1.3.0的jar包,与org.lz4:lz4-java:1.7.1冲突,1.3.0的jar包去掉了。

import java.util.Properties

import org.apache.spark.SparkConf
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
import org.apache.spark.sql.{SaveMode, SparkSession}
import org.apache.spark.storage.StorageLevel

object SparkWriteCk {
  val properties = new Properties()
  properties.put("driver", "ru.yandex.clickhouse.ClickHouseDriver")
  properties.put("user", "default")
  properties.put("password", "*****")
  properties.put("batchsize","100000")
  properties.put("socket_timeout","300000")
  properties.put("numPartitions","8")
  properties.put("rewriteBatchedStatements","true")
  val url = "jdbc:clickhouse://服务器IP:8123/default"
  val table = "fact_customer_qty"

  def main(args: Array[String]): Unit = {
    val sc = new SparkConf()
    val session = SparkSession.builder().master("local[*]").config(sc).appName("write-to-ck").getOrCreate()

    val columns = StructType(
      List(
        StructField("ymd",StringType,false),
        StructField("sup_name",StringType,false),
        StructField("item_name",StringType,false),
        StructField("need_qty",IntegerType,false),
        StructField("qty",IntegerType,false),
        StructField("unitcode",StringType,false)
        )
    )

    val df = session.read.format("csv").
      option("header",false).
      option("inferSchema",true).
      option("sep",",").
      schema(columns).
      load("C:\\Users\\86136\\IdeaProjects\\spark_learning\\spark_scala\\resources\\fact_customer_qty.csv")
      .persist(StorageLevel.MEMORY_ONLY_SER_2)

    print(df.schema)
    df.write.mode(SaveMode.Append).jdbc(url,table,properties)
    println(s"write done")
    df.unpersist(true)
  }
}

csv文件截图如下:


image.png

你可能感兴趣的:(spark 3.0.0 csv文件导入clickhouse)