SparkSql篇3:SparkSql读写kudu

SparkSql篇3:SparkSql读写kudu

spark操作kudu的方式有很多,spark封装了KuduContext,里面有kudu的增删改查
本文主要正对sparksql,利用外部数据源方式,直接从sql层面进行读写kudu

废话不多说,直接上干货

package kudu

import org.apache.kudu.spark.kudu.KuduContext
import org.apache.spark.SparkContext
import org.apache.spark.sql.SparkSession
import org.apache.kudu.spark.kudu

object sparkKuduReadAndWrite {
  def main(args: Array[String]): Unit = {
  val sparkSession = SparkSession.builder()
       .master("local")
      .appName("appName")
      .config("spark.testing.memory","471859200")
      .getOrCreate()
      
 // 从kudu中读
   val kudureader= {sparkSession.read
      .format("org.apache.kudu.spark.kudu")
      .option("kudu.master","cdh2:7051")
      .option("kudu.table","spark_kudu")
      .load()}
      
   //    在hive中建立临时表
    kudureader.createTempView("xydate1")    
    sparkSession.sqlContext.cacheTable("xydate1")
    //    像操作hive一样操作es(es的临时表)
    val datas= sparkSession.sql("select * from xydate1")

    {datas.write
      .format("org.apache.kudu.spark.kudu")
    .option("kudu.master", "cdh2:7051")
    .option("kudu.table", "xytest")
    .mode("append").save()}
    sparkSession.sqlContext.uncacheTable("xydate1")
    sparkSession.stop()

  }
}

pom如下:

      <dependency>
            <groupId>org.apache.kudu</groupId>
            <artifactId>kudu-client</artifactId>
            <version>${kudu.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.kudu</groupId>
            <artifactId>kudu-spark2_2.11</artifactId>
            <version>${kudu.version}</version>
        </dependency>

你可能感兴趣的:(sparksql,spark,kudu)