从mysql导出数据到hdfs上

一个简单的示例程序

一点点整合,最终的目的是

flume->kafka->spark->hbase
                |--->hive
package pers.machi

import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
import org.apache.spark.SparkConf


object Mysql2spark {

    def main(args: Array[String]): Unit = {
        System.setProperty("HADOOP_USER_NAME", "hadoop")

        val conf = new SparkConf()
            .setMaster("local[*]")
            .setAppName("mysql2spark")

        val spark = SparkSession.builder.config(conf).getOrCreate()
        spark.sparkContext.setLogLevel("WARN")

        val driver = "com.mysql.jdbc.Driver"
        val url = "jdbc:mysql://192.168.30.132:3306"
        val db = "testDB"
        val user = "root"
        val pwd = "0"

        val df = spark.read
            .format("jdbc")
            .option("url", url)
            .option("dbtable", db + "." + "vote_record")
            .option("user", user)
            .option("password", pwd)
            .load()

        df.select("user_id", "id").show(5)

        var dfrdd = df.rdd
        println(dfrdd.getNumPartitions)

        var ret =dfrdd.mapPartitions[String](iter => {
            var count: Int = 0;
            var ret = for (e <- iter) yield {
                count += 1; "changed"
            }
            ret
        }
        )

        for (i<-dfrdd)
            println(i.toString())

        for (i<-ret)
            println(i.toString())

        df.write.format("json").mode("overwrite").save("hdfs://hadoop01:8020/0")
        spark.stop()

    }

}

你可能感兴趣的:(Spark,scala)