[Spark应用]-- spark sql读取mysql数据源并且将结果写入mysql

一、创建测试表t_user2、user_t和t_result

 

1、t_user2表结构如下:

 

CREATE TABLE `t_user2` (
  `id` int(11) DEFAULT NULL COMMENT 'id',
  `name` varchar(64) DEFAULT NULL COMMENT '用户名',
  `password` varchar(64) DEFAULT NULL COMMENT '密码',
  `age` int(11) DEFAULT NULL COMMENT '年龄'
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

 

 

 

2、user_t表结构如下:

 

CREATE TABLE `user_t` (
  `id` int(11) DEFAULT NULL COMMENT 'id',
  `name` varchar(64) DEFAULT NULL COMMENT '姓名',
  `password` varchar(64) DEFAULT NULL COMMENT '密码',
  `address` varchar(64) DEFAULT NULL COMMENT '地址',
  `age` int(11) DEFAULT NULL COMMENT '年龄'
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

 

3、t_result表结构如下:

 

 

CREATE TABLE `t_result` (
  `id` int(11) DEFAULT NULL COMMENT 'id',
  `name` varchar(64) DEFAULT NULL COMMENT '姓名',
  `password` varchar(64) DEFAULT NULL COMMENT '密码',
  `address` varchar(64) DEFAULT NULL COMMENT '地址',
  `age` int(11) DEFAULT NULL COMMENT '年龄'
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

 

 

4、插入测试数据:

 

INSERT INTO `t_user2` VALUES (12, 'cassie', '1234562', 25);
INSERT INTO `t_user2` VALUES (11, 'zhangs', '123456', 25);
INSERT INTO `t_user2` VALUES (23, 'zhangs', '2321312', 34);
INSERT INTO `t_user2` VALUES (22, 'tom', 'sadfdsa', 23);


INSERT INTO `user_t` VALUES (1, 'zhangs', '123456', NULL, 25);
INSERT INTO `user_t` VALUES (2, 'zhangs', '123456', NULL, 252);

 

 

二、创建maven工程,导入mysql驱动包、spark相关包

 

mysql-connector-java.5.1.24.jar
spark-assembly-1.6.2-hadoop2.6.0.jar
spark-examples-1.6.2-hadoop2.6.0.jar

 

注:pom.xml文件内容如下

 



    4.0.0
    scala-test
    com.scala.mytest
    1.0-SNAPSHOT
    
        UTF-8
        UTF-8
        1.7
        cdh5.8.0

        
        1.7
        1.7
        UTF-8
        2.10
        2.10.4
        
        
        
            
                cloudera
                https://repository.cloudera.com/artifactory/cloudera-repos/
            
        
    
    
    
        mysql
        mysql-connector-java
        5.1.24
    
    
        com.google.code.gson
        gson
        2.7
    
    
    
        junit
        junit
        4.12
    

    
    
        org.apache.kafka
        kafka-clients
        0.9.0.0
    
    
        org.apache.kafka
        kafka-streams
        0.10.0.0
    

    
        org.apache.kafka
        kafka_2.11
        0.9.0.1
        
            
                jmxtools
                com.sun.jdmk
            
            
                jmxri
                com.sun.jmx
            
            
                jms
                javax.jms
            
        
    
    
    
        org.apache.hadoop
        hadoop-client
        2.6.0
    
    
        org.apache.hadoop
        hadoop-common
        2.6.0
    
    
        org.apache.hadoop
        hadoop-hdfs
        2.6.0
    
    
    
        org.scala-lang
        scala-library
        ${scala.version}
    
    
        com.fasterxml.jackson.core
        jackson-databind
        2.4.4
    
    
    
        org.slf4j
        slf4j-log4j12
        1.7.20
    
    

    
    
        commons-beanutils
        commons-beanutils
        1.7.0
    
    
    
        commons-collections
        commons-collections
        3.1
    
    
    
        commons-lang
        commons-lang
        2.5
    
    
        commons-logging
        commons-logging
        1.1.3
    
    
    
        net.sf.ezmorph
        ezmorph
        1.0.6
    
    

    
        net.sf.json-lib
        json-lib
        2.4
        jdk15
    




    4.0.0
    scala-test
    com.scala.mytest
    1.0-SNAPSHOT
    
        UTF-8
        UTF-8
        1.7
        cdh5.8.0

        
        1.7
        1.7
        UTF-8
        2.10
        2.10.4
        
        
        
            
                cloudera
                https://repository.cloudera.com/artifactory/cloudera-repos/
            
        
    
    
    
        mysql
        mysql-connector-java
        5.1.24
    
    
        com.google.code.gson
        gson
        2.7
    
    
    
        junit
        junit
        4.12
    

    
    
        org.apache.kafka
        kafka-clients
        0.9.0.0
    
    
        org.apache.kafka
        kafka-streams
        0.10.0.0
    

    
        org.apache.kafka
        kafka_2.11
        0.9.0.1
        
            
                jmxtools
                com.sun.jdmk
            
            
                jmxri
                com.sun.jmx
            
            
                jms
                javax.jms
            
        
    
    
    
        org.apache.hadoop
        hadoop-client
        2.6.0
    
    
        org.apache.hadoop
        hadoop-common
        2.6.0
    
    
        org.apache.hadoop
        hadoop-hdfs
        2.6.0
    
    
    
        org.scala-lang
        scala-library
        ${scala.version}
    
    
        com.fasterxml.jackson.core
        jackson-databind
        2.4.4
    
    
    
        org.slf4j
        slf4j-log4j12
        1.7.20
    
    

    
    
        commons-beanutils
        commons-beanutils
        1.7.0
    
    
    
        commons-collections
        commons-collections
        3.1
    
    
    
        commons-lang
        commons-lang
        2.5
    
    
        commons-logging
        commons-logging
        1.1.3
    
    
    
        net.sf.ezmorph
        ezmorph
        1.0.6
    
    

    
        net.sf.json-lib
        json-lib
        2.4
        jdk15
    


 

 

三、创建本地执行的scala代码类:

SparkSqlMysqlDatasource.scala
 
package sql

import java.util.Properties

import org.apache.spark.sql.{DataFrame, SQLContext}
import org.apache.spark.{SparkConf, SparkContext}

/**
  * 生产环境:下提交任务
  * spark-submit --class sql.SparkSqlMysqlDatasource --master yarn-cluster --executor-memory 2G --num-executors 2 --driver-memory 1g --executor-cores 1  /data1/e_heyutao/sparktest/sparkEnn.jar
  *
  */
object SparkSqlMysqlDatasource {
  //数据库配置
  lazy val url = "jdbc:mysql://your_ip:3306/my_test"
  lazy val username = "root"
  lazy val password = "secret_password"

  def main(args: Array[String]) {
//    val sparkConf = new SparkConf().setAppName("sparkSqlTest").setMaster("local[2]").set("spark.app.id", "sql")
    val sparkConf = new SparkConf().setAppName("sparkSqlTest").setMaster("yarn-cluster").set("spark.app.id", "sqlTest")
    //序列化
    sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
    sparkConf.set("spark.kryoserializer.buffer", "256m")
    sparkConf.set("spark.kryoserializer.buffer.max", "2046m")
    sparkConf.set("spark.akka.frameSize", "500")
    sparkConf.set("spark.rpc.askTimeout", "30")
    //获取context
    val sc = new SparkContext(sparkConf)
    //获取sqlContext
    val sqlContext = new SQLContext(sc)

    //引入隐式转换,可以使用spark sql内置函数
    import sqlContext.implicits._
    
    //创建jdbc连接信息
    val uri = url + "?user=" + username + "&password=" + password + "&useUnicode=true&characterEncoding=UTF-8"
    val prop = new Properties()
    //注意:集群上运行时,一定要添加这句话,否则会报找不到mysql驱动的错误
    prop.put("driver", "com.mysql.jdbc.Driver")
    //加载mysql数据表
    val df_test1: DataFrame = sqlContext.read.jdbc(uri, "user_t", prop)
    val df_test2: DataFrame = sqlContext.read.jdbc(uri, "t_user2", prop)

    //从dataframe中获取所需字段
    df_test2.select("id", "name", "age").collect()
      .foreach(row => {
        println("id  " + row(0) + " ,name  " + row(1) + ", age  " + row(2))
      })
    //注册成临时表
    df_test1.registerTempTable("temp_table")

    val total_sql = "select * from temp_table "
    val total_df: DataFrame = sqlContext.sql(total_sql)
    
     //将结果写入数据库中
    val properties=new Properties()
    properties.setProperty("user","root")
    properties.setProperty("password","secret_password")
    total_df.write.mode("append").jdbc("jdbc:mysql://your_ip:3306/my_test?useUnicode=true&characterEncoding=UTF-8","t_result",properties)

    /**
      * 注意:查看源码可以知道详细意思
    def mode(saveMode: String): DataFrameWriter = {
          this.mode = saveMode.toLowerCase match {
          case "overwrite" => SaveMode.Overwrite
          case "append" => SaveMode.Append
          case "ignore" => SaveMode.Ignore
          case "error" | "default" => SaveMode.ErrorIfExists
          case _ => throw new IllegalArgumentException(s"Unknown save mode: $saveMode. " +
            "Accepted modes are 'overwrite', 'append', 'ignore', 'error'.")
    }
      */

    //分组后求平均值
    total_df.groupBy("name").avg("age").collect().foreach(x => {
      println("name " + x(0))
      println("age " + x(1))
    })

  }
}

结果:

id  12 ,name  cassie, age  25
id  11 ,name  zhangs, age  25
id  23 ,name  zhangs, age  34
id  22 ,name  tom, age  23
name zhangs
age    138.5

 

四、查看数据库表t_result,发现刚才从mysql中读取出来的数据已经插入到表中

 

[Spark应用]-- spark sql读取mysql数据源并且将结果写入mysql_第1张图片

 

 

以上内容已经测试通过,可根据需要修改,如有疑问请留言!谢谢

 

 

你可能感兴趣的:(Spark)