Spark:连接MySQL

原文链接: http://www.cnblogs.com/xuejianbest/p/10285010.html

spark-shell或spark-submit,载入jdbc的jar包:

--jars /mysql-connector-java-*-bin.jar

spark使用jdbc读写数据库:

//读数据库
import java.util.Properties
val prop = {
    val p = new Properties()
    p.put("driver", "com.mysql.jdbc.Driver")
    p.put("url", "jdbc:mysql://10.18.2.3:3306/dbname?"+
 "useUnicode=true&characterEncoding=utf8&rewriteBatchedStatements=true")
    p.put("user", "username")
    p.put("password", "****")
    p
}
prop.put("dbtable", "tablename")
prop.put("dbtable", "(select * from tablename) tablename_")
import scala.collection.JavaConversions._
spark.read.format("jdbc").options(prop).load()

//写数据库:
import org.apache.spark.sql.SaveMode
df.write
    .mode(SaveMode.Append)
    .jdbc(prop.getProperty("url"), "tablename", prop)

rewriteBatchedStatements=true(默认为false)这个选项会在写数据库的时候起作用,原理为spark会将一条数据转为一个insert语句,其设为true会将多条数据用一个insert语句插入。
相当于直接用jdbc中的.addBatch()方法。


若要用jdbc提供的接口直接操作数据库(不用spark提供的接口):

import java.sql.DriverManager
import java.util.Properties

object JDBCUtil extends Serializable {
  def apply(prop: Properties) = {
    new JDBCUtil(prop)
  }
}

class JDBCUtil private (prop: Properties) extends Serializable {
  Class.forName(prop.getProperty("driver"))

  val url = prop.getProperty("url")
  private[this] val user = prop.getProperty("user")
  private[this] val password = prop.getProperty("password")

  def getConnect() = {
    DriverManager.getConnection(url, user, password)
  }

  def getJDBCProp() = {
    val prop_copy = new Properties()
    prop_copy.putAll(prop)
    prop_copy
  }
}

// import org.apache.spark.SparkFiles
object DBUtil extends Serializable {
  private[this] val jdbc_conf = {
    val p = new Properties()
    p.put("driver", "com.mysql.jdbc.Driver")
    p.put("url", "jdbc:mysql://10.18.2.3:3306/dbname?" +
      "useUnicode=true&characterEncoding=utf8&rewriteBatchedStatements=true")
    p.put("user", "username")
    p.put("password", "****")
    p
  }
  //private[this] val jdbc_file = SparkFiles.get("route_analysis.properties")
  private[this] val jdbcUtil = JDBCUtil(jdbc_conf)
  val url = jdbcUtil.url
  def conn = jdbcUtil.getConnect
  def prop = jdbcUtil.getJDBCProp
  import scala.collection.JavaConversions._
  def options: scala.collection.mutable.Map[String, String] = prop
}

// 使用:
def update(df: DataFrame) {
  val table = "tablename"
  df.foreachPartition { part =>
    val conn = DBUtil.conn
    part.foreach { r =>
      val col = r.getAs[String]("col")
      //查询已有数据
      val sql = s"""select col from ${table} where col = '${col}' """
      val stmt_query = conn.createStatement()
      val res = stmt_query.executeQuery(sql)
      val stmt_new = conn.createStatement()
      //如果结果集为空则插入一条记录
      if (!res.first) {
        val sql_insert = s"""insert into ${table} (col) values ('${col}') """
        stmt_new.execute(sql_insert)
      } else {
        //若找到记录可以看看是否可更新ex_station字段。
        val sql_update = s"""update ${table} set col = '${col}' where id = ${id} """
        stmt_new.executeUpdate(sql_update)
      }
      res.close()
      stmt_query.close()
      stmt_new.close()
    }
  }
  println(s"update表(${table})完成...")
}

转载于:https://www.cnblogs.com/xuejianbest/p/10285010.html

你可能感兴趣的:(Spark:连接MySQL)