Spark从 Mysql 数据库读写文件

1.引入maven依赖


        
            org.apache.spark
            spark-core_2.11
            2.1.1
        
        
            mysql
            mysql-connector-java
            5.1.27
        
    

2.从MySQL读取数据

import java.sql.DriverManager

import org.apache.spark.rdd.{JdbcRDD, RDD}
import org.apache.spark.{SparkConf, SparkContext}

/**
  * @author fczheng 
  *
  */
object Spark31_RDD_MySQL {
    def main(args: Array[String]): Unit = {
        
        //TODO 1.创建Spark配置对象
        val conf: SparkConf = new SparkConf().setAppName("sparkmysql").setMaster("local[*]")
        
        
        //TODO 2.创建Spark环境连接对象
        val sc: SparkContext = new SparkContext(conf)
    
        //TODO 3.从Mysql中读取数据
        val driver = "com.mysql.jdbc.Driver"
        val url = "jdbc:mysql://hadoop102:3306/gmall"
        val userName = "root"
        val passWd = "000000"
        
        val sql = "select * from base_category2 where id >=? and id <=?"
    
        val jdbc: JdbcRDD[Unit] = new JdbcRDD(
            sc,
            () => {
                Class.forName(driver)
                DriverManager.getConnection(url, userName, passWd)
            },
            sql,
            1,
            100,
            3,
            (rs) => {
                println(rs.getInt(1) + "," + rs.getString(2) + "," + rs.getInt(3))
            }
    
        )
        jdbc.collect()
        
        //TODO 关闭连接
        sc.stop()
        
    }
}

3.向MySQL写入数据

import java.sql.{DriverManager, PreparedStatement}

import org.apache.spark.rdd.{JdbcRDD, RDD}
import org.apache.spark.{SparkConf, SparkContext}

/**
  * @author fczheng 
  *
  */
object Spark32_RDD_MySQL {
    def main(args: Array[String]): Unit = {
        
        //TODO 1.创建Spark配置对象
        val conf: SparkConf = new SparkConf().setAppName("sparkmysql").setMaster("local[*]")
        
        
        //TODO 2.创建Spark环境连接对象
        val sc: SparkContext = new SparkContext(conf)
    
        //TODO 3.从Mysql中写入数据
        val driver = "com.mysql.jdbc.Driver"
        val url = "jdbc:mysql://hadoop102:3306/test"
        val userName = "root"
        val passWd = "000000"
        
        
        val dataRDD: RDD[(Int, String, Int)] = sc.makeRDD(List((7,"zhangsan1",10),(8,"lisi1",20),(9,"wangwu1",30)))
        
        /*
         // 下面的代码效率不高,因为频繁创建连接对象.
        dataRDD.foreach{
            case (id,name,age) =>{
                Class.forName(driver)
                val conn = DriverManager.getConnection(url,userName,passWd)
                val statement: PreparedStatement = conn.prepareStatement("insert into user(id,name,age) values(?,?,?)")
                statement.setInt(1,id)
                statement.setString(2,name)
                statement.setInt(3,age)
                statement.executeUpdate()
                statement.close()
                conn.close()
            }
        }*/
    
        /*
        Class.forName(driver)
        val conn = DriverManager.getConnection(url,userName,passWd)
        val statement: PreparedStatement = conn.prepareStatement("insert into user(id,name,age) values(?,?,?)")
       
        dataRDD.foreach{
            case(id,name,age) =>{
                statement.setInt(1,id)
                statement.setString(2,name)
                statement.setInt(3,age)
                statement.executeUpdate()
                
            }
        }
    
        statement.close()
        conn.close()
        */
    
        
        
        dataRDD.foreachPartition(datas => {
            Class.forName(driver)
            val conn = DriverManager.getConnection(url,userName,passWd)
            val statement: PreparedStatement = conn.prepareStatement("insert into user(id,name,age) values(?,?,?)")
            
            datas.foreach{
                case(id,name,age) =>{
                    statement.setInt(1,id)
                    statement.setString(2,name)
                    statement.setInt(3,age)
                    statement.executeUpdate()
        
                }
            }
    
            statement.close()
            conn.close()
        })
        
        //TODO 关闭连接
        sc.stop()
        
    }
}

 

你可能感兴趣的:(Spark)