Spark RDD 数据到 MySQL

数据
www.xzdream.cn    1    2    江西
www.xzdream.cn    3    4    广东
www.xzdream.cn    1    2    西藏
www.xzdream.cn    3    4    浙江
将日志文件 put 到hdfs
package com.xzdream.spark


import java.sql.DriverManager


import org.apache.spark.{SparkConf, SparkContext}


/**
  * Log App
  */
object SparkContextApp {
  def main(args: Array[String]): Unit = {
    val sparkConf = new SparkConf();
    sparkConf.setAppName("LogApp").setMaster("local[2]");


    val sc = new SparkContext(sparkConf);


    //file:///Users/hadoop/scala/spark_demo1/src/main/logs/2020-5-11.log
    val file_path = args(0);


    val lines = sc.textFile(file_path);
    //    lines.take(3).foreach(println)


    /*
    //读取每一行数据
    lines.map(x => {
      val splits = x.split("\t")
      val length = splits.length
      if(length == 4){
        val domain = splits(0)
        var traffic = 0L
        try{
          traffic = splits(1).toLong
        }catch {
          case e:Exception => 0L
        }


        (domain,traffic)
      }else{
        ("-",0L)
      }


    }).reduceByKey(_+_).collect.foreach(println)
     */


    //求省份访问量的top10


    val res = lines.map(x=>{
      val splits = x.split("\t")
      val length = splits.length
      if(length == 4){
        val province = splits(3)
        (province,1)
      }else{
        ('-',1)
      }
    }).reduceByKey(_+_).sortBy(_._2,false)
    //      .take(10)


    //保存到数据库
    //    Class.forName("com.mysql.jdbc.Driver")

    res.foreachPartition(x=>{
      var conn = DriverManager.getConnection("jdbc:mysql://127.0.0.1:3306/hive_db?user=root&password=123456&useUnicode=true&characterEncoding=UTF-8")
      conn.setAutoCommit(false)
      val sql = "insert into py(name) values(?)"
      var pstmt = conn.prepareStatement(sql)


      x.foreach{case (name,count)=>{
        pstmt.setString(1,name.toString)
        pstmt.addBatch()
      }}


      pstmt.executeBatch()
      conn.commit()
      conn.close()
    })

    sc.stop();
  }
}

  4.0.0
  com.xzdream.spark
  spark_demo1
  1.0
  2008
  
    2.11.8
  


  
    
      scala-tools.org
      Scala-Tools Maven2 Repository
      http://scala-tools.org/repo-releases
    
  


  
    
      scala-tools.org
      Scala-Tools Maven2 Repository
      http://scala-tools.org/repo-releases
    
  


  
    
    
      org.scala-lang
      scala-library
      ${scala.version}
    


    
    
      org.apache.spark
      spark-core_2.11
      2.4.0
    


    
      mysql
      mysql-connector-java
      5.1.27
    
  


  
    src/main/scala
    src/test/scala
    
      
        org.scala-tools
        maven-scala-plugin
        
          
            
              compile
              testCompile
            
          
        
        
          ${scala.version}
          
            -target:jvm-1.5
          
        
      
      
        org.apache.maven.plugins
        maven-eclipse-plugin
        
          true
          
            ch.epfl.lamp.sdt.core.scalabuilder
          
          
            ch.epfl.lamp.sdt.core.scalanature
          
          
            org.eclipse.jdt.launching.JRE_CONTAINER
            ch.epfl.lamp.sdt.launching.SCALA_CONTAINER
          
        
      
    
  
  
    
      
        org.scala-tools
        maven-scala-plugin
        
          ${scala.version}
        
      
    
  
提交任务
./spark-submit --master yarn --class com.xzdream.spark.SparkContextApp /Users/hadoop/scala/spark_demo1/target/spark_demo1-1.0.jar hdfs:///spark/logs

Spark RDD 数据到 MySQL_第1张图片

你可能感兴趣的:(大数据入门实践,大数据环境准备,spark,rdd,mysql)