sparkstreaming 消费Kafka写入MYSQL代码示例



    4.0.0

    com._51doit
    big
    1.0-SNAPSHOT
    
        1.8
        1.8
        2.11.8
        2.2.0
        2.8.1
        UTF-8
    
    
        
            org.apache.kafka
            kafka-clients
            0.11.0.0
        

        
            org.apache.spark
            spark-core_2.11
            2.2.0
        
        
            mysql
            mysql-connector-java
            5.1.48
        

        
            org.apache.spark
            spark-streaming_2.11
            2.2.0
        

        
            org.apache.spark
            spark-streaming-kafka_2.11
            1.6.2
        

        
            org.scala-lang
            scala-library
            2.11.8
        
    
    
        
            
            
                org.apache.maven.plugins
                maven-compiler-plugin
                3.5.1
            
            
            
                net.alchim31.maven
                scala-maven-plugin
                3.2.2
                
                    
                        
                            compile
                            testCompile
                        
                        
                            
                                -dependencyfile
                                ${project.build.directory}/.scala_dependencies
                            
                        
                    
                
            

        
    

 

/**
  * @author HANGGE 2020/3/10 21:58\消费kafka中的数据
  */
object Demo1 {
  Logger.getLogger("org").setLevel(Level.ERROR)
  System.setProperty("HADOOP_USER_NAME", "root")

  def main(args: Array[String]): Unit = {
    //定义状态更新函数
    val updateFunc = (values: Seq[Int], state: Option[Int]) => {
      val currentCount = values.foldLeft(0)(_ + _)
      val previousCount = state.getOrElse(0)
      Some(currentCount + previousCount)
    }

    val conf: SparkConf = new SparkConf().setMaster("local[*]").setAppName("demo")
    val ss = new StreamingContext(conf, Seconds(10))
    ss.checkpoint("hdfs://doit01:9000/kafka")

    val topic = Set("first")
    val kafkaParams = Map[String, String]("metadata.broker.list" -> "doit01:9092")
    val messages: InputDStream[(String, String)] = KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](
      ss, kafkaParams, topic)
    print("---------:" + messages)
    val lines = messages.map(_._2)
    val words = lines.flatMap(_.split(" "))
    //    val wordCounts = words.map(x => (x, 1)).reduceByKey(_ + _)
    //    wordCounts.print()

    val stateDstream = words.map(x => (x, 1)).updateStateByKey[Int](updateFunc)
   // stateDstream.print()
    stateDstream.foreachRDD(rdd => {
      //内部函数
      def func(records: Iterator[(String, Int)]) {
        var conn: Connection = null
        var stmt: PreparedStatement = null
        try {
          Class.forName("com.mysql.jdbc.Driver")
          val url = "jdbc:mysql://localhost:3306/doit14"
          val user = "root"
          val password = "123456" //笔者设置的数据库密码是hadoop,请改成你自己的mysql数据库密码
          conn = DriverManager.getConnection(url, user, password)
          records.foreach(p => {
            val sql = "insert into province(province) values (?)"
            stmt = conn.prepareStatement(sql);
            stmt.setString(1, p._1+"")
            stmt.executeUpdate()
          })
        } catch {
          case e: Exception => e.printStackTrace()
        } finally {
          if (stmt != null) {
            stmt.close()
          }
          if (conn != null) {
            conn.close()
          }
        }
      }
      val repartitionedRDD = rdd.repartition(3)
      repartitionedRDD.foreachPartition(func)
    })
      ss.start()
    ss.awaitTermination()
  }
}

你可能感兴趣的:(项目知识点,大数据之Spark)