将kafka中的数据消费到mysql的两种方式

方式一

package demo01
import java.sql.{Connection, DriverManager, PreparedStatement}
import java.text.SimpleDateFormat

import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord}
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.rdd.RDD
import org.apache.spark.streaming.dstream.{DStream, InputDStream}
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.{SparkConf, SparkContext}
object demo02 {

  val driver = "com.mysql.jdbc.Driver"
  val url = "jdbc:mysql://192.168.100.201/rng_comment"
  val username = "root"
  val password = "123456"
  /**
   * 1.5.1、查询出微博会员等级为5的用户,并把这些数据写入到mysql数据库中的vip_rank表中
   * 1.5.2、查询出评论赞的个数在10个以上的数据,并写入到mysql数据库中的like_status表中
   * 1.5.3、分别计算出2018/10/20 ,2018/10/21,2018/10/22,2018/10/23这四天每一天的评论数是多少,并写入到mysql数据库中的count_conmment表中
   */
  def main(args: Array[String]): Unit = {

    val sparkConf: SparkConf = new SparkConf().setMaster("local[*]").setAppName("hw3")
    val ssc = new StreamingContext(sparkConf, Seconds(3))
    ssc.sparkContext.setLogLevel("WARN")

    // 3.设置Kafka参数
    val kafkaParams: Map[String, Object] = Map[String, Object](
      ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> "node01:9092,node02:9092,node03:9092",
      ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer],
      ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer],
      ConsumerConfig.GROUP_ID_CONFIG -> "SparkKafka77777",
    ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "earliest",
    //false表示关闭自动提交.由spark帮你提交到Checkpoint或程序员手动维护
    ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG -> (false: java.lang.Boolean)
    )

    // 4.设置Topic
    var topics = Array("rng_comment")
    val recordDStream: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream[String, String](ssc,
      LocationStrategies.PreferConsistent, //位置策略,源码强烈推荐使用该策略,会让Spark的Executor和Kafka的Broker均匀对应
      ConsumerStrategies.Subscribe[String, String](topics, kafkaParams)) //消费策略,源码强烈推荐使用该策略
    val resultDStream: DStream[Array[String]] = recordDStream.map(_.value()).map(_.split("\t")).cache()
    // 1.查询出微博会员等级为5的用户,并把这些数据写入到mysql数据库中的vip_rank表中
    resultDStream.filter(_ (9) == "5").foreachRDD {
      rdd: RDD[Array[String]] => {
        rdd.foreachPartition {
          iter: Iterator[Array[String]] => {
            Class.forName(driver)
            val connection: Connection = DriverManager.getConnection(url, username, password)
            var sql = "insert into vip_rank values (?,?,?,?,?,?,?,?,?,?,?)"
            iter.foreach {
              line: Array[String] => {
                val statement: PreparedStatement = connection.prepareStatement(sql)
                statement.setInt(1, line(0).toInt);
                statement.setInt(2, line(1).toInt);
                statement.setString(3, line(2));
                statement.setString(4, line(3));
                statement.setString(5, line(4));
                statement.setString(6, line(5));
                statement.setString(7, line(6));
                statement.setString(8, line(7));
                statement.setString(9, line(8));
                statement.setInt(10, line(9).toInt);
                statement.setString(11, line(10));
                statement.executeUpdate()
                statement.close()
              }
            }
            connection.close()
          }
        }
      }
    }

    // 2.查询出评论赞的个数在10个以上的数据,并写入到mysql数据库中的like_status表中
    resultDStream.filter(_ (5).toInt > 10).foreachRDD {
      rdd: RDD[Array[String]] => {
        rdd.foreachPartition {
          iter: Iterator[Array[String]] => {
            Class.forName(driver)
            val connection: Connection = DriverManager.getConnection(url, username, password)
            var sql = "insert into like_status values (?,?,?,?,?,?,?,?,?,?,?)"
            iter.foreach {
              line: Array[String] => {
                val statement: PreparedStatement = connection.prepareStatement(sql)
                statement.setInt(1, line(0).toInt);
                statement.setInt(2, line(1).toInt);
                statement.setString(3, line(2));
                statement.setString(4, line(3));
                statement.setString(5, line(4));
                statement.setString(6, line(5));
                statement.setString(7, line(6));
                statement.setString(8, line(7));
                statement.setString(9, line(8));
                statement.setInt(10, line(9).toInt);
                statement.setString(11, line(10));
                statement.executeUpdate()
                statement.close()
              }
            }
            connection.close()
          }
        }
      }
    }

    val dateFormat1 = new SimpleDateFormat("yyyy/MM/dd HH:mm")
    val dateFormat2 = new SimpleDateFormat("yyyy/MM/dd")

    // 3.分别计算出2018/10/20 ,2018/10/21,2018/10/22,2018/10/23这四天每一天的评论数是多少,并写入到mysql数据库中的count_conmment表中
    val value: DStream[Array[String]] = resultDStream.filter {
      date:Array[String] => {
        val str: String = dateFormat2.format(dateFormat1.parse(date(2)))
        if ("2018/10/20".equals(str) || "2018/10/21".equals(str) || "2018/10/22".equals(str) || "2018/10/23".equals(str)) {
          true
        } else {
          false
        }
      }
    }
    value.foreachRDD {
      rdd: RDD[Array[String]] => {
        rdd.groupBy(x => dateFormat2.format(dateFormat1.parse(x(2)))).map(x => x._1 -> x._2.size).foreachPartition {
          iter: Iterator[(String, Int)] => {
            Class.forName(driver)
            val connection: Connection = DriverManager.getConnection(url, username, password)
            var sql = "insert into count_conmment values (?,?)"
            iter.foreach {
              line: (String, Int) => {
                val statement: PreparedStatement = connection.prepareStatement(sql)
                statement.setString(1, line._1);
                statement.setInt(2, line._2.toInt);
                statement.executeUpdate()
                statement.close()
              }
            }
            connection.close()
          }
        }
      }
    }
    ssc.start()
    ssc.awaitTermination()
    ssc.stop()
  }
}



方式二

package Job

import java.sql.{Connection, DriverManager}

import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.streaming.dstream.{DStream, InputDStream}
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.{SparkConf, SparkContext}

object KafkaToMysql {

  def main(args: Array[String]): Unit = {

    var conf =new SparkConf().setMaster("local").setAppName("KafkaToMysql")
    var sc =new SparkContext(conf)
    sc.setLogLevel("WARN")
    var ssc=new  StreamingContext(sc,Seconds(3))


    val kafkaParams = Map[String, Object](
      "bootstrap.servers" -> "node01:9092,node02:9092,node03:9092",
      "key.deserializer" -> classOf[StringDeserializer],
      "value.deserializer" -> classOf[StringDeserializer],
      "group.id" -> "SparkKafkaDemo",
      "auto.offset.reset" -> "latest",
      "enable.auto.commit" -> (false: java.lang.Boolean)
    )


    //1  1、	读取kafka数据
    //KafkaDatas就是读取过来的数据
    val KafkaDatas: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream[String, String](
      //SSC
      ssc,
      //本地/位置策略
      LocationStrategies.PreferConsistent,
      //消费策略
      ConsumerStrategies.Subscribe[String, String](Array("18BD12"), kafkaParams)
    )


/*    KafkaDatas.foreachRDD(rdd=>{
      rdd.foreach(println)
    })*/

    // 2   过滤出会员等级为5的用户

    val fiveDatas: DStream[ConsumerRecord[String, String]] = KafkaDatas.filter(a => {

      //ConsumerRecord(
      // topic = 18BD12,
      // partition = 1,
      // offset = 1302,
      // CreateTime = 1587636354790,
      // checksum = 1060145747,
      // serialized key size = -1,
      // serialized value size = 169,
      // key = null,
      // value = 55	0	2018/10/20 21:08	赵哒哒哒人:轻敌了?抱歉。我没有S9了。RNG加油吧。或许……LPL加油?       	0	0		1838923260	赵哒哒哒人	0	1540040880)

      //line  =>  55	0	2018/10/20 21:08	赵哒哒哒人:轻敌了?抱歉。我没有S9了。RNG加油吧。或许……LPL加油?       	0	0		1838923260	赵哒哒哒人	0	1540040880
      val line: String = a.value()
      //截取出每条数据的  星级   过滤出5星

      if (line.split("\t")(9) == "5") {
        true
      } else {
        false
      }
    })


    //遍历5分数据写入mysql
    fiveDatas.foreachRDD(rdd=>{
      rdd.foreach(message=>{
       // message.value()  //就是5分的数据

        //将5分的数传递给  插入的方法
        IntoMysql(message.value());

      })
    })



    //将“aa”  写入mysql


   ssc.start()
    ssc.awaitTermination()

  }



  // 将数据写入mysql
  def IntoMysql(line: String){
                                  // 55	0	2018/10/20 21:08	赵哒哒哒人:轻敌了?抱歉。我没有S9了。RNG加油吧。或许……LPL加油?       	0	0		1838923260	赵哒哒哒人	0	1540040880
    val datas = line.split("\t")

    //获得数据库连接
    val connection = DriverManager.getConnection("jdbc:mysql://node02:3306/rng_comment?characterEncoding=UTF-8", "root", "123456")
    //var sql = "insert into demo1 (indexx) values (?)"
    var sql = "insert into vip_rank (indexx,child_comment,comment_time,content,da_v,like_status,pic,user_id,user_name,vip_rank,stamp) " +
      "values (?,?,?,?,?,?,?,?,?,?,?)"

    val ps = connection.prepareStatement(sql)

    //这是参数
    ps.setString(1,datas(0))
    ps.setString(2,datas(1))
    ps.setString(3,datas(2))
    ps.setString(4,datas(3))
    ps.setString(5,datas(4))
    ps.setString(6,datas(5))
    ps.setString(7,datas(6))
    ps.setString(8,datas(7))
    ps.setString(9,datas(8))
    ps.setString(10,datas(9))
    ps.setString(11,datas(10))

    ps.executeUpdate()
  }



}

你可能感兴趣的:(kafka,SQL)