方式一
package demo01
import java.sql.{Connection, DriverManager, PreparedStatement}
import java.text.SimpleDateFormat
import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord}
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.rdd.RDD
import org.apache.spark.streaming.dstream.{DStream, InputDStream}
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.{SparkConf, SparkContext}
object demo02 {
val driver = "com.mysql.jdbc.Driver"
val url = "jdbc:mysql://192.168.100.201/rng_comment"
val username = "root"
val password = "123456"
/**
* 1.5.1、查询出微博会员等级为5的用户,并把这些数据写入到mysql数据库中的vip_rank表中
* 1.5.2、查询出评论赞的个数在10个以上的数据,并写入到mysql数据库中的like_status表中
* 1.5.3、分别计算出2018/10/20 ,2018/10/21,2018/10/22,2018/10/23这四天每一天的评论数是多少,并写入到mysql数据库中的count_conmment表中
*/
def main(args: Array[String]): Unit = {
val sparkConf: SparkConf = new SparkConf().setMaster("local[*]").setAppName("hw3")
val ssc = new StreamingContext(sparkConf, Seconds(3))
ssc.sparkContext.setLogLevel("WARN")
// 3.设置Kafka参数
val kafkaParams: Map[String, Object] = Map[String, Object](
ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> "node01:9092,node02:9092,node03:9092",
ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer],
ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer],
ConsumerConfig.GROUP_ID_CONFIG -> "SparkKafka77777",
ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "earliest",
//false表示关闭自动提交.由spark帮你提交到Checkpoint或程序员手动维护
ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG -> (false: java.lang.Boolean)
)
// 4.设置Topic
var topics = Array("rng_comment")
val recordDStream: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream[String, String](ssc,
LocationStrategies.PreferConsistent, //位置策略,源码强烈推荐使用该策略,会让Spark的Executor和Kafka的Broker均匀对应
ConsumerStrategies.Subscribe[String, String](topics, kafkaParams)) //消费策略,源码强烈推荐使用该策略
val resultDStream: DStream[Array[String]] = recordDStream.map(_.value()).map(_.split("\t")).cache()
// 1.查询出微博会员等级为5的用户,并把这些数据写入到mysql数据库中的vip_rank表中
resultDStream.filter(_ (9) == "5").foreachRDD {
rdd: RDD[Array[String]] => {
rdd.foreachPartition {
iter: Iterator[Array[String]] => {
Class.forName(driver)
val connection: Connection = DriverManager.getConnection(url, username, password)
var sql = "insert into vip_rank values (?,?,?,?,?,?,?,?,?,?,?)"
iter.foreach {
line: Array[String] => {
val statement: PreparedStatement = connection.prepareStatement(sql)
statement.setInt(1, line(0).toInt);
statement.setInt(2, line(1).toInt);
statement.setString(3, line(2));
statement.setString(4, line(3));
statement.setString(5, line(4));
statement.setString(6, line(5));
statement.setString(7, line(6));
statement.setString(8, line(7));
statement.setString(9, line(8));
statement.setInt(10, line(9).toInt);
statement.setString(11, line(10));
statement.executeUpdate()
statement.close()
}
}
connection.close()
}
}
}
}
// 2.查询出评论赞的个数在10个以上的数据,并写入到mysql数据库中的like_status表中
resultDStream.filter(_ (5).toInt > 10).foreachRDD {
rdd: RDD[Array[String]] => {
rdd.foreachPartition {
iter: Iterator[Array[String]] => {
Class.forName(driver)
val connection: Connection = DriverManager.getConnection(url, username, password)
var sql = "insert into like_status values (?,?,?,?,?,?,?,?,?,?,?)"
iter.foreach {
line: Array[String] => {
val statement: PreparedStatement = connection.prepareStatement(sql)
statement.setInt(1, line(0).toInt);
statement.setInt(2, line(1).toInt);
statement.setString(3, line(2));
statement.setString(4, line(3));
statement.setString(5, line(4));
statement.setString(6, line(5));
statement.setString(7, line(6));
statement.setString(8, line(7));
statement.setString(9, line(8));
statement.setInt(10, line(9).toInt);
statement.setString(11, line(10));
statement.executeUpdate()
statement.close()
}
}
connection.close()
}
}
}
}
val dateFormat1 = new SimpleDateFormat("yyyy/MM/dd HH:mm")
val dateFormat2 = new SimpleDateFormat("yyyy/MM/dd")
// 3.分别计算出2018/10/20 ,2018/10/21,2018/10/22,2018/10/23这四天每一天的评论数是多少,并写入到mysql数据库中的count_conmment表中
val value: DStream[Array[String]] = resultDStream.filter {
date:Array[String] => {
val str: String = dateFormat2.format(dateFormat1.parse(date(2)))
if ("2018/10/20".equals(str) || "2018/10/21".equals(str) || "2018/10/22".equals(str) || "2018/10/23".equals(str)) {
true
} else {
false
}
}
}
value.foreachRDD {
rdd: RDD[Array[String]] => {
rdd.groupBy(x => dateFormat2.format(dateFormat1.parse(x(2)))).map(x => x._1 -> x._2.size).foreachPartition {
iter: Iterator[(String, Int)] => {
Class.forName(driver)
val connection: Connection = DriverManager.getConnection(url, username, password)
var sql = "insert into count_conmment values (?,?)"
iter.foreach {
line: (String, Int) => {
val statement: PreparedStatement = connection.prepareStatement(sql)
statement.setString(1, line._1);
statement.setInt(2, line._2.toInt);
statement.executeUpdate()
statement.close()
}
}
connection.close()
}
}
}
}
ssc.start()
ssc.awaitTermination()
ssc.stop()
}
}
方式二
package Job
import java.sql.{Connection, DriverManager}
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.streaming.dstream.{DStream, InputDStream}
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.{SparkConf, SparkContext}
object KafkaToMysql {
def main(args: Array[String]): Unit = {
var conf =new SparkConf().setMaster("local").setAppName("KafkaToMysql")
var sc =new SparkContext(conf)
sc.setLogLevel("WARN")
var ssc=new StreamingContext(sc,Seconds(3))
val kafkaParams = Map[String, Object](
"bootstrap.servers" -> "node01:9092,node02:9092,node03:9092",
"key.deserializer" -> classOf[StringDeserializer],
"value.deserializer" -> classOf[StringDeserializer],
"group.id" -> "SparkKafkaDemo",
"auto.offset.reset" -> "latest",
"enable.auto.commit" -> (false: java.lang.Boolean)
)
//1 1、 读取kafka数据
//KafkaDatas就是读取过来的数据
val KafkaDatas: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream[String, String](
//SSC
ssc,
//本地/位置策略
LocationStrategies.PreferConsistent,
//消费策略
ConsumerStrategies.Subscribe[String, String](Array("18BD12"), kafkaParams)
)
/* KafkaDatas.foreachRDD(rdd=>{
rdd.foreach(println)
})*/
// 2 过滤出会员等级为5的用户
val fiveDatas: DStream[ConsumerRecord[String, String]] = KafkaDatas.filter(a => {
//ConsumerRecord(
// topic = 18BD12,
// partition = 1,
// offset = 1302,
// CreateTime = 1587636354790,
// checksum = 1060145747,
// serialized key size = -1,
// serialized value size = 169,
// key = null,
// value = 55 0 2018/10/20 21:08 赵哒哒哒人:轻敌了?抱歉。我没有S9了。RNG加油吧。或许……LPL加油? 0 0 1838923260 赵哒哒哒人 0 1540040880)
//line => 55 0 2018/10/20 21:08 赵哒哒哒人:轻敌了?抱歉。我没有S9了。RNG加油吧。或许……LPL加油? 0 0 1838923260 赵哒哒哒人 0 1540040880
val line: String = a.value()
//截取出每条数据的 星级 过滤出5星
if (line.split("\t")(9) == "5") {
true
} else {
false
}
})
//遍历5分数据写入mysql
fiveDatas.foreachRDD(rdd=>{
rdd.foreach(message=>{
// message.value() //就是5分的数据
//将5分的数传递给 插入的方法
IntoMysql(message.value());
})
})
//将“aa” 写入mysql
ssc.start()
ssc.awaitTermination()
}
// 将数据写入mysql
def IntoMysql(line: String){
// 55 0 2018/10/20 21:08 赵哒哒哒人:轻敌了?抱歉。我没有S9了。RNG加油吧。或许……LPL加油? 0 0 1838923260 赵哒哒哒人 0 1540040880
val datas = line.split("\t")
//获得数据库连接
val connection = DriverManager.getConnection("jdbc:mysql://node02:3306/rng_comment?characterEncoding=UTF-8", "root", "123456")
//var sql = "insert into demo1 (indexx) values (?)"
var sql = "insert into vip_rank (indexx,child_comment,comment_time,content,da_v,like_status,pic,user_id,user_name,vip_rank,stamp) " +
"values (?,?,?,?,?,?,?,?,?,?,?)"
val ps = connection.prepareStatement(sql)
//这是参数
ps.setString(1,datas(0))
ps.setString(2,datas(1))
ps.setString(3,datas(2))
ps.setString(4,datas(3))
ps.setString(5,datas(4))
ps.setString(6,datas(5))
ps.setString(7,datas(6))
ps.setString(8,datas(7))
ps.setString(9,datas(8))
ps.setString(10,datas(9))
ps.setString(11,datas(10))
ps.executeUpdate()
}
}