https://cloud.tencent.com/info/8247e941fcb7d65acf816b1578eb7b50.html
https://blog.csdn.net/qq_41455420/article/details/79515674
:Spark Streaming 结合Spark SQL 案例
代码如下:
package com.dt.spark.streaming
import org.apache.spark.sql.SQLContext
import org.apache.spark.{SparkContext, SparkConf}
import org.apache.spark.streaming.{StreamingContext, Duration}
/**
def main(args: Array[String]) {
val sparkConf = new SparkConf().setAppName(“LogAnalyzerStreamingSQL”).setMaster(“local[4]”)
val sc = new SparkContext(sparkConf)
val sqlContext = new SQLContext(sc)
import sqlContext.implicits._
//从数据库中加载itemInfo表
val itemInfoDF = sqlContext.read.format("jdbc").options(Map(
"url"-> "jdbc:mysql://spark-master:3306/spark",
"driver"->"com.mysql.jdbc.Driver",
"dbtable"->"iteminfo",
"user"->"root",
"password"-> "vincent"
)).load()
itemInfoDF.registerTempTable("itemInfo")
val streamingContext = new StreamingContext(sc, SLIDE_INTERVAL)
val logLinesDStream = streamingContext.textFileStream("D:/logs_incoming")
val accessLogsDStream = logLinesDStream.map(AccessLog.parseLogLine).cache()
val windowDStream = accessLogsDStream.window(WINDOW_LENGTH, SLIDE_INTERVAL)
windowDStream.foreachRDD(accessLogs => {
if (accessLogs.isEmpty()) {
println("No logs received in this time interval")
} else {
accessLogs.toDF().registerTempTable("accessLogs")
val sqlStr = "SELECT a.itemid,a.itemname,b.cnt FROM itemInfo a JOIN " +
" (SELECT itemId,COUNT(*) cnt FROM accessLogs GROUP BY itemId) b " +
" ON (a.itemid=b.itemId) ORDER BY cnt DESC LIMIT 10 "
val topTenClickItemLast10Minus = sqlContext.sql(sqlStr)
// Persist top ten table for this window to HDFS as parquet file
topTenClickItemLast10Minus.show()
}
})
streamingContext.start()
streamingContext.awaitTermination()
}
}
case class AccessLog(userId: String, itemId: String, clickTime: String) {
}
object AccessLog {
def parseLogLine(log: String): AccessLog = {
val logInfo = log.split(",")
if (logInfo.length == 3) {
AccessLog(logInfo(0),logInfo(1), logInfo(2))
}
else {
AccessLog(“0”,“0”,“0”)
}
}
}
MySQL中表的内容如下:
mysql> select * from spark.iteminfo;
±-------±---------+
| itemid | itemname |
±-------±---------+
| 001 | phone |
| 002 | computer |
| 003 | TV |
±-------±---------+
3 rows in set (0.00 sec)
在D创建目录logs_incoming
运行Spark Streaming 程序。
新建文件,内容如下:
0001,001,2016-05-04 22:10:20
0002,001,2016-05-04 22:10:21
0003,001,2016-05-04 22:10:22
0004,002,2016-05-04 22:10:23
0005,002,2016-05-04 22:10:24
0006,001,2016-05-04 22:10:25
0007,002,2016-05-04 22:10:26
0008,001,2016-05-04 22:10:27
0009,003,2016-05-04 22:10:28
0010,003,2016-05-04 22:10:29
0011,001,2016-05-04 22:10:30
0012,003,2016-05-04 22:10:31
0013,003,2016-05-04 22:10:32
将文件保存到目录logs_incoming 中,观察Spark程序的输出:
±-----±-------±–+
|itemid|itemname|cnt|
±-----±-------±–+
| 001| phone| 6|
| 003| TV| 4|
| 002|computer| 3|
±-----±-------±–+
备注:
1、DT大数据梦工厂微信公众号DT_Spark
2、IMF晚8点大数据实战YY直播频道号:68917580
3、新浪微博: http://www.weibo.com/ilovepains
SparkSql将数据写入到MySQL
1、通过IDEA编写SparkSql代码
package cn.cheng.sql
import java.util.Properties
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, Dataset, SaveMode, SparkSession}
/**
//todo:创建样例类Student
case class Student(id:Int,name:String,age:Int)
object SparkSqlToMysql {
def main(args: Array[String]): Unit = {
//todo:1、创建sparkSession对象
val spark: SparkSession = SparkSession.builder()
.appName(“SparkSqlToMysql”)
.getOrCreate()
//todo:2、读取数据
val data: RDD[String] = spark.sparkContext.textFile(args(0))
//todo:3、切分每一行,
val arrRDD: RDD[Array[String]] = data.map(.split(" "))
//todo:4、RDD关联Student
val studentRDD: RDD[Student] = arrRDD.map(x=>Student(x(0).toInt,x(1),x(2).toInt))
//todo:导入隐式转换
import spark.implicits.
//todo:5、将RDD转换成DataFrame
val studentDF: DataFrame = studentRDD.toDF()
//todo:6、将DataFrame注册成表
studentDF.createOrReplaceTempView(“student”)
//todo:7、操作student表 ,按照年龄进行降序排列
val resultDF: DataFrame = spark.sql(“select * from student order by age desc”)
//todo:8、把结果保存在mysql表中
//todo:创建Properties对象,配置连接mysql的用户名和密码
val prop =new Properties()
prop.setProperty("user","root")
prop.setProperty("password","123456")
resultDF.write.jdbc(“jdbc:mysql://192.168.200.150:3306/spark”,“student”,prop)
//todo:写入mysql时,可以配置插入mode,overwrite覆盖,append追加,ignore忽略,error默认表存在报错
//resultDF.write.mode(SaveMode.Overwrite).jdbc("jdbc:mysql://192.168.200.150:3306/spark","student",prop)
spark.stop()
}
}
2、将Jar包提交到spark集群运行
spark-submit
–class itcast.sql.SparkSqlToMysql
–master spark://node-1:7077
–executor-memory 1g
–total-executor-cores 2
–jars /opt/bigdata/hive/lib/mysql-connector-java-5.1.35.jar
–driver-class-path /opt/bigdata/hive/lib/mysql-connector-java-5.1.35.jar
/root/original-spark-2.0.2.jar /person.txt
1
2
3
4
5
6
7
8
查看mysql中表的数据
作者:Running_Tiger
来源:CSDN
原文:https://blog.csdn.net/qq_41455420/article/details/79515674
版权声明:本文为博主原创文章,转载请附上博文链接!