1)Structured Streaming更新broadcast
val enSpark = enSparkSession.session()
enSpark.streams.addListener(new StreamingQueryListener {
override def onQueryStarted(event: StreamingQueryListener.QueryStartedEvent): Unit = {
}
override def onQueryProgress(event: StreamingQueryListener.QueryProgressEvent): Unit = {
val mins = sdf.format(new Date()).substring(14, 16).toInt
if (mins % 5 == 0 && broadcastWrapper.rulebroadcast != null) {
broadcastWrapper.update(enSpark.sparkContext, true)
}
}
override def onQueryTerminated(event: StreamingQueryListener.QueryTerminatedEvent): Unit = {
}
})
import org.apache.spark.SparkContext
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.sql.SparkSession
import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer
/**
* 广播变量 Wrapper
*/
class BroadcastWrapper extends Serializable{
var rulebroadcast: Broadcast[ArrayBuffer[(Int,String,String,String,String,String,String,String)]] = _
/**
* 更新 instance
* @param sc spark context
* @param blocking unpersist by default
*/
def update(sc: SparkContext, blocking: Boolean = false): Broadcast[ArrayBuffer[(Int,String,String,String,String,String,String,String)]] = {
if (rulebroadcast != null) {
rulebroadcast.unpersist(blocking)
synchronized {
rulebroadcast = sc.broadcast(new JdbcUtil().getRuleBroadcast)
}
}
rulebroadcast
}
/**
* 初始化 instance
* @param sc spark context
* @return
*/
def getInstance(sc: SparkContext): Broadcast[ArrayBuffer[(Int,String,String,String,String,String,String,String)]] = {
if (rulebroadcast == null) {
synchronized {
if (rulebroadcast == null) {
rulebroadcast = sc.broadcast(new JdbcUtil( ).getRuleBroadcast)
}
}
}
rulebroadcast
}
}
2)Spark Streaming更新broadcast
def sparkStreaming(): Unit = {
// Create a local StreamingContext with two working thread and batch interval of 1 second.
// The master requires 2 cores to prevent a starvation scenario.
val conf = new SparkConf().setMaster("local[*]").setAppName("NetworkWordCount")
val ssc = new StreamingContext(conf, Seconds(15))
// Create a DStream that will connect to hostname:port, like localhost:9999
val lines = ssc.socketTextStream(ipAddr, 19999)
val mro = lines.map(row => {
val fields = row.split(",")
Mro(fields(0), fields(1))
})
val cellJoinMro = mro.transform(row => {
if (1 < 3) {
println("更新broadcast..." + new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new java.util.Date()))
BroadcastWrapper.update(ssc.sparkContext)
}
var broadcastCellRes = BroadcastWrapper.getInstance(ssc.sparkContext)
row.map(row => {
val int_id: String = row.int_id
val rsrp: String = row.rsrp
val findResult: String = String.join(",", broadcastCellRes.value.get(int_id).get)
val timeStamps: String = String.join(",", findResult)
CellJoinMro(int_id, rsrp, timeStamps)
})
})
cellJoinMro.print()
ssc.start() // Start the computation
ssc.awaitTermination() // Wait for the computation to terminate
}
import org.apache.spark.SparkContext
import org.apache.spark.broadcast.Broadcast
object BroadcastWrapper {
@volatile private var instance: Broadcast[Map[String, java.util.List[String]]] = null
private val baseDir = "/user/my/streaming/test/"
def loadData(): Map[String, java.util.List[String]] = {
val files = HdfsUtil.getFiles(baseDir)
var latest: String = null
for (key <- files.keySet) {
if (latest == null) latest = key
else if (latest.compareTo(key) <= 0) latest = key
}
val filePath = baseDir + latest
val map = HdfsUtil.getFileContent(filePath)
map
}
def update(sc: SparkContext, blocking: Boolean = false): Unit = {
if (instance != null)
instance.unpersist(blocking)
instance = sc.broadcast(loadData())
}
def getInstance(sc: SparkContext): Broadcast[Map[String, java.util.List[String]]] = {
if (instance == null) {
synchronized {
if (instance == null) {
instance = sc.broadcast(loadData)
}
}
}
instance
}
}
import java.io.{BufferedReader, InputStreamReader}
import java.text.SimpleDateFormat
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.fs.FileSystem
import scala.collection.mutable
object HdfsUtil {
private val sdf = new SimpleDateFormat("yyyy-MM-dd 00:00:00")
def getFiles(path: String): mutable.Map[String, String] = {
val fileItems = new mutable.LinkedHashMap[String, String]
val fs = FileSystem.get(new Configuration())
val files = fs.listStatus(new Path(path))
var pathStr: String = ""
for (file <- files) {
if (file.isFile) {
pathStr = file.getPath().getName()
fileItems.put(pathStr.split("/")(pathStr.split("/").length - 1), pathStr)
}
}
fs.close()
fileItems
}
def getFileContent(filePath: String): Map[String, java.util.List[String]] = {
val map = new mutable.LinkedHashMap[String, java.util.List[String]]
val fs = FileSystem.get(new Configuration())
val path = new Path(filePath)
if (fs.exists(path)) {
val bufferedReader = new BufferedReader(new InputStreamReader(fs.open(path)))
var line: String = null
line = bufferedReader.readLine()
while (line != null) {
val fields: Array[String] = line.split(",")
val int_id: String = fields(0)
val date = new java.util.Date(java.lang.Long.valueOf(fields(2)))
val time = sdf.format(date)
System.out.println(line + "(" + time + ")")
if (!map.keySet.contains(int_id))
map.put(int_id, new java.util.ArrayList[String])
map.get(int_id).get.add(time)
line = bufferedReader.readLine()
}
map.toMap
} else {
throw new RuntimeException("the file do not exists")
}
}
}
https://blog.csdn.net/weixin_34255793/article/details/86026293
https://www.cnblogs.com/yy3b2007com/p/10610845.html