工业物联网数据通过mqtt协议发送到emqtt,kafka订阅emqtt数据,sparkstreaming消费kafka数据和原始留存在oracle的信息表关联计算。
package streamTest
import java.util.concurrent.Future
import java.util.{Date, Properties}
import com.google.gson.Gson
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord, RecordMetadata}
import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}
import org.apache.spark._
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.sql.types._
import org.apache.spark.sql.{Row, SparkSession}
import org.apache.spark.streaming.dstream.InputDStream
import org.apache.spark.streaming.{Durations, Seconds, StreamingContext}
import org.apache.spark.streaming.kafka010._
object KafkaStreamTest {
def main(args: Array[String]): Unit = {
val property = new Properties()
val url = "jdbc:oracle:thin:@//xxx:1634/GPS"
property.put("user","GPS2")
property.put("password","123456")
val conf = new SparkConf().setAppName("kafkaStreamTest").set("spark.driver.allowMultipleContexts","true")
val ssc = new StreamingContext(conf, Durations.seconds(1))
val sparkSession = SparkSession.builder().appName("kafkaStreamTest").enableHiveSupport().getOrCreate()
val rfrunDF=sparkSession.read.jdbc(url,"t_rfrun",property)
val driverDF=sparkSession.read.jdbc(url,"t_driver",property)
val msgDF=rfrunDF.join(driverDF,rfrunDF("WORKNUM")===driverDF("WORKNUM"),"inner")
.select(rfrunDF("equipnum"),rfrunDF("worknum"),rfrunDF("worktime"),rfrunDF("unworktime"),driverDF("name"),driverDF("remark2"))
msgDF.persist()
val topics = Array("pocGPS01")
val kafkaParams = Map[String, Object](
"bootstrap.servers" -> "xxx:9092",
"key.deserializer" -> classOf[StringDeserializer],
"value.deserializer" -> classOf[StringDeserializer],
"group.id" -> "stream",
"auto.offset.reset" -> "latest",
"enable.auto.commit" -> (false: java.lang.Boolean)
)
val kafkaStream: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream[String, String](
ssc,
LocationStrategies.PreferConsistent,
ConsumerStrategies.Subscribe[String, String](topics, kafkaParams)
)
val recieveStream=kafkaStream
.filter(_.value().length>0)
.filter(_.value().nonEmpty)
.map(x=>handleJson2CaseClass(x.value()))
.mapPartitions(iter=>{
iter.map(x=>{
var distance=getDistance(x.jd1,x.wd1,x.jd2,x.wd2)
(x.num1,x.num2,distance,x.alertTime)
})
})
val crashSchema=StructType(List(
StructField("num1",StringType,false),
StructField("num2",StringType,false),
StructField("distance",DoubleType,false),
StructField("alterTime",DateType,false)))
val resultWorker=recieveStream.transform(rdd=>{
val crash=rdd.map(x=>Row(x._1,x._2,x._3,x._4))
val crashDF=sparkSession.createDataFrame(crash,crashSchema)
val result=crashDF.filter("distance<=0.04")
.join(msgDF,crashDF("num1")===msgDF("equipnum"),"left_outer")
.filter("alterTime <= unworktime and alterTime >= worktime")
.select(msgDF("worknum"),msgDF("name"),msgDF("remark2"),msgDF("worktime"),msgDF("unworktime"),crashDF("alterTime"))
.toDF().rdd
result
})
val streamAlertWindowRdd1=recieveStream.filter(_._3<0.04).countByWindow(Seconds(180),Seconds(30))
val kafkaProducer: Broadcast[KafkaSink[String, String]] = {
val kafkaProducerConfig = {
val p = new Properties()
p.setProperty("bootstrap.servers", "100.69.149.210:9092")
p.setProperty("key.serializer", classOf[StringSerializer].getName)
p.setProperty("value.serializer", classOf[StringSerializer].getName)
p.setProperty("acks","0")
p.setProperty("buffer.memory","102400")
p.setProperty("batch.size","1000")
p
}
ssc.sparkContext.broadcast(KafkaSink[String, String](kafkaProducerConfig))
}
resultWorker.foreachRDD(rdd => {
if (!rdd.isEmpty) {
rdd.foreach(record => {
kafkaProducer.value.send("ssc_test_1",record.toString())
// do something else
})
}
})
streamAlertWindowRdd1.foreachRDD(rdd => {
if (!rdd.isEmpty) {
rdd.foreach(record => {
kafkaProducer.value.send("ssc_test_window_1","3分钟警次数:"+record.toString())
// do something else
})
}
})
ssc.start()
ssc.awaitTermination()
}
case class CrashAlert(num1 :String,num2 :String,tp: Int,status: Int,jd1: Double,wd1: Double,jd2: Double,wd2: Double,alertTime: Date)
def handleJson2CaseClass(jsonStr: String): CrashAlert = {
val gson = new Gson()
gson.fromJson(jsonStr, classOf[CrashAlert])
}
def getDistance(jd1: Double,wd1: Double,jd2: Double,wd2: Double): Double={
if (jd1 != 0 && wd1 != 0 && jd2 != 0 && wd2 != 0) {
val R = 6378.137
val radLat1 = jd1 * Math.PI / 180
val radLat2 = jd2 * Math.PI / 180
val a = radLat1 - radLat2
val b = wd1 * Math.PI / 180 - wd2 * Math.PI / 180
val s = 2 * Math.sin(Math.sqrt(Math.pow(Math.sin(a / 2), 2) + Math.cos(radLat1) * Math.cos(radLat2) * Math.pow(Math.sin(b / 2), 2)))
//BigDecimal.double2bigDecimal(s * R).setScale(2, BigDecimal.RoundingMode.HALF_UP)
double2Double(s * R)
} else {
//BigDecimal.double2bigDecimal(0).setScale(2, BigDecimal.RoundingMode.HALF_UP)
double2Double(0)
}
}
class KafkaSink[K, V](createProducer: () => KafkaProducer[K, V]) extends Serializable {
/* This is the key idea that allows us to work around running into
NotSerializableExceptions. */
lazy val producer = createProducer()
def send(topic: String, key: K, value: V): Future[RecordMetadata] =
producer.send(new ProducerRecord[K, V](topic, key, value))
def send(topic: String, value: V): Future[RecordMetadata] =
producer.send(new ProducerRecord[K, V](topic, value))
}
object KafkaSink {
import scala.collection.JavaConversions._
def apply[K, V](config: Map[String, Object]): KafkaSink[K, V] = {
val createProducerFunc = () => {
val producer = new KafkaProducer[K, V](config)
sys.addShutdownHook {
// Ensure that, on executor JVM shutdown, the Kafka producer sends
// any buffered messages to Kafka before shutting down.
producer.close()
}
producer
}
new KafkaSink(createProducerFunc)
}
def apply[K, V](config: java.util.Properties): KafkaSink[K, V] = apply(config.toMap)
}
}
maven依赖
4.0.0
com.cmft
testSpark
1.0-SNAPSHOT
cloudera
https://repository.cloudera.com/artifactory/cloudera-repos/
cmhk.mirror
cmhk mirror.
xxx
nexus-cmft
cmft repository
http:/xxx/
nexus-cmft
cmft repository
xxx
UTF-8
2.3.2
org.apache.hadoop
hadoop-client
2.6.0-cdh5.12.1
org.apache.hbase
hbase-client
1.2.0-cdh5.12.1
com.alibaba
fastjson
1.2.28
org.apache.spark
spark-core_2.11
${spark.version}
org.tachyonproject
tachyon-client
org.apache.curator
curator-recipes
commons-codec
commons-codec
org.apache.spark
spark-streaming_2.11
${spark.version}
org.apache.spark
spark-streaming-kafka-0-10_2.11
${spark.version}
net.sf.json-lib
json-lib
2.3
jdk15
org.json4s
json4s-core_2.10
3.2.10
org.json4s
json4s-jackson_2.10
3.2.10
org.apache.spark
spark-hive_2.11
${spark.version}
org.scala-tools
maven-scala-plugin
2.15.2
modified-only
main-scalac
process-resources
add-source
compile
scala-test-compile
process-test-resources
testCompile
org.apache.maven.plugins
maven-compiler-plugin
3.6.0
1.8
org.apache.maven.plugins
maven-compiler-plugin
3.1
compile
compile
target
target/classes
target/test-classes
src