大数据学习之路86-使用广播变量结合Spark Sql实现Ip地理位置匹配

import org.apache.log4j.{Level, Logger}
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}

/**
  * 使用广播变量结合Spark Sql实现Ip地理位置匹配
  * Created by zhangjingcun on 2018/9/30 8:21.
  */
object SQLIPLocation {
  val rulesFilePath = "D:\\data\\ip.txt"
  val accessFilePath = "D:\\data\\access.log"

  def main(args: Array[String]): Unit = {
    Logger.getLogger("org.apache.spark").setLevel(Level.OFF)

    val spark = SparkSession.builder().appName("SQLIPLocation").master("local[*]").getOrCreate()

    //1:读取IP规则资源库
    val ipRulesLines: Dataset[String] = spark.read.textFile(rulesFilePath)
    //2:整理IP规则
    import  spark.implicits._
    //117.93.244.0|117.93.255.255|1969091584|1969094655|亚洲|中国|江苏|盐城||电信|320900|China|CN|120.139998|33.377631
    val ipRules: Dataset[(Long, Long, String)] = ipRulesLines.map(line => {
      val fields = line.split("[|]")
      val 

你可能感兴趣的:(大数据生态圈从入门到精通)