根据ip地址计算归属地

package utils

import java.sql.DriverManager

object UtilsDemo {
  /**
    * 定义一个ip转换的成十进制
    * @param ip
    * @return
    */
  def ip2Long(ip:String):Long={
    val fragments = ip.split("[.]")
    var ipNum =0L
    for(i<- 0 until fragments.length){
      ipNum = fragments(i).toLong | ipNum << 8L
    }
    ipNum
  }
  /**
    * 二分查找
    * @param lines
    * @param ip
    * @return
    */
  def binarySearch(lines:Array[(Long,Long,String)],ip:Long):Int={
    //定义一个初始值
    var low =0
    //定义一个末位置
    var high =lines.length-1
    while(low<= high){
      val middle =(low +high) /2
      if((ip>=lines(middle)._1) && (ip <=lines(middle)._2))
        return middle
      if (ip< lines(middle)._1)
        high = middle -1
      else{
        low = middle +1
      }
    }
    -1
  }

  /**
    * 定义一个数据库,将数据写入MySQL中
    */
  def data2Mysql(part:Iterator[(String,Int)]): Unit ={
    //创建一个jdbc的连接
    val conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/bigdata?serverTimezone=Asia/Shanghai&useSSL=false","root","123456")
    val st = conn.prepareStatement("insert into access_log values(?,?)")

    part.foreach(data=>{
      st.setString(1,data._1)
      st.setInt(2,data._2)
      st.executeUpdate()
    })
    conn.close()
    st.close()
  }

}
package IP

import org.apache.spark.broadcast.Broadcast
import org.apache.spark.{SparkConf, SparkContext}
import utils.UtilsDemo

/**
  * 通过ip地址查找地区
  */
object IpTest {
  def main(args: Array[String]): Unit = {
    val conf =new SparkConf().setAppName("IpTest").setMaster("local[*]")
    val sc =new SparkContext(conf)

    //读取access文件数据
    val lines = sc.textFile(args(0))
    val access_log = lines.map(line => {
      val fields = line.split("[|]")
      val startNum = fields(2).toLong
      val endNum = fields(3).toLong
      val province = fields(6)
      (startNum, endNum, province)
    })
    //将数据收集到Driver端
    val access = access_log.collect()
    //引用广播变量
    val broadcast: Broadcast[Array[(Long, Long, String)]] = sc.broadcast(access)

    //读取第二份数据
    val lines2 = sc.textFile(args(1))
    val ipToProvince = lines2.map(line => {
      val fields = line.split("[|]")
      val ip = fields(1)
      //将ip转换成十进制
      val ipNum = UtilsDemo.ip2Long(ip)
      val rules: Array[(Long, Long, String)] = broadcast.value
      //利用二分查找进行搜索
      val index: Int = UtilsDemo.binarySearch(rules, ipNum)

      //根据索引找到省份
      var province = "未知省份"
      if (index != -1) {
        province = rules(index)._3
      }
      (province, 1)
    })
    val reduced = ipToProvince.reduceByKey(_+_)
    //排序
    val result = reduced.sortBy(_._2,false)
  //  result.collect().foreach(println(_))

    result.foreachPartition(pait=>{
        UtilsDemo.data2Mysql(pait)
    })
    //关闭资源
    sc.stop()
  }
}

 

你可能感兴趣的:(MySql,Spark-core)