统计IP地址对应的省份,并把结果存入到mysql(示例)

ip.txt
在这里插入图片描述
access.log
在这里插入图片描述

import java.io.{BufferedReader, FileInputStream, InputStreamReader}
import java.sql.{Connection, DriverManager, PreparedStatement}

import org.apache.spark.{SparkConf, SparkContext}

import scala.collection.mutable.ArrayBuffer

case class ip(ip: String, ipLong: Long, ipLongMin: Long, ipLongMax: Long, city: String)

object ip extends App {

  private val sparkConf: SparkConf = new SparkConf().setAppName("ip").setMaster("local[1]")
  private val sc = new SparkContext(sparkConf)

  var ipLongs = sc.textFile("F:\\BigData\\ip\\access.log")
    .map(_.split("[|]"))
    .map(x => x(1))
    .map(x => (x, MyUtils.ip2Long(x)))


  var lines = MyUtils.readData("F:\\BigData\\ip\\ip.txt")


  val iterator = ipLongs.map(tp => {
    val index = MyUtils.binarySearch(lines, tp._2)
    val citys = lines(index).split("[|]")

    (tp._1, tp._2, citys(2), citys(3), citys(6))
  })



  var conn: Connection = null
  var ps: PreparedStatement = null

  var sql = "insert into iptable(ip,ipLong,ipLongMin,ipLongMax,city) values(?,?,?,?,?)"

  try {
    conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/ip", "root", "1234")
    iterator.foreach(data => {
      ps = conn.prepareStatement(sql)
      ps.setString(1, data._1)
      ps.setLong(2, data._2)
      ps.setLong(3, data._3.toLong)
      ps.setLong(4, data._4.toLong)
      ps.setString(5, data._5.toString)
      ps.executeUpdate()
    }
    )
  } catch {
    case e: Exception => println("Mysql Exception")
  } finally {
    if (ps != null) {
      ps.close()
    }
    if (conn != null) {
      conn.close()
    }
  }

  iterator.map(x=>(x._5,1)).reduceByKey(_+_).foreach(println)
  sc.stop()
}


object MyUtils {
  def ip2Long(ip: String): Long = {
    val fragments = ip.split("[.]")
    var ipNum = 0L
    for (i <- 0 until fragments.length) {
      ipNum = fragments(i).toLong | ipNum << 8L
    }
    ipNum
  }

  def binarySearch(lines: ArrayBuffer[String], ip: Long): Int = {
    var low = 0
    var high = lines.length - 1
    while (low <= high) {
      val middle = (low + high) / 2
      if ((ip >= lines(middle).split("\\|")(2).toLong) && (ip <= lines(middle).split("\\|")(3).toLong))
        return middle
      if (ip < lines(middle).split("\\|")(2).toLong)
        high = middle - 1
      else {
        low = middle + 1
      }
    }
    -1
  }

  def readData(path: String) = {
    val br = new BufferedReader(new InputStreamReader(new FileInputStream(path)))
    var s: String = null
    var flag = true
    val lines = new ArrayBuffer[String]()
    while (flag) {
      s = br.readLine()
      if (s != null)
        lines += s
      else
        flag = false
    }
    lines
  }

}


转载于:https://www.cnblogs.com/drl-blogs/p/11086819.html

你可能感兴趣的:(统计IP地址对应的省份,并把结果存入到mysql(示例))