题目:分析每个用户每次会话的行为轨迹(半小时内算一次会话,并按时间顺序给序号)

题目描述
分析每个用户每次会话的行为轨迹(半小时内算一次会话,并按时间顺序给序号)

import java.text.SimpleDateFormat
import java.util.UUID


object Test01 {
     

  case class User(var uid: String, var id: String, var time: Long, var link: String)

  def main(args: Array[String]): Unit = {
     
   
    val list = List[(String, String, String)](
      ("1001", "2020-09-10 10:21:21", "home.html"),
      ("1001", "2020-09-10 10:28:10", "good_list.html"),
      ("1001", "2020-09-10 10:35:05", "good_detail.html"),
      ("1001", "2020-09-10 10:42:55", "cart.html"),
      ("1001", "2020-09-10 11:35:21", "home.html"),
      ("1001", "2020-09-10 11:36:10", "cart.html"),
      ("1001", "2020-09-10 11:38:12", "trade.html"),
      ("1001", "2020-09-10 11:40:00", "payment.html"),
      ("1002", "2020-09-10 09:40:00", "home.html"),
      ("1002", "2020-09-10 09:41:00", "mine.html"),
      ("1002", "2020-09-10 09:42:00", "favor.html"),
      ("1003", "2020-09-10 13:10:00", "home.html"),
      ("1003", "2020-09-10 13:15:00", "search.html")
    )
    // 处理时间,封装对象
    val format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
    val res = list.map {
     
      case (id, time, link) => User(UUID.randomUUID().toString, id, format.parse(time).getTime / 1000, link)
    }
      //分组
      .groupBy(_.id).map(_._2)
      //此时数据为
      //List(User(b53,1003,1599714600,home.html), User(f41,1003,1599714900,search.html))
      //List(User(2b2,1002,1599702000,home.html), User(b77,1002,1599702060,mine.html), User(303,1002,1599702120,favor.html))
      .flatMap(x => {
     
        val slidingList = x.sliding(2)
        //两两滑窗,再判断时间
        slidingList.foreach(y => {
     
          //此时数据为
          //List(User(aa0,1001,1599704481,home.html), User(297,1001,1599704890,good_list.html))
          //List(User(97,1001,1599704890,good_list.html), User(874,1001,1599705305,good_detail.html))
          //List(User(874,1001,1599705305,good_detail.html), User(540,1001,1599705775,cart.html))
          //List(User(540,1001,1599705775,cart.html), User(e7d,1001,1599708921,home.html))
          val head = y.head
          val last = y.last

          //如果前后时间不相差超过三十分钟,就把前面的uid赋值给后面的
          //因为User是个对象,所以当后面的值改变时,下一条和他一个地址的头也会改变
          //List(User(aa0,1001,1599704481,home.html), User(297,1001,1599704890,good_list.html))
          //List(User(297,1001,1599704890,good_list.html), User(874,1001,1599705305,good_detail.html))
          //结果如下
          //List(User(aa0,1001,1599704481,home.html), User(aa0,1001,1599704890,good_list.html))
          //List(User(aa0,1001,1599704890,good_list.html), User(874,1001,1599705305,good_detail.html))

          if (last.time - head.time <= 60 * 30) last.uid = head.uid
        })
        x
      })
      //此时数据为
      //User(e8ed4922-6be0-4ef7-84f3-ed653f267fd9,1003,1599714600,home.html)
      //User(e8ed4922-6be0-4ef7-84f3-ed653f267fd9,1003,1599714900,search.html)
      //User(bdb8a4ee-b360-45fd-9431-04585807a510,1002,1599702000,home.html)
      //User(bdb8a4ee-b360-45fd-9431-04585807a510,1002,1599702060,mine.html)
      //User(bdb8a4ee-b360-45fd-9431-04585807a510,1002,1599702120,favor.html)
      //再按照uid分组,与下标拉链
      .groupBy(_.uid)
      .map(z => {
     
        val zipList = z._2.zipWithIndex
        //下标加一
        zipList.map {
     
          case (user, index) => (user, index + 1)
        }
      })
      //List((User(bcd,1003,1599714600,home.html),1), (User(bcd,1003,1599714900,search.html),2))
      //List((User(e12,1001,1599708921,home.html),1), (User(e12,1001,1599708970,cart.html),2), (User(e12,1001,1599709092,trade.html),3), (User(e12,1001,1599709200,payment.html),4))
      //再按照id和时间排序,最后拉平得到结果
      .toList.sortWith((a, b) => {
     
      if (a.head._1.id > b.head._1.id) false
      else if (a.head._1.id < b.head._1.id) true
      else {
     
        if (a.head._1.time > b.head._1.time) false
        else true
      }
    }).flatten

    res.foreach(println(_))
    //(User(d76e2807-5540-4ef6-98e2-648ac0cf69af,1001,1599704481,home.html),1)
    //(User(d76e2807-5540-4ef6-98e2-648ac0cf69af,1001,1599704890,good_list.html),2)
    //(User(d76e2807-5540-4ef6-98e2-648ac0cf69af,1001,1599705305,good_detail.html),3)
    //(User(d76e2807-5540-4ef6-98e2-648ac0cf69af,1001,1599705775,cart.html),4)
    //(User(9ce84306-2ffb-4c3a-a7a0-e9dc2b0310f6,1001,1599708921,home.html),1)
    //(User(9ce84306-2ffb-4c3a-a7a0-e9dc2b0310f6,1001,1599708970,cart.html),2)
    //(User(9ce84306-2ffb-4c3a-a7a0-e9dc2b0310f6,1001,1599709092,trade.html),3)
    //(User(9ce84306-2ffb-4c3a-a7a0-e9dc2b0310f6,1001,1599709200,payment.html),4)
    //(User(0c997a09-63b9-4407-9756-f1319f6e4e4a,1002,1599702000,home.html),1)
    //(User(0c997a09-63b9-4407-9756-f1319f6e4e4a,1002,1599702060,mine.html),2)
    //(User(0c997a09-63b9-4407-9756-f1319f6e4e4a,1002,1599702120,favor.html),3)
    //(User(d8aade40-50f7-4eba-bd58-c11100470977,1003,1599714600,home.html),1)
    //(User(d8aade40-50f7-4eba-bd58-c11100470977,1003,1599714900,search.html),2)
  }
}

你可能感兴趣的:(学习笔记,scala,spark)