scala详细笔记(七)scala集合练习题 [函数练习题][scala案例][scala练习]

0 经典WorkCount案例

统计文本文件中单词出现的次数

import scala.io.Source
import scala.reflect.io.File
/**
 * @Author 多易教育 - 行哥 
 *
 */
object WordCount {
  def main(args: Array[String]): Unit = {
    // IO   读取本地磁盘中的数据
    val source = Source.fromFile("d://word.txt")
    // 获取文件中所有的行数据
    val lines: Iterator[String] = source.getLines()
    // 统计文本中单词出现的次数
    // 处理每行数据  切割  压平  返回所有的单词
    val words: Iterator[String] = lines.flatMap(_.split(" "))
    // 组装成(单词,1) (单词,1) (单词,1) (单词,1)
    //  val tuples: Iterator[(String, Int)] = words.map(e => (e, 1))
    val tuples: Iterator[(String, Int)] = words.map((_, 1))
    // 按照单词的内容分组  相同的单词分到一起  Array  List
    val group_data: Map[String, List[(String, Int)]] = tuples.toList.groupBy(_._1)
    // 统计单词的个数  分组的list的长度 单词个数
    val res = group_data.map(e => {
      val word = e._1
      val count = e._2.size
      (word, count)
    })
    res.foreach(println)
  }

}

1 平均温度案例

val d1 = Array(("beijing", 28.1), ("shanghai", 28.7), ("guangzhou", 32.0), ("shenzhen", 33.1))

val d2 = Array(("beijing", 27.3), ("shanghai", 30.1), ("guangzhou", 33.3))
val d3 = Array(("beijing", 28.2), ("shanghai", 29.1), ("guangzhou", 32.0), ("shenzhen", 32.1))
val data: Array[(String, Double)] = d1 ++ d2 ++ d3
//  val tuples: Array[(String, Double)] = d1.union(d2).union(d3)
val map: Map[String, Array[(String, Double)]] = data.groupBy(_._1)
val res: Map[String, Double] = map.mapValues(x => {
  // 每个元素的第二个数据都加上0  然后所有的数据再相加
  x.aggregate(0d)(_ + _._2, _ + _)/x.length
})

map.mapValues(arr=>{
  val d = arr.reduce((x,y)=>("",x._2+y._2))
  d._2/arr.length
}).foreach(println)

data.groupBy(_._1).map(tp=>{
   val city: String = tp._1
  val avg =  tp._2.map(_._2).sum/tp._2.map(_._2).size
   (city , avg)
 }).foreach(println)

2 共同好友案例

数据如下  :每个字母代表一个人 ,  统计任意一个人和其他人的共同好友

A:B,C,D,F,E,O
B:A,C,E,K
C:F,A,D,I
D:A,E,F,L
E:B,C,D,M,L
F:A,B,C,D,E,O,M
G:A,C,D,E,F
H:A,C,D,E,O
I:A,O
J:B,O
K:A,C,D
L:D,E,F
M:E,F,G
O:A,H,I,J

def main(args: Array[String]): Unit = {
  // 读取数据
  val bs: BufferedSource = Source.fromFile("data/friends.txt")
  val lines: Iterator[String] = bs.getLines()
    //处理数据  获取每个人和他们对应的好友列表
  val res1: Iterator[(String, Array[String])] = lines.map(line => {
    val user = line.split(":", -1)(0)
    val fs: Array[String] = line.split(":", -1)(1).split(",", -1)
    (user, fs)
  })
  val arr: Array[(String, Array[String])] = res1.toArray
  //双层遍历 依次获取两个人好友的交集
  val res = for (i <- 0 until (arr.length - 1); j <- i + 1 until (arr.length)) yield {
    val same: Array[String] = arr(i)._2.intersect(arr(j)._2)
    (arr(i)._1 + "和" + arr(j)._1 + "的好友有: ", same)
  }
 // 过滤空值输出结果
  res.map(tp => {
    val key: String = tp._1
    val list: List[String] = tp._2.toList
    (key, list)
  }).filter(_._2.size > 0).foreach(println)
}

3 SQL join语法案例

数据如下:

user.txt

uid , name , age , friend

u001,hls,22,fengjie
u002,wangwu,31,lisi
u003,zhangyanru,22,tananpengyou
u004,laocao,26,fengyi
u005,mengqi,12,nvmengqi
u006,haolei,38,sb
u007,wanghongjing,24,wife
u009,wanghongjing,24,wife

order.txt

oid,uid,cost

order011,u001,300
order012,u002,200
order023,u006,100
order056,u007,300
order066,u003,500
order055,u004,300
order021,u005,300
order014,u001,100
order025,u005,300
order046,u007,30
order067,u003,340
order098,u008,310

def main(args: Array[String]): Unit = {
  // 加载用户数据和订单数据
  val bs1: BufferedSource = Source.fromFile("data/join/user.txt")
  val bs2: BufferedSource = Source.fromFile("data/join/orders.txt")
  //将用户数据转换成map集合
  val users: Iterator[String] = bs1.getLines()
  val iters: Iterator[(String, (String, String, String, String))] = users.map(_.split(",", -1)).map(x => (x(0), (x(0), x(1), x(2), x(3))))
  val map: Map[String, (String, String, String, String)] = iters.toMap
  // 将订单数据转换成list集合
  val orders: Iterator[String] = bs2.getLines()
  val iters2: Iterator[(String, (String, String))] = orders.map(_.split(",", -1)).map(x => (x(1), (x(0), x(1))))
  val list2: List[(String, (String, String))] = iters2.toList

  //遍历每个订单  拼接用户信息
  var r = list2.map(x => {
    val user = map.getOrElse(x._1, ("null", "null", "null", "null"))
    (user._1, user._2, user._3, user._4, x._2._1)
  })
  // 打印结果
  r.sortBy(_._1).foreach(println)
}

 

4 pvuv案例

数据

site1,user1,2018-03-01 02:12:22
site1,user2,2018-03-05 04:12:22
site1,user2,2018-03-05 04:13:22
site1,user2,2018-03-05 04:14:22
site1,user2,2018-03-05 04:15:22
site4,user7,
site1,user2,2018-03-05 05:15:22
site1,user2,2018-03-05 08:15:22
site1,user3,2018-03-05 04:15:22
site1,user4,2018-03-05 05:15:22
site1,user3,2018-03-07 11:12:22
site1,user3,2018-03-08 11:12:22
site2,user4,2018-03-07 15:12:22
site3,user5,2018-03-07 08:12:22
site3,user6,2018-03-05 08:12:22
site1,user1,2018-03-08 11:12:22
site1,,2018-03-08 11:12:22
site2,user2,2018-03-07 15:12:22
site3,user5,2018-03-07 08:12:22
site3,user5,2018-03-07 18:12:22
site3,user6,2018-03-05 08:12:22
site4,user7,2018-03-03 10:12:22
site2,,2018-03-08 11:12:22
site3,user5,2018-03-07 08:12:22
site3,user6,2018-03-05 08:12:22
site4,user5,2018-03-03 10:12:22
site4,user7,2018-02-20 11:12:22

def main(args: Array[String]): Unit = {
  val source: BufferedSource = Source.fromFile("data/pvuv/pvuv.txt")
  val lines: Iterator[String] = source.getLines()
  //过滤数据
  val data = lines.filter(line => {
    val arr: Array[String] = line.split(",", -1)
    arr.length >= 0 && !arr.exists(_.isEmpty)
  })
  // 切割
  val iters: Iterator[Array[String]] = data.map(_.split(",", -1))
  //处理每行数据
  var tps = iters.map(arr => {
    val p = arr(0)
    val u = arr(1)
    val time = arr(2)
    val day: String = time.split("\\s")(0)
    val h: String = time.split("\\s")(1).split(":")(0)
    (p, u, day, h)
  })
  // 转换成list page 和 天 分组
  val map: Map[(String, String), List[(String, String, String, String)]] = tps.toList.groupBy(x => (x._1, x._3))
  val res = map.map(x => {
    //页面和天
    val value: (String, String) = x._1
    // 每天每个页面的访问量
    val pv: Int = x._2.size
    //去重人的统计个数  
    val uv: Int = x._2.map(_._2).distinct.size
    (value._1, value._2, pv, uv)
  })
  res.foreach(println)
}

5 Line线段重叠案例

数据如下: 统计线段在每个点重叠的次数 , 并按照从高到低排序输出

1,4
2,5
4,6
2,4
3,6
4,6
1,5

def main(args: Array[String]): Unit = {
  // 加载数据
  val bs: BufferedSource = Source.fromFile("data/line.txt")
  // 获取数据的所有行
  val lines: Iterator[String] = bs.getLines()
  // 处理每行数据 , 组装成int类型的元组元组
  val data: Iterator[(Int, Int)] = lines.map(_.split(",")).map(arr => {
    val start: Int = arr(0).toInt
    val end: Int = arr(1).toInt
    (start, end)
  })
  //使用推导式生成每个线段经过的所有的点
  val res1: Iterator[immutable.IndexedSeq[(Int, Int)]] = data.map(x => {
    for (i <- x._1.toInt to x._2.toInt) yield (i, 1)
  })
  // 将数据压平  列出所有的点
  val tuples: List[(Int, Int)] = res1.toList.flatMap(x => x)
  // 分组统计每个点出现的次数
  val mp: Map[Int, Int] = tuples.groupBy(x => x._1).map(x => (x._1, x._2.size))
  // 将结果转换成List集合排序
  val sorted: List[(Int, Int)] = mp.toList.sortBy(-_._2)
  //输出结果
  sorted.foreach(println)
}

 

你可能感兴趣的:(Scala笔记)