Scala之集合常用函数

1.基本属性和常用操作

object TestList {

   def main(args: Array[String]): Unit = {
    //基本属性与常规操作
    val list=List[Int](1,2,3,4,5,6)
    //查看集合的长度
    println(list.length)
    println(list.size)
    //集合遍历
    for(i<-list){
      println(i)
    }
    //生成字符串
    println(list)
    println(list.mkString("#"))
    //是否包含
    println(list.contains(1))
  }
}

//打印结果
//6
//6
//1
//2
//3
//4
//5
//6
//List(1, 2, 3, 4, 5, 6)
//1#2#3#4#5#6
//true

2. 衍生集合

(1)获取集合的头head

(2)获取集合的尾(不是头就是尾)tail

(3)集合最后一个数据 last

(4)集合初始数据(不包含最后一个)

(5)反转

(6)取前(后)n个元素

(7)去掉前(后)n个元素

(8)并集

(9)交集

(10)差集

(11)拉链

(12)滑窗

object CollectionFunction {
  def main(args: Array[String]): Unit = {
    val list=List[Int](10,30,20,90,100,55,100)
    //删除前多少个元素
    val list2=list.drop(3)//删掉前三个,结果List(90, 100, 55, 100)
    println(list2)
      
    //删除后多少个元素
    val list3=list.dropRight(2)//删掉后2个,结果List(10, 30, 20, 90, 100)
    println(list3)
      
    //去重 
    val list4=list.distinct
    println(list4)
      
    //获取第一个元素
    println(list.head)//10
    //获取最后一个元素
    println(list.last)//100
    //获取除开最后一个元素的所有元素
    val list5 =list.init
    println(list5)
      
    //判断是否为空
    println(list.isEmpty)
      
    //反转
    println(list.reverse)
      
    //滑窗
    val result=list.sliding(5,2)//滑窗长度为5,每次滑2个ArrayBuffer(10, 30, 20, 90, 100)
								//ArrayBuffer(20, 90, 100, 55, 100)
    for (x<-result){
      println(x.toBuffer)
    }
      
    //获取子集合 from:开始角标 until:结束角标
    val list6=list.slice(2,5)
    println(list6)
    //获取除开第一个元素的所有元素
    val list7=list.tail
    println(list7)
    //获取后多少个元素
    val list9=list.takeRight(3)
    println(list9)
      
    //集合操作
    //交集-取两个集合共同的部分
    val list10=list.intersect(List(100,200,300))
    println(list10)//List(100)
    //差集
    println("===================")
    println(list)//List(10, 30, 20, 90, 100, 55, 100)
    val list11=list.diff(List(10,20,100,200))
    println(list11)//List(30, 90, 55, 100) 只减了一个100
    //并集
    val list12=list.union(List(100,200,300))
    println(list12)//(10, 30, 20, 90, 100, 55, 100, 100, 200, 300) 不去重
    
    //拉链
    println("拉链=============================")
    println(list)
    val list13=list.zip(List("zhangsan","lisi","wangwu","aa","bb","cc","dd"))
    println(list13)//List((10,zhangsan), (30,lisi), (20,wangwu), (90,aa), (100,bb), (55,cc), (100,dd))
    val list14=list.zip(List("aa","bb"))
    println(list14)//List((10,aa), (30,bb)) 匹配不上的去除了
    //反拉链
    val list15:(List[Int],List[String])=list13.unzip
    println(list15)//(List(10, 30, 20, 90, 100, 55, 100),List(zhangsan, lisi, wangwu, aa, bb, cc, dd))
  }
}

//打印结果
//List(90, 100, 55, 100)
//List(10, 30, 20, 90, 100)
//List(10, 30, 20, 90, 100, 55)
//10
//100
//List(10, 30, 20, 90, 100, 55)
//false
//List(100, 55, 100, 90, 20, 30, 10)
//ArrayBuffer(10, 30, 20, 90, 100)
//ArrayBuffer(20, 90, 100, 55, 100)
//List(20, 90, 100)
//List(30, 20, 90, 100, 55, 100)
//List(100, 55, 100)
//List(100)
//===================
//List(10, 30, 20, 90, 100, 55, 100)
//List(30, 90, 55, 100)
//List(10, 30, 20, 90, 100, 55, 100, 100, 200, 300)
//拉链=============================
//List(10, 30, 20, 90, 100, 55, 100)
//List((10,zhangsan), (30,lisi), (20,wangwu), (90,aa), (100,bb), (55,cc), (100,dd))
//List((10,aa), (30,bb))
//(List(10, 30, 20, 90, 100, 55, 100),List(zhangsan, lisi, wangwu, aa, bb, cc, dd))

3.集合计算初级函数

1)说明

​ (1)求和

​ (2)求乘积

​ (3)最大值

​ (4)最小值

​ (5)排序

2)示例

object ListFunction {

  def main(args: Array[String]): Unit = {

    val list: List[Int] = List(1, 5, -3, 4, 2, -7, 6)

    //(1)求和
    println(list.sum)

    //(2)求乘积
    println(list.product)

    //(3)最大值
    println(list.max)

    //(4)最小值
    println(list.min)

    //(5)排序
    // (5.1)按照元素大小排序
    println(list.sortBy(x => x))

    // (5.2)按照元素的绝对值大小排序
    println(list.sortBy(x => x.abs))

    // (5.3)按元素大小升序排序
	println(list.sortWith((x, y) => x < y))

	// (5.4)按元素大小降序排序
    println(list.sortWith((x, y) => x > y))
    
  }
}
//打印结果
//8
//5040
//6
//-7
//List(-7, -3, 1, 2, 4, 5, 6)
//List(1, 2, -3, 4, 5, 6, -7)
//List(-7, -3, 1, 2, 4, 5, 6)
//List(6, 5, 4, 2, 1, -3, -7)

4. 集合计算高级函数

object CollectionAdvanceFunction {

  /**
    *(1)过滤
    *(2)转化/映射
    *(3)扁平化
    *(4)扁平化+映射 注:flatMap相当于先进行map操作,在进行flatten操作
    *(5)分组
    *(6)简化(规约)
    *(7)折叠
    */
  def main(args: Array[String]): Unit = {


//filter - 过滤 -针对的是集合的每个元素 -保留的是函数返回值为true的数据  **************
    val list = List[Int](10,20,3,5,8,10,33)
    //println(list.filter(x => x % 2 == 0))
    println(list.filter(_ % 2 == 0))
      		//打印结果 List(10, 20, 8, 10)
  
      
//foreach
    list.foreach(x=>println(x))
    list.foreach(println(_)) //x=>prntln(x+1)  
      
      
//groupBy -针对的是集合的每个元素  **************
//场景: 用于多对一场景
    val list2 = List[(String,Int,String)](
      ("zhangsan",20,"beijing"),
      ("lisi",20,"shenzhen"),
      ("wangwu",20,"shenzhen"),
      ("zhaoliu",20,"shenzhen"),
      ("tianqi",20,"beijing"),
      ("wangermazi",20,"shanghai")
    )
      
    val map: Map[String, List[(String, Int, String)]] = list2.groupBy(x=>x._3)
    println(map)
      	//打印结果 Map(shenzhen -> List((lisi,20,shenzhen), (wangwu,20,shenzhen), (zhaoliu,20,shenzhen)), shanghai -> List((wangermazi,20,shanghai)), beijing -> List((zhangsan,20,beijing), (tianqi,20,beijing)))

      
//flatMap -针对集合的每个元素 = map+flatten  **************
//场景: 用于一对多场景
    val list13 = List[String]("hello word","hello python")
    //List[String](hello,word,hello,python)
    val list14 = list13.map(x=>x.split(" "))
    //List[List[String]](List[String](hello,word),List(hello,python))
    val list15 = list14.flatten
    println(list15)
      	打印结果List(hello, word, hello, python)
    val list16 = list13.flatMap(x=>x.split(" "))
    println(list16)
      	//打印结果List(hello, word, hello, python)
      
      
//map -- 针对的集合的每个元素  **************
//场景: 用于一对一的场景
    val list5 = List("hello","world","scala","python")
    val list6 = list5.map(x=> x.length)
    println(list6)
		//打印结果 List(5, 5, 5, 6)
      
      
//flatten 压平
//场景:用于一对多的
    val list7 = List[List[Int]](
      List[Int](1,2,3,4),
      List[Int](5,6,7),
      List[Int](8,9,10)
    )

    val list8: List[Int] = list7.flatten
    println(list8)
		//打印结果 List(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
    val list9 = List[List[List[Int]]](
      List[List[Int]](List[Int](1,2),List[Int](3,4)),
      List[List[Int]](List[Int](5,6),List[Int](7,8)),
      List[List[Int]](List[Int](9,10),List[Int](11,12))
    )
    val list10 = list9.flatten.flatten
    println(list10)
      	//打印结果 List(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)

      
//sorted - 按照元素本身的大小进行排序,是升序排列
    val list17 = List[Int](2,5,1,10,3,20)
    val list18 = list17.sorted.reverse
    println(list18) 
      	//打印结果 List(20, 10, 5, 3, 2, 1)
      
      
//sortBy - 针对集合的每个元素,返回的是排序字段[升序]  **************
    val list19 = List[(String,Int)](("zhangsan",20),("lisi",10),("wangwu",15),("zhaoliu",30))
    val list20 = list19.sortBy(x=>x._2)//按照年龄排序
    println(list20)
      //打印结果 List((lisi,10), (wangwu,15), (zhangsan,20), (zhaoliu,30))
      
      
//sortWith - 集合的每两个元素进行比较
    //升序
    val list21 = list17.sortWith((x,y)=>x<y)
    println(list21)
      		//打印结果 List(1, 2, 3, 5, 10, 20)
    //降序
    val list22 = list17.sortWith((x,y)=>x>y)
    println(list22)
      		//打印结果 List(20, 10, 5, 3, 2, 1)

      
//reduce  **************
//agg: 上一次的聚合结果
//curr: 代表本次聚合的元素
    val list23 = list17.reduce((agg,curr)=>{
      println(s"agg=${agg} curr=${curr}")
      agg+curr
    })
    //val list17 = List[Int](2,5,1,10,3,20)
    //第一次执行的时候 agg=2  curr=5   agg+curr=7
    //第二次执行的时候 agg=7  curr=1   agg+curr=8
    //第三次执行的时候 agg=8  curr=10   agg+curr=18
    //第四次执行的时候 agg=18  curr=3   agg+curr=21
    //第五次执行的时候 agg=21  curr=20  agg+curr=41
    println(list23)
      	//打印结果agg=2 curr=5
			//	 agg=7 curr=1
			//	 agg=8 curr=10
			//	 agg=18 curr=3
			//	 agg=21 curr=20
			//	 41
//reduceRight
    val list24 = list17.reduceRight((curr,agg)=>{
      println(s"agg=${agg} curr=${curr}")
      agg+curr
    })
    println(list24)
    println("="*40)
      	//打印结果agg=20 curr=3
		//       agg=23 curr=10
		//       agg=33 curr=1
		//       agg=34 curr=5
		//       agg=39 curr=2
		//       41
      
      
//fold agg的初始值为fold第一个参数列表的值
    val list25 = list17.fold(10)((agg,curr)=>{
      println(s"agg=${agg} curr=${curr}")
      agg+curr
    })
     // 打印结果agg=10 curr=2
			// agg=12 curr=5
			// agg=17 curr=1
			// agg=18 curr=10
			// agg=28 curr=3
			// agg=31 curr=20
    //foldRigth
    val list26 = list17.foldRight(100)((curr,agg)=>{
      println(s"agg=${agg} curr=${curr}")
      agg+curr
    })
			//agg=100 curr=20
			//agg=120 curr=3
			//agg=123 curr=10
			//agg=133 curr=1
			//agg=134 curr=5
			//agg=139 curr=2
  }
}

5.普通wordcount案例

import scala.io.Source

object WordCountTest {
  def main(args: Array[String]): Unit = {
    
    //1.读取文件
    val data:List[String]=Source.fromFile("d:/wordcount.txt","utf-8").getLines().toList
    		//List(
    		//"hello python hello hadoop",
    		//"hello scala hello java",
    		//"java and scala and hadoop",
    		//"flume hadoop kafka hbase",
    		//"spark scala hadoop kafka",
    		// )
    		//flatten flatMap
      
    //2.切割、压平
    val words=data.flatMap(line=>line.split(" "))
      		//List(hello, python, hello, hadoop, hello, scala, hello, java, java, and, 				scala, and, hadoop, flume, hadoop, kafka, hbase, spark, scala, hadoop, kafka)
    //3、分组
    val grouped=words.groupBy(x=>x)
      		//Map(java -> List(java, java), kafka -> List(kafka, kafka), hadoop -> List(hadoop, hadoop, hadoop, hadoop), spark -> List(spark), scala -> List(scala, scala, scala), python -> List(python), flume -> List(flume), hello -> List(hello, hello, hello, hello), hbase -> List(hbase), and -> List(and, and))

    //4.统计个数
    grouped.map(x=>{
      val word=x._1
      val sum=x._2.size
      (word,sum)
    }).foreach(println(_))
  }
}

//打印结果
//(java,2)
//(kafka,2)
//(hadoop,4)
//(spark,1)
//(scala,3)
//(python,1)
//(flume,1)
//(hello,4)
//(hbase,1)
//(and,2)

6.高级wordcount案例

object WordCountTest2 {
    
  def main(args: Array[String]): Unit = {
      
    val tupleList=List(("Hello Scala Spark World", 4), ("Hello Scala Spark", 3), ("Hello Scala", 2), ("Hello", 1))
      
    //1.切割、压平
    tupleList.flatMap(x=>{
      val words=x._1.split(" ")
        //Array(Hello,Scala,Spark,World)
      words.map(word=>(word,x._2))
        //Array((Hello,4),(Scala,4),(Spark,4),(World,4))
    })	//List((Hello,4),(Scala,4),(Spark,4),(World,4),(Hello,3),(Scala,3),(Spark,3),(Hello,2),(Scala,2).(Hello,1))
    //2.分组
      .groupBy(x=>x._1)
      //[
    // Hello->List((Hello,4),(Hello,3),(Hello,2),(Hello,1)),
    // Scala->List((Scala,4),(Scala,3),(Scala,2)),
    // Spark->List((Spark,4),(Spark,3)),
    // World->List((World,4))
    // ]
      //3.统计个数
      .map(x=>{
       //x=Hello->List((Hello,4),(Hello,3),(Hello,2),(Hello,1)),
      //val tuple = x._2.reduce((agg,curr)=>(agg._1,agg._2+curr._2))
      //第一次执行 agg = (Hello,4)  curr=(Hello,3)   (agg,curr)=>(agg._1,agg._2+curr._2)  结果:(Hello,7)
      //第二次执行 agg = (Hello,7)  curr=(Hello,2)   (agg,curr)=>(agg._1,agg._2+curr._2)  结果:(Hello,9)
      //第三次执行 agg = (Hello,9)  curr=(Hello,1)   (agg,curr)=>(agg._1,agg._2+curr._2)  结果:(Hello,10)
      //tuple
        val word=x._1
        val sum=x._2.map(y=>y._2).sum
          //List(4,3,2,1)
        (word,sum)
      })
    //4、结果展示
    //[
    //  Hello->10,Scala->9,Spark->7,World->4
    // ]
      .foreach(println(_))
  }
}

你可能感兴趣的:(大数据学习,scala)