object TestList {
def main(args: Array[String]): Unit = {
//基本属性与常规操作
val list=List[Int](1,2,3,4,5,6)
//查看集合的长度
println(list.length)
println(list.size)
//集合遍历
for(i<-list){
println(i)
}
//生成字符串
println(list)
println(list.mkString("#"))
//是否包含
println(list.contains(1))
}
}
//打印结果
//6
//6
//1
//2
//3
//4
//5
//6
//List(1, 2, 3, 4, 5, 6)
//1#2#3#4#5#6
//true
(1)获取集合的头head
(2)获取集合的尾(不是头就是尾)tail
(3)集合最后一个数据 last
(4)集合初始数据(不包含最后一个)
(5)反转
(6)取前(后)n个元素
(7)去掉前(后)n个元素
(8)并集
(9)交集
(10)差集
(11)拉链
(12)滑窗
object CollectionFunction {
def main(args: Array[String]): Unit = {
val list=List[Int](10,30,20,90,100,55,100)
//删除前多少个元素
val list2=list.drop(3)//删掉前三个,结果List(90, 100, 55, 100)
println(list2)
//删除后多少个元素
val list3=list.dropRight(2)//删掉后2个,结果List(10, 30, 20, 90, 100)
println(list3)
//去重
val list4=list.distinct
println(list4)
//获取第一个元素
println(list.head)//10
//获取最后一个元素
println(list.last)//100
//获取除开最后一个元素的所有元素
val list5 =list.init
println(list5)
//判断是否为空
println(list.isEmpty)
//反转
println(list.reverse)
//滑窗
val result=list.sliding(5,2)//滑窗长度为5,每次滑2个ArrayBuffer(10, 30, 20, 90, 100)
//ArrayBuffer(20, 90, 100, 55, 100)
for (x<-result){
println(x.toBuffer)
}
//获取子集合 from:开始角标 until:结束角标
val list6=list.slice(2,5)
println(list6)
//获取除开第一个元素的所有元素
val list7=list.tail
println(list7)
//获取后多少个元素
val list9=list.takeRight(3)
println(list9)
//集合操作
//交集-取两个集合共同的部分
val list10=list.intersect(List(100,200,300))
println(list10)//List(100)
//差集
println("===================")
println(list)//List(10, 30, 20, 90, 100, 55, 100)
val list11=list.diff(List(10,20,100,200))
println(list11)//List(30, 90, 55, 100) 只减了一个100
//并集
val list12=list.union(List(100,200,300))
println(list12)//(10, 30, 20, 90, 100, 55, 100, 100, 200, 300) 不去重
//拉链
println("拉链=============================")
println(list)
val list13=list.zip(List("zhangsan","lisi","wangwu","aa","bb","cc","dd"))
println(list13)//List((10,zhangsan), (30,lisi), (20,wangwu), (90,aa), (100,bb), (55,cc), (100,dd))
val list14=list.zip(List("aa","bb"))
println(list14)//List((10,aa), (30,bb)) 匹配不上的去除了
//反拉链
val list15:(List[Int],List[String])=list13.unzip
println(list15)//(List(10, 30, 20, 90, 100, 55, 100),List(zhangsan, lisi, wangwu, aa, bb, cc, dd))
}
}
//打印结果
//List(90, 100, 55, 100)
//List(10, 30, 20, 90, 100)
//List(10, 30, 20, 90, 100, 55)
//10
//100
//List(10, 30, 20, 90, 100, 55)
//false
//List(100, 55, 100, 90, 20, 30, 10)
//ArrayBuffer(10, 30, 20, 90, 100)
//ArrayBuffer(20, 90, 100, 55, 100)
//List(20, 90, 100)
//List(30, 20, 90, 100, 55, 100)
//List(100, 55, 100)
//List(100)
//===================
//List(10, 30, 20, 90, 100, 55, 100)
//List(30, 90, 55, 100)
//List(10, 30, 20, 90, 100, 55, 100, 100, 200, 300)
//拉链=============================
//List(10, 30, 20, 90, 100, 55, 100)
//List((10,zhangsan), (30,lisi), (20,wangwu), (90,aa), (100,bb), (55,cc), (100,dd))
//List((10,aa), (30,bb))
//(List(10, 30, 20, 90, 100, 55, 100),List(zhangsan, lisi, wangwu, aa, bb, cc, dd))
1)说明
(1)求和
(2)求乘积
(3)最大值
(4)最小值
(5)排序
2)示例
object ListFunction {
def main(args: Array[String]): Unit = {
val list: List[Int] = List(1, 5, -3, 4, 2, -7, 6)
//(1)求和
println(list.sum)
//(2)求乘积
println(list.product)
//(3)最大值
println(list.max)
//(4)最小值
println(list.min)
//(5)排序
// (5.1)按照元素大小排序
println(list.sortBy(x => x))
// (5.2)按照元素的绝对值大小排序
println(list.sortBy(x => x.abs))
// (5.3)按元素大小升序排序
println(list.sortWith((x, y) => x < y))
// (5.4)按元素大小降序排序
println(list.sortWith((x, y) => x > y))
}
}
//打印结果
//8
//5040
//6
//-7
//List(-7, -3, 1, 2, 4, 5, 6)
//List(1, 2, -3, 4, 5, 6, -7)
//List(-7, -3, 1, 2, 4, 5, 6)
//List(6, 5, 4, 2, 1, -3, -7)
object CollectionAdvanceFunction {
/**
*(1)过滤
*(2)转化/映射
*(3)扁平化
*(4)扁平化+映射 注:flatMap相当于先进行map操作,在进行flatten操作
*(5)分组
*(6)简化(规约)
*(7)折叠
*/
def main(args: Array[String]): Unit = {
//filter - 过滤 -针对的是集合的每个元素 -保留的是函数返回值为true的数据 **************
val list = List[Int](10,20,3,5,8,10,33)
//println(list.filter(x => x % 2 == 0))
println(list.filter(_ % 2 == 0))
//打印结果 List(10, 20, 8, 10)
//foreach
list.foreach(x=>println(x))
list.foreach(println(_)) //x=>prntln(x+1)
//groupBy -针对的是集合的每个元素 **************
//场景: 用于多对一场景
val list2 = List[(String,Int,String)](
("zhangsan",20,"beijing"),
("lisi",20,"shenzhen"),
("wangwu",20,"shenzhen"),
("zhaoliu",20,"shenzhen"),
("tianqi",20,"beijing"),
("wangermazi",20,"shanghai")
)
val map: Map[String, List[(String, Int, String)]] = list2.groupBy(x=>x._3)
println(map)
//打印结果 Map(shenzhen -> List((lisi,20,shenzhen), (wangwu,20,shenzhen), (zhaoliu,20,shenzhen)), shanghai -> List((wangermazi,20,shanghai)), beijing -> List((zhangsan,20,beijing), (tianqi,20,beijing)))
//flatMap -针对集合的每个元素 = map+flatten **************
//场景: 用于一对多场景
val list13 = List[String]("hello word","hello python")
//List[String](hello,word,hello,python)
val list14 = list13.map(x=>x.split(" "))
//List[List[String]](List[String](hello,word),List(hello,python))
val list15 = list14.flatten
println(list15)
打印结果List(hello, word, hello, python)
val list16 = list13.flatMap(x=>x.split(" "))
println(list16)
//打印结果List(hello, word, hello, python)
//map -- 针对的集合的每个元素 **************
//场景: 用于一对一的场景
val list5 = List("hello","world","scala","python")
val list6 = list5.map(x=> x.length)
println(list6)
//打印结果 List(5, 5, 5, 6)
//flatten 压平
//场景:用于一对多的
val list7 = List[List[Int]](
List[Int](1,2,3,4),
List[Int](5,6,7),
List[Int](8,9,10)
)
val list8: List[Int] = list7.flatten
println(list8)
//打印结果 List(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
val list9 = List[List[List[Int]]](
List[List[Int]](List[Int](1,2),List[Int](3,4)),
List[List[Int]](List[Int](5,6),List[Int](7,8)),
List[List[Int]](List[Int](9,10),List[Int](11,12))
)
val list10 = list9.flatten.flatten
println(list10)
//打印结果 List(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)
//sorted - 按照元素本身的大小进行排序,是升序排列
val list17 = List[Int](2,5,1,10,3,20)
val list18 = list17.sorted.reverse
println(list18)
//打印结果 List(20, 10, 5, 3, 2, 1)
//sortBy - 针对集合的每个元素,返回的是排序字段[升序] **************
val list19 = List[(String,Int)](("zhangsan",20),("lisi",10),("wangwu",15),("zhaoliu",30))
val list20 = list19.sortBy(x=>x._2)//按照年龄排序
println(list20)
//打印结果 List((lisi,10), (wangwu,15), (zhangsan,20), (zhaoliu,30))
//sortWith - 集合的每两个元素进行比较
//升序
val list21 = list17.sortWith((x,y)=>x<y)
println(list21)
//打印结果 List(1, 2, 3, 5, 10, 20)
//降序
val list22 = list17.sortWith((x,y)=>x>y)
println(list22)
//打印结果 List(20, 10, 5, 3, 2, 1)
//reduce **************
//agg: 上一次的聚合结果
//curr: 代表本次聚合的元素
val list23 = list17.reduce((agg,curr)=>{
println(s"agg=${agg} curr=${curr}")
agg+curr
})
//val list17 = List[Int](2,5,1,10,3,20)
//第一次执行的时候 agg=2 curr=5 agg+curr=7
//第二次执行的时候 agg=7 curr=1 agg+curr=8
//第三次执行的时候 agg=8 curr=10 agg+curr=18
//第四次执行的时候 agg=18 curr=3 agg+curr=21
//第五次执行的时候 agg=21 curr=20 agg+curr=41
println(list23)
//打印结果agg=2 curr=5
// agg=7 curr=1
// agg=8 curr=10
// agg=18 curr=3
// agg=21 curr=20
// 41
//reduceRight
val list24 = list17.reduceRight((curr,agg)=>{
println(s"agg=${agg} curr=${curr}")
agg+curr
})
println(list24)
println("="*40)
//打印结果agg=20 curr=3
// agg=23 curr=10
// agg=33 curr=1
// agg=34 curr=5
// agg=39 curr=2
// 41
//fold agg的初始值为fold第一个参数列表的值
val list25 = list17.fold(10)((agg,curr)=>{
println(s"agg=${agg} curr=${curr}")
agg+curr
})
// 打印结果agg=10 curr=2
// agg=12 curr=5
// agg=17 curr=1
// agg=18 curr=10
// agg=28 curr=3
// agg=31 curr=20
//foldRigth
val list26 = list17.foldRight(100)((curr,agg)=>{
println(s"agg=${agg} curr=${curr}")
agg+curr
})
//agg=100 curr=20
//agg=120 curr=3
//agg=123 curr=10
//agg=133 curr=1
//agg=134 curr=5
//agg=139 curr=2
}
}
import scala.io.Source
object WordCountTest {
def main(args: Array[String]): Unit = {
//1.读取文件
val data:List[String]=Source.fromFile("d:/wordcount.txt","utf-8").getLines().toList
//List(
//"hello python hello hadoop",
//"hello scala hello java",
//"java and scala and hadoop",
//"flume hadoop kafka hbase",
//"spark scala hadoop kafka",
// )
//flatten flatMap
//2.切割、压平
val words=data.flatMap(line=>line.split(" "))
//List(hello, python, hello, hadoop, hello, scala, hello, java, java, and, scala, and, hadoop, flume, hadoop, kafka, hbase, spark, scala, hadoop, kafka)
//3、分组
val grouped=words.groupBy(x=>x)
//Map(java -> List(java, java), kafka -> List(kafka, kafka), hadoop -> List(hadoop, hadoop, hadoop, hadoop), spark -> List(spark), scala -> List(scala, scala, scala), python -> List(python), flume -> List(flume), hello -> List(hello, hello, hello, hello), hbase -> List(hbase), and -> List(and, and))
//4.统计个数
grouped.map(x=>{
val word=x._1
val sum=x._2.size
(word,sum)
}).foreach(println(_))
}
}
//打印结果
//(java,2)
//(kafka,2)
//(hadoop,4)
//(spark,1)
//(scala,3)
//(python,1)
//(flume,1)
//(hello,4)
//(hbase,1)
//(and,2)
6.高级wordcount案例
object WordCountTest2 {
def main(args: Array[String]): Unit = {
val tupleList=List(("Hello Scala Spark World", 4), ("Hello Scala Spark", 3), ("Hello Scala", 2), ("Hello", 1))
//1.切割、压平
tupleList.flatMap(x=>{
val words=x._1.split(" ")
//Array(Hello,Scala,Spark,World)
words.map(word=>(word,x._2))
//Array((Hello,4),(Scala,4),(Spark,4),(World,4))
}) //List((Hello,4),(Scala,4),(Spark,4),(World,4),(Hello,3),(Scala,3),(Spark,3),(Hello,2),(Scala,2).(Hello,1))
//2.分组
.groupBy(x=>x._1)
//[
// Hello->List((Hello,4),(Hello,3),(Hello,2),(Hello,1)),
// Scala->List((Scala,4),(Scala,3),(Scala,2)),
// Spark->List((Spark,4),(Spark,3)),
// World->List((World,4))
// ]
//3.统计个数
.map(x=>{
//x=Hello->List((Hello,4),(Hello,3),(Hello,2),(Hello,1)),
//val tuple = x._2.reduce((agg,curr)=>(agg._1,agg._2+curr._2))
//第一次执行 agg = (Hello,4) curr=(Hello,3) (agg,curr)=>(agg._1,agg._2+curr._2) 结果:(Hello,7)
//第二次执行 agg = (Hello,7) curr=(Hello,2) (agg,curr)=>(agg._1,agg._2+curr._2) 结果:(Hello,9)
//第三次执行 agg = (Hello,9) curr=(Hello,1) (agg,curr)=>(agg._1,agg._2+curr._2) 结果:(Hello,10)
//tuple
val word=x._1
val sum=x._2.map(y=>y._2).sum
//List(4,3,2,1)
(word,sum)
})
//4、结果展示
//[
// Hello->10,Scala->9,Spark->7,World->4
// ]
.foreach(println(_))
}
}