有一段时间没有使用Scala
, 我们今天回顾下Scala
的常用的函数算子, 并使用一些练习加深理解和印象. 这些函数对于理解Spark
的相关算子非常有用. 建议自己练习.
所有的练习代码, 都可以在 https://github.com/SeanYanxml/arsenal/tree/master/arsenal-scala/quick-scala/quick-scala 上找到.
foreach()
override def foreach[U](f : scala.Function1[A, U]) : scala.Unit = { /* compiled code */ }
遍历数组&集合.
scala> val lst0 = List(1,2,3,4,5,6)
lst0: List[Int] = List(1, 2, 3, 4, 5, 6)
scala> lst0.foreach(print(_))
123456
map()
final override def map[B, That](f : scala.Function1[A, B])(implicit bf : scala.collection.generic.CanBuildFrom[scala.collection.immutable.List[A], B, That]) : That = { /* compiled code */ }
用于遍历数组、Map集合、List集合、Tuple元祖内的每一个元素. 传入的是一个
scala> val lst0 = List(1,7,9,8,0,3,5,4,6,2)
lst0: List[Int] = List(1, 7, 9, 8, 0, 3, 5, 4, 6, 2)
scala> val lst1 = lst0.map(_*10)
lst1: List[Int] = List(10, 70, 90, 80, 0, 30, 50, 40, 60, 20)
foreach()
&map()
foreach()
与map()
方法的区别在于, foreach()
无返回值, 而map()
有返回值.scala> val lst0 = List(1,2,3,4,5)
lst0: List[Int] = List(1, 2, 3, 4, 5)
scala> val lst1 = lst0.foreach(_*10)
lst1: Unit = ()
# 无输出
scala> lst1
scala> val lst2 = lst0.map(_*10)
lst2: List[Int] = List(10, 20, 30, 40, 50)
# 含有输出
scala> lst2
res2: List[Int] = List(10, 20, 30, 40, 50)
map().flatten
/flatMap()
def flatten[B](implicit asTraversable : scala.Function1[A, scala.collection.GenTraversableOnce[B]]) : CC[B] = { /* compiled code */ }
final override def flatMap[B, That](f : scala.Function1[A, scala.collection.GenTraversableOnce[B]])(implicit bf : scala.collection.generic.CanBuildFrom[scala.collection.immutable.List[A], B, That]) : That = { /* compiled code */ }
先进行map()
函数操作, 再将其进行压平.
scala> val lines = List("hello tom hello jerry", "hello jerry", "hello kitty")
lines: List[String] = List(hello tom hello jerry, hello jerry, hello kitty)
# 获得里面是字符串数组的List对象
scala> val linesSplit = lines.map(_.split(" "))
linesSplit: List[Array[String]] = List(Array(hello, tom, hello, jerry), Array(hello, jerry), Array(hello, kitty))
# 将所有的数组都压平,压在一个List内部
scala> val linesSplitFlat = lines.map(_.split(" ")).flatten
linesSplitFlat: List[String] = List(hello, tom, hello, jerry, hello, jerry, hello, kitty)
# 使用flatMap
scala> val linesSplitFlat2 = lines.flatMap(_.split(" "))
linesSplitFlat2: List[String] = List(hello, tom, hello, jerry, hello, jerry, hello, kitty)```
filter()
def filter(p : scala.Function1[A, scala.Boolean]) : Repr = { /* compiled code */ }
过滤, 过滤出数组或集合
内满足筛选条件的数据.
scala> val lst0 = List(1,7,9,8,0,3,5,4,6,2)
lst0: List[Int] = List(1, 7, 9, 8, 0, 3, 5, 4, 6, 2)
scala> val list3 = lst0.filter(_>5)
list3: List[Int] = List(7, 9, 8, 6)
sorted
/sortedBy()
/sortedWith()
- def sorted[B >: A](implicit ord : scala.math.Ordering[B]) : Repr = { /* compiled code */ }
- def sortBy[B](f : scala.Function1[A, B])(implicit ord : scala.math.Ordering[B]) : Repr = { /* compiled code */ }
- def sortWith(lt : scala.Function2[A, A, scala.Boolean]) : Repr = { /* compiled code */ }
三个函数都可以用于排序. 其中sorted
是简单排序, sortedBy
可以指定已某一个属性进行排序, sortedWith
可以指定排序的比较函数.
scala> val lst0 = List(1,7,9,8,0,3,5,4,6,2)
lst0: List[Int] = List(1, 7, 9, 8, 0, 3, 5, 4, 6, 2)
scala> val lst3_1 = lst0.sorted
lst3_1: List[Int] = List(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)
scala> val lst3_2 = lst0.sortBy(x =>x)
lst3_2: List[Int] = List(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)
scala> val lst3_2 = lst0.sortBy(x => (-x))
lst3_2: List[Int] = List(9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
# 传入一个compare()函数
scala> val lst3_3 = lst0.sortWith((x,y) => (x>y))
lst3_3: List[Int] = List(9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
reverse
override def reverse : scala.collection.immutable.List[A] = { /* compiled code */ }
反转.
scala> val lst0 = List(1,7,9,8,0,3,5,4,6,2)
lst0: List[Int] = List(1, 7, 9, 8, 0, 3, 5, 4, 6, 2)
scala> val lst4 = lst0.reverse
lst4: List[Int] = List(2, 6, 4, 5, 3, 0, 8, 9, 7, 1)
grouped()
/groupedBy()
- def grouped(size : scala.Int) : scala.collection.Iterator[Repr] = { /* compiled code */ }
- def groupBy[K](f : scala.Function1[A, K]) : scala.collection.immutable.Map[K, Repr] = { /* compiled code */ }
grouped()
是将几个元素进行组合, 返回的是一个List
的List;
groupedBy()
指定分类的函数, 返回的是一个Map
的Map.
scala> val lst0 = List(1,7,9,8,0,3,5,4,6,2)
lst0: List[Int] = List(1, 7, 9, 8, 0, 3, 5, 4, 6, 2)
#grouped()
scala> val lst5 = lst0.grouped(4)
lst5: Iterator[List[Int]] = non-empty iterator
scala> lst5.toList
res1: List[List[Int]] = List(List(1, 7, 9, 8), List(0, 3, 5, 4), List(6, 2))
# groupedBy()
scala> val lst0 = List(1,7,9,8,0,3,5,4,6,2,3,2,3)
lst0: List[Int] = List(1, 7, 9, 8, 0, 3, 5, 4, 6, 2, 3, 2, 3)
scala> val lst8 = lst0.groupBy(x => (x))
lst8: scala.collection.immutable.Map[Int,List[Int]] = Map(0 -> List(0), 5 -> List(5), 1 -> List(1), 6 -> List(6), 9 -> List(9), 2 -> List(2, 2), 7 -> List(7), 3 -> List(3, 3, 3), 8 -> List(8), 4 -> List(4))
scala> val lst8 = lst0.groupBy(x => (x)).toList
lst8: List[(Int, List[Int])] = List((0,List(0)), (5,List(5)), (1,List(1)), (6,List(6)), (9,List(9)), (2,List(2, 2)), (7,List(7)), (3,List(3, 3, 3)), (8,List(8)), (4,List(4)))
scala> val lst8 = lst0.groupBy(x => (x%2==1)).toList
lst8: List[(Boolean, List[Int])] = List((false,List(8, 0, 4, 6, 2, 2)), (true,List(1, 7, 9, 3, 5, 3, 3)))
reduce()
def reduce[A1 >: A](op : scala.Function2[A1, A1, A1]) : A1 = { /* compiled code */ }
并行化计算.
scala> val array = Array(1,2,3,4,5,6,7)
array: Array[Int] = Array(1, 2, 3, 4, 5, 6, 7)
scala> val sum = array.reduce(_+_)
sum: Int = 28
scala> array.par.reduce(_+_)
res3: Int = 28
flod()
def fold[U >: T](z : U)(op : scala.Function2[U, U, U]) : U = { /* compiled code */ }
scala> val array = Array(1,2,3,4,5,6,7)
array: Array[Int] = Array(1, 2, 3, 4, 5, 6, 7)
scala> array.fold(10)(_+_)
res4: Int = 38
scala> array.par.fold(10)(_+_)
res5: Int = 98
scala> array.par.fold(0)(_+_)
res6: Int = 28
aggregate()
def aggregate[B](z : => B)(seqop : scala.Function2[B, A, B], combop : scala.Function2[B, B, B]) : B = { /* compiled code */ }
聚合函数.
scala> val arr = List(List(1, 2, 3), List(3, 4, 5), List(2), List(0))
arr: List[List[Int]] = List(List(1, 2, 3), List(3, 4, 5), List(2), List(0))
scala> arr.aggregate(0)(_+_.sum, _+_)
res7: Int = 20
intersect
/union
/diff
)
- def intersect[B >: A](that : scala.collection.GenSeq[B]) : Repr = { /* compiled code */ }
- override def union[B >: A, That](that : scala.collection.GenSeq[B])(implicit bf : scala.collection.generic.CanBuildFrom[Repr, B, That]) : That = { /* compiled code */ }
- def diff[B >: A](that : scala.collection.GenSeq[B]) : Repr = { /* compiled code */ }
scala> val l1 = List(5,6,4,7)
l1: List[Int] = List(5, 6, 4, 7)
scala> val l2 = List(1,2,3,4)
l2: List[Int] = List(1, 2, 3, 4)
# 并集
scala> val unionL1 = l1.union(l2)
unionL1: List[Int] = List(5, 6, 4, 7, 1, 2, 3, 4)
# 交集
scala> val insercetionL1 = l1.intersect(l2)
insercetionL1: List[Int] = List(4)
# 差集
scala> val differenceL1 = l1.diff(l2)
differenceL1: List[Int] = List(5, 6, 7)
split()
public String[] split(String regex) {
return split(regex, 0);
}
分割字符串, 返回一个字符串数组.
scala> val str1 = "a b c d e"
str1: String = a b c d e
scala> val strArray1 = str1.split(" ")
strArray1: Array[String] = Array(a, b, c, d, e)
scala> strArray1
res4: Array[String] = Array(a, b, c, d, e)
val lst0 = List(1,7,9,8,0,3,5,4,6,2)
val lines = List("hello tom hello jerry", "hello jerry", "hello kitty")
val array = Array(1,2,3,4,5,6,7)
val arr = List(List(1, 2, 3), List(3, 4, 5), List(2), List(0))
val l1 = List(5,6,4,7)
val l2 = List(1,2,3,4)
package com.yanxml.quick_scala.basic.train
import org.junit.Test
/**
* 快速训练Scala的基础语法.
* @Date 2019-04-24
* */
class QuickBasicTrain {
@Test
def train(){
//创建一个List
val lst0 = List(1,7,9,8,0,3,5,4,6,2)
//将lst0中每个元素乘以10后生成一个新的集合
val lst1 = lst0.map(_*10)
//将lst0中的偶数取出来生成一个新的集合
val lst2 = lst0.filter(_%2==0)
//将lst0排序后生成一个新的集合
val lst3_1 = lst0.sorted
val lst3_2 = lst0.sortBy(x =>x)
val lst3_3 = lst0.sortWith((x,y) => (x>y))
//反转顺序
val lst4 = lst0.reverse
//将lst0中的元素4个一组,类型为Iterator[List[Int]]
val lst5 = lst0.grouped(4)
//将Iterator转换成List
val lst6 = lst0.grouped(4).toList
//将多个list压扁成一个List
val lst7 = lst0.grouped(4).toList.flatten
val lst8 = lst0.groupBy(x => (x%2==1))
val lines = List("hello tom hello jerry", "hello jerry", "hello kitty")
//先按空格切分,在压平
val linesSplitFlat = lines.map(_.split(" ")).flatten
val linesSplitFlat2 = lines.flatMap(_.split(" "))
val linesCountOfOne = linesSplitFlat.map((_,1))
// (())(...)
val linesGroupBy = linesCountOfOne.groupBy(_._1)
// 错误示范: linesGroupBy.map(_._1,_._2.size)
val linesCount = linesGroupBy.map(t => (t._1, t._2.size))
// 默认Map是不支持排序的 需要
val linesCountSorted = linesCount.toList.sortBy(_._2)
val linesCountAllOne = lines.map(_.split(" ")).flatten.map((_,1)).groupBy(_._1).map(t => (t._1,t._2.size))
val linesCountAllOne2 = lines.map(_.split(" ")).flatten.map((_,1)).groupBy(_._1).mapValues(_.foldLeft(0)(_+_._2))
//并行计算求和
//化简:reduce
val array = Array(1,2,3,4,5,6,7)
array.map(println)
array.map(x=> println(x+1))
array.foreach(println)
// 默认使用reduceLeft (((1+2)+3)+4)+5)+6)+7
val sum = array.reduce(_+_)
// 转换为并行化的接口
array.par.reduce(_+_)
//将非特定顺序的二元操作应用到所有元素
//安装特点的顺序
//折叠:有初始值(无特定顺序)
// 使用柯里化 默认设置
array.fold(10)(_+_)
array.par.fold(10)(_+_)
array.par.fold(0)(_+_)
//折叠:有初始值(有特定顺序)
//聚合
val arr = List(List(1, 2, 3), List(3, 4, 5), List(2), List(0))
// aggregate(<初始值>)((函数1),(函数2))
// 聚合函数求和
arr.aggregate(0)(_+_.sum, _+_)
val l1 = List(5,6,4,7)
val l2 = List(1,2,3,4)
//求并集
val unionL1 = l1.union(l2)
//求交集
val insercetionL1 = l1.intersect(l2)
//求差集
val differenceL1 = l1.diff(l2)
// println(r3)
}
}
虽然, 有些函数的定义还不能像Java
一样完全看懂, 但是记录于此. 多看几次.
此外, 有许多的函数暂没有列举出来. 后续再进行补充.
[1]. Scala 中的foreach和map方法比较