map是对每一个元素操作;
mapPartitions是对其中的每个partition操作;
mapPartitionsWithIndex:把每个partition中的分区号和对应的值拿出来,看源码:
def mapPartitionsWithIndex[U](f : scala.Function2[scala.Int, scala.Iterator[T], scala.Iterator[U]], preservesPartitioning : scala.Boolean = { /* compiled code */ })(implicit evidence$7 : scala.reflect.ClassTag[U]) : org.apache.spark.rdd.RDD[U] = { /* compiled code */ }
aggregate:是actino操作,第一个参数是初始值,二:是两个函数[每个函数都是2个参数:先对个个分区进行合并,第二个:对个个分区合并后的结果在进行合并),输出一个参数]
第一个参数X:原封不动去出来,第二个参数:是函数,局部运算,第三个:是函数,对局部运算后的结果在做运算
package org.apache.spark.rdd
abstract class RDD[T](@scala.transient private var _sc : org.apache.spark.SparkContext, private var deps : scala.Seq[org.apache.spark.Dependency[_]])(implicit evidence$1 : scala.reflect.ClassTag[T]) extends scala.AnyRef with scala.Serializable with org.apache.spark.Logging {
def this(oneParent : org.apache.spark.rdd.RDD[_])(implicit evidence$2 : scala.reflect.ClassTag[T]) = { /* compiled code */ }
private[spark] def conf : org.apache.spark.SparkConf = { /* compiled code */ }
@org.apache.spark.annotation.DeveloperApi
def compute(split : org.apache.spark.Partition, context : org.apache.spark.TaskContext) : scala.Iterator[T]
protected def getPartitions : scala.Array[org.apache.spark.Partition]
protected def getDependencies : scala.Seq[org.apache.spark.Dependency[_]] = { /* compiled code */ }
protected def getPreferredLocations(split : org.apache.spark.Partition) : scala.Seq[scala.Predef.String] = { /* compiled code */ }
@scala.transient
val partitioner : scala.Option[org.apache.spark.Partitioner] = { /* compiled code */ }
def sparkContext : org.apache.spark.SparkContext = { /* compiled code */ }
val id : scala.Int = { /* compiled code */ }
@scala.transient
var name : scala.Predef.String = { /* compiled code */ }
def setName(_name : scala.Predef.String) : RDD.this.type = { /* compiled code */ }
def persist(newLevel : org.apache.spark.storage.StorageLevel) : RDD.this.type = { /* compiled code */ }
def persist() : RDD.this.type = { /* compiled code */ }
def cache() : RDD.this.type = { /* compiled code */ }
def unpersist(blocking : scala.Boolean = { /* compiled code */ }) : RDD.this.type = { /* compiled code */ }
def getStorageLevel : org.apache.spark.storage.StorageLevel = { /* compiled code */ }
final def dependencies : scala.Seq[org.apache.spark.Dependency[_]] = { /* compiled code */ }
final def partitions : scala.Array[org.apache.spark.Partition] = { /* compiled code */ }
final def preferredLocations(split : org.apache.spark.Partition) : scala.Seq[scala.Predef.String] = { /* compiled code */ }
final def iterator(split : org.apache.spark.Partition, context : org.apache.spark.TaskContext) : scala.Iterator[T] = { /* compiled code */ }
private[spark] def getNarrowAncestors : scala.Seq[org.apache.spark.rdd.RDD[_]] = { /* compiled code */ }
private[spark] def computeOrReadCheckpoint(split : org.apache.spark.Partition, context : org.apache.spark.TaskContext) : scala.Iterator[T] = { /* compiled code */ }
private[spark] def withScope[U](body : => U) : U = { /* compiled code */ }
def map[U](f : scala.Function1[T, U])(implicit evidence$3 : scala.reflect.ClassTag[U]) : org.apache.spark.rdd.RDD[U] = { /* compiled code */ }
def flatMap[U](f : scala.Function1[T, scala.TraversableOnce[U]])(implicit evidence$4 : scala.reflect.ClassTag[U]) : org.apache.spark.rdd.RDD[U] = { /* compiled code */ }
def filter(f : scala.Function1[T, scala.Boolean]) : org.apache.spark.rdd.RDD[T] = { /* compiled code */ }
def distinct(numPartitions : scala.Int)(implicit ord : scala.Ordering[T] = { /* compiled code */ }) : org.apache.spark.rdd.RDD[T] = { /* compiled code */ }
def distinct() : org.apache.spark.rdd.RDD[T] = { /* compiled code */ }
def repartition(numPartitions : scala.Int)(implicit ord : scala.Ordering[T] = { /* compiled code */ }) : org.apache.spark.rdd.RDD[T] = { /* compiled code */ }
def coalesce(numPartitions : scala.Int, shuffle : scala.Boolean = { /* compiled code */ })(implicit ord : scala.Ordering[T] = { /* compiled code */ }) : org.apache.spark.rdd.RDD[T] = { /* compiled code */ }
def sample(withReplacement : scala.Boolean, fraction : scala.Double, seed : scala.Long = { /* compiled code */ }) : org.apache.spark.rdd.RDD[T] = { /* compiled code */ }
def randomSplit(weights : scala.Array[scala.Double], seed : scala.Long = { /* compiled code */ }) : scala.Array[org.apache.spark.rdd.RDD[T]] = { /* compiled code */ }
private[spark] def randomSampleWithRange(lb : scala.Double, ub : scala.Double, seed : scala.Long) : org.apache.spark.rdd.RDD[T] = { /* compiled code */ }
def takeSample(withReplacement : scala.Boolean, num : scala.Int, seed : scala.Long = { /* compiled code */ }) : scala.Array[T] = { /* compiled code */ }
def union(other : org.apache.spark.rdd.RDD[T]) : org.apache.spark.rdd.RDD[T] = { /* compiled code */ }
def ++(other : org.apache.spark.rdd.RDD[T]) : org.apache.spark.rdd.RDD[T] = { /* compiled code */ }
def sortBy[K](f : scala.Function1[T, K], ascending : scala.Boolean = { /* compiled code */ }, numPartitions : scala.Int = { /* compiled code */ })(implicit ord : scala.Ordering[K], ctag : scala.reflect.ClassTag[K]) : org.apache.spark.rdd.RDD[T] = { /* compiled code */ }
def intersection(other : org.apache.spark.rdd.RDD[T]) : org.apache.spark.rdd.RDD[T] = { /* compiled code */ }
def intersection(other : org.apache.spark.rdd.RDD[T], partitioner : org.apache.spark.Partitioner)(implicit ord : scala.Ordering[T] = { /* compiled code */ }) : org.apache.spark.rdd.RDD[T] = { /* compiled code */ }
def intersection(other : org.apache.spark.rdd.RDD[T], numPartitions : scala.Int) : org.apache.spark.rdd.RDD[T] = { /* compiled code */ }
def glom() : org.apache.spark.rdd.RDD[scala.Array[T]] = { /* compiled code */ }
def cartesian[U](other : org.apache.spark.rdd.RDD[U])(implicit evidence$5 : scala.reflect.ClassTag[U]) : org.apache.spark.rdd.RDD[scala.Tuple2[T, U]] = { /* compiled code */ }
def groupBy[K](f : scala.Function1[T, K])(implicit kt : scala.reflect.ClassTag[K]) : org.apache.spark.rdd.RDD[scala.Tuple2[K, scala.Iterable[T]]] = { /* compiled code */ }
def groupBy[K](f : scala.Function1[T, K], numPartitions : scala.Int)(implicit kt : scala.reflect.ClassTag[K]) : org.apache.spark.rdd.RDD[scala.Tuple2[K, scala.Iterable[T]]] = { /* compiled code */ }
def groupBy[K](f : scala.Function1[T, K], p : org.apache.spark.Partitioner)(implicit kt : scala.reflect.ClassTag[K], ord : scala.Ordering[K] = { /* compiled code */ }) : org.apache.spark.rdd.RDD[scala.Tuple2[K, scala.Iterable[T]]] = { /* compiled code */ }
def pipe(command : scala.Predef.String) : org.apache.spark.rdd.RDD[scala.Predef.String] = { /* compiled code */ }
def pipe(command : scala.Predef.String, env : scala.collection.Map[scala.Predef.String, scala.Predef.String]) : org.apache.spark.rdd.RDD[scala.Predef.String] = { /* compiled code */ }
def pipe(command : scala.Seq[scala.Predef.String], env : scala.collection.Map[scala.Predef.String, scala.Predef.String] = { /* compiled code */ }, printPipeContext : scala.Function1[scala.Function1[scala.Predef.String, scala.Unit], scala.Unit] = { /* compiled code */ }, printRDDElement : scala.Function2[T, scala.Function1[scala.Predef.String, scala.Unit], scala.Unit] = { /* compiled code */ }, separateWorkingDir : scala.Boolean = { /* compiled code */ }) : org.apache.spark.rdd.RDD[scala.Predef.String] = { /* compiled code */ }
def mapPartitions[U](f : scala.Function1[scala.Iterator[T], scala.Iterator[U]], preservesPartitioning : scala.Boolean = { /* compiled code */ })(implicit evidence$6 : scala.reflect.ClassTag[U]) : org.apache.spark.rdd.RDD[U] = { /* compiled code */ }
def mapPartitionsWithIndex[U](f : scala.Function2[scala.Int, scala.Iterator[T], scala.Iterator[U]], preservesPartitioning : scala.Boolean = { /* compiled code */ })(implicit evidence$7 : scala.reflect.ClassTag[U]) : org.apache.spark.rdd.RDD[U] = { /* compiled code */ }
@scala.deprecated("use TaskContext.get", "1.2.0")
@org.apache.spark.annotation.DeveloperApi
def mapPartitionsWithContext[U](f : scala.Function2[org.apache.spark.TaskContext, scala.Iterator[T], scala.Iterator[U]], preservesPartitioning : scala.Boolean = { /* compiled code */ })(implicit evidence$8 : scala.reflect.ClassTag[U]) : org.apache.spark.rdd.RDD[U] = { /* compiled code */ }
@scala.deprecated("use mapPartitionsWithIndex", "0.7.0")
def mapPartitionsWithSplit[U](f : scala.Function2[scala.Int, scala.Iterator[T], scala.Iterator[U]], preservesPartitioning : scala.Boolean = { /* compiled code */ })(implicit evidence$9 : scala.reflect.ClassTag[U]) : org.apache.spark.rdd.RDD[U] = { /* compiled code */ }
@scala.deprecated("use mapPartitionsWithIndex", "1.0.0")
def mapWith[A, U](constructA : scala.Function1[scala.Int, A], preservesPartitioning : scala.Boolean = { /* compiled code */ })(f : scala.Function2[T, A, U])(implicit evidence$10 : scala.reflect.ClassTag[U]) : org.apache.spark.rdd.RDD[U] = { /* compiled code */ }
@scala.deprecated("use mapPartitionsWithIndex and flatMap", "1.0.0")
def flatMapWith[A, U](constructA : scala.Function1[scala.Int, A], preservesPartitioning : scala.Boolean = { /* compiled code */ })(f : scala.Function2[T, A, scala.Seq[U]])(implicit evidence$11 : scala.reflect.ClassTag[U]) : org.apache.spark.rdd.RDD[U] = { /* compiled code */ }
@scala.deprecated("use mapPartitionsWithIndex and foreach", "1.0.0")
def foreachWith[A](constructA : scala.Function1[scala.Int, A])(f : scala.Function2[T, A, scala.Unit]) : scala.Unit = { /* compiled code */ }
@scala.deprecated("use mapPartitionsWithIndex and filter", "1.0.0")
def filterWith[A](constructA : scala.Function1[scala.Int, A])(p : scala.Function2[T, A, scala.Boolean]) : org.apache.spark.rdd.RDD[T] = { /* compiled code */ }
def zip[U](other : org.apache.spark.rdd.RDD[U])(implicit evidence$12 : scala.reflect.ClassTag[U]) : org.apache.spark.rdd.RDD[scala.Tuple2[T, U]] = { /* compiled code */ }
def zipPartitions[B, V](rdd2 : org.apache.spark.rdd.RDD[B], preservesPartitioning : scala.Boolean)(f : scala.Function2[scala.Iterator[T], scala.Iterator[B], scala.Iterator[V]])(implicit evidence$13 : scala.reflect.ClassTag[B], evidence$14 : scala.reflect.ClassTag[V]) : org.apache.spark.rdd.RDD[V] = { /* compiled code */ }
def zipPartitions[B, V](rdd2 : org.apache.spark.rdd.RDD[B])(f : scala.Function2[scala.Iterator[T], scala.Iterator[B], scala.Iterator[V]])(implicit evidence$15 : scala.reflect.ClassTag[B], evidence$16 : scala.reflect.ClassTag[V]) : org.apache.spark.rdd.RDD[V] = { /* compiled code */ }
def zipPartitions[B, C, V](rdd2 : org.apache.spark.rdd.RDD[B], rdd3 : org.apache.spark.rdd.RDD[C], preservesPartitioning : scala.Boolean)(f : scala.Function3[scala.Iterator[T], scala.Iterator[B], scala.Iterator[C], scala.Iterator[V]])(implicit evidence$17 : scala.reflect.ClassTag[B], evidence$18 : scala.reflect.ClassTag[C], evidence$19 : scala.reflect.ClassTag[V]) : org.apache.spark.rdd.RDD[V] = { /* compiled code */ }
def zipPartitions[B, C, V](rdd2 : org.apache.spark.rdd.RDD[B], rdd3 : org.apache.spark.rdd.RDD[C])(f : scala.Function3[scala.Iterator[T], scala.Iterator[B], scala.Iterator[C], scala.Iterator[V]])(implicit evidence$20 : scala.reflect.ClassTag[B], evidence$21 : scala.reflect.ClassTag[C], evidence$22 : scala.reflect.ClassTag[V]) : org.apache.spark.rdd.RDD[V] = { /* compiled code */ }
def zipPartitions[B, C, D, V](rdd2 : org.apache.spark.rdd.RDD[B], rdd3 : org.apache.spark.rdd.RDD[C], rdd4 : org.apache.spark.rdd.RDD[D], preservesPartitioning : scala.Boolean)(f : scala.Function4[scala.Iterator[T], scala.Iterator[B], scala.Iterator[C], scala.Iterator[D], scala.Iterator[V]])(implicit evidence$23 : scala.reflect.ClassTag[B], evidence$24 : scala.reflect.ClassTag[C], evidence$25 : scala.reflect.ClassTag[D], evidence$26 : scala.reflect.ClassTag[V]) : org.apache.spark.rdd.RDD[V] = { /* compiled code */ }
def zipPartitions[B, C, D, V](rdd2 : org.apache.spark.rdd.RDD[B], rdd3 : org.apache.spark.rdd.RDD[C], rdd4 : org.apache.spark.rdd.RDD[D])(f : scala.Function4[scala.Iterator[T], scala.Iterator[B], scala.Iterator[C], scala.Iterator[D], scala.Iterator[V]])(implicit evidence$27 : scala.reflect.ClassTag[B], evidence$28 : scala.reflect.ClassTag[C], evidence$29 : scala.reflect.ClassTag[D], evidence$30 : scala.reflect.ClassTag[V]) : org.apache.spark.rdd.RDD[V] = { /* compiled code */ }
def foreach(f : scala.Function1[T, scala.Unit]) : scala.Unit = { /* compiled code */ }
def foreachPartition(f : scala.Function1[scala.Iterator[T], scala.Unit]) : scala.Unit = { /* compiled code */ }
def collect() : scala.Array[T] = { /* compiled code */ }
def toLocalIterator : scala.Iterator[T] = { /* compiled code */ }
@scala.deprecated("use collect", "1.0.0")
def toArray() : scala.Array[T] = { /* compiled code */ }
def collect[U](f : scala.PartialFunction[T, U])(implicit evidence$31 : scala.reflect.ClassTag[U]) : org.apache.spark.rdd.RDD[U] = { /* compiled code */ }
def subtract(other : org.apache.spark.rdd.RDD[T]) : org.apache.spark.rdd.RDD[T] = { /* compiled code */ }
def subtract(other : org.apache.spark.rdd.RDD[T], numPartitions : scala.Int) : org.apache.spark.rdd.RDD[T] = { /* compiled code */ }
def subtract(other : org.apache.spark.rdd.RDD[T], p : org.apache.spark.Partitioner)(implicit ord : scala.Ordering[T] = { /* compiled code */ }) : org.apache.spark.rdd.RDD[T] = { /* compiled code */ }
def reduce(f : scala.Function2[T, T, T]) : T = { /* compiled code */ }
def treeReduce(f : scala.Function2[T, T, T], depth : scala.Int = { /* compiled code */ }) : T = { /* compiled code */ }
def fold(zeroValue : T)(op : scala.Function2[T, T, T]) : T = { /* compiled code */ }
def aggregate[U](zeroValue : U)(seqOp : scala.Function2[U, T, U], combOp : scala.Function2[U, U, U])(implicit evidence$32 : scala.reflect.ClassTag[U]) : U = { /* compiled code */ }
def treeAggregate[U](zeroValue : U)(seqOp : scala.Function2[U, T, U], combOp : scala.Function2[U, U, U], depth : scala.Int = { /* compiled code */ })(implicit evidence$33 : scala.reflect.ClassTag[U]) : U = { /* compiled code */ }
def count() : scala.Long = { /* compiled code */ }
@org.apache.spark.annotation.Experimental
def countApprox(timeout : scala.Long, confidence : scala.Double = { /* compiled code */ }) : org.apache.spark.partial.PartialResult[org.apache.spark.partial.BoundedDouble] = { /* compiled code */ }
def countByValue()(implicit ord : scala.Ordering[T] = { /* compiled code */ }) : scala.collection.Map[T, scala.Long] = { /* compiled code */ }
@org.apache.spark.annotation.Experimental
def countByValueApprox(timeout : scala.Long, confidence : scala.Double = { /* compiled code */ })(implicit ord : scala.Ordering[T] = { /* compiled code */ }) : org.apache.spark.partial.PartialResult[scala.collection.Map[T, org.apache.spark.partial.BoundedDouble]] = { /* compiled code */ }
@org.apache.spark.annotation.Experimental
def countApproxDistinct(p : scala.Int, sp : scala.Int) : scala.Long = { /* compiled code */ }
def countApproxDistinct(relativeSD : scala.Double = { /* compiled code */ }) : scala.Long = { /* compiled code */ }
def zipWithIndex() : org.apache.spark.rdd.RDD[scala.Tuple2[T, scala.Long]] = { /* compiled code */ }
def zipWithUniqueId() : org.apache.spark.rdd.RDD[scala.Tuple2[T, scala.Long]] = { /* compiled code */ }
def take(num : scala.Int) : scala.Array[T] = { /* compiled code */ }
def first() : T = { /* compiled code */ }
def top(num : scala.Int)(implicit ord : scala.Ordering[T]) : scala.Array[T] = { /* compiled code */ }
def takeOrdered(num : scala.Int)(implicit ord : scala.Ordering[T]) : scala.Array[T] = { /* compiled code */ }
def max()(implicit ord : scala.Ordering[T]) : T = { /* compiled code */ }
def min()(implicit ord : scala.Ordering[T]) : T = { /* compiled code */ }
def isEmpty() : scala.Boolean = { /* compiled code */ }
def saveAsTextFile(path : scala.Predef.String) : scala.Unit = { /* compiled code */ }
def saveAsTextFile(path : scala.Predef.String, codec : scala.Predef.Class[_ <: org.apache.hadoop.io.compress.CompressionCodec]) : scala.Unit = { /* compiled code */ }
def saveAsObjectFile(path : scala.Predef.String) : scala.Unit = { /* compiled code */ }
def keyBy[K](f : scala.Function1[T, K]) : org.apache.spark.rdd.RDD[scala.Tuple2[K, T]] = { /* compiled code */ }
private[spark] def collectPartitions() : scala.Array[scala.Array[T]] = { /* compiled code */ }
def checkpoint() : scala.Unit = { /* compiled code */ }
def localCheckpoint() : RDD.this.type = { /* compiled code */ }
def isCheckpointed : scala.Boolean = { /* compiled code */ }
private[spark] def isCheckpointedAndMaterialized : scala.Boolean = { /* compiled code */ }
private[rdd] def isLocallyCheckpointed : scala.Boolean = { /* compiled code */ }
def getCheckpointFile : scala.Option[scala.Predef.String] = { /* compiled code */ }
@scala.transient
private[spark] val creationSite : org.apache.spark.util.CallSite = { /* compiled code */ }
@scala.transient
private[spark] val scope : scala.Option[org.apache.spark.rdd.RDDOperationScope] = { /* compiled code */ }
private[spark] def getCreationSite : scala.Predef.String = { /* compiled code */ }
private[spark] def elementClassTag : scala.reflect.ClassTag[T] = { /* compiled code */ }
private[spark] var checkpointData : scala.Option[org.apache.spark.rdd.RDDCheckpointData[T]] = { /* compiled code */ }
protected[spark] def firstParent[U](implicit evidence$34 : scala.reflect.ClassTag[U]) : org.apache.spark.rdd.RDD[U] = { /* compiled code */ }
protected[spark] def parent[U](j : scala.Int)(implicit evidence$35 : scala.reflect.ClassTag[U]) : org.apache.spark.rdd.RDD[U] = { /* compiled code */ }
def context : org.apache.spark.SparkContext = { /* compiled code */ }
private[spark] def retag(cls : scala.Predef.Class[T]) : org.apache.spark.rdd.RDD[T] = { /* compiled code */ }
private[spark] def retag(implicit classTag : scala.reflect.ClassTag[T]) : org.apache.spark.rdd.RDD[T] = { /* compiled code */ }
private[spark] def doCheckpoint() : scala.Unit = { /* compiled code */ }
private[spark] def markCheckpointed() : scala.Unit = { /* compiled code */ }
protected def clearDependencies() : scala.Unit = { /* compiled code */ }
def toDebugString : scala.Predef.String = { /* compiled code */ }
override def toString() : scala.Predef.String = { /* compiled code */ }
def toJavaRDD() : org.apache.spark.api.java.JavaRDD[T] = { /* compiled code */ }
}
object RDD extends scala.AnyRef with scala.Serializable {
implicit def rddToPairRDDFunctions[K, V](rdd : org.apache.spark.rdd.RDD[scala.Tuple2[K, V]])(implicit kt : scala.reflect.ClassTag[K], vt : scala.reflect.ClassTag[V], ord : scala.Ordering[K] = { /* compiled code */ }) : org.apache.spark.rdd.PairRDDFunctions[K, V] = { /* compiled code */ }
implicit def rddToAsyncRDDActions[T](rdd : org.apache.spark.rdd.RDD[T])(implicit evidence$36 : scala.reflect.ClassTag[T]) : org.apache.spark.rdd.AsyncRDDActions[T] = { /* compiled code */ }
implicit def rddToSequenceFileRDDFunctions[K, V](rdd : org.apache.spark.rdd.RDD[scala.Tuple2[K, V]])(implicit kt : scala.reflect.ClassTag[K], vt : scala.reflect.ClassTag[V], keyWritableFactory : org.apache.spark.WritableFactory[K], valueWritableFactory : org.apache.spark.WritableFactory[V]) : org.apache.spark.rdd.SequenceFileRDDFunctions[K, V] = { /* compiled code */ }
implicit def rddToOrderedRDDFunctions[K, V](rdd : org.apache.spark.rdd.RDD[scala.Tuple2[K, V]])(implicit evidence$37 : scala.Ordering[K], evidence$38 : scala.reflect.ClassTag[K], evidence$39 : scala.reflect.ClassTag[V]) : org.apache.spark.rdd.OrderedRDDFunctions[K, V, scala.Tuple2[K, V]] = { /* compiled code */ }
implicit def doubleRDDToDoubleRDDFunctions(rdd : org.apache.spark.rdd.RDD[scala.Double]) : org.apache.spark.rdd.DoubleRDDFunctions = { /* compiled code */ }
implicit def numericRDDToDoubleRDDFunctions[T](rdd : org.apache.spark.rdd.RDD[T])(implicit num : scala.Numeric[T]) : org.apache.spark.rdd.DoubleRDDFunctions = { /* compiled code */ }
}