Task not serializable

# Task not serializable 问题出现
查阅google返现是不能将rdd的map放在另外一个class中, 而这个class不能序列化
网址:
http://stackoverflow.com/questions/29295838/org-apache-spark-sparkexception-task-not-serializable
我的办法是: 将此类中map(func)中的func放在伴生对象中

看例子:

package mangocool

import org.apache.spark.{SparkConf, SparkContext}

object TaskNotSerializationTest {
  def main(args: Array[String]) {
    new Test1().runJob
  }
}

object Spark1 {
  val conf = new SparkConf().setMaster("local").setAppName("TaskNotSerializationTest")
  val ctx = new SparkContext(conf)
}

class Test1 {
  val rddList = Spark1.ctx.parallelize(List(1,2,3))

  def runJob() =  {
    val after = rddList.map(someFunc(_))
    after.collect().map(println(_))
  }

  def someFunc(a:Int) = a+1

}

改为

package mangocool

import org.apache.spark.{SparkConf, SparkContext}

object TaskNotSerializationTest {
  def main(args: Array[String]) {
    new Test1().runJob
  }
}

object Spark1 {
  val conf = new SparkConf().setMaster("local").setAppName("TaskNotSerializationTest")
  val ctx = new SparkContext(conf)
}

object Test1{
  def someFunc(a:Int) = a+1
}
class Test1 {
  val rddList = Spark1.ctx.parallelize(List(1,2,3))

  def runJob() =  {
    val after = rddList.map(someFunc(_))
    after.collect().map(println(_))
  }



}

你可能感兴趣的:(spark&sparkr)