spark 多目录输出


/**
  * 多目录输出
  */
class RDDMultipleTextOutputFormat extends MultipleTextOutputFormat[Any, Any] {
 
  
//指定该条记录的输出文件
  override def generateFileNameForKeyValue(key: Any, value: Any, name: String): String =
    key.asInstanceOf[String]

//输出时不输出key
  override def generateActualKey(key: Any, value: Any): Any =
    NullWritable.get()

}
 
  
//调用
 
  
modeltags
.partitionBy(new HashPartitioner(20)) //按新key重新分区,否则会出现数据混乱丢失  
.saveAsHadoopFile(outputPath, classOf[String], classOf[String], classOf[RDDMultipleTextOutputFormat])


你可能感兴趣的:(spark)