Spark udf,udaf

udf

def filterTdWithOp(operator: String): Boolean = {
    val x = ".*中国联通.*"
    val y = ".*CHN-UNICOM.*"
    val z = ".*China Unicom.*"
    if (operator!=null && operator.matches(x + "|" + y + "|" + z))
      true
    else
      false
  }
sqlContext.udf.register("filterTdWithOp", filterTdWithOp _)
td.registerTempTable("td")
val tdOp = sqlContext.sql("select *,filterTdWithOp(operator) as fiOp from td").filter("fiOp = true")

udaf

去看官网比较好

object  MaxPoint extends Aggregator[Input,Input, Input] {
override def zero: Input = Input(0,0,0,"")
  // Combine two values to produce a new value. For performance, the function may modify `buffer`
  // and return it instead of constructing a new object
  override def reduce(b: Input, a: Input): Input = {
    if(b.pointNum>a.pointNum)
      b
    else
      a
  }
  // Merge two intermediate values
  override def merge(b: Input, a: Input): Input = {
    if(b.pointNum>a.pointNum)
      b
    else
      a
  }

  override def finish(reduction: Input): Input = reduction
}

spark2.0之后引入了两个新方法

Spark udf,udaf_第1张图片

你可能感兴趣的:(spark)