spark 自定义累加器与自定义事件

1 可以实现简单的累加器,功能是只保存最后一个字符串。


import org.apache.spark.util.AccumulatorV2;

public  class MyAccumulator  extends AccumulatorV2 {

  private String str = "";

  public MyAccumulator() {
  }

  public MyAccumulator(String str) {
    this.str = str;
  }

  @Override
  public boolean isZero() {
    return str.compareTo(str) == 0;
  }

  @Override
  public AccumulatorV2 copy() {
    return new MyAccumulator(str);
  }

  @Override
  public void reset() {
    str = "";
  }

  @Override
  public void add(String str) {
    this.str = str;
  }

  @Override
  public void merge(AccumulatorV2 other) {
    str = other.value();
  }

  @Override
  public String value() {
    return str;
  }
}

注意使用的时候,只能在driver端取。在执行端写。

2 可以实现简单的事件与监听器

import org.apache.spark.scheduler._

case class MyEvent(description: String)
  extends SparkListenerEvent
import java.util.concurrent.{ExecutorService, Executors}

import org.apache.spark.internal.Logging
import org.apache.spark.scheduler.{SparkListener, SparkListenerApplicationEnd, SparkListenerApplicationStart}

class MyEventSparkAppListener extends SparkListener with Logging {


  val pool: ExecutorService = Executors.newCachedThreadPool();

  override def onApplicationStart(applicationStart: SparkListenerApplicationStart): Unit = {
    val appId = applicationStart.appId
    logInfo("***************************************************" + appId.get)
  }

  override def onApplicationEnd(applicationEnd: SparkListenerApplicationEnd): Unit = {
    logInfo("************************ app end time ************************ " + applicationEnd.time)
  }

  override def onOtherEvent(event: org.apache.spark.scheduler.SparkListenerEvent): scala.Unit = {
    logInfo("************************ app end time org.apache.spark.scheduler.SparkListenerEvent ************************ " + event.toString)
    if (event.isInstanceOf[MyEvent]) {
      val mevent: MyEvent = event.asInstanceOf[MyEvent]
      logInfo("************************ app end time org.apache.spark.scheduler.MyEvent ************************ " + mevent.toString)
      // 处理长时间业务 process(mevent.description)

      var i=0
      for ( i <- 1 to 10)
      {
        pool.execute(new Runnable() {
          def run() {
            logInfo("thread name: " + Thread.currentThread().getName());
            try {
              Thread.sleep(1000);
            } catch {
              case ex: InterruptedException =>
                ex.printStackTrace();
            }
          }
        });
      }
    }
  }
}

 

你可能感兴趣的:(大数据,spark)