Greedy Step Averaging A parameter-free stochastic optimization method

Fregdata

https://github.com/TalkingData/Fregata

Combining Features Logistic Regression
  • data including age, gender, marital status ...
  • data including internet behavior ...
  • data including phone brand ...
  • data including APP installed ...
Greedy Step Averaging A parameter-free stochastic optimization method_第1张图片
image
image
image
image
image

真正计算的模块

Logistic regression derived from approximation formula
image
image
Averaging Scheme
image
image
image
image
运行接口,通过参数传入真正的
//fregata.spark.model.classification
class LogisticRegressionModel(val model:LLogisticRegressionModel) extends ClassificationModel

object LogisticRegression {

  /**
    *
    * @param data
    * @param localEpochNum the local model epoch num of every partition
    * @param epochNum
    * @return
    */
  def run(data:RDD[(Vector,Num)],
          localEpochNum:Int = 1 ,
          epochNum:Int = 1) = {
    //class LogisticRegression extends ModelTrainer      
    val trainer = new LLogisticRegression
    new SparkTrainer(trainer)
      .run(data,epochNum,localEpochNum)
    
    //def buildModel(ps:ParameterServer) = new LogisticRegressionModel(ps.get(0))
    new LogisticRegressionModel(trainer.buildModel(trainer.ps))
  }
}


//fregata.spark.model.SparkTrainer
class SparkTrainer(trainer:ModelTrainer) {

  def run(data:RDD[(Vector,Num)],epochNum:Int,localEpochNum:Int) {
    (0 until epochNum).foreach{
      i =>
        run(data,localEpochNum)
    }
  }

  def run(data:RDD[(Vector,Num)],localEpochNum:Int) {
    val _trainer = this.trainer
    val br_opt = data.sparkContext.broadcast(_trainer)
    val pn = data.partitions.length
    
    
    //  def mapPartitions[U: ClassTag](f: Iterator[T] => Iterator[U],preservesPartitioning: Boolean = false): RDD[U] = withScope {
    //  val cleanedF = sc.clean(f)
    //  new MapPartitionsRDD(
    //  this,
    //  (context: TaskContext, index: Int, iter: Iterator[T]) => cleanedF(iter),
    //  preservesPartitioning)
    //  }
    
    //每个分区中的内容将以Iterator[T]传递给输入函数f,f的输出结果是Iterator[U]。最终的RDD由所有分区经过输入函数处理后的结果合并起来的
    
    val ws = data.mapPartitions{
      it =>
        val local_opt = br_opt.value
        
        //真正的run函数
        local_opt.run(it.toIterable,localEpochNum)
        Iterator( local_opt.ps.get )
    }.treeReduce{
      (a,b) =>
        a.zip(b).map{
          case (w1,w2) => w1 + w2
        }
    }
    ws.foreach{
      w =>
      val values = w match {
        case w : DenseVector => w.data
        case w : SparseVector => w.data
      }
      var i = 0
      while( i < values.length ) {
        values(i) /= pn
        i += 1
      }
    }
    trainer.ps.set(ws)
  }
}

通过parameter server获取参数
class LogisticGradient(ps:ParameterServer) extends Gradient {
  val thres = 0.95
  val update = Array(0.0)
  var stepSize = 0.0
  var i = 0.0
  
  //重载caculate,计算梯度更新值
  def calculate(x:Vector,label:Num) : Array[Num] = {
    var weight = ps.get
    if( weight == null ) {
      //更新权重 1*(n+1) 带bias
      ps.init(1,x.length + 1)
      weight = ps.get
    }
    val lambda = i / ( i + 1 )
    i += 1
    val margin = VectorUtil.wxpb(weight(0),x,1.0)
    val p1 = 1.0 / ( 1.0 + math.exp( - margin ) )
    val p0 = 1 - p1
    val b1 = math.exp(p1)
    val b0 = math.exp(p0)
    val x2 = math.pow(norm(x),2.0)
    // compute greedy step size
    val greedyStep = if( label == 1 ) {
      (p1 - thres) / ( thres * (1 - p0 * b0 - p1 * b1) + p1 * (1 - b0) ) / x2
    }else{
      (p0 - thres) / ( thres * (1 - p0 * b0 - p1 * b1 ) + p0 * (1 - b1)) / x2
    }
    // compute averaged step size
    stepSize = lambda * stepSize + (1 - lambda) * greedyStep
    update(0) = 2 * ( p1 - label ) * stepSize
    update
  }
}

//LogisticRegerssion.scala
class LogisticRegressionModel(val weights:Vector) extends ClassificationModel{

  var threshold = 0.5
  def setThreshold(t:Double) : this.type = {
    this.threshold = t
    this
  }
  def classPredict(x: Vector): (Num, Num) = {
    val margin = VectorUtil.wxpb(weights,x,1.0)
    val p = 1.0 / ( 1.0 + math.exp( - margin ) )
    val c = if( p > threshold ) 1.0 else 0.0
    (asNum(p),asNum(c))
  }
}

class LogisticRegression extends ModelTrainer {
  override type M = LogisticRegressionModel
  val ps = newPs    //ps
  val gradient = new LogisticGradient(ps)   //定义计算梯度
  def buildModel(ps:ParameterServer) = new LogisticRegressionModel(ps.get(0))
  def run( data:Iterable[(Vector,Num)] ) = {
  
    !!! 在run函数中通过Target这个case class传入gradient 和ps
    val target = Target(gradient,ps)
    new AdaptiveSGD()
      .minimize(target)
      .run(data)
    new LogisticRegressionModel(ps.get(0))
  }
}

//ModelTrainer.scala  core
trait ModelTrainer extends Serializable{

  type M

  def newPs = ParameterServer.create   //创建parameter serever
  def ps : ParameterServer
  def buildModel(ps:ParameterServer) : M


    //http://stackoverflow.com/questions/38289353/using-scala-trait-as-a-callback-interface
    //callback 语法糖
  def run(data:Iterable[(Vector,Num)],epochNum:Int ,callback : (M,Int) => Unit = null) : M = {
    var model : Any = null
    (0 until epochNum).foreach{
      i =>
        model = run(data)
        if( callback != null ) callback(model.asInstanceOf[M],i)
    }
    model.asInstanceOf[M]
  }

  def run(data:Iterable[(Vector,Num)]) : M

}


定义一个case class,其中需要Gradient和Parameter Server
case class Target(val gradient : fregata.optimize.Gradient, val ps : fregata.param.ParameterServer) extends scala.AnyRef with scala.Product with scala.Serializable {
}


trait Gradient extends Serializable{
  def calculate(x:Vector,label:Num) : Array[Num]
}

SGD优化方法
//AdaptiveSGD.scala
Ad

class AdaptiveSGD extends StochasticGradientDescent {

  override def stepSize(i:Int,x:Vector) = asNum(1d)

}

//StochasticGradientDescent.scala
class StochasticGradientDescent extends Minimizer {

  private var eta = asNum(.1)
  def setStepSize(eta:Num) : this.type = {
    this.eta = eta
    this
  }
  protected def stepSize(itN:Int,x:Vector) = eta

  def run(data:TraversableOnce[(Vector,Num)]) = {
    var i = 0
    data.foreach{
      case (x,label) =>
        val gradients = target.gradient.calculate(x,label)
        val step = stepSize(i,x)
        val delta = gradients.map( v => asNum( v * step ) )
        target.ps.adjust(delta,x)
        i += 1
    }
  }
}

//Minimizer.scala
trait Minimizer extends Optimizer {
  private[this] var _target : Target = _
  def minimize(target: Target) : this.type = {
    this._target = target
    this
  }
  def target = _target
}

//Optimizer.scala
trait Optimizer extends Serializable {
  def run(data:TraversableOnce[(Vector,Num)])
}
object ParameterServer {
  def create : ParameterServer = new LocalParameterServer
}

trait ParameterServer extends Serializable {
  def init(rows:Int,cols:Int)
  def adjust(delta:Array[Num],x:Vector)
  def get : Array[Vector]
  def set(ps:Array[Vector])
}

class LocalParameterServer extends ParameterServer {

  private[this] var ps : Array[DenseVector] = null
  private[this] var values : Array[Array[Num]] = null

  def init(rows:Int,cols:Int) = {
    values = Array.fill(rows)( Array.ofDim[Num](cols) )
    ps = values.map( new DenseVector(_) )
  }
  
  //Array(Array(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0))
  //Array(DenseVector(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0))

  def set(ps:Array[Vector]) = this.ps = ps.map( _.toDenseVector )
  def adjust(delta:Array[Num],x:Vector) = {
    var k = 0
    while( k < delta.length ) {
      val d = delta(k)
      VectorUtil.forV(x,(i,xi) =>{
        values(k)(i) -= d * xi
      })
      k += 1
    }
  }
  def get : Array[Vector] = ps.asInstanceOf[Array[Vector]]
}

你可能感兴趣的:(Greedy Step Averaging A parameter-free stochastic optimization method)