《Spark MLlib 机器学习》第十五章代码
1、神经网络类
package NN import org.apache.spark._ import org.apache.spark.SparkContext._ import org.apache.spark.rdd.RDD import org.apache.spark.Logging import org.apache.spark.mllib.linalg._ import breeze.linalg.{ Matrix => BM, CSCMatrix => BSM, DenseMatrix => BDM, Vector => BV, DenseVector => BDV, SparseVector => BSV, axpy => brzAxpy, svd => brzSvd } import breeze.numerics.{ exp => Bexp, tanh => Btanh } import scala.collection.mutable.ArrayBuffer import java.util.Random import scala.math._ /** * label:目标矩阵 * nna:神经网络每层节点的输出值,a(0),a(1),a(2) * error:输出层与目标值的误差矩阵 */ case class NNLabel(label: BDM[Double], nna: ArrayBuffer[BDM[Double]], error: BDM[Double]) extends Serializable /** * 配置参数 */ case class NNConfig( size: Array[Int], layer: Int, activation_function: String, learningRate: Double, momentum: Double, scaling_learningRate: Double, weightPenaltyL2: Double, nonSparsityPenalty: Double, sparsityTarget: Double, inputZeroMaskedFraction: Double, dropoutFraction: Double, testing: Double, output_function: String) extends Serializable /** * NN(neural network) */ class NeuralNet( private var size: Array[Int], private var layer: Int, private var activation_function: String, private var learningRate: Double, private var momentum: Double, private var scaling_learningRate: Double, private var weightPenaltyL2: Double, private var nonSparsityPenalty: Double, private var sparsityTarget: Double, private var inputZeroMaskedFraction: Double, private var dropoutFraction: Double, private var testing: Double, private var output_function: String, private var initW: Array[BDM[Double]]) extends Serializable with Logging { // var size=Array(5, 10, 7, 1) // var layer=4 // var activation_function="tanh_opt" // var learningRate=2.0 // var momentum=0.5 // var scaling_learningRate=1.0 // var weightPenaltyL2=0.0 // var nonSparsityPenalty=0.0 // var sparsityTarget=0.05 // var inputZeroMaskedFraction=0.0 // var dropoutFraction=0.0 // var testing=0.0 // var output_function="sigm" /** * size = architecture; * n = numel(nn.size); * activation_function = sigm 隐含层函数Activation functions of hidden layers: 'sigm' (sigmoid) or 'tanh_opt' (optimal tanh). * learningRate = 2; 学习率learning rate Note: typically needs to be lower when using 'sigm' activation function and non-normalized inputs. * momentum = 0.5; Momentum * scaling_learningRate = 1; Scaling factor for the learning rate (each epoch) * weightPenaltyL2 = 0; 正则化L2 regularization * nonSparsityPenalty = 0; 权重稀疏度惩罚值on sparsity penalty * sparsityTarget = 0.05; Sparsity target * inputZeroMaskedFraction = 0; 加入noise,Used for Denoising AutoEncoders * dropoutFraction = 0; 每一次mini-batch样本输入训练时,随机扔掉x%的隐含层节点Dropout level (http://www.cs.toronto.edu/~hinton/absps/dropout.pdf) * testing = 0; Internal variable. nntest sets this to one. * output = 'sigm'; 输出函数output unit 'sigm' (=logistic), 'softmax' and 'linear' * */ def this() = this(NeuralNet.Architecture, 3, NeuralNet.Activation_Function, 2.0, 0.5, 1.0, 0.0, 0.0, 0.05, 0.0, 0.0, 0.0, NeuralNet.Output, Array(BDM.zeros[Double](1, 1))) /** 设置神经网络结构. Default: [10, 5, 1]. */ def setSize(size: Array[Int]): this.type = { this.size = size this } /** 设置神经网络层数据. Default: 3. */ def setLayer(layer: Int): this.type = { this.layer = layer this } /** 设置隐含层函数. Default: sigm. */ def setActivation_function(activation_function: String): this.type = { this.activation_function = activation_function this } /** 设置学习率因子. Default: 2. */ def setLearningRate(learningRate: Double): this.type = { this.learningRate = learningRate this } /** 设置Momentum. Default: 0.5. */ def setMomentum(momentum: Double): this.type = { this.momentum = momentum this } /** 设置scaling_learningRate. Default: 1. */ def setScaling_learningRate(scaling_learningRate: Double): this.type = { this.scaling_learningRate = scaling_learningRate this } /** 设置正则化L2因子. Default: 0. */ def setWeightPenaltyL2(weightPenaltyL2: Double): this.type = { this.weightPenaltyL2 = weightPenaltyL2 this } /** 设置权重稀疏度惩罚因子. Default: 0. */ def setNonSparsityPenalty(nonSparsityPenalty: Double): this.type = { this.nonSparsityPenalty = nonSparsityPenalty this } /** 设置权重稀疏度目标值. Default: 0.05. */ def setSparsityTarget(sparsityTarget: Double): this.type = { this.sparsityTarget = sparsityTarget this } /** 设置权重加入噪声因子. Default: 0. */ def setInputZeroMaskedFraction(inputZeroMaskedFraction: Double): this.type = { this.inputZeroMaskedFraction = inputZeroMaskedFraction this } /** 设置权重Dropout因子. Default: 0. */ def setDropoutFraction(dropoutFraction: Double): this.type = { this.dropoutFraction = dropoutFraction this } /** 设置testing. Default: 0. */ def setTesting(testing: Double): this.type = { this.testing = testing this } /** 设置输出函数. Default: linear. */ def setOutput_function(output_function: String): this.type = { this.output_function = output_function this } /** 设置初始权重. Default: 0. */ def setInitW(initW: Array[BDM[Double]]): this.type = { this.initW = initW this } /** * 运行神经网络算法. */ def NNtrain(train_d: RDD[(BDM[Double], BDM[Double])], opts: Array[Double]): NeuralNetModel = { val sc = train_d.sparkContext var initStartTime = System.currentTimeMillis() var initEndTime = System.currentTimeMillis() // 参数配置 广播配置 var nnconfig = NNConfig(size, layer, activation_function, learningRate, momentum, scaling_learningRate, weightPenaltyL2, nonSparsityPenalty, sparsityTarget, inputZeroMaskedFraction, dropoutFraction, testing, output_function) // 初始化权重 var nn_W = NeuralNet.InitialWeight(size) if (!((initW.length == 1) && (initW(0) == (BDM.zeros[Double](1, 1))))) { for (i <- 0 to initW.length - 1) { nn_W(i) = initW(i) } } var nn_vW = NeuralNet.InitialWeightV(size) // val tmpw = nn_W(0) // for (i <- 0 to tmpw.rows - 1) { // for (j <- 0 to tmpw.cols - 1) { // print(tmpw(i, j) + "\t") // } // println() // } // 初始化每层的平均激活度nn.p // average activations (for use with sparsity) var nn_p = NeuralNet.InitialActiveP(size) // 样本数据划分:训练数据、交叉检验数据 val validation = opts(2) val splitW1 = Array(1.0 - validation, validation) val train_split1 = train_d.randomSplit(splitW1, System.nanoTime()) val train_t = train_split1(0) val train_v = train_split1(1) // m:训练样本的数量 val m = train_t.count // batchsize是做batch gradient时候的大小 // 计算batch的数量 val batchsize = opts(0).toInt val numepochs = opts(1).toInt val numbatches = (m / batchsize).toInt var L = Array.fill(numepochs * numbatches.toInt)(0.0) var n = 0 var loss_train_e = Array.fill(numepochs)(0.0) var loss_val_e = Array.fill(numepochs)(0.0) // numepochs是循环的次数 for (i <- 1 to numepochs) { initStartTime = System.currentTimeMillis() val splitW2 = Array.fill(numbatches)(1.0 / numbatches) // 根据分组权重,随机划分每组样本数据 val bc_config = sc.broadcast(nnconfig) for (l <- 1 to numbatches) { // 权重 val bc_nn_W = sc.broadcast(nn_W) val bc_nn_vW = sc.broadcast(nn_vW) // println(i + "\t" + l) // println("W1") // val tmpw0 = bc_nn_W.value(0) // for (i <- 0 to tmpw0.rows - 1) { // for (j <- 0 to tmpw0.cols - 1) { // print(tmpw0(i, j) + "\t") // } // println() // } // println("W2") // val tmpw1 = bc_nn_W.value(1) // for (i <- 0 to tmpw1.rows - 1) { // for (j <- 0 to tmpw1.cols - 1) { // print(tmpw1(i, j) + "\t") // } // println() // } // println("W3") // val tmpw2 = bc_nn_W.value(2) // for (i <- 0 to tmpw2.rows - 1) { // for (j <- 0 to tmpw2.cols - 1) { // print(tmpw2(i, j) + "\t") // } // println() // } // 样本划分 val train_split2 = train_t.randomSplit(splitW2, System.nanoTime()) val batch_xy1 = train_split2(l - 1) // val train_split3 = train_t.filter { f => (f._1 >= batchsize * (l - 1) + 1) && (f._1 <= batchsize * (l)) } // val batch_xy1 = train_split3.map(f => (f._2, f._3)) // Add noise to input (for use in denoising autoencoder) // 加入noise,这是denoising autoencoder需要使用到的部分 // 这部分请参见《Extracting and Composing Robust Features with Denoising Autoencoders》这篇论文 // 具体加入的方法就是把训练样例中的一些数据调整变为0,inputZeroMaskedFraction表示了调整的比例 //val randNoise = NeuralNet.RandMatrix(batch_x.numRows.toInt, batch_x.numCols.toInt, inputZeroMaskedFraction) val batch_xy2 = if (bc_config.value.inputZeroMaskedFraction != 0) { NeuralNet.AddNoise(batch_xy1, bc_config.value.inputZeroMaskedFraction) } else batch_xy1 // val tmpxy = batch_xy2.map(f => (f._1.toArray,f._2.toArray)).toArray.map {f => ((new ArrayBuffer() ++ f._1) ++ f._2).toArray} // for (i <- 0 to tmpxy.length - 1) { // for (j <- 0 to tmpxy(i).length - 1) { // print(tmpxy(i)(j) + "\t") // } // println() // } // NNff是进行前向传播 // nn = nnff(nn, batch_x, batch_y); val train_nnff = NeuralNet.NNff(batch_xy2, bc_config, bc_nn_W) // val tmpa0 = train_nnff.map(f => f._1.nna(0)).take(20) // println("tmpa0") // for (i <- 0 to 10) { // for (j <- 0 to tmpa0(i).cols - 1) { // print(tmpa0(i)(0, j) + "\t") // } // println() // } // val tmpa1 = train_nnff.map(f => f._1.nna(1)).take(20) // println("tmpa1") // for (i <- 0 to 10) { // for (j <- 0 to tmpa1(i).cols - 1) { // print(tmpa1(i)(0, j) + "\t") // } // println() // } // val tmpa2 = train_nnff.map(f => f._1.nna(2)).take(20) // println("tmpa2") // for (i <- 0 to 10) { // for (j <- 0 to tmpa2(i).cols - 1) { // print(tmpa2(i)(0, j) + "\t") // } // println() // } // sparsity计算,计算每层节点的平均稀疏度 nn_p = NeuralNet.ActiveP(train_nnff, bc_config, nn_p) val bc_nn_p = sc.broadcast(nn_p) // NNbp是后向传播 // nn = nnbp(nn); val train_nnbp = NeuralNet.NNbp(train_nnff, bc_config, bc_nn_W, bc_nn_p) // val tmpd0 = rdd5.map(f => f._2(2)).take(20) // println("tmpd0") // for (i <- 0 to 10) { // for (j <- 0 to tmpd0(i).cols - 1) { // print(tmpd0(i)(0, j) + "\t") // } // println() // } // val tmpd1 = rdd5.map(f => f._2(1)).take(20) // println("tmpd1") // for (i <- 0 to 10) { // for (j <- 0 to tmpd1(i).cols - 1) { // print(tmpd1(i)(0, j) + "\t") // } // println() // } // val tmpdw0 = rdd5.map(f => f._3(0)).take(20) // println("tmpdw0") // for (i <- 0 to 10) { // for (j <- 0 to tmpdw0(i).cols - 1) { // print(tmpdw0(i)(0, j) + "\t") // } // println() // } // val tmpdw1 = rdd5.map(f => f._3(1)).take(20) // println("tmpdw1") // for (i <- 0 to 10) { // for (j <- 0 to tmpdw1(i).cols - 1) { // print(tmpdw1(i)(0, j) + "\t") // } // println() // } // nn = NNapplygrads(nn) returns an neural network structure with updated // weights and biases // 更新权重参数:w=w-α*[dw + λw] val train_nnapplygrads = NeuralNet.NNapplygrads(train_nnbp, bc_config, bc_nn_W, bc_nn_vW) nn_W = train_nnapplygrads(0) nn_vW = train_nnapplygrads(1) // val tmpw2 = train_nnapplygrads(0)(0) // for (i <- 0 to tmpw2.rows - 1) { // for (j <- 0 to tmpw2.cols - 1) { // print(tmpw2(i, j) + "\t") // } // println() // } // val tmpw3 = train_nnapplygrads(0)(1) // for (i <- 0 to tmpw3.rows - 1) { // for (j <- 0 to tmpw3.cols - 1) { // print(tmpw3(i, j) + "\t") // } // println() // } // error and loss // 输出误差计算 val loss1 = train_nnff.map(f => f._1.error) val (loss2, counte) = loss1.treeAggregate((0.0, 0L))( seqOp = (c, v) => { // c: (e, count), v: (m) val e1 = c._1 val e2 = (v :* v).sum val esum = e1 + e2 (esum, c._2 + 1) }, combOp = (c1, c2) => { // c: (e, count) val e1 = c1._1 val e2 = c2._1 val esum = e1 + e2 (esum, c1._2 + c2._2) }) val Loss = loss2 / counte.toDouble L(n) = Loss * 0.5 n = n + 1 } // 计算本次迭代的训练误差及交叉检验误差 // Full-batch train mse val evalconfig = NNConfig(size, layer, activation_function, learningRate, momentum, scaling_learningRate, weightPenaltyL2, nonSparsityPenalty, sparsityTarget, inputZeroMaskedFraction, dropoutFraction, 1.0, output_function) loss_train_e(i - 1) = NeuralNet.NNeval(train_t, sc.broadcast(evalconfig), sc.broadcast(nn_W)) if (validation > 0) loss_val_e(i - 1) = NeuralNet.NNeval(train_v, sc.broadcast(evalconfig), sc.broadcast(nn_W)) // 更新学习因子 // nn.learningRate = nn.learningRate * nn.scaling_learningRate; nnconfig = NNConfig(size, layer, activation_function, nnconfig.learningRate * nnconfig.scaling_learningRate, momentum, scaling_learningRate, weightPenaltyL2, nonSparsityPenalty, sparsityTarget, inputZeroMaskedFraction, dropoutFraction, testing, output_function) initEndTime = System.currentTimeMillis() // 打印输出结果 printf("epoch: numepochs = %d , Took = %d seconds; Full-batch train mse = %f, val mse = %f.\n", i, scala.math.ceil((initEndTime - initStartTime).toDouble / 1000).toLong, loss_train_e(i - 1), loss_val_e(i - 1)) } val configok = NNConfig(size, layer, activation_function, learningRate, momentum, scaling_learningRate, weightPenaltyL2, nonSparsityPenalty, sparsityTarget, inputZeroMaskedFraction, dropoutFraction, 1.0, output_function) new NeuralNetModel(configok, nn_W) } } /** * NN(neural network) */ object NeuralNet extends Serializable { // Initialization mode names val Activation_Function = "sigm" val Output = "linear" val Architecture = Array(10, 5, 1) /** * 增加随机噪声 * 若随机值>=Fraction,值不变,否则改为0 */ def AddNoise(rdd: RDD[(BDM[Double], BDM[Double])], Fraction: Double): RDD[(BDM[Double], BDM[Double])] = { val addNoise = rdd.map { f => val features = f._2 val a = BDM.rand[Double](features.rows, features.cols) val a1 = a :>= Fraction val d1 = a1.data.map { f => if (f == true) 1.0 else 0.0 } val a2 = new BDM(features.rows, features.cols, d1) val features2 = features :* a2 (f._1, features2) } addNoise } /** * 初始化权重 * 初始化为一个很小的、接近零的随机值 */ def InitialWeight(size: Array[Int]): Array[BDM[Double]] = { // 初始化权重参数 // weights and weight momentum // nn.W{i - 1} = (rand(nn.size(i), nn.size(i - 1)+1) - 0.5) * 2 * 4 * sqrt(6 / (nn.size(i) + nn.size(i - 1))); // nn.vW{i - 1} = zeros(size(nn.W{i - 1})); val n = size.length val nn_W = ArrayBuffer[BDM[Double]]() for (i <- 1 to n - 1) { val d1 = BDM.rand(size(i), size(i - 1) + 1) d1 :-= 0.5 val f1 = 2 * 4 * sqrt(6.0 / (size(i) + size(i - 1))) val d2 = d1 :* f1 //val d3 = new DenseMatrix(d2.rows, d2.cols, d2.data, d2.isTranspose) //val d4 = Matrices.dense(d2.rows, d2.cols, d2.data) nn_W += d2 } nn_W.toArray } /** * 初始化权重vW * 初始化为0 */ def InitialWeightV(size: Array[Int]): Array[BDM[Double]] = { // 初始化权重参数 // weights and weight momentum // nn.vW{i - 1} = zeros(size(nn.W{i - 1})); val n = size.length val nn_vW = ArrayBuffer[BDM[Double]]() for (i <- 1 to n - 1) { val d1 = BDM.zeros[Double](size(i), size(i - 1) + 1) nn_vW += d1 } nn_vW.toArray } /** * 初始每一层的平均激活度 * 初始化为0 */ def InitialActiveP(size: Array[Int]): Array[BDM[Double]] = { // 初始每一层的平均激活度 // average activations (for use with sparsity) // nn.p{i} = zeros(1, nn.size(i)); val n = size.length val nn_p = ArrayBuffer[BDM[Double]]() nn_p += BDM.zeros[Double](1, 1) for (i <- 1 to n - 1) { val d1 = BDM.zeros[Double](1, size(i)) nn_p += d1 } nn_p.toArray } /** * 随机让网络某些隐含层节点的权重不工作 * 若随机值>=Fraction,矩阵值不变,否则改为0 */ def DropoutWeight(matrix: BDM[Double], Fraction: Double): Array[BDM[Double]] = { val aa = BDM.rand[Double](matrix.rows, matrix.cols) val aa1 = aa :> Fraction val d1 = aa1.data.map { f => if (f == true) 1.0 else 0.0 } val aa2 = new BDM(matrix.rows: Int, matrix.cols: Int, d1: Array[Double]) val matrix2 = matrix :* aa2 Array(aa2, matrix2) } /** * sigm激活函数 * X = 1./(1+exp(-P)); */ def sigm(matrix: BDM[Double]): BDM[Double] = { val s1 = 1.0 / (Bexp(matrix * (-1.0)) + 1.0) s1 } /** * tanh激活函数 * f=1.7159*tanh(2/3.*A); */ def tanh_opt(matrix: BDM[Double]): BDM[Double] = { val s1 = Btanh(matrix * (2.0 / 3.0)) * 1.7159 s1 } /** * nnff是进行前向传播 * 计算神经网络中的每个节点的输出值; */ def NNff( batch_xy2: RDD[(BDM[Double], BDM[Double])], bc_config: org.apache.spark.broadcast.Broadcast[NNConfig], bc_nn_W: org.apache.spark.broadcast.Broadcast[Array[BDM[Double]]]): RDD[(NNLabel, Array[BDM[Double]])] = { // 第1层:a(1)=[1 x] // 增加偏置项b val train_data1 = batch_xy2.map { f => val lable = f._1 val features = f._2 val nna = ArrayBuffer[BDM[Double]]() val Bm1 = new BDM(features.rows, 1, Array.fill(features.rows * 1)(1.0)) val features2 = BDM.horzcat(Bm1, features) val error = BDM.zeros[Double](lable.rows, lable.cols) nna += features2 NNLabel(lable, nna, error) } // println("bc_size " + bc_config.value.size(0) + bc_config.value.size(1) + bc_config.value.size(2)) // println("bc_layer " + bc_config.value.layer) // println("bc_activation_function " + bc_config.value.activation_function) // println("bc_output_function " + bc_config.value.output_function) // // println("tmpw0 ") // val tmpw0 = bc_nn_W.value(0) // for (i <- 0 to tmpw0.rows - 1) { // for (j <- 0 to tmpw0.cols - 1) { // print(tmpw0(i, j) + "\t") // } // println() // } // feedforward pass // 第2至n-1层计算,a(i)=f(a(i-1)*w(i-1)') //val tmp1 = train_data1.map(f => f.nna(0).data).take(1)(0) //val tmp2 = new BDM(1, tmp1.length, tmp1) //val nn_a = ArrayBuffer[BDM[Double]]() //nn_a += tmp2 val train_data2 = train_data1.map { f => val nn_a = f.nna val dropOutMask = ArrayBuffer[BDM[Double]]() dropOutMask += new BDM[Double](1, 1, Array(0.0)) for (j <- 1 to bc_config.value.layer - 2) { // 计算每层输出 // Calculate the unit's outputs (including the bias term) // nn.a{i} = sigm(nn.a{i - 1} * nn.W{i - 1}') // nn.a{i} = tanh_opt(nn.a{i - 1} * nn.W{i - 1}'); val A1 = nn_a(j - 1) val W1 = bc_nn_W.value(j - 1) val aw1 = A1 * W1.t val nnai1 = bc_config.value.activation_function match { case "sigm" => val aw2 = NeuralNet.sigm(aw1) aw2 case "tanh_opt" => val aw2 = NeuralNet.tanh_opt(aw1) //val aw2 = Btanh(aw1 * (2.0 / 3.0)) * 1.7159 aw2 } // dropout计算 // Dropout是指在模型训练时随机让网络某些隐含层节点的权重不工作,不工作的那些节点可以暂时认为不是网络结构的一部分 // 但是它的权重得保留下来(只是暂时不更新而已),因为下次样本输入时它可能又得工作了 // 参照 http://www.cnblogs.com/tornadomeet/p/3258122.html val dropoutai = if (bc_config.value.dropoutFraction > 0) { if (bc_config.value.testing == 1) { val nnai2 = nnai1 * (1.0 - bc_config.value.dropoutFraction) Array(new BDM[Double](1, 1, Array(0.0)), nnai2) } else { NeuralNet.DropoutWeight(nnai1, bc_config.value.dropoutFraction) } } else { val nnai2 = nnai1 Array(new BDM[Double](1, 1, Array(0.0)), nnai2) } val nnai2 = dropoutai(1) dropOutMask += dropoutai(0) // Add the bias term // 增加偏置项b // nn.a{i} = [ones(m,1) nn.a{i}]; val Bm1 = BDM.ones[Double](nnai2.rows, 1) val nnai3 = BDM.horzcat(Bm1, nnai2) nn_a += nnai3 } (NNLabel(f.label, nn_a, f.error), dropOutMask.toArray) } // 输出层计算 val train_data3 = train_data2.map { f => val nn_a = f._1.nna // nn.a{n} = sigm(nn.a{n - 1} * nn.W{n - 1}'); // nn.a{n} = nn.a{n - 1} * nn.W{n - 1}'; val An1 = nn_a(bc_config.value.layer - 2) val Wn1 = bc_nn_W.value(bc_config.value.layer - 2) val awn1 = An1 * Wn1.t val nnan1 = bc_config.value.output_function match { case "sigm" => val awn2 = NeuralNet.sigm(awn1) //val awn2 = 1.0 / (Bexp(awn1 * (-1.0)) + 1.0) awn2 case "linear" => val awn2 = awn1 awn2 } nn_a += nnan1 (NNLabel(f._1.label, nn_a, f._1.error), f._2) } // error and loss // 输出误差计算 // nn.e = y - nn.a{n}; // val nn_e = batch_y - nnan val train_data4 = train_data3.map { f => val batch_y = f._1.label val nnan = f._1.nna(bc_config.value.layer - 1) val error = (batch_y - nnan) (NNLabel(f._1.label, f._1.nna, error), f._2) } train_data4 } /** * sparsity计算,网络稀疏度 * 计算每个节点的平均值 */ def ActiveP( train_nnff: RDD[(NNLabel, Array[BDM[Double]])], bc_config: org.apache.spark.broadcast.Broadcast[NNConfig], nn_p_old: Array[BDM[Double]]): Array[BDM[Double]] = { val nn_p = ArrayBuffer[BDM[Double]]() nn_p += BDM.zeros[Double](1, 1) // calculate running exponential activations for use with sparsity // sparsity计算,计算sparsity,nonSparsityPenalty 是对没达到sparsitytarget的参数的惩罚系数 for (i <- 1 to bc_config.value.layer - 1) { val pi1 = train_nnff.map(f => f._1.nna(i)) val initpi = BDM.zeros[Double](1, bc_config.value.size(i)) val (piSum, miniBatchSize) = pi1.treeAggregate((initpi, 0L))( seqOp = (c, v) => { // c: (nnasum, count), v: (nna) val nna1 = c._1 val nna2 = v val nnasum = nna1 + nna2 (nnasum, c._2 + 1) }, combOp = (c1, c2) => { // c: (nnasum, count) val nna1 = c1._1 val nna2 = c2._1 val nnasum = nna1 + nna2 (nnasum, c1._2 + c2._2) }) val piAvg = piSum / miniBatchSize.toDouble val oldpi = nn_p_old(i) val newpi = (piAvg * 0.01) + (oldpi * 0.09) nn_p += newpi } nn_p.toArray } /** * NNbp是后向传播 * 计算权重的平均偏导数 */ def NNbp( train_nnff: RDD[(NNLabel, Array[BDM[Double]])], bc_config: org.apache.spark.broadcast.Broadcast[NNConfig], bc_nn_W: org.apache.spark.broadcast.Broadcast[Array[BDM[Double]]], bc_nn_p: org.apache.spark.broadcast.Broadcast[Array[BDM[Double]]]): Array[BDM[Double]] = { // 第n层偏导数:d(n)=-(y-a(n))*f'(z),sigmoid函数f'(z)表达式:f'(z)=f(z)*[1-f(z)] // sigm: d{n} = - nn.e .* (nn.a{n} .* (1 - nn.a{n})); // {'softmax','linear'}: d{n} = - nn.e; val train_data5 = train_nnff.map { f => val nn_a = f._1.nna val error = f._1.error val dn = ArrayBuffer[BDM[Double]]() val nndn = bc_config.value.output_function match { case "sigm" => val fz = nn_a(bc_config.value.layer - 1) (error * (-1.0)) :* (fz :* (1.0 - fz)) case "linear" => error * (-1.0) } dn += nndn (f._1, f._2, dn) } // 第n-1至第2层导数:d(n)=-(w(n)*d(n+1))*f'(z) val train_data6 = train_data5.map { f => // 假设 f(z) 是sigmoid函数 f(z)=1/[1+e^(-z)],f'(z)表达式,f'(z)=f(z)*[1-f(z)] // 假设 f(z) tanh f(z)=1.7159*tanh(2/3.*A) ,f'(z)表达式,f'(z)=1.7159 * 2/3 * (1 - 1/(1.7159)^2 * f(z).^2) // train_data5.map(f => f._1.nna).take(1) // train_data5.map(f => f._3).take(1) // train_data5.map(f => f._2).take(1) // val di = ArrayBuffer(BDM((0.011181628780251586))) // val nn_a = ArrayBuffer[BDM[Double]]() // val a1 = BDM((1.0, 0.312605257000000, 0.848582961000000, 0.999014768000000, 0.278330771000000, 0.462701179000000)) // val a2 = BDM((1.0, 0.838091550300577, 0.996782915917104, 0.118033012437165, 0.312605257000000, 0.848582961000000, 0.999014768000000, 0.278330771000000, 0.462701179000000, 0.278330771000000, 0.462701179000000)) // val a3 = BDM((1.0, 0.312605257000000, 0.848582961000000, 0.999014768000000, 0.278330771000000, 0.462701179000000, 0.278330771000000, 0.462701179000000)) // val a4 = BDM((0.9826605123949446)) // nn_a += a1 // nn_a += a2 // nn_a += a3 // nn_a += a4 // val dropout = Array(BDM.zeros[Double](1,1), BDM.zeros[Double](1,1), BDM.zeros[Double](1,1)) val nn_a = f._1.nna val di = f._3 val dropout = f._2 for (i <- (bc_config.value.layer - 2) to 1 by -1) { // f'(z)表达式 val nnd_act = bc_config.value.activation_function match { case "sigm" => val d_act = nn_a(i) :* (1.0 - nn_a(i)) d_act case "tanh_opt" => val fz2 = (1.0 - ((nn_a(i) :* nn_a(i)) * (1.0 / (1.7159 * 1.7159)))) val d_act = fz2 * (1.7159 * (2.0 / 3.0)) d_act } // 稀疏度惩罚误差计算:-(t/p)+(1-t)/(1-p) // sparsityError = [zeros(size(nn.a{i},1),1) nn.nonSparsityPenalty * (-nn.sparsityTarget ./ pi + (1 - nn.sparsityTarget) ./ (1 - pi))]; val sparsityError = if (bc_config.value.nonSparsityPenalty > 0) { val nn_pi1 = bc_nn_p.value(i) val nn_pi2 = (bc_config.value.sparsityTarget / nn_pi1) * (-1.0) + (1.0 - bc_config.value.sparsityTarget) / (1.0 - nn_pi1) val Bm1 = new BDM(nn_pi2.rows, 1, Array.fill(nn_pi2.rows * 1)(1.0)) val sparsity = BDM.horzcat(Bm1, nn_pi2 * bc_config.value.nonSparsityPenalty) sparsity } else { val nn_pi1 = bc_nn_p.value(i) val sparsity = BDM.zeros[Double](nn_pi1.rows, nn_pi1.cols + 1) sparsity } // 导数:d(n)=-( w(n)*d(n+1)+ sparsityError )*f'(z) // d{i} = (d{i + 1} * nn.W{i} + sparsityError) .* d_act; val W1 = bc_nn_W.value(i) val nndi1 = if (i + 1 == bc_config.value.layer - 1) { //in this case in d{n} there is not the bias term to be removed val di1 = di(bc_config.value.layer - 2 - i) val di2 = (di1 * W1 + sparsityError) :* nnd_act di2 } else { // in this case in d{i} the bias term has to be removed val di1 = di(bc_config.value.layer - 2 - i)(::, 1 to -1) val di2 = (di1 * W1 + sparsityError) :* nnd_act di2 } // dropoutFraction val nndi2 = if (bc_config.value.dropoutFraction > 0) { val dropouti1 = dropout(i) val Bm1 = new BDM(nndi1.rows: Int, 1: Int, Array.fill(nndi1.rows * 1)(1.0)) val dropouti2 = BDM.horzcat(Bm1, dropouti1) nndi1 :* dropouti2 } else nndi1 di += nndi2 } di += BDM.zeros(1, 1) // 计算最终需要的偏导数值:dw(n)=(1/m)∑d(n+1)*a(n) // nn.dW{i} = (d{i + 1}' * nn.a{i}) / size(d{i + 1}, 1); val dw = ArrayBuffer[BDM[Double]]() for (i <- 0 to bc_config.value.layer - 2) { val nndW = if (i + 1 == bc_config.value.layer - 1) { (di(bc_config.value.layer - 2 - i).t) * nn_a(i) } else { (di(bc_config.value.layer - 2 - i)(::, 1 to -1)).t * nn_a(i) } dw += nndW } (f._1, di, dw) } val train_data7 = train_data6.map(f => f._3) // Sample a subset (fraction miniBatchFraction) of the total data // compute and sum up the subgradients on this subset (this is one map-reduce) val initgrad = ArrayBuffer[BDM[Double]]() for (i <- 0 to bc_config.value.layer - 2) { val init1 = if (i + 1 == bc_config.value.layer - 1) { BDM.zeros[Double](bc_config.value.size(i + 1), bc_config.value.size(i) + 1) } else { BDM.zeros[Double](bc_config.value.size(i + 1), bc_config.value.size(i) + 1) } initgrad += init1 } val (gradientSum, miniBatchSize) = train_data7.treeAggregate((initgrad, 0L))( seqOp = (c, v) => { // c: (grad, count), v: (grad) val grad1 = c._1 val grad2 = v val sumgrad = ArrayBuffer[BDM[Double]]() for (i <- 0 to bc_config.value.layer - 2) { val Bm1 = grad1(i) val Bm2 = grad2(i) val Bmsum = Bm1 + Bm2 sumgrad += Bmsum } (sumgrad, c._2 + 1) }, combOp = (c1, c2) => { // c: (grad, count) val grad1 = c1._1 val grad2 = c2._1 val sumgrad = ArrayBuffer[BDM[Double]]() for (i <- 0 to bc_config.value.layer - 2) { val Bm1 = grad1(i) val Bm2 = grad2(i) val Bmsum = Bm1 + Bm2 sumgrad += Bmsum } (sumgrad, c1._2 + c2._2) }) // 求平均值 val gradientAvg = ArrayBuffer[BDM[Double]]() for (i <- 0 to bc_config.value.layer - 2) { val Bm1 = gradientSum(i) val Bmavg = Bm1 :/ miniBatchSize.toDouble gradientAvg += Bmavg } gradientAvg.toArray } /** * NNapplygrads是权重更新 * 权重更新 */ def NNapplygrads( train_nnbp: Array[BDM[Double]], bc_config: org.apache.spark.broadcast.Broadcast[NNConfig], bc_nn_W: org.apache.spark.broadcast.Broadcast[Array[BDM[Double]]], bc_nn_vW: org.apache.spark.broadcast.Broadcast[Array[BDM[Double]]]): Array[Array[BDM[Double]]] = { // nn = nnapplygrads(nn) returns an neural network structure with updated // weights and biases // 更新权重参数:w=w-α*[dw + λw] val W_a = ArrayBuffer[BDM[Double]]() val vW_a = ArrayBuffer[BDM[Double]]() for (i <- 0 to bc_config.value.layer - 2) { val nndwi = if (bc_config.value.weightPenaltyL2 > 0) { val dwi = train_nnbp(i) val zeros = BDM.zeros[Double](dwi.rows, 1) val l2 = BDM.horzcat(zeros, dwi(::, 1 to -1)) val dwi2 = dwi + (l2 * bc_config.value.weightPenaltyL2) dwi2 } else { val dwi = train_nnbp(i) dwi } val nndwi2 = nndwi :* bc_config.value.learningRate val nndwi3 = if (bc_config.value.momentum > 0) { val vwi = bc_nn_vW.value(i) val dw3 = nndwi2 + (vwi * bc_config.value.momentum) dw3 } else { nndwi2 } // nn.W{i} = nn.W{i} - dW; W_a += (bc_nn_W.value(i) - nndwi3) // nn.vW{i} = nn.momentum*nn.vW{i} + dW; val nnvwi1 = if (bc_config.value.momentum > 0) { val vwi = bc_nn_vW.value(i) val vw3 = nndwi2 + (vwi * bc_config.value.momentum) vw3 } else { bc_nn_vW.value(i) } vW_a += nnvwi1 } Array(W_a.toArray, vW_a.toArray) } /** * nneval是进行前向传播并计算输出误差 * 计算神经网络中的每个节点的输出值,并计算平均误差; */ def NNeval( batch_xy: RDD[(BDM[Double], BDM[Double])], bc_config: org.apache.spark.broadcast.Broadcast[NNConfig], bc_nn_W: org.apache.spark.broadcast.Broadcast[Array[BDM[Double]]]): Double = { // NNff是进行前向传播 // nn = nnff(nn, batch_x, batch_y); val train_nnff = NeuralNet.NNff(batch_xy, bc_config, bc_nn_W) // error and loss // 输出误差计算 val loss1 = train_nnff.map(f => f._1.error) val (loss2, counte) = loss1.treeAggregate((0.0, 0L))( seqOp = (c, v) => { // c: (e, count), v: (m) val e1 = c._1 val e2 = (v :* v).sum val esum = e1 + e2 (esum, c._2 + 1) }, combOp = (c1, c2) => { // c: (e, count) val e1 = c1._1 val e2 = c2._1 val esum = e1 + e2 (esum, c1._2 + c2._2) }) val Loss = loss2 / counte.toDouble Loss * 0.5 } }2、ANN 模型
package NN import breeze.linalg.{ Matrix => BM, CSCMatrix => BSM, DenseMatrix => BDM, Vector => BV, DenseVector => BDV, SparseVector => BSV } import org.apache.spark.rdd.RDD /** * label:目标矩阵 * features:特征矩阵 * predict_label:预测矩阵 * error:误差 */ case class PredictNNLabel(label: BDM[Double], features: BDM[Double], predict_label: BDM[Double], error: BDM[Double]) extends Serializable /** * NN(neural network) */ class NeuralNetModel( val config: NNConfig, val weights: Array[BDM[Double]]) extends Serializable { /** * 返回预测结果 * 返回格式:(label, feature, predict_label, error) */ def predict(dataMatrix: RDD[(BDM[Double], BDM[Double])]): RDD[PredictNNLabel] = { val sc = dataMatrix.sparkContext val bc_nn_W = sc.broadcast(weights) val bc_config = sc.broadcast(config) // NNff是进行前向传播 // nn = nnff(nn, batch_x, batch_y); val train_nnff = NeuralNet.NNff(dataMatrix, bc_config, bc_nn_W) val predict = train_nnff.map { f => val label = f._1.label val error = f._1.error val nnan = f._1.nna(bc_config.value.layer - 1) val nna1 = f._1.nna(0)(::, 1 to -1) PredictNNLabel(label, nna1, nnan, error) } predict } /** * 计算输出误差 * 平均误差; */ def Loss(predict: RDD[PredictNNLabel]): Double = { val predict1 = predict.map(f => f.error) // error and loss // 输出误差计算 val loss1 = predict1 val (loss2, counte) = loss1.treeAggregate((0.0, 0L))( seqOp = (c, v) => { // c: (e, count), v: (m) val e1 = c._1 val e2 = (v :* v).sum val esum = e1 + e2 (esum, c._2 + 1) }, combOp = (c1, c2) => { // c: (e, count) val e1 = c1._1 val e2 = c2._1 val esum = e1 + e2 (esum, c1._2 + c2._2) }) val Loss = loss2 / counte.toDouble Loss * 0.5 } }3、测试函数代码
package util import java.util.Random import breeze.linalg.{ Matrix => BM, CSCMatrix => BSM, DenseMatrix => BDM, Vector => BV, DenseVector => BDV, SparseVector => BSV, axpy => brzAxpy, svd => brzSvd } import breeze.numerics.{ exp => Bexp, cos => Bcos, tanh => Btanh } import scala.math.Pi object RandSampleData extends Serializable { // Rosenbrock: //∑(100*(x(i+1)-x(i) 2) 2 + (x(i)-1) 2) // Rastrigin: //∑(x(i) 2 -10*cos(2*3.14*x(i))+10) // Sphere : //∑(x(i) 2) /** * 测试函数: Rosenbrock, Rastrigin * 随机生成n2维数据,并根据测试函数计算Y * n1 行,n2 列,b1 上限,b2 下限,function 计算函数 */ def RandM( n1: Int, n2: Int, b1: Double, b2: Double, function: String): BDM[Double] = { // val n1 = 2 // val n2 = 3 // val b1 = -30 // val b2 = 30 val bdm1 = BDM.rand(n1, n2) * (b2 - b1).toDouble + b1.toDouble val bdm_y = function match { case "rosenbrock" => val xi0 = bdm1(::, 0 to (bdm1.cols - 2)) val xi1 = bdm1(::, 1 to (bdm1.cols - 1)) val xi2 = (xi0 :* xi0) val m1 = ((xi1 - xi2) :* (xi1 - xi2)) * 100.0 + ((xi0 - 1.0) :* (xi0 - 1.0)) val m2 = m1 * BDM.ones[Double](m1.cols, 1) m2 case "rastrigin" => val xi0 = bdm1 val xi2 = (xi0 :* xi0) val sicos = Bcos(xi0 * 2.0 * Pi) * 10.0 val m1 = xi2 - sicos + 10.0 val m2 = m1 * BDM.ones[Double](m1.cols, 1) m2 case "sphere" => val xi0 = bdm1 val xi2 = (xi0 :* xi0) val m1 = xi2 val m2 = m1 * BDM.ones[Double](m1.cols, 1) m2 } val randm = BDM.horzcat(bdm_y, bdm1) randm } }4、实例代码
package tests import org.apache.log4j.{ Level, Logger } import org.apache.spark.{ SparkConf, SparkContext } import org.apache.spark.storage.StorageLevel import org.apache.spark.mllib.util.MLUtils import org.apache.spark.mllib.linalg.{ Vector, Vectors } import org.apache.spark.mllib.linalg.distributed.RowMatrix import org.apache.spark.mllib.regression.LabeledPoint import breeze.linalg.{ Matrix => BM, CSCMatrix => BSM, DenseMatrix => BDM, Vector => BV, DenseVector => BDV, SparseVector => BSV, axpy => brzAxpy, svd => brzSvd, max => Bmax, min => Bmin, sum => Bsum } import scala.collection.mutable.ArrayBuffer import NN.NeuralNet import util.RandSampleData object Test_example_NN { def main(args: Array[String]) { //1 构建Spark对象 val conf = new SparkConf().setAppName("NNtest") val sc = new SparkContext(conf) //*****************************例1(基于经典优化算法测试函数随机生成样本)*****************************// //2 随机生成测试数据 // 随机数生成 Logger.getRootLogger.setLevel(Level.WARN) val sample_n1 = 1000 val sample_n2 = 5 val randsamp1 = RandSampleData.RandM(sample_n1, sample_n2, -10, 10, "sphere") // 归一化[0 1] val normmax = Bmax(randsamp1(::, breeze.linalg.*)) val normmin = Bmin(randsamp1(::, breeze.linalg.*)) val norm1 = randsamp1 - (BDM.ones[Double](randsamp1.rows, 1)) * normmin val norm2 = norm1 :/ ((BDM.ones[Double](norm1.rows, 1)) * (normmax - normmin)) // 转换样本train_d val randsamp2 = ArrayBuffer[BDM[Double]]() for (i <- 0 to sample_n1 - 1) { val mi = norm2(i, ::) val mi1 = mi.inner val mi2 = mi1.toArray val mi3 = new BDM(1, mi2.length, mi2) randsamp2 += mi3 } val randsamp3 = sc.parallelize(randsamp2, 10) sc.setCheckpointDir("hdfs://192.168.180.79:9000/user/huangmeiling/checkpoint") randsamp3.checkpoint() val train_d = randsamp3.map(f => (new BDM(1, 1, f(::, 0).data), f(::, 1 to -1))) //3 设置训练参数,建立模型 // opts:迭代步长,迭代次数,交叉验证比例 val opts = Array(100.0, 50.0, 0.0) train_d.cache val numExamples = train_d.count() println(s"numExamples = $numExamples.") val NNmodel = new NeuralNet(). setSize(Array(5, 7, 1)). setLayer(3). setActivation_function("tanh_opt"). setLearningRate(2.0). setScaling_learningRate(1.0). setWeightPenaltyL2(0.0). setNonSparsityPenalty(0.0). setSparsityTarget(0.05). setInputZeroMaskedFraction(0.0). setDropoutFraction(0.0). setOutput_function("sigm"). NNtrain(train_d, opts) //4 模型测试 val NNforecast = NNmodel.predict(train_d) val NNerror = NNmodel.Loss(NNforecast) println(s"NNerror = $NNerror.") val printf1 = NNforecast.map(f => (f.label.data(0), f.predict_label.data(0))).take(20) println("预测结果——实际值:预测值:误差") for (i <- 0 until printf1.length) println(printf1(i)._1 + "\t" + printf1(i)._2 + "\t" + (printf1(i)._2 - printf1(i)._1)) println("权重W{1}") val tmpw0 = NNmodel.weights(0) for (i <- 0 to tmpw0.rows - 1) { for (j <- 0 to tmpw0.cols - 1) { print(tmpw0(i, j) + "\t") } println() } println("权重W{2}") val tmpw1 = NNmodel.weights(1) for (i <- 0 to tmpw1.rows - 1) { for (j <- 0 to tmpw1.cols - 1) { print(tmpw1(i, j) + "\t") } println() } // val tmpxy = train_d.map(f => (f._1.toArray, f._2.toArray)).toArray.map { f => ((new ArrayBuffer() ++ f._1) ++ f._2).toArray } // for (i <- 0 to tmpxy.length - 1) { // for (j <- 0 to tmpxy(i).length - 1) { // print(tmpxy(i)(j) + "\t") // } // println() // } //*****************************例2(读取固定样本:来源于经典优化算法测试函数Sphere Model)*****************************// // //2 读取样本数据, // Logger.getRootLogger.setLevel(Level.WARN) // val data_path = "hdfs://192.168.180.79:9000/user/huangmeiling/deeplearn/data1" // val examples = sc.textFile(data_path).cache() // val train_d1 = examples.map { line => // val f1 = line.split("\t") // val f = f1.map(f => f.toDouble) // val id = f(0) // val y = Array(f(1)) // val x = f.slice(2, f.length) // (id, new BDM(1, y.length, y), new BDM(1, x.length, x)) // } // val train_d = train_d1 // val opts = Array(100.0, 20.0, 0.0) // //3 设置训练参数,建立模型 // val NNmodel = new NeuralNet(). // setSize(Array(5, 7, 1)). // setLayer(3). // setActivation_function("tanh_opt"). // setLearningRate(2.0). // setScaling_learningRate(1.0). // setWeightPenaltyL2(0.0). // setNonSparsityPenalty(0.0). // setSparsityTarget(0.0). // setOutput_function("sigm"). // NNtrain(train_d, opts) // // //4 模型测试 // val NNforecast = NNmodel.predict(train_d.map(f => (f._2, f._3))) // val NNerror = NNmodel.Loss(NNforecast) // println(s"NNerror = $NNerror.") // val printf1 = NNforecast.map(f => (f.label.data(0), f.predict_label.data(0))).take(200) // println("预测结果——实际值:预测值:误差") // for (i <- 0 until printf1.length) // println(printf1(i)._1 + "\t" + printf1(i)._2 + "\t" + (printf1(i)._2 - printf1(i)._1)) // println("权重W{1}") // val tmpw0 = NNmodel.weights(0) // for (i <- 0 to tmpw0.rows - 1) { // for (j <- 0 to tmpw0.cols - 1) { // print(tmpw0(i, j) + "\t") // } // println() // } // println("权重W{2}") // val tmpw1 = NNmodel.weights(1) // for (i <- 0 to tmpw1.rows - 1) { // for (j <- 0 to tmpw1.cols - 1) { // print(tmpw1(i, j) + "\t") // } // println() // } //*****************************例3(读取SparkMlib数据)*****************************// //例2 读取样本数据,转化:[y1,[x1 x2 x10]] => ([y1 y2],[x1 x2...x10]) // val data_path = "file:/home/jb-huangmeiling/data/sample_linear_regression_data.txt" // val examples = MLUtils.loadLibSVMFile(sc, data_path).cache() // val train_d1 = examples.map { f => // LabeledPoint(f.label, Vectors.dense(f.features.toArray)) // } // val opts = Array(100.0, 100.0, 0.0) // val train_d = train_d1.map(f => (BDM((f.label, f.label * 0.5 + 2.0)), BDM(f.features.toArray))) // val numExamples = train_d.count() // println(s"numExamples = $numExamples.") } }代码和数据地址网盘:
http://pan.baidu.com/s/1c1J8ZN6