MLlib之LR算法源码学习


/**

 * :: DeveloperApi ::

 * GeneralizedLinearModel (GLM) represents a model trained using

 * GeneralizedLinearAlgorithm. GLMs consist of a weight vector and

 * an intercept.

 *

 * @param weights Weights computed for every feature.

 * @param intercept Intercept computed for this model.

 */

@DeveloperApi

abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double // 主构造器)

  extends Serializable {



  /**

   * Predict the result given a data point and the weights learned.

   *

   * @param dataMatrix Row vector containing the features for this data point

   * @param weightMatrix Column vector containing the weights of the model

   * @param intercept Intercept of the model.

   */

  protected def predictPoint(dataMatrix: Vector, weightMatrix: Vector, intercept: Double): Double // 预测所属标签 /**

   * Predict values for the given data set using the model trained.

   *

   * @param testData RDD representing data points to be predicted

   * @return RDD[Double] where each entry contains the corresponding prediction

   */

  def predict(testData: RDD[Vector]): RDD[Double] = {

    // A small optimization to avoid serializing the entire model. Only the weightsMatrix

    // and intercept is needed.

    val localWeights = weights

    val bcWeights = testData.context.broadcast(localWeights)

    val localIntercept = intercept

    testData.mapPartitions { iter =>

      val w = bcWeights.value //broadcast调用 read-only（类似Hadoop -》 DistributedCache） 

      iter.map(v => predictPoint(v, w, localIntercept))

    }

  }



  /**

   * Predict values for a single data point using the model trained.

   *

   * @param testData array representing a single data point

   * @return Double prediction from the trained model

   */

  def predict(testData: Vector): Double = {

    predictPoint(testData, weights, intercept)

  }

}

// 根据训练数据集得到的weights来预测新的数据点的分类

/**

 * Regression model trained using LinearRegression.

 *

 * @param weights Weights computed for every feature.

 * @param intercept Intercept computed for this model.

 */

class LinearRegressionModel (

    override val weights: Vector,

    override val intercept: Double)

  extends GeneralizedLinearModel(weights, intercept) with RegressionModel with Serializable {



  override protected def predictPoint(

      dataMatrix: Vector,

      weightMatrix: Vector,

      intercept: Double): Double = {

    weightMatrix.toBreeze.dot(dataMatrix.toBreeze) + intercept //两向量点乘v1 = [a1, b1], v2 = [a2, b2], v1.v2 = a1 * a2 + b1 * b2

  }

}

import org.apache.spark.mllib.linalg.{Vectors, Vector}

import org.apache.spark.mllib.util.NumericParser

import org.apache.spark.SparkException



/**

 * Class that represents the features and labels of a data point.

 *

 * @param label Label for this data point.

 * @param features List of features for this data point.

 */

case class LabeledPoint(label: Double, features: Vector /*主构造器*/) {

  override def toString: String = {

    "(%s,%s)".format(label, features)

  }

}



/**

 * Parser for [[org.apache.spark.mllib.regression.LabeledPoint]].

 */

object LabeledPoint {

  /**

   * Parses a string resulted from `LabeledPoint#toString` into

   * an [[org.apache.spark.mllib.regression.LabeledPoint]].

   */

  def parse(s: String): LabeledPoint = {

    if (s.startsWith("(")) {

      NumericParser.parse(s) match {

        case Seq(label: Double, numeric: Any) =>

          LabeledPoint(label, Vectors.parseNumeric(numeric))

        case other =>

          throw new SparkException(/*字符串插值*/s"Cannot parse $other.")

      }

    } 
    else { // dense format used before v1.0

      val parts = s.split(',')

      val label = java.lang.Double.parseDouble(parts(0))

      val features = Vectors.dense(parts(1).trim().split(' ').map(java.lang.Double.parseDouble))

      LabeledPoint(label, features)

    }

  }

}

/**

 * :: DeveloperApi ::

 * GeneralizedLinearAlgorithm implements methods to train a Generalized Linear Model (GLM).

 * This class should be extended with an Optimizer to create a new GLM.

 */

@DeveloperApi

abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]

  extends Logging with Serializable {



  protected val validators: Seq[RDD[LabeledPoint] => Boolean] = List()



  /** The optimizer to solve the problem. */

  def optimizer: Optimizer /** Whether to add intercept (default: false). */

  protected var addIntercept: Boolean = false



  protected var validateData: Boolean = true



  /**

   * Whether to perform feature scaling before model training to reduce the condition numbers

   * which can significantly help the optimizer converging faster. The scaling correction will be

   * translated back to resulting model weights, so it's transparent to users.

   * Note: This technique is used in both libsvm and glmnet packages. Default false.

   */

  private var useFeatureScaling = false



  /**

   * Set if the algorithm should use feature scaling to improve the convergence during optimization.

   */

  private[mllib] def setFeatureScaling(useFeatureScaling: Boolean): this.type = {

    this.useFeatureScaling = useFeatureScaling

    this

  }



  /**

   * Create a model given the weights and intercept

   */

  protected def createModel(weights: Vector, intercept: Double): M /**

   * Set if the algorithm should add an intercept. Default false.

   * We set the default to false because adding the intercept will cause memory allocation.

   */

  def setIntercept(addIntercept: Boolean): this.type = {

    this.addIntercept = addIntercept

    this

  }



  /**

   * Set if the algorithm should validate data before training. Default true.

   */

  def setValidateData(validateData: Boolean): this.type = {

    this.validateData = validateData

    this

  }



  /**

   * Run the algorithm with the configured parameters on an input

   * RDD of LabeledPoint entries.

   */

  def run(input: RDD[LabeledPoint]): M = {

    val numFeatures: Int = input.first().features.size

    val initialWeights = Vectors.dense(new Array[Double](numFeatures)) //初始化为0向量

    run(input, initialWeights)

  }



  /**

   * Run the algorithm with the configured parameters on an input RDD

   * of LabeledPoint entries starting from the initial weights provided.

   */

  def run(input: RDD[LabeledPoint], initialWeights: Vector): M = {



    // Check the data properties before running the optimizer

    if (validateData && !validators.forall(func => func(input))) {

      throw new SparkException("Input validation failed.")

    }



    /**

     * Scaling columns to unit variance as a heuristic to reduce the condition number:

     *

     * During the optimization process, the convergence (rate) depends on the condition number of

     * the training dataset. Scaling the variables often reduces this condition number

     * heuristically, thus improving the convergence rate. Without reducing the condition number,

     * some training datasets mixing the columns with different scales may not be able to converge.

     *

     * GLMNET and LIBSVM packages perform the scaling to reduce the condition number, and return

     * the weights in the original scale.

     * See page 9 in http://cran.r-project.org/web/packages/glmnet/glmnet.pdf

     *

     * Here, if useFeatureScaling is enabled, we will standardize the training features by dividing

     * the variance of each column (without subtracting the mean), and train the model in the

     * scaled space. Then we transform the coefficients from the scaled space to the original scale

     * as GLMNET and LIBSVM do.

     *

     * Currently, it's only enabled in LogisticRegressionWithLBFGS

     */

    val scaler = if (useFeatureScaling) {

      (new StandardScaler).fit(input.map(x => x.features))

    } else {

      null

    }



    // Prepend an extra variable consisting of all 1.0's for the intercept.

    val data = if (addIntercept) {

      if(useFeatureScaling) {

        input.map(labeledPoint =>

          (labeledPoint.label, appendBias(scaler.transform(labeledPoint.features))))

      } else {

        input.map(labeledPoint => (labeledPoint.label, /*加入惩罚函数*/appendBias(labeledPoint.features)))

      }

    } else {

      if (useFeatureScaling) {

        input.map(labeledPoint => (labeledPoint.label, scaler.transform(labeledPoint.features)))

      } else {

        input.map(labeledPoint => (labeledPoint.label, labeledPoint.features))

      }

    }



    val initialWeightsWithIntercept = if (addIntercept) {

      appendBias(initialWeights)

    } else {

      initialWeights

    }

 
 //Very important

    val weightsWithIntercept = optimizer.optimize(data, initialWeightsWithIntercept)



    val intercept = if (addIntercept) weightsWithIntercept(weightsWithIntercept.size - 1) else 0.0

    var weights =

      if (addIntercept) {

        Vectors.dense(weightsWithIntercept.toArray.slice(0, weightsWithIntercept.size - 1))

      } else {

        weightsWithIntercept

      }



    /**

     * The weights and intercept are trained in the scaled space; we're converting them back to

     * the original scale.

     *

     * Math shows that if we only perform standardization without subtracting means, the intercept

     * will not be changed. w_i = w_i' / v_i where w_i' is the coefficient in the scaled space, w_i

     * is the coefficient in the original space, and v_i is the variance of the column i.

     */

    if (useFeatureScaling) {

      weights = scaler.transform(weights)

    }



    createModel(weights, intercept)

  }

}

LinearRegressionWithSGD类主要接收外部数据集、算法参数等输入进行训练得到一个逻辑回归模型LogisticRegressionModel

　接收的输入参数包括：

　　　　input:输入数据集合，分类标签lable只能是1.0和0.0两种,feature为double类型

　　　　numIterations:迭代次数，默认为100

　　　　stepSize:迭代步伐大小，默认为1.0

　　　　miniBatchFraction:每次迭代参与计算的样本比例，默认为1.0

　　　　initialWeights:weight向量初始值，默认为0向量

/**

 * Train a linear regression model with no regularization using Stochastic Gradient Descent.

 * This solves the least squares regression formulation

 *              f(weights) = 1/n ||A weights-y||^2

 * (which is the mean squared error).

 * Here the data matrix has n rows, and the input RDD holds the set of rows of A, each with

 * its corresponding right hand side label y.

 * See also the documentation for the precise formulation.

 */

class LinearRegressionWithSGD private[mllib] (

    private var stepSize: Double,

    private var numIterations: Int,

    private var miniBatchFraction: Double)

  extends GeneralizedLinearAlgorithm[LinearRegressionModel] with Serializable {



  private val gradient = new LeastSquaresGradient()

  private val updater = new SimpleUpdater()

  override val optimizer = new GradientDescent(gradient, updater)

    .setStepSize(stepSize)

    .setNumIterations(numIterations)

    .setMiniBatchFraction(miniBatchFraction)



  /**

   * Construct a LinearRegression object with default parameters: {stepSize: 1.0,

   * numIterations: 100, miniBatchFraction: 1.0}.

   */

  def this() = this(1.0, 100, 1.0)



  override protected[mllib] def createModel(weights: Vector, intercept: Double) = {

    new LinearRegressionModel(weights, intercept)

  }

}



/**

 * Top-level methods for calling LinearRegression.

 */

object LinearRegressionWithSGD {



  /**

   * Train a Linear Regression model given an RDD of (label, features) pairs. We run a fixed number

   * of iterations of gradient descent using the specified step size. Each iteration uses

   * `miniBatchFraction` fraction of the data to calculate a stochastic gradient. The weights used

   * in gradient descent are initialized using the initial weights provided.

   *

   * @param input RDD of (label, array of features) pairs. Each pair describes a row of the data

   *              matrix A as well as the corresponding right hand side label y

   * @param numIterations Number of iterations of gradient descent to run.

   * @param stepSize Step size to be used for each iteration of gradient descent.

   * @param miniBatchFraction Fraction of data to be used per iteration.

   * @param initialWeights Initial set of weights to be used. Array should be equal in size to

   *        the number of features in the data.

   */

  def train(

      input: RDD[LabeledPoint],

      numIterations: Int,

      stepSize: Double,

      miniBatchFraction: Double,

      initialWeights: Vector): LinearRegressionModel = {

    new LinearRegressionWithSGD(stepSize, numIterations, miniBatchFraction) .run(input, initialWeights)

  }



  /**

   * Train a LinearRegression model given an RDD of (label, features) pairs. We run a fixed number

   * of iterations of gradient descent using the specified step size. Each iteration uses

   * `miniBatchFraction` fraction of the data to calculate a stochastic gradient.

   *

   * @param input RDD of (label, array of features) pairs. Each pair describes a row of the data

   *              matrix A as well as the corresponding right hand side label y

   * @param numIterations Number of iterations of gradient descent to run.

   * @param stepSize Step size to be used for each iteration of gradient descent.

   * @param miniBatchFraction Fraction of data to be used per iteration.

   */

  def train(

      input: RDD[LabeledPoint],

      numIterations: Int,

      stepSize: Double,

      miniBatchFraction: Double): LinearRegressionModel = {

    new LinearRegressionWithSGD(stepSize, numIterations, miniBatchFraction).run(input)

  }



  /**

   * Train a LinearRegression model given an RDD of (label, features) pairs. We run a fixed number

   * of iterations of gradient descent using the specified step size. We use the entire data set to

   * compute the true gradient in each iteration.

   *

   * @param input RDD of (label, array of features) pairs. Each pair describes a row of the data

   *              matrix A as well as the corresponding right hand side label y

   * @param stepSize Step size to be used for each iteration of Gradient Descent.

   * @param numIterations Number of iterations of gradient descent to run.

   * @return a LinearRegressionModel which has the weights and offset from training.

   */

  def train(

      input: RDD[LabeledPoint],

      numIterations: Int,

      stepSize: Double): LinearRegressionModel = {

    train(input, numIterations, stepSize, 1.0)

  }



  /**

   * Train a LinearRegression model given an RDD of (label, features) pairs. We run a fixed number

   * of iterations of gradient descent using a step size of 1.0. We use the entire data set to

   * compute the true gradient in each iteration.

   *

   * @param input RDD of (label, array of features) pairs. Each pair describes a row of the data

   *              matrix A as well as the corresponding right hand side label y

   * @param numIterations Number of iterations of gradient descent to run.

   * @return a LinearRegressionModel which has the weights and offset from training.

   */

  def train(

      input: RDD[LabeledPoint],

      numIterations: Int): LinearRegressionModel = {

    train(input, numIterations, 1.0, 1.0)

  }

}

（梯度下降 or
 最小二乘法求导，计算梯度）
/**

 * :: DeveloperApi ::

 * Class used to compute the gradient for a loss function, given a single data point.

 */

@DeveloperApi

abstract class Gradient extends Serializable {

  /**

   * Compute the gradient and loss given the features of a single data point.

   *

   * @param data features for one data point

   * @param label label for this data point

   * @param weights weights/coefficients corresponding to features

   *

   * @return (gradient: Vector, loss: Double) */

  def compute(data: Vector, label: Double, weights: Vector): (Vector, Double)



  /**

   * Compute the gradient and loss given the features of a single data point,

   * add the gradient to a provided vector to avoid creating new objects, and return loss.

   *

   * @param data features for one data point

   * @param label label for this data point

   * @param weights weights/coefficients corresponding to features

   * @param cumGradient the computed gradient will be added to this vector

   *

   * @return loss

   */

  def compute(data: Vector, label: Double, weights: Vector, cumGradient: Vector): Double

}



/**

 * :: DeveloperApi ::

 * Compute gradient and loss for a logistic loss function, as used in binary classification.

 * See also the documentation for the precise formulation.

 */

@DeveloperApi

class LogisticGradient extends Gradient {

  override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {

    val margin = -1.0 * dot(data, weights)

    val gradientMultiplier = (1.0 / (1.0 + math.exp(margin))) - label

    val gradient = data.copy

    scal(gradientMultiplier, gradient)

    val loss =

      if (label > 0) {

        math.log1p(math.exp(margin)) // log1p is log(1+p) but more accurate for small p

      } else {

        math.log1p(math.exp(margin)) - margin

      }



    (gradient, loss)

  }



  override def compute(

      data: Vector,

      label: Double,

      weights: Vector,

      cumGradient: Vector): Double = {

    val margin = -1.0 * dot(data, weights)

    val gradientMultiplier = (1.0 / (1.0 + math.exp(margin))) - label

    axpy(gradientMultiplier, data, cumGradient)

    if (label > 0) {

      math.log1p(math.exp(margin))

    } else {

      math.log1p(math.exp(margin)) - margin

    }

  }

}



/**

 * :: DeveloperApi ::

 * Compute gradient and loss for a Least-squared loss function, as used in linear regression.

 * This is correct for the averaged least squares loss function (mean squared error)

 *              L = 1/n ||A weights-y||^2

 * See also the documentation for the precise formulation.

 */

@DeveloperApi

class LeastSquaresGradient extends Gradient {

  override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {

    val diff = dot(data, weights) - label

    val loss = diff * diff

    val gradient = data.copy

    scal(2.0 * diff, gradient)

    (gradient, loss)

  }



  override def compute(

      data: Vector,

      label: Double,

      weights: Vector,

      cumGradient: Vector): Double = {

    val diff = dot(data, weights) - label

    axpy(2.0 * diff, data, cumGradient)

    diff * diff

  }

}



/**

 * :: DeveloperApi ::

 * Compute gradient and loss for a Hinge loss function, as used in SVM binary classification.

 * See also the documentation for the precise formulation.

 * NOTE: This assumes that the labels are {0,1}

 */

@DeveloperApi

class HingeGradient extends Gradient {

  override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {

    val dotProduct = dot(data, weights)

    // Our loss function with {0, 1} labels is max(0, 1 - (2y – 1) (f_w(x)))

    // Therefore the gradient is -(2y - 1)*x

    val labelScaled = 2 * label - 1.0

    if (1.0 > labelScaled * dotProduct) {

      val gradient = data.copy

      scal(-labelScaled, gradient)

      (gradient, 1.0 - labelScaled * dotProduct)

    } else {

      (Vectors.sparse(weights.size, Array.empty, Array.empty), 0.0)

    }

  }



  override def compute(

      data: Vector,

      label: Double,

      weights: Vector,

      cumGradient: Vector): Double = {

    val dotProduct = dot(data, weights)

    // Our loss function with {0, 1} labels is max(0, 1 - (2y – 1) (f_w(x)))

    // Therefore the gradient is -(2y - 1)*x

    val labelScaled = 2 * label - 1.0

    if (1.0 > labelScaled * dotProduct) {

      axpy(-labelScaled, data, cumGradient)

      1.0 - labelScaled * dotProduct

    } else {

      0.0

    }

  }

}

Updater类负责weight的迭代更新计算，包含了SimpleUpdater、L1Updater、SquaredL2Update

/**

 * :: DeveloperApi ::

 * Class used to perform steps (weight update) using Gradient Descent methods.

 *

 * For general minimization problems, or for regularized problems of the form

 *         min  L(w) + regParam * R(w),

 * the compute function performs the actual update step, when given some

 * (e.g. stochastic) gradient direction for the loss L(w),

 * and a desired step-size (learning rate).

 *

 * The updater is responsible to also perform the update coming from the

 * regularization term R(w) (if any regularization is used).

 */

@DeveloperApi

abstract class Updater extends Serializable {

  /**

   * Compute an updated value for weights given the gradient, stepSize, iteration number and

   * regularization parameter. Also returns the regularization value regParam * R(w)

   * computed using the *updated* weights.

   *

   * @param weightsOld - Column matrix of size dx1 where d is the number of features.

   * @param gradient - Column matrix of size dx1 where d is the number of features.

   * @param stepSize - step size across iterations

   * @param iter - Iteration number

   * @param regParam - Regularization parameter

   *

   * @return A tuple of 2 elements. The first element is a column matrix containing updated weights,

   *         and the second element is the regularization value computed using updated weights.

   */

  def compute(

      weightsOld: Vector,

      gradient: Vector,

      stepSize: Double,

      iter: Int,

      regParam: Double): (Vector, Double)

}



/**

 * :: DeveloperApi ::

 * A simple updater for gradient descent *without* any regularization.

 * Uses a step-size decreasing with the square root of the number of iterations.

 */

@DeveloperApi

class SimpleUpdater extends Updater {

  override def compute(

      weightsOld: Vector,

      gradient: Vector,

      stepSize: Double,

      iter: Int,

      regParam: Double): (Vector, Double) = {

    val thisIterStepSize = stepSize / math.sqrt(iter)

    val brzWeights: BV[Double] = weightsOld.toBreeze.toDenseVector

    brzAxpy(-thisIterStepSize, gradient.toBreeze, brzWeights)



    (Vectors.fromBreeze(brzWeights), 0)

  }

}



/**

 * :: DeveloperApi ::

 * Updater for L1 regularized problems.

 *          R(w) = ||w||_1

 * Uses a step-size decreasing with the square root of the number of iterations.



 * Instead of subgradient of the regularizer, the proximal operator for the

 * L1 regularization is applied after the gradient step. This is known to

 * result in better sparsity of the intermediate solution.

 *

 * The corresponding proximal operator for the L1 norm is the soft-thresholding

 * function. That is, each weight component is shrunk towards 0 by shrinkageVal.

 *

 * If w >  shrinkageVal, set weight component to w-shrinkageVal.

 * If w < -shrinkageVal, set weight component to w+shrinkageVal.

 * If -shrinkageVal < w < shrinkageVal, set weight component to 0.

 *

 * Equivalently, set weight component to signum(w) * max(0.0, abs(w) - shrinkageVal)

 */

@DeveloperApi

class L1Updater extends Updater {

  override def compute(

      weightsOld: Vector,

      gradient: Vector,

      stepSize: Double,

      iter: Int,

      regParam: Double): (Vector, Double) = {

    val thisIterStepSize = stepSize / math.sqrt(iter)

    // Take gradient step

    val brzWeights: BV[Double] = weightsOld.toBreeze.toDenseVector

    brzAxpy(-thisIterStepSize, gradient.toBreeze, brzWeights)

    // Apply proximal operator (soft thresholding)

    val shrinkageVal = regParam * thisIterStepSize

    var i = 0

    while (i < brzWeights.length) {

      val wi = brzWeights(i)

      brzWeights(i) = signum(wi) * max(0.0, abs(wi) - shrinkageVal)

      i += 1

    }



    (Vectors.fromBreeze(brzWeights), brzNorm(brzWeights, 1.0) * regParam)

  }

}



/**

 * :: DeveloperApi ::

 * Updater for L2 regularized problems.

 *          R(w) = 1/2 ||w||^2

 * Uses a step-size decreasing with the square root of the number of iterations.

 */

@DeveloperApi

class SquaredL2Updater extends Updater {

  override def compute(

      weightsOld: Vector,

      gradient: Vector,

      stepSize: Double,

      iter: Int,

      regParam: Double): (Vector, Double) = {

    // add up both updates from the gradient of the loss (= step) as well as

    // the gradient of the regularizer (= regParam * weightsOld)

    // w' = w - thisIterStepSize * (gradient + regParam * w)

    // w' = (1 - thisIterStepSize * regParam) * w - thisIterStepSize * gradient

    val thisIterStepSize = stepSize / math.sqrt(iter)

    val brzWeights: BV[Double] = weightsOld.toBreeze.toDenseVector

    brzWeights :*= (1.0 - thisIterStepSize * regParam)

    brzAxpy(-thisIterStepSize, gradient.toBreeze, brzWeights)

    val norm = brzNorm(brzWeights, 2.0)



    (Vectors.fromBreeze(brzWeights), 0.5 * regParam * norm * norm)

  }

}

/**

 * :: DeveloperApi ::

 * Trait for optimization problem solvers.

 */

@DeveloperApi

trait Optimizer extends Serializable {



  /**

   * Solve the provided convex optimization problem.

   */

  def optimize(data: RDD[(Double, Vector)], initialWeights: Vector): Vector

}

GradientDescent（梯度下降算法）

/**

 * Class used to solve an optimization problem using Gradient Descent.

 * @param gradient Gradient function to be used.

 * @param updater Updater to be used to update weights after every iteration.

 */

class GradientDescent private[mllib] (private var gradient: Gradient, private var updater: Updater)

  extends Optimizer with Logging {



  private var stepSize: Double = 1.0

  private var numIterations: Int = 100

  private var regParam: Double = 0.0

  private var miniBatchFraction: Double = 1.0



  /**

   * Set the initial step size of SGD for the first step. Default 1.0.

   * In subsequent steps, the step size will decrease with stepSize/sqrt(t)

   */

  def setStepSize(step: Double): this.type = {

    this.stepSize = step

    this

  }



  /**

   * :: Experimental ::

   * Set fraction of data to be used for each SGD iteration.

   * Default 1.0 (corresponding to deterministic/classical gradient descent)

   */

  @Experimental

  def setMiniBatchFraction(fraction: Double): this.type = {

    this.miniBatchFraction = fraction

    this

  }



  /**

   * Set the number of iterations for SGD. Default 100.

   */

  def setNumIterations(iters: Int): this.type = {

    this.numIterations = iters

    this

  }



  /**

   * Set the regularization parameter. Default 0.0.

   */

  def setRegParam(regParam: Double): this.type = {

    this.regParam = regParam

    this

  }



  /**

   * Set the gradient function (of the loss function of one single data example)

   * to be used for SGD.

   */

  def setGradient(gradient: Gradient): this.type = {

    this.gradient = gradient

    this

  }





  /**

   * Set the updater function to actually perform a gradient step in a given direction.

   * The updater is responsible to perform the update from the regularization term as well,

   * and therefore determines what kind or regularization is used, if any.

   */

  def setUpdater(updater: Updater): this.type = {

    this.updater = updater

    this

  }



  /**

   * :: DeveloperApi ::

   * Runs gradient descent on the given training data.

   * @param data training data

   * @param initialWeights initial weights

   * @return solution vector

   */

  @DeveloperApi

  def optimize(data: RDD[(Double, Vector)], initialWeights: Vector): Vector = {

    val (weights, _) = GradientDescent.runMiniBatchSGD(

      data,

      gradient,

      updater,

      stepSize,

      numIterations,

      regParam,

      miniBatchFraction,

      initialWeights)

    weights

  }



}



/**

 * :: DeveloperApi ::

 * Top-level method to run gradient descent. */

@DeveloperApi

object GradientDescent extends Logging {

  /**

   * Run stochastic gradient descent (SGD) in parallel using mini batches.

   * In each iteration, we sample a subset (fraction miniBatchFraction) of the total data

   * in order to compute a gradient estimate.

   * Sampling, and averaging the subgradients over this subset is performed using one standard

   * spark map-reduce in each iteration.

   *

   * @param data - Input data for SGD. RDD of the set of data examples, each of

   *               the form (label, [feature values]).

   * @param gradient - Gradient object (used to compute the gradient of the loss function of

   *                   one single data example)

   * @param updater - Updater function to actually perform a gradient step in a given direction.

   * @param stepSize - initial step size for the first step

   * @param numIterations - number of iterations that SGD should be run.

   * @param regParam - regularization parameter

   * @param miniBatchFraction - fraction of the input data set that should be used for

   *                            one iteration of SGD. Default value 1.0.

   *

   * @return A tuple containing two elements. The first element is a column matrix containing

   *         weights for every feature, and the second element is an array containing the

   *         stochastic loss computed for every iteration.

   */

  def runMiniBatchSGD(

      data: RDD[(Double, Vector)],

      gradient: Gradient,

      updater: Updater,

      stepSize: Double,

      numIterations: Int,

      regParam: Double,

      miniBatchFraction: Double,

      initialWeights: Vector): (Vector, Array[Double]) = {



    val stochasticLossHistory = new ArrayBuffer[Double](numIterations)



    val numExamples = data.count()

    val miniBatchSize = numExamples * miniBatchFraction



    // if no data, return initial weights to avoid NaNs

    if (numExamples == 0) {



      logInfo("GradientDescent.runMiniBatchSGD returning initial weights, no data found")

      return (initialWeights, stochasticLossHistory.toArray)



    }



    // Initialize weights as a column vector

    var weights = Vectors.dense(initialWeights.toArray)

    val n = weights.size



    /**

     * For the first iteration, the regVal will be initialized as sum of weight squares

     * if it's L2 updater; for L1 updater, the same logic is followed.

     */

    var regVal = updater.compute(

      weights, Vectors.dense(new Array[Double](weights.size)), 0, 1, regParam)._2



    for (i <- 1 to numIterations) {

      val bcWeights = data.context.broadcast(weights)

      // Sample a subset (fraction miniBatchFraction) of the total data

      // compute and sum up the subgradients on this subset (this is one map-reduce)

      val (gradientSum, lossSum) = data.sample(false, miniBatchFraction, 42 + i)

        .treeAggregate((BDV.zeros[Double](n), 0.0))(

          seqOp = (c, v) => (c, v) match { case ((grad, loss), (label, features)) =>

            val l = gradient.compute(features, label, bcWeights.value, Vectors.fromBreeze(grad))

            (grad, loss + l)

          },

          combOp = (c1, c2) => (c1, c2) match { case ((grad1, loss1), (grad2, loss2)) =>

            (grad1 += grad2, loss1 + loss2)

          })



      /**

       * NOTE(Xinghao): lossSum is computed using the weights from the previous iteration

       * and regVal is the regularization value computed in the previous iteration as well.

       */

      stochasticLossHistory.append(lossSum / miniBatchSize + regVal)

      val update = updater.compute(

        weights, Vectors.fromBreeze(gradientSum / miniBatchSize), stepSize, i, regParam)

      weights = update._1

      regVal = update._2

    }



    logInfo("GradientDescent.runMiniBatchSGD finished. Last 10 stochastic losses %s".format(

      stochasticLossHistory.takeRight(10).mkString(", ")))



    (weights, stochasticLossHistory.toArray)



  }

}

你可能感兴趣的:(lib)

Python数据分析与可视化实战指南 William数据分析 python python 数据
在数据驱动的时代，Python因其简洁的语法、强大的库生态系统以及活跃的社区，成为了数据分析与可视化的首选语言。本文将通过一个详细的案例，带领大家学习如何使用Python进行数据分析，并通过可视化来直观呈现分析结果。一、环境准备1.1安装必要库在开始数据分析和可视化之前，我们需要安装一些常用的库。主要包括pandas、numpy、matplotlib和seaborn等。这些库分别用于数据处理、数学
linux sdl windows.h,Windows下的SDL安装奔跑吧linux内核 linux sdl windows.h
首先你要下载并安装SDL开发包。如果装在C盘下，路径为C:\SDL1.2.5如果在WINDOWS下。你可以按以下步骤：1.打开VC++，点击"Tools",Options2,点击directories选项3.选择"Includefiles"增加一个新的路径。"C:\SDL1.2.5\include"4，现在选择"Libaryfiles“增加"C:\SDL1.2.5\lib"现在你可以开始编写你的第
libyuv之linux编译 jaronho Linux linux 运维服务器
文章目录一、下载源码二、编译源码三、注意事项1、银河麒麟系统（aarch64）（1）解决armv8-a+dotprod+i8mm指令集支持问题（2）解决armv9-a+sve2指令集支持问题一、下载源码到GitHub网站下载https://github.com/lemenkov/libyuv源码，或者用直接用git克隆到本地，如：gitclonehttps://github.com/lemenko
nosql数据库技术与应用知识点皆过客，揽星河 NoSQL nosql 数据库大数据数据分析数据结构非关系型数据库
Nosql知识回顾大数据处理流程数据采集(flume、爬虫、传感器)数据存储(本门课程NoSQL所处的阶段)Hdfs、MongoDB、HBase等数据清洗(入仓)Hive等数据处理、分析(Spark、Flink等)数据可视化数据挖掘、机器学习应用(Python、SparkMLlib等)大数据时代存储的挑战(三高)高并发(同一时间很多人访问)高扩展(要求随时根据需求扩展存储)高效率(要求读写速度快)
Python开发常用的三方模块如下：换个网名有点难 python 开发语言
Python是一门功能强大的编程语言，拥有丰富的第三方库，这些库为开发者提供了极大的便利。以下是100个常用的Python库，涵盖了多个领域：1、NumPy，用于科学计算的基础库。2、Pandas，提供数据结构和数据分析工具。3、Matplotlib，一个绘图库。4、Scikit-learn，机器学习库。5、SciPy，用于数学、科学和工程的库。6、TensorFlow，由Google开发的开源机
SpringCloudAlibaba—Sentinel(限流) 菜鸟爪哇
前言：自己在学习过程的记录，借鉴别人文章，记录自己实现的步骤。借鉴文章：https://blog.csdn.net/u014494148/article/details/105484410Sentinel介绍Sentinel诞生于阿里巴巴，其主要目标是流量控制和服务熔断。Sentinel是通过限制并发线程的数量（即信号隔离）来减少不稳定资源的影响，而不是使用线程池，省去了线程切换的性能开销。当资源
Python实现TIFF 文件转换为 PNG 和 JPG 格式 sand&wich python 开发语言
在日常的图像处理工作中，可能会遇到需要将TIFF格式的图像转换为其他格式的情况，例如PNG和JPG。下面，本文将介绍如何使用Python和GDAL库实现这一功能。准备工作在开始之前，请确保已经安装了必要的库：GDAL（GeospatialDataAbstractionLibrary）可以使用以下命令安装GDAL：pipinstallgdal代码实现以下是一个将TIFF文件转换为PNG文件的示例代码
GenVisR 基因组数据可视化实战(三) 11的雾
3.genCov画每个突变位点附件的coverage，跟igv有点相似。这个操作起来很复杂，但是图还是挺有用的。可以考虑。由于我的referencegenomebuild是hg38BiocManager::install(c("TxDb.Hsapiens.UCSC.hg38.knownGene","BSgenome.Hsapiens.UCSC.hg38"))library(TxDb.Hsapien
【Python】已解决：ModuleNotFoundError: No module named ‘PIL’ 屿小夏 python 开发语言
文章目录一、分析问题背景二、可能出错的原因三、错误代码示例四、正确代码示例五、注意事项已解决：ModuleNotFoundError:Nomodulenamed‘PIL’一、分析问题背景当你在Python环境中尝试导入PIL（PythonImagingLibrary）模块时，可能会遇到“ModuleNotFoundError:Nomodulenamed‘PIL’”的错误。这通常发生在尝试使用PIL
Ubuntu18.04 Docker部署Kinship(Django)项目过程 Dante617
1Docker的安装https://blog.csdn.net/weixin_41735055/article/details/1003551792下载镜像dockerpullprogramize/python3.6.8-dlib下载的镜像里包含python3.6.8和dlib19.17.03启动镜像dockerrun-it--namekinship-p7777:80-p3307:3306-p55
sublime个人设置 bawangtianzun sublime text 编辑器
如何拥有jiangly蒋老师同款编译器(sublimec++配置竞赛向）_哔哩哔哩_bilibiliSublimeText4的安装教程（新手竞赛向）-知乎(zhihu.com)创建文件自动保存为c++打开SublimeText软件。转到"Tools"（工具）>"Developer"（开发者）>"NewPlugin"（新建插件）。在打开的新文件中，粘贴以下代码：importsublimeimport
python画图|同时输出二维和三维图西猫雷婶 python 开发语言
前面已经学习了如何输出二维图和三维图，部分文章详见下述链接：python画图|极坐标下的3Dsurface-CSDN博客python画图|垂线标记系列_如何用pyplot画垂直x轴的线-CSDN博客有时候也需要同时输出二位和三维图，因此有必要学习一下。【1】官网教程首先我们打开官网教程，链接如下。https://matplotlib.org/stable/gallery/mplot3d/mixed
解决SDK Manager 中没有 Support Library 木鱼wzh
1、直接修改SDK-MANAGER打开sdk-manager—->Tools—->options然后点击packages—->showobsoletepackages即可在最下面的Extras目录下找到推荐两个自己使用的镜像服务器：mirrors.neusoft.edu.cn端口80mirrors.dormforce.net端口802、去官网下载SupportLibrar点击这里进入官网进入百度云
python编写直方图和饼图 2301_80421078 python 开发语言
1.直方图#直方图的绘制#语法格式：plt.hist(x,bins),其中x:数据集；bins:统计数据的分布区间importmatplotlib.pyplotaspltimportpandasaspd#导入文件excel=pd.read_excel('成绩.xlsx')#print(excel)#避免乱码plt.rcParams['font.sans-serif']=['SimHei']x=ex
pythonpandas函数详解_Python pandas常用函数详解 Senvn
本文研究的主要是pandas常用函数，具体介绍如下。1import语句importpandasaspdimportnumpyasnpimportmatplotlib.pyplotaspltimportdatetimeimportre2文件读取df=pd.read_csv(path='file.csv')参数：header=None用默认列名，0，1，2，3...names=['A','B','C'
python画出分子化学空间分布（UMAP） Sakaiay python
利用umap画出分子化学空间分布图安装pipinstallumap-learn下面是用一个数据集举的例子importtorchimportumapimportpandasaspdimportnumpyasnpimportmatplotlib.pyplotaspltimportseabornassnsfromsklearn.manifoldimportTSNEfromrdkit.Chemimport
Sentinel 眼泪落在琴弦 springcloud java java
Sentinel（服务熔断降级限流）1.引入spring-cloud-starter-alibaba-sentinel2.下载sentinel服务器3.配置application地址信息4.在控制台调整参数【默认所以流控设置保存在内存中，重启失效】5.想实时监控需每个微服务导入actuator，并配置application暴露所有端口6.自定义sentinel流控返回数据7.配置sentinel类
talib的python库安装 jesonwz python 开发语言
talib的python库安装反正用清华源装不上发现talib的指标好多，想着用用，结果在python里装不上，清华源里提示找不到。也难怪，这个库上网查了一下，最新一次更新是在2013年，太老了。废话不说，上我的解决办法。解决方法步骤（靠谱的）思路：既然在线装不上，就用离线的1.下载对应python版本的talib的whl版本安装文件，链接在这：https://blog.csdn.net/FL16
Makefile问答之 04 优化异常与警告设置捕鲸叉 Linux使用 Linux系统编程 Makefile linux
Makefile怎样指定优化选项，包括编译和链接优化，常用的选项有哪些？在Makefile中，你可以通过设置编译器和链接器的选项来指定优化选项。优化选项可以分为编译优化和链接优化，以下是如何在Makefile中指定这些选项，以及一些常用的选项。示例Makefile#编译器CC=gcc#编译选项CFLAGS=-Wall-O2#链接选项LDFLAGS=-O2#需要链接的库LDLIBS=#目标文件TAR
TA-Lib Python 库 Windows 64位安装包黄桥壮Quinn
TA-LibPython库Windows64位安装包TA.rar项目地址:https://gitcode.com/open-source-toolkit/3ff39简介本仓库提供了一个适用于Windows64位系统的TA-LibPython库安装包。TA-Lib是一个广泛用于金融技术分析的库，支持多种技术指标的计算。资源文件文件名TA-Lib-0.4.29-cp312-win-amd64.whl描
python绘制等高线和等值面初步 bcbobo21cn 图形学和3D python 开发语言 Matplotlib NumPy 等高线
importmatplotlib.pyplotaspltimportnumpyasnpx=np.linspace(-5,6,210)y=np.linspace(-5,6,210)x,y=np.meshgrid(x,y)z=(1-x/2+x**5+y**4)*np.exp(-x**2-y**2)plt.contour(x,y,z,levels=9,colors='black')plt.show()i
python模块TA_Lib文件whl下载地址汇总国产bug零零柒 whl python linux windows
序号项目名称下载地址1TA_Lib-0.4.28-cp312-cp312-win32.whl.zip点我下载2TA_Lib-0.4.28-cp38-cp38-win32.whl.zip点我下载3TA_Lib-0.4.28-cp312-cp312-win_amd64.whl.zip点我下载4TA_Lib-0.4.28-cp38-cp38-win_amd64.whl.zip点我下载5TA_Lib-0.
py获取系统缩放比例 xu-jssy Python自动化脚本 python 前端
fromwin32.libimportwin32conimportwin32api,win32gui,win32printscaling=1.0defget_real_resolution():"""获取真实的分辨率"""hDC=win32gui.GetDC(0)wide=win32print.GetDeviceCaps(hDC,win32con.DESKTOPHORZRES)high=win32
python抓取网页内容401应该用哪个库_python3使用requests模块爬取页面内容入门坂田月半
python的爬虫相关模块有很多，除了requests模块，再如urllib和pycurl以及tornado等。相比而言，requests模块是相对简单易上手的。通过文本，大家可以迅速学会使用python的requests模块爬取页码内容。1.Requests唯一的一个非转基因的PythonHTTP库，人类可以安全享用。官网：http://cn.python-requests.org/zh_CN/
python的request请求401_Python模拟HTTPS请求返回HTTP 401 unauthorized错误 weixin_39599372
Python模拟HTTPS请求返回HTTP401unauthorized错误开始是使用的httplib模块，代码如下：header={"Content-type":"application/json","Accept":"*/*"}params={‘source‘:‘en‘,‘target‘:‘es‘,‘text‘:match.group(1)}data=urllib.urlencode(para
python使用MD5 18.程序员哈希算法算法
一、要使用Python进行MD5加密，可以使用Python标准库中的hashlib模块。二、案例importhashlibstring="Hello,World!"#要进行加密的字符串hash_object=hashlib.md5(string.encode())#将字符串编码并进行MD5加密hash_hex=hash_object.hexdigest()#获取加密后的十六进制字符串print(h
【Python爬虫】百度百科词条内容 PokiFighting 数据处理 python 爬虫开发语言
词条内容我这里随便选取了一个链接，用的是FBI的词条importurllib.requestimporturllib.parsefromlxmlimportetreedefquery(url):headers={'user-agent':'Mozilla/5.0(WindowsNT6.1;Win64;x64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/80.
Humanize 项目教程尤嫒冰
Humanize项目教程humanizeAJSlibraryforaddinga“humantouch”todata.项目地址:https://gitcode.com/gh_mirrors/humani/humanize项目介绍Humanize是一个开源项目，旨在将机器生成的文本转换为更加自然、人性化的文本。该项目通过先进的算法和自然语言处理技术，使得AI生成的内容更加贴近人类的表达方式，从而提高
python下载pandas库镜像_下载pandas库 weixin_39791152
背景交代：在下载matplotlib库时，我已经将pip的下载源手动更改为清华的镜像，所以，如果有小伙伴在下载库遇到问题，如timeout，请先将下载源改为国内镜像，具体操作见我的另一篇文章：今天的主题是安装pandas库~首先，按田字格+R，打开cmd，输入：pipinstallpandas嗯，不出所料地报错了……主要原因：pip._vendor.urllib3.exceptions.ReadT
锋哥写一套前后端分离Python权限系统基于Django5+DRF+Vue3.2+Element Plus+Jwt 视频教程，帅呆了~~ java1234_小锋 Python 权限系统 django权限系统 python web权限系统 django DRF VUE权限 python
大家好，我是java1234_小锋老师，最近写了一套【前后端分离Python权限系统基于Django5+DRF+Vue3.2+ElementPlus+Jwt】视频教程，持续更新中，计划月底更新完，感谢支持。视频在线地址：打造前后端分离Python权限系统基于Django5+DRF+Vue3.2+ElementPlus+Jwt视频教程（火爆连载更新中..）_哔哩哔哩_bilibili项目介绍本课程采
插入表主键冲突做更新 a-john
有以下场景：用户下了一个订单，订单内的内容较多，且来自多表，首次下单的时候，内容可能会不全（部分内容不是必须，出现有些表根本就没有没有该订单的值）。在以后更改订单时，有些内容会更改，有些内容会新增。问题：如果在sql语句中执行update操作，在没有数据的表中会出错。如果在逻辑代码中先做查询，查询结果有做更新，没有做插入，这样会将代码复杂化。解决： mysql中提供了一个sql语
Android xml资源文件中@、@android:type、@*、？、@+含义和区别 Cb123456 @+@?@*
一.@代表引用资源 1.引用自定义资源。格式：@[package:]type/name android：text="@string/hello" 2.引用系统资源。格式：@android:type/name android:textColor="@android:color/opaque_red"
数据结构的基本介绍天子之骄数据结构散列表树、图线性结构价格标签
数据结构的基本介绍数据结构就是数据的组织形式，用一种提前设计好的框架去存取数据，以便更方便，高效的对数据进行增删查改。正确选择合适的数据结构，对软件程序的高效执行的影响作用不亚于算法的设计。此外，在计算机系统中数据结构的作用也是非同小可。例如常常在编程语言中听到的栈，堆等，就是经典的数据结构。经典的数据结构大致如下：一：线性数据结构 (1)：列表 a
通过二维码开放平台的API快速生成二维码一炮送你回车库 api
现在很多网站都有通过扫二维码用手机连接的功能，联图网(http://www.liantu.com/pingtai/)的二维码开放平台开放了一个生成二维码图片的Api,挺方便使用的。闲着无聊，写了个前台快速生成二维码的方法。 html代码如下:(二维码将生成在这div下) ? 1 &nbs
ImageIO读取一张图片改变大小 3213213333332132 java IO image BufferedImage
package com.demo; import java.awt.image.BufferedImage; import java.io.File; import java.io.IOException; import javax.imageio.ImageIO; /** * @Description 读取一张图片改变大小 * @author FuJianyon
myeclipse集成svn（一针见血） 7454103 eclipse SVN MyEclipse
&n
装箱与拆箱----autoboxing和unboxing darkranger J2SE
4.2　自动装箱和拆箱基本数据(Primitive)类型的自动装箱(autoboxing)、拆箱(unboxing)是自J2SE 5.0开始提供的功能。虽然为您打包基本数据类型提供了方便，但提供方便的同时表示隐藏了细节，建议在能够区分基本数据类型与对象的差别时再使用。 4.2.1　autoboxing和unboxing 在Java中，所有要处理的东西几乎都是对象(Object)
ajax传统的方式制作ajax aijuans Ajax
//这是前台的代码 <%@ page language="java" import="java.util.*" pageEncoding="UTF-8"%> <% String path = request.getContextPath(); String basePath = request.getScheme()+
只用jre的eclipse是怎么编译java源文件的？ avords java eclipse jdk tomcat
eclipse只需要jre就可以运行开发java程序了，也能自动编译java源代码，但是jre不是java的运行环境么，难道jre中也带有编译工具？还是eclipse自己实现的？谁能给解释一下呢问题补充：假设系统中没有安装jdk or jre，只在eclipse的目录中有一个jre，那么eclipse会采用该jre，问题是eclipse照样可以编译java源文件，为什么呢？ &nb
前端模块化 bee1314 模块化
背景：前端JavaScript模块化，其实已经不是什么新鲜事了。但是很多的项目还没有真正的使用起来，还处于刀耕火种的野蛮生长阶段。 JavaScript一直缺乏有效的包管理机制，造成了大量的全局变量，大量的方法冲突。我们多么渴望有天能像Java（import），Python (import)，Ruby(require)那样写代码。在没有包管理机制的年代，我们是怎么避免所
处理百万级以上的数据处理 bijian1013 oracle sql 数据库大数据查询
一.处理百万级以上的数据提高查询速度的方法： 1.应尽量避免在 where 子句中使用!=或<>操作符，否则将引擎放弃使用索引而进行全表扫描。 2.对查询进行优化，应尽量避免全表扫描，首先应考虑在 where 及 o
mac 卸载 java 1.7 或更高版本征客丶 java OS
卸载 java 1.7 或更高 sudo rm -rf /Library/Internet\ Plug-Ins/JavaAppletPlugin.plugin 成功执行此命令后，还可以执行 java 与 javac 命令 sudo rm -rf /Library/PreferencePanes/JavaControlPanel.prefPane 成功执行此命令后，还可以执行 java
【Spark六十一】Spark Streaming结合Flume、Kafka进行日志分析 bit1129 Stream
第一步，Flume和Kakfa对接，Flume抓取日志，写到Kafka中第二部，Spark Streaming读取Kafka中的数据，进行实时分析本文首先使用Kakfa自带的消息处理（脚本）来获取消息，走通Flume和Kafka的对接 1. Flume配置 1. 下载Flume和Kafka集成的插件，下载地址：https://github.com/beyondj2ee/f
Erlang vs TNSDL bookjovi erlang
TNSDL是Nokia内部用于开发电信交换软件的私有语言，是在SDL语言的基础上加以修改而成，TNSDL需翻译成C语言得以编译执行，TNSDL语言中实现了异步并行的特点，当然要完整实现异步并行还需要运行时动态库的支持，异步并行类似于Erlang的process（轻量级进程），TNSDL中则称之为hand，Erlang是基于vm(beam)开发，
非常希望有一个预防疲劳的java软件, 预防过劳死和眼睛疲劳,大家一起努力搞一个 ljy325 企业应用
　非常希望有一个预防疲劳的java软件，我看新闻和网站，国防科技大学的科学家累死了，太疲劳，老是加班，不休息，经常吃药，吃药根本就没用，根本原因是疲劳过度。我以前做java,那会公司垃圾，老想赶快学习到东西跳槽离开，搞得超负荷，不明理。深圳做软件开发经常累死人，总有不明理的人，有个软件提醒限制很好，可以挽救很多人的生命。相关新闻：（1）IT行业成五大疾病重灾区：过劳死平均37.9岁
读《研磨设计模式》-代码笔记-原型模式 bylijinnan java 设计模式
声明：本文只为方便我个人查阅和理解，详细的分析以及源代码请移步原作者的博客http://chjavach.iteye.com/ /** * Effective Java 建议使用copy constructor or copy factory来代替clone()方法： * 1.public Product copy(Product p){} * 2.publi
配置管理---svn工具之权限配置 chenyu19891124 SVN
今天花了大半天的功夫，终于弄懂svn权限配置。下面是今天收获的战绩。安装完svn后就是在svn中建立版本库，比如我本地的是版本库路径是C:\Repositories\pepos。pepos是我的版本库。在pepos的目录结构 pepos component webapps 在conf里面的auth里赋予的权限配置为 [groups]
浅谈程序员的数学修养 comsci 设计模式编程算法面试招聘
浅谈程序员的数学修养
批量执行 bulk collect与forall用法 daizj oracle sql bulk collect forall
BULK COLLECT 子句会批量检索结果，即一次性将结果集绑定到一个集合变量中，并从SQL引擎发送到PL/SQL引擎。通常可以在SELECT INTO、 FETCH INTO以及RETURNING INTO子句中使用BULK COLLECT。本文将逐一描述BULK COLLECT在这几种情形下的用法。有关FORALL语句的用法请参考：批量SQL之 F
Linux下使用rsync最快速删除海量文件的方法 dongwei_6688 OS
1、先安装rsync：yum install rsync 2、建立一个空的文件夹：mkdir /tmp/test 3、用rsync删除目标目录：rsync --delete-before -a -H -v --progress --stats /tmp/test/ log/这样我们要删除的log目录就会被清空了，删除的速度会非常快。rsync实际上用的是替换原理，处理数十万个文件也是秒删。
Yii CModel中rules验证规格 dcj3sjt126com rules yii validate
Yii cValidator主要用法分析： yii验证rulesit 分类： Yii yii的rules验证 cValidator主要属性 attributes ,builtInValidators,enableClientValidation,message,on,safe,skipOnError
基于vagrant的redis主从实验 dcj3sjt126com vagrant
平台: Mac 工具: Vagrant 系统: Centos6.5 实验目的: Redis主从实现思路制作一个基于sentos6.5, 已经安装好reids的box, 添加一个脚本配置从机, 然后作为后面主机从机的基础box 制作sentos6.5+redis的box mkdir vagrant_redis cd vagrant_
Memcached(二)、Centos安装Memcached服务器 frank1234 centos memcached
一、安装gcc rpm和yum安装memcached服务器连接没有找到，所以我使用的是make的方式安装，由于make依赖于gcc，所以要先安装gcc 开始安装，命令如下，[color=red][b]顺序一定不能出错[/b][/color]：建议可以先切换到root用户，不然可能会遇到权限问题：su root 输入密码...... rpm -ivh kernel-head
Remove Duplicates from Sorted List hcx2013 remove
Given a sorted linked list, delete all duplicates such that each element appear only once. For example,Given 1->1->2, return 1->2.Given 1->1->2->3->3, return&
Spring4新特性——JSR310日期时间API的支持 jinnianshilongnian spring4
Spring4新特性——泛型限定式依赖注入 Spring4新特性——核心容器的其他改进 Spring4新特性——Web开发的增强 Spring4新特性——集成Bean Validation 1.1(JSR-349)到SpringMVC Spring4新特性——Groovy Bean定义DSL Spring4新特性——更好的Java泛型操作API Spring4新
浅谈enum与单例设计模式 247687009 java 单例
在JDK1.5之前的单例实现方式有两种(懒汉式和饿汉式并无设计上的区别故看做一种)，两者同是私有构造器，导出静态成员变量，以便调用者访问。第一种 package singleton; public class Singleton { //导出全局成员 public final static Singleton INSTANCE = new S
使用switch条件语句需要注意的几点 openwrt c break switch
1. 当满足条件的case中没有break，程序将依次执行其后的每种条件（包括default）直到遇到break跳出 int main() { int n = 1; switch(n) { case 1: printf("--1--\n"); default: printf("defa
配置Spring Mybatis JUnit测试环境的应用上下文 schnell18 spring mybatis JUnit
Spring-test模块中的应用上下文和web及spring boot的有很大差异。主要试下来差异有：单元测试的app context不支持从外部properties文件注入属性 @Value注解不能解析带通配符的路径字符串解决第一个问题可以配置一个PropertyPlaceholderConfigurer的bean。第二个问题的具体实例是：
Java 定时任务总结一 tuoni java spring timer quartz timertask
Java定时任务总结一.从技术上分类大概分为以下三种方式： 1.Java自带的java.util.Timer类，这个类允许你调度一个java.util.TimerTask任务; 说明： java.util.Timer定时器，实际上是个线程，定时执行TimerTask类 &
一种防止用户生成内容站点出现商业广告以及非法有害等垃圾信息的方法 yangshangchuan rank 相似度计算文本相似度词袋模型余弦相似度
本文描述了一种在ITEYE博客频道上面出现的新型的商业广告形式及其应对方法，对于其他的用户生成内容站点类型也具有同样的适用性。最近在ITEYE博客频道上面出现了一种新型的商业广告形式，方法如下： 1、注册多个账号（一般10个以上）。 2、从多个账号中选择一个账号，发表1-2篇博文