依据距离计算分类的思想,适用于各维。
具体代码如下:
package sk.mlib; import org.apache.spark.SparkConf; import org.apache.spark.SparkContext; import scala.Tuple2; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.function.Function; import org.apache.spark.mllib.classification.SVMModel; import org.apache.spark.mllib.classification.SVMWithSGD; import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics; import org.apache.spark.mllib.linalg.Vectors; import org.apache.spark.mllib.regression.LabeledPoint; import org.apache.spark.mllib.util.MLUtils; public class SVMWithSGDDemo { public static void main(String[] args) { SparkConf conf = new SparkConf().setAppName("JavaSVMWithSGDExample"); SparkContext sc = new SparkContext(conf); // $example on$ String path = "/tmp/svmdata.txt"; JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(sc, path).toJavaRDD(); // Split initial RDD into two... [60% training data, 40% testing data]. JavaRDD<LabeledPoint> training = data.sample(false, 0.6, 11L); training.cache(); JavaRDD<LabeledPoint> test = data.subtract(training); // Run training algorithm to build the model. int numIterations = 100; final SVMModel model = SVMWithSGD.train(training.rdd(), numIterations); // Clear the default threshold. model.clearThreshold(); // Compute raw scores on the test set. JavaRDD<Tuple2<Object, Object>> scoreAndLabels = test.map( new Function<LabeledPoint, Tuple2<Object, Object>>() { public Tuple2<Object, Object> call(LabeledPoint p) { Double score = model.predict(p.features()); return new Tuple2<Object, Object>(score, p.label()); } } ); // Get evaluation metrics. BinaryClassificationMetrics metrics =new BinaryClassificationMetrics(JavaRDD.toRDD(scoreAndLabels)); double auROC = metrics.areaUnderROC(); System.out.println("Area under ROC = " + auROC); // Save and load model model.save(sc, "/tmp/javaSVMWithSGDModel"); SVMModel sameModel = SVMModel.load(sc, "/tmp/javaSVMWithSGDModel"); //应用模型分类 System.out.println("Prediction of (-0.857554,0.555556,1,1,0.555556,0.333333,1,0.777778,0.333333,-1 ):"+sameModel.predict(Vectors.dense(-0.857554,0.555556,1,1,0.555556,0.333333,1,0.777778,0.333333,-1 ))); sc.stop(); } } /* 执行结果: Area under ROC = 0.9017094017094017 Prediction of (-0.857554,0.555556,1,1,0.555556,0.333333,1,0.777778,0.333333,-1 ):3.238535993736797 */输入的数据集:标签 特征向量1:特征向量值1 特征向量2:特征向量值2 ... 特征向量n:特征向量值n
0 1:-0.860107 2:-0.111111 3:-1 4:-1 5:-1 6:-0.777778 7:-1 8:-0.555556 9:-1 10:-1 0 1:-0.859671 2:-0.111111 3:-0.333333 4:-0.333333 5:-0.111111 6:0.333333 7:1 8:-0.555556 9:-0.777778 10:-1 0 1:-0.857807 2:-0.555556 3:-1 4:-1 5:-1 6:-0.777778 7:-0.777778 8:-0.555556 9:-1 10:-1 0 1:-0.85768 2:0.111111 3:0.555556 4:0.555556 5:-1 6:-0.555556 7:-0.333333 8:-0.555556 9:0.333333 10:-1 0 1:-0.857569 2:-0.333333 3:-1 4:-1 5:-0.555556 6:-0.777778 7:-1 8:-0.555556 9:-1 10:-1 1 1:-0.857554 2:0.555556 3:1 4:1 5:0.555556 6:0.333333 7:1 8:0.777778 9:0.333333 10:-1 0 1:-0.857408 2:-1 3:-1 4:-1 5:-1 6:-0.777778 7:1 8:-0.555556 9:-1 10:-1 0 1:-0.857339 2:-0.777778 3:-1 4:-0.777778 5:-1 6:-0.777778 7:-1 8:-0.555556 9:-1 10:-1 1 1:-0.855171 2:-0.777778 3:-1 4:-1 5:-1 6:-0.777778 7:-1 8:-1 9:-1 10:-0.111111 0 1:-0.855171 2:-0.333333 3:-0.777778 4:-1 5:-1 6:-0.777778 7:-1 8:-0.777778 9:-1 10:-1 0 1:-0.854841 2:-1 3:-1 4:-1 5:-1 6:-1 7:-1 8:-0.555556 9:-1 10:-1 0 1:-0.854709 2:-0.777778 3:-1 4:-1 5:-1 6:-0.777778 7:-1 8:-0.777778 9:-1 10:-1 1 1:-0.853868 2:-0.111111 3:-0.555556 4:-0.555556 5:-0.555556 6:-0.777778 7:-0.555556 8:-0.333333 9:-0.333333 10:-1 0 1:-0.85354 2:-1 3:-1 4:-1 5:-1 6:-0.777778 7:-0.555556 8:-0.555556 9:-1 10:-1 1 1:-0.853454 2:0.555556 3:0.333333 4:-0.111111 5:1 6:0.333333 7:0.777778 8:-0.111111 9:-0.111111 10:-0.333333 1 1:-0.852997 2:0.333333 3:-0.333333 4:0.111111 5:-0.333333 6:0.111111 7:-1 8:-0.333333 9:-0.555556 10:-1 0 1:-0.852842 2:-0.333333 3:-1 4:-1 5:-1 6:-0.777778 7:-1 8:-0.777778 9:-1 10:-1 1 1:-0.852671 2:-0.333333 3:-1 4:-1 5:-1 6:-0.777778 7:-1 8:-0.555556 9:-1 10:-1 0 1:-0.852543 2:1 3:0.333333 4:0.333333 5:0.111111 6:-0.333333 7:1 8:-0.333333 9:-1 10:-0.777778 0 1:-0.852536 2:0.111111 3:-1 4:-1 5:-1 6:-0.777778 7:-1 8:-0.555556 9:-1 10:-1 1 1:-0.851958 2:0.333333 3:-0.555556 4:-0.777778 5:1 6:-0.111111 7:1 8:-0.111111 9:-0.333333 10:-0.333333 1 1:-0.851957 2:1 3:-0.111111 4:-0.111111 5:-0.555556 6:0.111111 7:0.333333 8:0.333333 9:1 10:-1 0 1:-0.85163 2:-0.555556 3:-1 4:-1 5:-1 6:-0.777778 7:-1 8:-0.777778 9:-1 10:-1 0 1:-0.851217 2:-1 3:-1 4:-1 5:-1 6:-0.777778 7:-1 8:-0.555556 9:-1 10:-1 1 1:-0.850295 2:-0.111111 3:-0.777778 4:-0.555556 5:-0.333333 6:-0.777778 7:0.333333 8:-0.555556 9:0.111111 10:-1 0 1:-0.850198 2:-0.555556 3:-0.777778 4:-1 5:-1 6:-1 7:-1 8:-0.777778 9:-1 10:-1 0 1:-0.850107 2:-0.111111 3:-1 4:-1 5:-1 6:-0.777778 7:-1 8:-0.777778 9:-1 10:-1 0 1:-0.850038 2:-0.777778 3:-1 4:-1 5:-1 6:-0.777778 7:-1 8:-0.777778 9:-1 10:-1 0 1:-0.849517 2:-1 3:-1 4:-0.555556 5:-1 6:-0.777778 7:-1 8:-1 9:-1 10:-1 0 1:-0.849517 2:-0.555556 3:-1 4:-1 5:-1 6:-1 7:-1 8:-0.777778 9:-1 10:-1 0 1:-0.849393 2:-0.777778 3:-1 4:-1 5:-1 6:-0.777778 7:-1 8:-0.555556 9:-1 10:-1 1 1:-0.849331 2:1 3:0.333333 4:0.333333 5:-0.555556 6:0.555556 7:-0.111111 8:0.333333 9:-0.333333 10:-0.555556 0 1:-0.848968 2:-0.777778 3:-1 4:-1 5:-0.777778 6:-0.777778 7:-1 8:-0.555556 9:-1 10:-1 0 1:-0.848891 2:-0.555556 3:-1 4:-0.777778 5:-1 6:-0.777778 7:-1 8:-0.777778 9:-1 10:-1 0 1:-0.848267 2:-0.777778 3:-1 4:-1 5:-1 6:-0.777778 7:-1 8:-0.777778 9:-1 10:-1 1 1:-0.848135 2:1 3:1 4:1 5:0.555556 6:0.111111 7:-1 8:0.555556 9:0.777778 10:-1 0 1:-0.847895 2:0.111111 3:-0.777778 4:-1 5:-1 6:-1 7:-1 8:0.333333 9:-1 10:-1 1 1:-0.847478 2:-0.111111 3:-0.333333 4:-0.333333 5:0.777778 6:-0.777778 7:1 8:-0.111111 9:0.111111 10:-1 1 1:-0.846481 2:-0.777778 3:-0.111111 4:-0.555556 5:-0.555556 6:0.111111 7:0.333333 8:0.333333 9:-0.111111 10:-1 1 1:-0.845249 2:1 3:-0.333333 4:-0.555556 5:-1 6:-0.555556 7:-0.555556 8:0.111111 9:-0.111111 10:-0.777778 1 1:-0.845097 2:0.111111 3:1 4:1 5:-0.777778 6:0.555556 7:1 8:0.333333 9:-0.555556 10:-0.555556 1 1:-0.844791 2:-0.111111 3:0.111111 4:-0.111111 5:0.111111 6:1 7:-1 8:-0.555556 9:-1 10:-1 1 1:-0.844637 2:1 3:1 4:1 5:-0.333333 6:0.555556 7:-1 8:0.555556 9:1 10:-1 0 1:-0.84462 2:-1 3:-1 4:-1 5:-1 6:-0.777778 7:-1 8:-0.777778 9:-1 10:-0.777778 1 1:-0.84439 2:-0.555556 3:0.333333 4:0.333333 5:-0.333333 6:-0.333333 7:0.777778 8:-0.333333 9:0.555556 10:-1 0 1:-0.844351 2:-1 3:-1 4:-1 5:-1 6:-0.777778 7:-1 8:-0.777778 9:-1 10:-1 0 1:-0.844265 2:-0.333333 3:-1 4:-1 5:-0.555556 6:-0.777778 7:-1 8:-0.555556 9:-1 10:-1 1 1:-0.844156 2:0.333333 3:0.555556 4:0.333333 5:-0.777778 6:-0.333333 7:0.555556 8:-0.555556 9:0.555556 10:-0.777778 1 1:-0.843926 2:0.777778 3:-0.111111 4:0.555556 5:-1 6:-0.777778 7:-0.555556 8:-0.777778 9:-1 10:-0.111111 1 1:-0.843914 2:-0.111111 3:-0.555556 4:-0.555556 5:-0.333333 6:-0.777778 7:-0.333333 8:-0.555556 9:-0.333333 10:-1 1 1:-0.843667 2:1 3:-0.555556 4:0.111111 5:-0.777778 6:-0.555556 7:-0.111111 8:-0.333333 9:1 10:-0.777778 1 1:-0.843607 2:-0.111111 3:-0.111111 4:-0.111111 5:0.555556 6:1 7:0.555556 8:0.333333 9:-0.555556 10:0.333333 1 1:-0.843604 2:1 3:-0.111111 4:-0.111111 5:0.111111 6:0.555556 7:0.555556 8:0.333333 9:-1 10:-1 1 1:-0.843496 2:1 3:0.111111 4:0.111111 5:-0.555556 6:-0.333333 7:-0.111111 8:-0.555556 9:0.111111 10:-1 1 1:-0.843352 2:0.555556 3:1 4:1 5:-1 6:-0.555556 7:0.111111 8:-0.555556 9:0.777778 10:-1 1 1:-0.843228 2:0.555556 3:-0.777778 4:-0.333333 5:-1 6:-0.111111 7:-1 8:-0.111111 9:-0.333333 10:-0.333333 1 1:-0.843162 2:-0.111111 3:-0.777778 4:-0.555556 5:-1 6:0.111111 7:1 8:-0.111111 9:-1 10:-1 1 1:-0.843099 2:0.777778 3:-0.111111 4:-0.111111 5:-0.777778 6:-0.777778 7:-0.777778 8:-0.111111 9:-1 10:-1 1 1:-0.842893 2:-0.111111 3:-0.555556 4:-0.111111 5:-0.111111 6:-0.555556 7:-0.555556 8:-0.333333 9:1 10:-1 0 1:-0.842892 2:-1 3:-1 4:-1 5:-1 6:-0.777778 7:-0.777778 8:-0.777778 9:-1 10:-1 1 1:-0.842769 2:0.777778 3:1 4:1 5:-1 6:1 7:0.555556 8:-0.555556 9:-0.555556 10:-1 1 1:-0.842766 2:0.111111 3:-0.555556 4:-0.333333 5:-1 6:-0.111111 7:-0.777778 8:-0.555556 9:0.777778 10:-1 0 1:-0.842757 2:-1 3:-1 4:-1 5:-1 6:-0.777778 7:-1 8:-0.777778 9:-1 10:-1 1 1:-0.842637 2:1 3:-0.333333 4:-0.777778 5:-1 6:-0.555556 7:-0.777778 8:-0.333333 9:-0.555556 10:1 0 1:-0.842614 2:-0.333333 3:-1 4:-1 5:-1 6:-0.777778 7:-1 8:-0.555556 9:-1 10:-1