Java7 –系统变量
JAVA_HOME -> C:\ProgramFiles\Java\jdk1.7.0_71
CLASSPATH ->%JAVA_HOME%\lib\dt.jar;%JAVA_HOME%\lib\tools.jar;
PATH -> %JAVA_HOME%\bin;
Scala2.10.x –系统变量
SCALA_HOME -> C:\Program Files\scala
PATH -> %SCALA_HOME%\bin;
Scala IDE for Eclipse
File -> New -> Scala Project-> Project name:SparkMLlibTest-> Next -> Finish
src -> New -> Package -> name:com.sparkmltest.scala -> Finish
Scala Library container -> Build Path-> Configure Build Path… -> Libraries -> Edit -> Fixed ScalaLibrary container:2.10.x -> Finish -> OK
//For the Scala API, Spark1.5.2 usesScala 2.10.x
SparkMLlibTest -> New -> Folder-> name:lib -> Finish
将Spark-assembly-1.5.1-hadoop2.6.0.jar架包复制到文件夹lib下。
Spark-assembly-1.5.1-hadoop2.6.0.jar-> Build Path -> Add to Build Path
//example1 基于Spark的,读取一个本地文件并打印。
com.sparkmltest.scala -> New ->Scala Object/Scala Class -> Name:SimpleTest-> Finish
package com.sparkmltest.scala
import org.apache.spark.{SparkConf,SparkContext}
object SimpleTest {
def main(args:Array[String]):Unit = {
val conf = newSparkConf().setAppName("SimpleTest").setMaster("local[*]")
val sc = newSparkContext(conf)
val sqlContext = neworg.apache.spark.sql.SQLContext(sc)
val rdd = sc.textFile("D:/Scala workspace/simplefile.txt")
rdd.foreach(x =>println(x))
}
}
package com.sparkmltest.scala
import org.apache.spark.{SparkConf,SparkContext}
import org.apache.spark.ml.classification.MultilayerPerceptronClassifier
import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
import org.apache.spark.mllib.util.MLUtils
import org.apache.spark.sql.Row
object MultilayersPerceptronTest {
def main(args: Array[String]): Unit= {
val conf = new SparkConf().setAppName("SimpleTest").setMaster("local[*]")
val sc = new SparkContext(conf)
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
importsqlContext.implicits._
//Load training data
val data = MLUtils.loadLibSVMFile(sc, "D:/Scalaworkspace/spark-1.5.1-bin-hadoop2.6/data/mllib/sample_multiclass_classification_data.txt").toDF()
// Split thedata into train and test
val splits = data.randomSplit(Array(0.6, 0.4), seed = 1234L)
val train = splits(0)
val test = splits(1)
// specifylayers for the neural network:
// inputlayer of size 4 (features), two intermediate of size 5 and 4 and output of size3 (classes)
val layers = Array[Int](4, 5, 4, 3)
// create thetrainer and set its parameters
val trainer = new MultilayerPerceptronClassifier()
.setLayers(layers)
.setBlockSize(128)
.setSeed(1234L)
.setMaxIter(100)
// train themodel
val model = trainer.fit(train)
// computeprecision on the test set
val result = model.transform(test)
val predictionAndLabels = result.select("prediction", "label")
val evaluator = new MulticlassClassificationEvaluator()
.setMetricName("precision")
println("Precision:" + evaluator.evaluate(predictionAndLabels))
}
}