import org.apache.log4j.{Level, Logger} import org.apache.spark.graphx.{Edge, _} import org.apache.spark.rdd.RDD import org.apache.spark.{SparkConf, SparkContext}
object Practice extends App {
Logger.getLogger("org.apache.spark").setLevel(Level.ERROR) Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF)
val conf = new SparkConf().setAppName("SimpleGraphX").setMaster("local[4]") val sc = new SparkContext(conf)
val vertexArray = Array( (1L, ("Alice", 28)), (2L, ("Bob", 27)), (3L, ("Charlie", 65)), (4L, ("David", 42)), (5L, ("Ed", 55)), (6L, ("Fran", 50)) ) val vertexRDD: RDD[(Long, (String, Int))] = sc.parallelize(vertexArray)
val edgeArray = Array( Edge(2L, 1L, 7), Edge(2L, 4L, 2), Edge(3L, 2L, 4), Edge(3L, 6L, 3), Edge(4L, 1L, 1), Edge(2L, 5L, 2), Edge(5L, 3L, 8), Edge(5L, 6L, 3) )
val edgeRDD: RDD[Edge[Int]] = sc.parallelize(edgeArray)
val graph: Graph[(String, Int), Int] = Graph(vertexRDD, edgeRDD)
println("属性演示") println("**********************************************************") println("找出图中年龄大于30的顶点:") graph.vertices.filter { case (id, (name, age)) => age > 30 }.collect.foreach { case (id, (name, age)) => println(s"$name is $age") }
println println("找出图中属性大于5的边:") graph.edges.filter(e => e.attr > 5).collect.foreach(e => println(s"${e.srcId} to ${e.dstId} att ${e.attr}")) println
println("列出边属性>5的tripltes:") for (triplet <- graph.triplets.filter(t => t.attr > 5).collect) { println(s"${triplet.srcAttr._1} likes ${triplet.dstAttr._1}") } println
def max(a: (VertexId, Int), b: (VertexId, Int)): (VertexId, Int) = { if (a._2 > b._2) a else b }
println("max of outDegrees:" + graph.outDegrees.reduce(max) + " max of inDegrees:" + graph.inDegrees.reduce(max) + " max of Degrees:" + graph.degrees.reduce(max)) println
println("转换操作") println("**********************************************************") println("顶点的转换操作,顶点age + 10:") graph.mapVertices { case (id, (name, age)) => (id, (name, age + 10)) }.vertices.collect.foreach(v => println(s"${v._2._1} is ${v._2._2}")) println println("边的转换操作,边的属性*2:") graph.mapEdges(e => e.attr * 2).edges.collect.foreach(e => println(s"${e.srcId} to ${e.dstId} att ${e.attr}")) println println("三元组的转换操作,边的属性为端点的age相加:") graph.mapTriplets(tri => tri.srcAttr._2 * tri.dstAttr._2).triplets.collect.foreach(e => println(s"${e.srcId} to ${e.dstId} att ${e.attr}")) println
println("结构操作") println("**********************************************************") println("顶点年纪>30的子图:") val subGraph = graph.subgraph(vpred = (id, vd) => vd._2 >= 30) println("子图所有顶点:") subGraph.vertices.collect.foreach(v => println(s"${v._2._1} is ${v._2._2}")) println println("子图所有边:") subGraph.edges.collect.foreach(e => println(s"${e.srcId} to ${e.dstId} att ${e.attr}")) println println("反转整个图:") val reverseGraph = graph.reverse println("子图所有顶点:") reverseGraph.vertices.collect.foreach(v => println(s"${v._2._1} is ${v._2._2}")) println println("子图所有边:") reverseGraph.edges.collect.foreach(e => println(s"${e.srcId} to ${e.dstId} att ${e.attr}")) println
println("连接操作") println("**********************************************************") val inDegrees: VertexRDD[Int] = graph.inDegrees
case class User(name: String, age: Int, inDeg: Int, outDeg: Int)
//创建一个新图,顶类点VD的数据型为User,并从graph做类型转换 val initialUserGraph: Graph[User, Int] = graph.mapVertices { case (id, (name, age)) => User(name, age, 0, 0) }
val userGraph = initialUserGraph.outerJoinVertices(initialUserGraph.inDegrees) { case (id, u, inDegOpt) => User(u.name, u.age, inDegOpt.getOrElse(0), u.outDeg) }.outerJoinVertices(initialUserGraph.outDegrees) { case (id, u, outDegOpt) => User(u.name, u.age, u.inDeg, outDegOpt.getOrElse(0)) }
println("连接图的属性:") userGraph.vertices.collect.foreach(v => println(s"${v._2.name} inDeg: ${v._2.inDeg} outDeg: ${v._2.outDeg}")) println
println("出度和入读相同的人员:") userGraph.vertices.filter { case (id, u) => u.inDeg == u.outDeg }.collect.foreach { case (id, property) => println(property.name) } println
println("聚合操作") println("**********************************************************") println("collectNeighbors:获取当前节点source节点的id和属性") graph.collectNeighbors(EdgeDirection.In).collect.foreach(v => { println(s"id: ${v._1}"); for (arr <- v._2) { println(s" ${arr._1} (name: ${arr._2._1} age: ${arr._2._2})") } })
println("aggregateMessages版本:") graph.aggregateMessages[Array[(VertexId, (String, Int))]](ctx => ctx.sendToDst(Array((ctx.srcId.toLong, (ctx.srcAttr._1, ctx.srcAttr._2)))), _ ++ _).collect.foreach(v => { println(s"id: ${v._1}"); for (arr <- v._2) { println(s" ${arr._1} (name: ${arr._2._1} age: ${arr._2._2})") } })
println("聚合操作") println("**********************************************************") println("找出年纪最大的追求者:")
val oldestFollower: VertexRDD[(String, Int)] = userGraph.aggregateMessages[(String, Int)]( ctx => ctx.sendToDst((ctx.srcAttr.name, ctx.srcAttr.age)), (a, b) => if (a._2 > b._2) a else b )
userGraph.vertices.leftJoin(oldestFollower) { (id, user, optOldestFollower) => optOldestFollower match { case None => s"${user.name} does not have any followers." case Some((name, age)) => s"${name} is the oldest follower of ${user.name}." } }.collect.foreach { case (id, str) => println(str) } println
println("聚合操作") println("**********************************************************")
val sourceId: VertexId = 5L val initialGraph = graph.mapVertices((id, _) => if (id == sourceId) 0.0 else Double.PositiveInfinity)
println("找出5到各顶点的最短距离:") val sssp = initialGraph.pregel(Double.PositiveInfinity, Int.MaxValue, EdgeDirection.Out)( (id, dist, newDist) => { println("||||" + id); math.min(dist, newDist) }, triplet => { println(">>>>" + triplet.srcId) if (triplet.srcAttr + triplet.attr < triplet.dstAttr) { Iterator((triplet.dstId, triplet.srcAttr + triplet.attr)) } else { Iterator.empty } }, (a, b) => math.min(a, b) ) sssp.triplets.collect().foreach(println)
sc.stop() }