Spark Graphx计算有向环

在工作过程中有使用到Spark Graphx做一些图的计算,开发环境如下:
开发工具:IDEA
JDK:1.7.0_80
Maven:3.3.9

对于图计算,其实可以有很多技术框架等,例如使用python的networx,spark的graphx,以及阿里也有开源框架,此处我们使用Graphx为计算框架,由于个人简单案例,就以单机环境实现,并且简单数据输入。

在Graphx中,通常类似的操作可以用聚合方法或者Pregel来实现,可以参考看下官方文档:spark graphx,官方文档有类似的案例可以参考,话不多说,上代码:

package com.pnlorf.graphx.pnlorf.graphx.circle

import org.apache.spark.graphx._
import org.apache.spark.{SparkConf, SparkContext}

import scala.collection.mutable.ArrayBuffer

/**
 * description: 在输入数据中求环
 *
 * @author: 
 *          date: 2019/10/12
 *          package: com.pnlorf.graphx.pnlorf
 */
object CircleGraph {

  def main(args: Array[String]): Unit = {

    val conf = new SparkConf().setMaster("local").setAppName("Graphx_Circle")

    val sc: SparkContext = new SparkContext(conf)

    val myVertices = sc.parallelize(Array((1L, "A"),
      (2L, "B"),
      (3L, "C"),
      (4L, "D"),
      (5L, "E"),
      (6L, "F"),
      (7L, "G"),
      (8L, "H"),
      (9L, "I")
    ))

    val myEdges = sc.parallelize(Array(Edge(1L, 2L, new EdgeInfo("1", "2")),
      Edge(2L, 3L, new EdgeInfo("2", "3")),
      Edge(3L, 4L, new EdgeInfo("3", "4")),
      Edge(4L, 5L, new EdgeInfo("4", "5")),
      Edge(5L, 1L, new EdgeInfo("5", "1")),
      Edge(5L, 3L, new EdgeInfo("5", "3")),
      Edge(6L, 7L, new EdgeInfo("6", "7")),
      Edge(7L, 6L, new EdgeInfo("7", "6")),
      Edge(7L, 8L, new EdgeInfo("7", "8")),
      Edge(8L, 7L, new EdgeInfo("8", "7")),
      Edge(1L, 9L, new EdgeInfo("1", "9")),
      Edge(9L, 1L, new EdgeInfo("9", "1"))


    ))

    // 最小边数量
    val minSize = 2
    // 最大边数量
    val maxSize = 5

    val myGraph = Graph(myVertices, myEdges)

    type A = ArrayBuffer[ArrayBuffer[EdgeInfo]]

    type VD = ArrayBuffer[ArrayBuffer[EdgeInfo]]

    type ED = EdgeInfo

    val graph = myGraph.mapVertices((vid, vd) => new ArrayBuffer[ArrayBuffer[EdgeInfo]]())

    /**
     * 初始化msg,每个节点属性都初始化
     *
     * @return 空的msg
     */
    def initialMsg(): A = {
      new ArrayBuffer[ArrayBuffer[EdgeInfo]]()
    }

    /**
     * 将收到的消息更新到当前节点属性
     *
     * @param vid 点id
     * @param vd  点属性
     * @param A   发送过来的消息
     * @return
     */
    def vprog(vid: Long, vd: VD, A: A): VD = {
      val retArray = new ArrayBuffer[ArrayBuffer[EdgeInfo]]()
      retArray.appendAll(vd)
      retArray.appendAll(A)
      retArray
    }

    def sendMsg(edgeTriplet: EdgeTriplet[VD, ED]): Iterator[(Long, A)] = {
      if (edgeTriplet.srcAttr.length == 0) {
        return Iterator((edgeTriplet.dstId, ArrayBuffer[ArrayBuffer[EdgeInfo]](ArrayBuffer[EdgeInfo](edgeTriplet.attr))))
      }

      val msgInfos = edgeTriplet.srcAttr.filter(msg => !msg.map(_.to).contains(edgeTriplet.attr.to))

      val newAttr = msgInfos.map(msg => {
        val retA = new ArrayBuffer[EdgeInfo]()
        retA.appendAll(msg)
        retA.append(edgeTriplet.attr)
        retA
      })

      Iterator((edgeTriplet.dstId, newAttr))
    }

    def mergeMsg(a1: A, a2: A): A = {
      a1 ++ a2
    }

    val result = Pregel(graph, initialMsg(), maxSize, EdgeDirection.Out)(vprog, sendMsg, mergeMsg).cache()

    result.vertices.map(v => v._2.filter(path => path.head.fm.equalsIgnoreCase(path.last.to)))
      .flatMap(v => v.toIterator)
      .map(v => {
        val array = new ArrayBuffer[String]()
        v.foreach(v1 => {
          array += v1.fm
          array += v1.to
        })
        (array.sorted.mkString("_"), v)
      })
      .groupByKey()
      .map(_._2.head)
      .filter(v => v.size > minSize)
      .collect()
      .foreach(v => {
        println("==========================")
        v.foreach(v1 => {
          println(v1.toString)
        })
        println("*************************")
      })

    result.unpersist(false)
  }

}

另一个自定义的类:

package com.pnlorf.graphx.pnlorf.graphx.circle

/**
 * description: 
 *
 * @author: 
 *          date: 2019/12/9
 *          package: com.pnlorf.graphx.pnlorf
 */
class EdgeInfo extends Serializable {

  var fm = ""

  var to = ""

  def this(fm: String, to: String) {
    this()
    this.fm = fm
    this.to = to
  }

  def canEqual(other: Any): Boolean = other.isInstanceOf[EdgeInfo]

  override def equals(other: Any): Boolean = other match {
    case that: EdgeInfo =>
      (that canEqual this) &&
        fm == that.fm &&
        to == that.to
    case _ => false
  }

  override def hashCode(): Int = {
    val state = Seq(fm, to)
    state.map(_.hashCode()).foldLeft(0)((a, b) => 31 * a + b)
  }

  override def toString = s"EdgeInfo(fm=$fm, to=$to, hashCode=$hashCode)"
}

以上代码可直接执行,结果输出如下:

==========================
EdgeInfo(fm=4, to=5, hashCode=1665)
EdgeInfo(fm=5, to=1, hashCode=1692)
EdgeInfo(fm=1, to=2, hashCode=1569)
EdgeInfo(fm=2, to=3, hashCode=1601)
EdgeInfo(fm=3, to=4, hashCode=1633)
*************************
==========================
EdgeInfo(fm=4, to=5, hashCode=1665)
EdgeInfo(fm=5, to=3, hashCode=1694)
EdgeInfo(fm=3, to=4, hashCode=1633)
*************************

由于是写个简单样例,可能考虑的不够周全,有问题还希望大家多多包涵,有问题大家一起讨论!

谢谢!

你可能感兴趣的:(Spark)