DotParser.scala:
Scala DotParserpackage scaladot; import scala.util.parsing.combinator._ import scala.util.parsing.combinator.syntactical._ import scala.util.parsing.combinator.lexical._ /** * A parser for the GraphViz "dot" language. This code is in the public domain. * @author Ross Judson */ class DotParser extends StdTokenParsers with ImplicitConversions { // Fill in abstract defs type Tokens = DotLexer val lexical = new Tokens // Configure lexical parsing lexical.reserved ++= List("strict", "graph", "digraph", "node", "edge", "subgraph") lexical.delimiters ++= List("{", "}", "[", "]", ":", "=", ";", ",", "->", "--","\"") import lexical._ /** We want to map an Option of None to the empty string identifier, and Some(s) to s. */ implicit def emptyIdentifier(n: Option[String]) = n match { case Some(id) => id case _ => "" } /** It seems that when we need to get the implicit versions of sequences of results out into a list of those results, the automatic conversion can be performed by this implicit function. Note that this function uses existential typing; we don't care about the type of the second part of the ~'s type, so we ignore it. */ implicit def convertList[B](lst: List[~[B,_]]) = lst.map(_._1) //graph : [ strict ] (graph | digraph) [ ID ] '{' stmt_list '}' lazy val dot = opt("strict") ~ ("graph" ^^ false | "digraph" ^^ true) ~ opt(ID) ~ "{" ~ stmt_list ~ "}" ^^ { case str ~ typ ~ id ~ statements => Graph(str, typ, id, statements:_*) } //stmt_list : [ stmt [ ';' ] [ stmt_list ] ] lazy val stmt_list = rep(stmt ~ opt(";")) // stmt : node_stmt // | edge_stmt // | attr_stmt // | ID '=' ID // | subgraph lazy val stmt: Parser[Statement] = subgraph | attr_set | edge_stmt | attr_stmt | node_stmt lazy val attr_set = ID ~ "=" ~ a_value ^^ { case left ~ Pair(q,v) => Attr(left, Some(v), q) } //attr_stmt : (graph | node | edge) attr_list lazy val attr_stmt = attr_list_type ~ attr_list ^^ { case at ~ al => AttrList(at, al:_*) } lazy val attr_list_type = "graph" ^^ "graph" | "node" ^^ "node" | "edge" ^^ "edge" //attr_list : '[' [ a_list ] ']' [ attr_list ] lazy val attr_list = (("[" ~ a_list ~ "]")*) ^^ { case lists => lists.flatMap(l => l) } //a_list : ID [ '=' ID ] [ ',' ] [ a_list ] lazy val a_list = a_part * "," lazy val a_part = log( (ID ~ opt("=" ~ a_value) ^^ { case n ~ Some((q,v)) => Attr(n,Some(v),q) case n ~ None => Attr(n, None, false) } ))("a_part") lazy val a_value = accept("string", { case StringLit(v) => (true,v)}) | (ID ^^ { case v => (false,v) }) lazy val a_string = log(accept("string", { case StringLit(v) => v }))("a_string") //edge_stmt : (node_id | subgraph) edgeRHS [ attr_list ] lazy val edge_stmt = (node_id | subgraph) ~ "->" ~ rep1sep(node_id | subgraph, "->") ~ attr_list ^^ { case head ~ rest ~ attrs => Edge("?", attrs, (head :: rest):_*) } //node_stmt : node_id [ attr_list ] lazy val node_stmt = node_id ~ attr_list ^^ { case Node(n,p) ~ a => Node(n,p,a:_*) } //node_id : ID [ port ] lazy val node_id = ID ~ opt(port) ^^ { case n ~ p => Node(n, p) } //port : ':' ID [ ':' compass_pt ] // | ':' compass_pt lazy val port = ":" ~ ((ID ~ opt(":" ~ ID)) ^^ flatten2(Port) | ID ^^ { Port (_, None) } ) //subgraph : [ subgraph [ ID ] ] '{' stmt_list '}' lazy val subgraph = "subgraph" ~ opt(ID) ~ "{" ~ stmt_list ~ "}" ^^ { case n ~ s => Subgraph(n, s:_*) } //compass_pt : (n | ne | e | se | s | sw | w | nw) lazy val compass_pt = "n" ^^ "n" | "ne" ^^ "ne" | "e" ^^ "e" | "se" ^^ "se" | "s" ^^ "s" | "sw" ^^ "sw" | "w" ^^ "w" | "nw" ^^ "nw" lazy val ID = IDs | IDi lazy val IDs = accept("string", { case StringLit(n) => n }) lazy val IDi = accept("identifier", { case Identifier(n) => n}) }
DotLexer.scala:
Scala DotLexerpackage scaladot; import scala.util.parsing.combinator._ import scala.util.parsing.combinator.syntactical._ import scala.util.parsing.combinator.lexical._ import scala.util.parsing.input.CharArrayReader.EofCh class DotLexer extends StdLexical with ImplicitConversions { override def token: Parser[Token] = ( string ^^ StringLit | number ~ letter ^^ { case n ~ l => ErrorToken("Invalid number format : " + n + l) } | '-' ~ whitespace ~ number ~ letter ^^ { case ws ~ num ~ l => ErrorToken("Invalid number format : -" + num + l) } | '-' ~ whitespace ~ number ^^ { case ws ~ num => NumericLit("-" + num) } | number ^^ NumericLit | EofCh ^^ EOF | delim | '\"' ~ failure("Unterminated string") | id ^^ checkKeyword | failure("Illegal character") ) // def idcont = letter | digit | underscore def id = rep(letter | digit | elem("underscore", _=='_')) ^^ { _ mkString "" } // def underscore: Parser[String] = elem('_') def checkKeyword(strRep: String) = { if (reserved contains strRep) Keyword(strRep) else Identifier(strRep) } /** A string is a collection of zero or more Unicode characters, wrapped in * double quotes, using backslash escapes (cf. http://www.json.org/). */ def string = '\"' ~ rep(charSeq | chrExcept('\"', '\n', EofCh)) ~ '\"' ^^ { _ mkString "" } override def whitespace = rep(whitespaceChar) def number = intPart ~ opt(fracPart) ~ opt(expPart) ^^ { case i ~ f ~ e => i + optString(".", f) + optString("", e) } def intPart = zero | intList def intList = nonzero ~ rep(digit) ^^ {case x ~ y => (x :: y) mkString ""} def fracPart = '.' ~ rep(digit) ^^ { _ mkString "" } def expPart = exponent ~ opt(sign) ~ rep1(digit) ^^ { case e ~ s ~ d => e + optString("", s) + d.mkString("") } private def optString[A](pre: String, a: Option[A]) = a match { case Some(x) => pre + x.toString case None => "" } def zero: Parser[String] = '0' ^^ "0" def nonzero = elem("nonzero digit", d => d.isDigit && d != '0') def exponent = elem("exponent character", d => d == 'e' || d == 'E') def sign = elem("sign character", d => d == '-' || d == '+') def charSeq: Parser[String] = ('\\' ~ '\"' ^^ "\"" |'\\' ~ '\\' ^^ "\\" |'\\' ~ '/' ^^ "/" |'\\' ~ 'b' ^^ "\b" |'\\' ~ 'f' ^^ "\f" |'\\' ~ 'n' ^^ "\n" |'\\' ~ 'r' ^^ "\r" |'\\' ~ 't' ^^ "\t" |'\\' ~ 'u' ~ unicodeBlock) val hexDigits = Set[Char]() ++ "0123456789abcdefABCDEF".toArray def hexDigit = elem("hex digit", hexDigits.contains(_)) private def unicodeBlock = hexDigit ~ hexDigit ~ hexDigit ~ hexDigit ^^ { case a ~ b ~ c ~ d => new String(io.UTF8Codec.encode(Integer.parseInt(List(a, b, c, d) mkString "", 16))) } //private def lift[T](f: String => T)(xs: List[Any]): T = f(xs mkString "") }
And finally, an example of usage, together with the AST for DOT:
Scala DotLexerpackage scaladot; object Dot extends DotParser { def parse(input: String) = phrase(dot)(new lexical.Scanner(input)) match { case Success(result, _) => println("Success!"); Some(result) case n @ _ => println(n); None } def main(args: Array[String]) { val x = parse(""" digraph acm { hello -> world; test:up:n -> world; style = filled; subgraph cluster { node [style=filled,color=white]; toast -> bingo; zot -> bingo; zot -> test; style=filled; color=lightgrey; label = "Below"; } } """) println(x) } } abstract class DotComponent { override def toString = { val b = new StringBuilder buildString(0, b) b.toString() } private def indent(level: Int, b: StringBuilder) { for (i <- 0 to level) b append ' ' } def buildString(implicit level: Int, b: StringBuilder) { def between(sep: String, things: Seq[DotComponent])(implicit lev: Int) { var first = true for (t <- things) { if (first) first = false else b append sep t.buildString(lev, b) } } def betweenList(before: String, sep: String, after: String, things: Seq[DotComponent])(implicit lev: Int) { if (!things.isEmpty) { b append before between(sep, things)(lev) b append after } } this match { case Port(id, compass) => b append id if (compass != None) b append ':' append compass.get case Graph(strict, digraph, id, statements @ _*) => indent(level,b) if (strict) b append "strict " b append (if (digraph) "digraph " else "graph ") betweenList(id + " {\n", "\n", "}\n", statements)(level+1) case AttrList(kind, attrs @ _*) => indent(level,b) b append kind betweenList(" [", ",", "]", attrs)(0) case Attr(n, Some(v), q) => indent(level,b) b append n append '=' if (q) b append '"' b append v if (q) b append '"' case Attr(n,_,_) => b append n case Edge(_, attrs, nodes @ _*) => indent(level,b) between(" -> ", nodes) betweenList(" [", ",", "]", attrs)(0) case Subgraph(id, statements @ _*) => indent(level,b) b append "subgraph " append id betweenList(" {\n", "\n", "\n", statements)(level+1) indent(level,b) b append "}\n" case Node(id, port, attrs @ _*) => b append id if (port != None) { b append ':'; port.get.buildString(level, b) } betweenList(" [", ", ", " ]\n", attrs) } } } /** Implemented by DOT components that are allowed to have an identity */ trait Identified extends DotComponent { val id: String } /** Implemented by DOT components that are statements, for use within graphs and subgraphs. */ trait Statement extends DotComponent /** Implemented by DOT components that have a list of attributes associated with them. */ trait Attributed extends DotComponent { val attrs: Seq[Attr] } /** Implemented by DOT components that can participate in an edge; currently Node and Subgraph. */ trait EdgeComponent extends Identified /** The abstract base for the two graph components of DOT -- graph (digraph) and subgraph. */ abstract class AbstractGraph extends DotComponent with Identified { val statements: Seq[Statement] } /** Nodes can have an optional port identifier. The port identifier can have an optional compass direction. */ case class Port(id: String, compass: Option[String]) extends DotComponent case class Graph(strict: Boolean, digraph: Boolean, id: String, statements: Statement*) extends AbstractGraph // // Statements // case class AttrList(kind: String, attrs: Attr*) extends Statement with Attributed case class Attr(name: String, value: Option[String], quoted: Boolean) extends Statement case class Edge(id: String, attrs: Seq[Attr], nodes: EdgeComponent*) extends Identified with Statement with Attributed case class Subgraph(id: String, statements: Statement*) extends AbstractGraph with EdgeComponent with Statement case class Node(id: String, port: Option[Port], attrs: Attr*) extends EdgeComponent with Statement with Attributed