从Sun的javac源码中抽取出来的LL(1) Java语法

该语法抽取自 OpenJDK 6 build 17中javac的语法分析器, j2se/src/share/classes/com/sun/tools/javac/parser/Parser.java
该代码以 GPLv2许可证开源。

注意Parser类的注释:
/** The parser maps a token sequence into an abstract syntax
 *  tree. It operates by recursive descent, with code derived
 *  systematically from an LL(1) grammar. For efficiency reasons, an
 *  operator precedence scheme is used for parsing binary operation
 *  expressions.
 *
 *  <p><b>This is NOT part of any API supported by Sun Microsystems.  If
 *  you write code that depends on this, you do so at your own risk.
 *  This code and its internal interfaces are subject to change or
 *  deletion without notice.</b>
 */

Sun JDK 6中的javac使用了递归下降与运算符优先级的混合解析方式。主要是递归下降式,在解析二元运算表达式时采用运算符优先级方式以提高解析效率。

下面是从各个语法分析方法前的注释中提取出来的LL(1)语法。顺序有调整。
该语法采用 EBNF记法,其中[]表示可选(0或1个),{}表示任意个(0或多个),()表示分组,双引号内的是字面量,没有被双引号包围的名字是语法规则名。
有几条规则,如SuperSuffix,可能有多个定义;它们是从不同方法的注释中提取出来的,其内容本应该不矛盾,我还没仔细看清楚到底这里有什么问题。
CompilationUnit = [ { "@" Annotation } PACKAGE Qualident ";"] {ImportDeclaration} {TypeDeclaration}

AnnotationsOpt = { '@' Annotation }

ImportDeclaration = IMPORT [ STATIC ] Ident { "." Ident } [ "." "*" ] ";"

TypeDeclaration = ClassOrInterfaceOrEnumDeclaration
                | ";"

ClassOrInterfaceOrEnumDeclaration = ModifiersOpt
         (ClassDeclaration | InterfaceDeclaration | EnumDeclaration)

ModifiersOpt = { Modifier }
Modifier = PUBLIC | PROTECTED | PRIVATE | STATIC | ABSTRACT | FINAL
         | NATIVE | SYNCHRONIZED | TRANSIENT | VOLATILE | "@"
         | "@" Annotation

Annotation              = "@" Qualident [ "(" AnnotationFieldValues ")" ]

AnnotationFieldValues   = "(" [ AnnotationFieldValue { "," AnnotationFieldValue } ] ")"

AnnotationFieldValue    = AnnotationValue
                        | Identifier "=" AnnotationValue

AnnotationValue          = ConditionalExpression
                        | Annotation
                        | "{" [ AnnotationValue { "," AnnotationValue } ] [","] "}"

ClassDeclaration = CLASS Ident TypeParametersOpt [EXTENDS Type]
                   [IMPLEMENTS TypeList] ClassBody

InterfaceDeclaration = INTERFACE Ident TypeParametersOpt
                       [EXTENDS TypeList] InterfaceBody

EnumDeclaration = ENUM Ident [IMPLEMENTS TypeList] EnumBody

EnumBody = "{" { EnumeratorDeclarationList } [","]
                [ ";" {ClassBodyDeclaration} ] "}"

EnumeratorDeclaration = AnnotationsOpt [TypeArguments] IDENTIFIER [ Arguments ] [ "{" ClassBody "}" ]

TypeList = Type {"," Type}

ClassBody     = "{" {ClassBodyDeclaration} "}"
InterfaceBody = "{" {InterfaceBodyDeclaration} "}"

ClassBodyDeclaration =
    ";"
  | [STATIC] Block
  | ModifiersOpt
    ( Type Ident
      ( VariableDeclaratorsRest ";" | MethodDeclaratorRest )
    | VOID Ident MethodDeclaratorRest
    | TypeParameters (Type | VOID) Ident MethodDeclaratorRest
    | Ident ConstructorDeclaratorRest
    | TypeParameters Ident ConstructorDeclaratorRest
    | ClassOrInterfaceOrEnumDeclaration
    )
InterfaceBodyDeclaration =
    ";"
  | ModifiersOpt Type Ident
    ( ConstantDeclaratorsRest | InterfaceMethodDeclaratorRest ";" )

MethodDeclaratorRest =
    FormalParameters BracketsOpt [Throws TypeList] ( MethodBody | [DEFAULT AnnotationValue] ";")
VoidMethodDeclaratorRest =
    FormalParameters [Throws TypeList] ( MethodBody | ";")
InterfaceMethodDeclaratorRest =
    FormalParameters BracketsOpt [THROWS TypeList] ";"
VoidInterfaceMethodDeclaratorRest =
    FormalParameters [THROWS TypeList] ";"
ConstructorDeclaratorRest =
    "(" FormalParameterListOpt ")" [THROWS TypeList] MethodBody

QualidentList = Qualident {"," Qualident}

Qualident = Ident { DOT Ident }

TypeParametersOpt = ["<" TypeParameter {"," TypeParameter} ">"]

TypeParameter = TypeVariable [TypeParameterBound]
TypeParameterBound = EXTENDS Type {"&" Type}
TypeVariable = Ident

FormalParameters = "(" [ FormalParameterList ] ")"
FormalParameterList = [ FormalParameterListNovarargs , ] LastFormalParameter
FormalParameterListNovarargs = [ FormalParameterListNovarargs , ] FormalParameter

FormalParameter = { FINAL | '@' Annotation } Type VariableDeclaratorId
LastFormalParameter = { FINAL | '@' Annotation } Type '...' Ident | FormalParameter

MethodBody = Block

Statement =
     Block
   | IF ParExpression Statement [ELSE Statement]
   | FOR "(" ForInitOpt ";" [Expression] ";" ForUpdateOpt ")" Statement
   | FOR "(" FormalParameter : Expression ")" Statement
   | WHILE ParExpression Statement
   | DO Statement WHILE ParExpression ";"
   | TRY Block ( Catches | [Catches] FinallyPart )
   | SWITCH ParExpression "{" SwitchBlockStatementGroups "}"
   | SYNCHRONIZED ParExpression Block
   | RETURN [Expression] ";"
   | THROW Expression ";"
   | BREAK [Ident] ";"
   | CONTINUE [Ident] ";"
   | ASSERT Expression [ ":" Expression ] ";"
   | ";"
   | ExpressionStatement
   | Ident ":" Statement

Block = "{" BlockStatements "}"

BlockStatements = { BlockStatement }
BlockStatement  = LocalVariableDeclarationStatement
                | ClassOrInterfaceOrEnumDeclaration
                | [Ident ":"] Statement
LocalVariableDeclarationStatement
                = { FINAL | '@' Annotation } Type VariableDeclarators ";"

ParExpression = "(" Expression ")"

ForInit = StatementExpression MoreStatementExpressions
        |  { FINAL | '@' Annotation } Type VariableDeclarators

ForUpdate = StatementExpression MoreStatementExpressions

VariableDeclarators = VariableDeclarator { "," VariableDeclarator }

VariableDeclaratorsRest = VariableDeclaratorRest { "," VariableDeclarator }
ConstantDeclaratorsRest = ConstantDeclaratorRest { "," ConstantDeclarator }

VariableDeclarator = Ident VariableDeclaratorRest
ConstantDeclarator = Ident ConstantDeclaratorRest

VariableDeclaratorRest = BracketsOpt ["=" VariableInitializer]
ConstantDeclaratorRest = BracketsOpt "=" VariableInitializer

VariableDeclaratorId = Ident BracketsOpt

CatchClause     = CATCH "(" FormalParameter ")" Block

SwitchBlockStatementGroups = { SwitchBlockStatementGroup }
SwitchBlockStatementGroup = SwitchLabel BlockStatements
SwitchLabel = CASE ConstantExpression ":" | DEFAULT ":"

MoreStatementExpressions = { COMMA StatementExpression }

Expression = Expression1 [ExpressionRest]
ExpressionRest = [AssignmentOperator Expression1]
AssignmentOperator = "=" | "+=" | "-=" | "*=" | "/=" |
                     "&=" | "|=" | "^=" |
                     "%=" | "<<=" | ">>=" | ">>>="
Type = Type1
TypeNoParams = TypeNoParams1
StatementExpression = Expression
ConstantExpression = Expression

Expression1   = Expression2 [Expression1Rest]
Type1         = Type2
TypeNoParams1 = TypeNoParams2

Expression1Rest = ["?" Expression ":" Expression1]

Expression2   = Expression3 [Expression2Rest]
Type2         = Type3
TypeNoParams2 = TypeNoParams3

Expression2Rest = {infixop Expression3}
                | Expression3 INSTANCEOF Type
infixop         = "||"
                | "&&"
                | "|"
                | "^"
                | "&"
                | "==" | "!="
                | "<" | ">" | "<=" | ">="
                | "<<" | ">>" | ">>>"
                | "+" | "-"
                | "*" | "/" | "%"

Expression3    = PrefixOp Expression3
               | "(" Expr | TypeNoParams ")" Expression3
               | Primary {Selector} {PostfixOp}
Primary        = "(" Expression ")"
               | Literal
               | [TypeArguments] THIS [Arguments]
               | [TypeArguments] SUPER SuperSuffix
               | NEW [TypeArguments] Creator
               | Ident { "." Ident }
                 [ "[" ( "]" BracketsOpt "." CLASS | Expression "]" )
                 | Arguments
                 | "." ( CLASS | THIS | [TypeArguments] SUPER Arguments | NEW [TypeArguments] InnerCreator )
                 ]
               | BasicType BracketsOpt "." CLASS
PrefixOp       = "++" | "--" | "!" | "~" | "+" | "-"
PostfixOp      = "++" | "--"
Type3          = Ident { "." Ident } [TypeArguments] {TypeSelector} BracketsOpt
               | BasicType
TypeNoParams3  = Ident { "." Ident } BracketsOpt
Selector       = "." [TypeArguments] Ident [Arguments]
               | "." THIS
               | "." [TypeArguments] SUPER SuperSuffix
               | "." NEW [TypeArguments] InnerCreator
               | "[" Expression "]"
TypeSelector   = "." Ident [TypeArguments]
SuperSuffix    = Arguments | "." Ident [Arguments]

SuperSuffix = Arguments | "." [TypeArguments] Ident [Arguments]

BasicType = BYTE | SHORT | CHAR | INT | LONG | FLOAT | DOUBLE | BOOLEAN

ArgumentsOpt = [ Arguments ]

Arguments = "(" [Expression { COMMA Expression }] ")"

TypeArgumentsOpt = [ TypeArguments ]

TypeArguments  = "<" TypeArgument {"," TypeArgument} ">"

TypeArgument = Type
             | "?"
             | "?" EXTENDS Type {"&" Type}
             | "?" SUPER Type

BracketsOpt = {"[" "]"}

BracketsSuffixExpr = "." CLASS
BracketsSuffixType =

Creator = Qualident [TypeArguments] ( ArrayCreatorRest | ClassCreatorRest )

InnerCreator = Ident [TypeArguments] ClassCreatorRest

ArrayCreatorRest = "[" ( "]" BracketsOpt ArrayInitializer
                       | Expression "]" {"[" Expression "]"} BracketsOpt )

ClassCreatorRest = Arguments [ClassBody]

ArrayInitializer = "{" [VariableInitializer {"," VariableInitializer}] [","] "}"

VariableInitializer = ArrayInitializer | Expression

Ident = IDENTIFIER

Literal =
    INTLITERAL
  | LONGLITERAL
  | FLOATLITERAL
  | DOUBLELITERAL
  | CHARLITERAL
  | STRINGLITERAL
  | TRUE
  | FALSE
  | NULL


现在只是从源码中原样提取了注释,还没检查有没有提取错误或缺失。总之先记下来慢慢看。

P.S. OpenJDK项目中有一个 Compiler Grammar项目,其中有一个用ANTLR写的语法文件, Java.g,值得参考。

你可能感兴趣的:(java,.net,Scheme,J2SE,sun)