学习一些完整的语法
CSV
grammar CSV;
file : hdr row+ ;
hdr : row ;
row : field (',' field)* '\r'? '\n' ;
field
: TEXT
| STRING
|
;
TEXT : ~[,\n\r"]+ ;
STRING : '"' ('""'|~'"')* '"' ; // quote-quote is an escaped quote
JSON
// Derived from http://json.org
grammar JSON;
json: object
| array
;
object
: '{' pair (',' pair)* '}'
| '{' '}' // empty object
;
pair: STRING ':' value ;
array
: '[' value (',' value)* ']'
| '[' ']' // empty array
;
value
: STRING
| NUMBER
| object // recursion
| array // recursion
| 'true' // keywords
| 'false'
| 'null'
;
STRING : '"' (ESC | ~["\\])* '"' ;
fragment ESC : '\\' (["\\/bfnrt] | UNICODE) ;
fragment UNICODE : 'u' HEX HEX HEX HEX ;
fragment HEX : [0-9a-fA-F] ;
NUMBER
: '-'? INT '.' [0-9]+ EXP? // 1.35, 1.35E-9, 0.3, -4.5
| '-'? INT EXP // 1e10 -3e4
| '-'? INT // -3, 45
;
fragment INT : '0' | [1-9] [0-9]* ; // no leading zeros
fragment EXP : [Ee] [+\-]? INT ; // \- since - means "range" inside [...]
WS : [ \t\n\r]+ -> skip ;
DOT
/** Derived from http://www.graphviz.org/doc/info/lang.html.
Comments pulled from spec.
*/
grammar DOT;
graph : STRICT? (GRAPH | DIGRAPH) id? '{' stmt_list '}' ;
stmt_list : ( stmt ';'? )* ;
stmt : node_stmt
| edge_stmt
| attr_stmt
| id '=' id
| subgraph
;
attr_stmt : (GRAPH | NODE | EDGE) attr_list ;
attr_list : ('[' a_list? ']')+ ;
a_list : (id ('=' id)? ','?)+ ;
edge_stmt : (node_id | subgraph) edgeRHS attr_list? ;
edgeRHS : ( edgeop (node_id | subgraph) )+ ;
edgeop : '->' | '--' ;
node_stmt : node_id attr_list? ;
node_id : id port? ;
port : ':' id (':' id)? ;
subgraph : (SUBGRAPH id?)? '{' stmt_list '}' ;
id : ID
| STRING
| HTML_STRING
| NUMBER
;
// "The keywords node, edge, graph, digraph, subgraph, and strict are
// case-independent"
STRICT : [Ss][Tt][Rr][Ii][Cc][Tt] ;
GRAPH : [Gg][Rr][Aa][Pp][Hh] ;
DIGRAPH : [Dd][Ii][Gg][Rr][Aa][Pp][Hh] ;
NODE : [Nn][Oo][Dd][Ee] ;
EDGE : [Ee][Dd][Gg][Ee] ;
SUBGRAPH : [Ss][Uu][Bb][Gg][Rr][Aa][Pp][Hh] ;
/** "a numeral [-]?(.[0-9]+ | [0-9]+(.[0-9]*)? )" */
NUMBER : '-'? ('.' DIGIT+ | DIGIT+ ('.' DIGIT*)? ) ;
fragment
DIGIT : [0-9] ;
/** "any double-quoted string ("...") possibly containing escaped quotes" */
STRING : '"' ('\\"'|.)*? '"' ;
/** "Any string of alphabetic ([a-zA-Z\200-\377]) characters, underscores
* ('_') or digits ([0-9]), not beginning with a digit"
*/
ID : LETTER (LETTER|DIGIT)*;
fragment
LETTER : [a-zA-Z\u0080-\u00FF_] ;
/** "HTML strings, angle brackets must occur in matched pairs, and
* unescaped newlines are allowed."
*/
HTML_STRING : '<' (TAG|~[<>])* '>' ;
fragment
TAG : '<' .*? '>' ;
COMMENT : '/*' .*? '*/' -> skip ;
LINE_COMMENT: '//' .*? '\r'? '\n' -> skip ;
/** "a '#' character is considered a line output from a C preprocessor (e.g.,
* # 34 to indicate line 34 ) and discarded"
*/
PREPROC : '#' .*? '\n' -> skip ;
WS : [ \t\n\r]+ -> skip ;
Cymbol
/** Simple statically-typed programming language with functions and variables
* taken from "Language Implementation Patterns" book.
*/
grammar Cymbol;
file: (functionDecl | varDecl)+ ;
varDecl
: type ID ('=' expr)? ';'
;
type: 'float' | 'int' | 'void' ; // user-defined types
functionDecl
: type ID '(' formalParameters? ')' block // "void f(int x) {...}"
;
formalParameters
: formalParameter (',' formalParameter)*
;
formalParameter
: type ID
;
block: '{' stat* '}' ; // possibly empty statement block
stat: block
| varDecl
| 'if' expr 'then' stat ('else' stat)?
| 'return' expr? ';'
| expr '=' expr ';' // assignment
| expr ';' // func call
;
expr: ID '(' exprList? ')' // func call like f(), f(x), f(1,2)
| ID '[' expr ']' // array index like a[i], a[i][j]
| '-' expr // unary minus
| '!' expr // boolean not
| expr '*' expr
| expr ('+'|'-') expr
| expr '==' expr // equality comparison (lowest priority op)
| ID // variable reference
| INT
| '(' expr ')'
;
exprList : expr (',' expr)* ; // arg list
ID : LETTER (LETTER | [0-9])* ;
fragment
LETTER : [a-zA-Z] ;
INT : [0-9]+ ;
WS : [ \t\n\r]+ -> skip ;
SL_COMMENT
: '//' .*? '\n' -> skip
;
R
/**
derived from http://svn.r-project.org/R/trunk/src/main/gram.y
http://cran.r-project.org/doc/manuals/R-lang.html#Parser
*/
grammar R;
prog: ( expr_or_assign (';'|NL)
| NL
)*
EOF
;
expr_or_assign
: expr ('<-'|'='|'<<-') expr_or_assign
| expr
;
expr: expr '[[' sublist ']' ']' // '[[' follows R's yacc grammar
| expr '[' sublist ']'
| expr ('::'|':::') expr
| expr ('$'|'@') expr
| expr '^' expr
| ('-'|'+') expr
| expr ':' expr
| expr USER_OP expr // anything wrappedin %: '%' .* '%'
| expr ('*'|'/') expr
| expr ('+'|'-') expr
| expr ('>'|'>='|'<'|'<='|'=='|'!=') expr
| '!' expr
| expr ('&'|'&&') expr
| expr ('|'|'||') expr
| '~' expr
| expr '~' expr
| expr ('->'|'->>'|':=') expr
| 'function' '(' formlist? ')' expr // define function
| expr '(' sublist ')' // call function
| '{' exprlist '}' // compound statement
| 'if' '(' expr ')' expr
| 'if' '(' expr ')' expr 'else' expr
| 'for' '(' ID 'in' expr ')' expr
| 'while' '(' expr ')' expr
| 'repeat' expr
| '?' expr // get help on expr, usually string or ID
| 'next'
| 'break'
| '(' expr ')'
| ID
| STRING
| HEX
| INT
| FLOAT
| COMPLEX
| 'NULL'
| 'NA'
| 'Inf'
| 'NaN'
| 'TRUE'
| 'FALSE'
;
exprlist
: expr_or_assign ((';'|NL) expr_or_assign?)*
|
;
formlist : form (',' form)* ;
form: ID
| ID '=' expr
| '...'
;
sublist : sub (',' sub)* ;
sub : expr
| ID '='
| ID '=' expr
| STRING '='
| STRING '=' expr
| 'NULL' '='
| 'NULL' '=' expr
| '...'
|
;
HEX : '0' ('x'|'X') HEXDIGIT+ [Ll]? ;
INT : DIGIT+ [Ll]? ;
fragment
HEXDIGIT : ('0'..'9'|'a'..'f'|'A'..'F') ;
FLOAT: DIGIT+ '.' DIGIT* EXP? [Ll]?
| DIGIT+ EXP? [Ll]?
| '.' DIGIT+ EXP? [Ll]?
;
fragment
DIGIT: '0'..'9' ;
fragment
EXP : ('E' | 'e') ('+' | '-')? INT ;
COMPLEX
: INT 'i'
| FLOAT 'i'
;
STRING
: '"' ( ESC | ~[\\"] )*? '"'
| '\'' ( ESC | ~[\\'] )*? '\''
;
fragment
ESC : '\\' ([abtnfrv]|'"'|'\'')
| UNICODE_ESCAPE
| HEX_ESCAPE
| OCTAL_ESCAPE
;
fragment
UNICODE_ESCAPE
: '\\' 'u' HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT
| '\\' 'u' '{' HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT '}'
;
fragment
OCTAL_ESCAPE
: '\\' [0-3] [0-7] [0-7]
| '\\' [0-7] [0-7]
| '\\' [0-7]
;
fragment
HEX_ESCAPE
: '\\' HEXDIGIT HEXDIGIT?
;
ID : '.' (LETTER|'_'|'.') (LETTER|DIGIT|'_'|'.')*
| LETTER (LETTER|DIGIT|'_'|'.')*
;
fragment LETTER : [a-zA-Z] ;
USER_OP : '%' .*? '%' ;
COMMENT : '#' .*? '\r'? '\n' -> type(NL) ;
// Match both UNIX and Windows newlines
NL : '\r'? '\n' ;
WS : [ \t]+ -> skip ;