代码为QueryParser.jj,语法为JavaCC实现的LL():
完整文档:http://lucene.apache.org/java/2_0_0/queryparsersyntax.html

和正则一样:
?表示0个或1个
+表示一个或多个
*表示0个或多个


以下是Token部分:

_NUM_CHAR:: = [ " 0 " - " 9 " // 数字
_ESCAPED_CHAR:: =   " \\ "  [  " \\ " " + " " - " " ! " " ( " " ) " " : " " ^ " " [ " " ] " " \ "" " { " " } " " ~ " " * " " ? "  ] > //特殊字符,
_TERM_START_CHAR :: = ~ "   " " \t " " \n " " \r " " + " " - " " ! " " ( " " ) " " : " " ^ " , " [ " " ] " " \ "" " { " " } " " ~ " " * " " ? "  ] //TERM的起始字符,除了列出的其它字符都可以
_TERM_CHAR:: = < _TERM_START_CHAR >   |   < _ESCAPED_CHAR >   |   " - "   |   " + "  )  >   // TERM可使用字符
_WHITESPACE:: =  (  "   "   |   " \t "   |   " \n "   |   " \r " // 空格和回车,

 

< DEFAULT >  TOKEN:
 AND::
= ( " AND "   |   " && " )
 OR::
= ( " OR "   |   " || " )
 NOT::
= ( " NOT "   |   " ! " )
 PLUS::
= " + "
 MINUS::
= " - "
 LPAREN::
= " ( "
 RPAREN::
= " ) "
 COLON::
= " : "
 STAR::
= " * "
 CARAT::
= " ^ "   // 后接Boost,原文<CARAT:     "^" > : Boost,后面Boost说明什么没明白
 QUOTED:: = " \ ""  (~[ " \ "" |   " \\\ "" )+  " \ ""   //  表示用"包起来的字符串,字符"开始,中间由不是"的符号或者连着的这两个符号\"组成,字符"结束,
 TERM:: =< _TERM_START_CHAR >  ( < _TERM_CHAR > ) *
 FUZZY_SLOP::
= " ~ "  ( ( < _NUM_CHAR > ) +  (  " . "  ( < _NUM_CHAR > ) +  ) ?  ) ?   // 字符~开始,而后是数字.Lucene支持模糊查询,例如"roam~"或"roam~0.8",The value is between 0 and 1,算法为the Levenshtein Distance, or Edit Distance algorithm
 PREFIXTERM:: = ( < _TERM_START_CHAR >   |   " * " ) ( < _TERM_CHAR > ) *   " * "   >   // 模糊查找,表示以某某开头的查询, 字符表示为"something*",前缀允许模糊符号*,中间可有字符也可没有, 结尾必须是*
 WILDTERM:: = ( < _TERM_START_CHAR >   |  [  " * " " ? "  ]) ( < _TERM_CHAR >   |  ( [  " * " " ? "  ] )) *   >   // 类似上面,但同时支持?字符,结尾可以是字符也可以是* ?。使用[]表示or关系时,不需要使用|,只要,号分割即可
 RANGEIN_START:: = " [ "   // 在RangeQuery中,[或{表示了是否包含边界条件本身, 用字符表示为"[begin TO end]" 或者"{begin TO end}",后接RangeIn
 RANGEEX_START:: = " { "   // 同上,后接RangeEx

< Boost >  TOKEN:
 NUMBER::
= ( < _NUM_CHAR > ) +  (  " . "  ( < _NUM_CHAR > ) +  ) ?   // 后接DEFAULT, 整数或小数

< RangeIn >  TOKEN:
 RANGEIN_TO::
= " TO "
 RANGEIN_END::
= " ] "   // 后接DEFAULT, RangIn的结束
 RANGEIN_QUOTED:: =   " \ ""  (~[ " \ "" |   " \\\ "" )+  " \ ""   // 同上述QUOTED,表示用"包起来的字符串,
 RANGEIN_GOOP:: =  ( ~ "   " " ] "  ]) +   // 1个或多个不是空格和]的符号,这样就能提取出[]中的内容

< RangeEx >  TOKEN :
 RANGEEX_TO::
= " TO " >
 RANGEEX_END::
= " } "   // 后接DEFAULT, RangeEx的结束
 RANGEEX_QUOTED:: = " \ ""  (~[ " \ "" |   " \\\ "" )+  " \ ""   // 同上述QUOTED,表示用"包起来的字符串,
 RANGEEX_GOOP:: = ( ~ "   " " } "  ]) +   // 1个或多个不是空格和]的符号,这样就能提取出[]中的内容


< DEFAULT, RangeIn, RangeEx >  SKIP : {
  
<   < _WHITESPACE >>
// 所有空格和回车被忽略



以下为解析部分

 

Conjunction:: = < AND >  { ret  =  CONJ_AND; }  |   < OR >   { ret  =  CONJ_OR; }  ]  // 连接
Modifiers:: = < PLUS >  { ret  =  MOD_REQ; }  |   < MINUS >  { ret  =  MOD_NOT; }  |   < NOT >  { ret  =  MOD_NOT; } ]  // + - !符号
Query:: = Modifiers Clause (Conjunction Modifiers Clause) *
Clause::
= [( < TERM >   < COLON >|< STAR >   < COLON > )]  // btw:代码中LOOKAHEAD[2]表示使用LL(2)
         (Term |< LPAREN >  Query  < RPAREN >  ( < CARAT >   < NUMBER > ) ? )   // 子句. ???????这儿语法有点,仿佛允许 *:(*:dog)这样的语法,很奇怪
Term:: = (
    (
< TERM >|< STAR >|< PREFIXTERM >|< WILDTERM >|< NUMBER > ) [ < FUZZY_SLOP > ] [ < CARAT >< NUMBER > [ < FUZZY_SLOP > ]} 
    
|  (  < RANGEIN_START >  ( < RANGEIN_GOOP >|< RANGEIN_QUOTED > ) [  < RANGEIN_TO >  ] ( < RANGEIN_GOOP >|< RANGEIN_QUOTED >   < RANGEIN_END >  ) [  < CARAT >  boost =< NUMBER >  ]  // 这儿看出range必须同时有两端,不能只有有一端 
     |  (  < RANGEEX_START >   < RANGEEX_GOOP >|< RANGEEX_QUOTED >  [  < RANGEEX_TO >  ]  < RANGEEX_GOOP >|< RANGEEX_QUOTED >   < RANGEEX_END >  )[  < CARAT >  boost =< NUMBER >  ]  // 在RangeQuery中,[或{表示了是否包含边界条件本身, 用字符表示为"[begin TO end]" 或者"{begin TO end}",后接RangeIn
     |   < QUOTED >  [  < FUZZY_SLOP >  ] [  < CARAT >  boost =< NUMBER >  ]  // 被""包含的内容


btw: 猜测: javacc中,如果使用[],则允许出现0次或1次