lex和yacc解析SQL语句语法解析(1)

  最近因项目需要解析sql,并sql语句的Group By字段加入附加信息。系统的开发环境和语言。

  项目运行环境 : linux Ret Hat Enterprise 5

       开发语言 :  c,shell. perl

          基础库 : glib-2.24

 

 

  我们先从总体上来对lex和yacc的功能进行认识。lex的功能主要是完成词法的分析,yacc主要功能是完成语法的分析;这样词法分析工具和语法工具已经具备,这样我们就可以整合这两个工具来完成sql语句的分析或者是自定义语句的分析工作。

   在开始代码工作之前,我们首先要明确我们的目标。

  

   目标: 通过词法分析构建语法分析的要素;然后通过语法分析,构建语法树;最后对我们对语法树进行处理,得到我们想要的结果。

 

    1、通过lex分析定义的词;(词是构建语法的最基本单元,语法是建立在词的基础之上)

    2、通过yacc分析语法,构建语法树。

    3、通过对语法树的分析,生成或计算出我们想要的结果。

 

  我们明确了目标之后,接下来准备开始词法分析的代码编写和分析。在开始词法代码编写之前,我们根据自己的需要先语法树节点结构体的定义。根据分析我们定义的结构体如下

 

// sqlAnalysis.y

typedef struct sqlNodeType{
        int type;
        double dVal;
        char text[48];
        struct nodeType *lpNext; // 指向同一层的节点
        struct nodeType *lpSub;  // 指向子节点
}STSqlNode,*LPSTSqlNode;

 

 

%union {
        int nVal;
        LPSTSqlNode lpStNode;
};

   

定义完之后,然后定义词的规则

// sqlAnalysis.l

 

number          [-]?[0-9]+(/.[0-9]+)?
name            [_a-zA-Z][_0-9a-zA-Z()/./*]*
variable        ($/{[_a-zA-Z][_0-9a-zA-Z()/.]*/}|[_a-zA-Z][_0-9a-zA-Z()/./*]*|[-]?[0-9]+(/.[0-9]+)?)
whitespace      [ /n/t]
st              (select|SELECT|Select)
fm              (from|FROM|From)
wh              (where|WHERE|Where)
gy              (group[ ]+by|GROUP[ ]+BY|Group[ ]+By)
oy              (order[ ]+by|ORDER[ ]+BY|Order[ ]+By)
terminator      [;]
nsplit          [,]
whsplit         (and|AND|or|OR|Or|And)
define          (AS|as|As|aS)
operator        [<>=|(<=)|(>=)]
lborder         ([ ]+/([ ]+)
rborder         ([ ]+/)[ ]+)
indId           (/[[a-zA-Z0-9_]{1,24}/])

 

以上是一个SELECT sql语句简单的词规则,为了简便我们就定义简单一些。

定义完词规则之后,然后根据这些规则进行相应的处理,我们简单的处理:

 

{indId} {
                yylval.lpStNode = mallocSqlNode();
                yylval.lpStNode->type=INDID;
                snprintf(yylval.lpStNode->text,sizeof(yylval.lpStNode->text),"%s",yytext);
                return INDID;
        }

 

 

  
{st}  {
                yylval.lpStNode = mallocSqlNode();
                yylval.lpStNode->type=ST;
                snprintf(yylval.lpStNode->text,sizeof(yylval.lpStNode->text),"SELECT");
                return ST;
        }

 

{fm}    {
                yylval.lpStNode = mallocSqlNode();
                yylval.lpStNode->type=FM;
                snprintf(yylval.lpStNode->text,sizeof(yylval.lpStNode->text),"FROM");
                return FM;
        }

{wh}    {
                yylval.lpStNode = mallocSqlNode();
                yylval.lpStNode->type=WH;
                snprintf(yylval.lpStNode->text,sizeof(yylval.lpStNode->text),"WHERE");
                return WH;
        }

{gy}    {
                yylval.lpStNode = mallocSqlNode();
                yylval.lpStNode->type=GY;
                snprintf(yylval.lpStNode->text,sizeof(yylval.lpStNode->text),"GROUP BY");
                return GY;
        }

{oy}    {
                yylval.lpStNode = mallocSqlNode();
                yylval.lpStNode->type=OY;
                snprintf(yylval.lpStNode->text,sizeof(yylval.lpStNode->text),"ORDER BY");
                return OY;
        }

{define}  {
                yylval.lpStNode = mallocSqlNode();
                yylval.lpStNode->type=DEFINE;
                snprintf(yylval.lpStNode->text,sizeof(yylval.lpStNode->text),"AS");
                return DEFINE;
           }

{whsplit} {
                yylval.lpStNode = mallocSqlNode();
                yylval.lpStNode->type=WHSPLIT;
                snprintf(yylval.lpStNode->text,sizeof(yylval.lpStNode->text),"%s",yytext);
                return WHSPLIT;
          }

{operator} {
                yylval.lpStNode = mallocSqlNode();
                yylval.lpStNode->type= OPERATOR;
                snprintf(yylval.lpStNode->text,sizeof(yylval.lpStNode->text),"%s",yytext);
                return OPERATOR;
           }

{nsplit}        {
                        yylval.lpStNode = mallocSqlNode();
                        yylval.lpStNode->type= NSPLIT;
                        snprintf(yylval.lpStNode->text,sizeof(yylval.lpStNode->text),"%s",yytext);
                        return NSPLIT;
                }
{lborder}       {
                        yylval.lpStNode = mallocSqlNode();
                        yylval.lpStNode->type= LBORDER;
                        snprintf(yylval.lpStNode->text,sizeof(yylval.lpStNode->text),"%s",yytext);
                        return LBORDER;
                }

{rborder}       {
                        yylval.lpStNode = mallocSqlNode();
                        yylval.lpStNode->type= RBORDER;
                        snprintf(yylval.lpStNode->text,sizeof(yylval.lpStNode->text),"%s",yytext);
                        return RBORDER;
                }

{variable} {
                yylval.lpStNode = mallocSqlNode();
                yylval.lpStNode->type=VARIABLE;
                snprintf(yylval.lpStNode->text,sizeof(yylval.lpStNode->text),"%s",yytext);
                return VARIABLE;
           }

{name}  {
                yylval.lpStNode = mallocSqlNode();
                yylval.lpStNode->type=NAME;
                snprintf(yylval.lpStNode->text,sizeof(yylval.lpStNode->text),"%s",yytext);
                return NAME;
        }

{number} {
                yylval.lpStNode = mallocSqlNode();
                yylval.lpStNode->type=NUMBER;
                yylval.lpStNode->dVal = strtod(yytext,NULL);
                return NUMBER;
         }

{terminator}    {
                        return TERMINATOR;
                }

{whitespace}    ;
%%

 

这里一般不是太复杂,因为词法分析的结果只是为语法分析而作的准备。是为了语法树的构建而准备。

这样词法分析就这样做好了,是很简单吧。词法分析写好了,接着我们开始编写语法分析代码。

 

语法分析代码编写,首先定义处理规则,

 

program:
        program stmt INDID TERMINATOR {
                    

        }
        |
        ;

stmt:
         st_list fm_list                                {$$=oper(OPT_BROTHER,2,$1,$2);}
        |st_list fm_list oy_list                        {$$=oper(OPT_BROTHER,3,$1,$2,$3);}
        |st_list fm_list gy_list                        {$$=oper(OPT_BROTHER,3,$1,$2,$3);}
        |st_list fm_list wh_list                        {$$=oper(OPT_BROTHER,3,$1,$2,$3);}
        |st_list fm_list wh_list gy_list                {$$=oper(OPT_BROTHER,4,$1,$2,$3,$4);}
        |st_list fm_list gy_list oy_list                {$$=oper(OPT_BROTHER,4,$1,$2,$3,$4);}
        |st_list fm_list wh_list oy_list                {$$=oper(OPT_BROTHER,4,$1,$2,$3,$4);}
        |st_list fm_list wh_list gy_list oy_list        {$$=oper(OPT_BROTHER,5,$1,$2,$3,$4,$5);}
        ;

st_list:
        ST name_list                    {$$=oper(OPT_SON,2,$1,$2);}
        ;
fm_list:
        FM NAME                         {$$=oper(OPT_SON,2,$1,$2);}
        |FM LBORDER stmt RBORDER        {$$=oper(OPT_SON,4,$1,$2,$3,$4);}
        |fm_list NSPLIT NAME            {$$=oper(OPT_SON,3,$1,$2,$3);}
        |fm_list NSPLIT LBORDER stmt RBORDER {$$=oper(OPT_SON,5,$1,$2,$3,$4,$5);}
        |fm_list DEFINE NAME            {$$=oper(OPT_SON,3,$1,$2,$3);}
        ;

oy_list:
        OY name_list                    {$$=oper(OPT_SON,2,$1,$2);}
        ;
gy_list:
        GY name_list                    {$$=oper(OPT_SON,2,$1,$2);}
        ;
wh_list:
        WH whname_list                  {$$=oper(OPT_SON,2,$1,$2);}
        ;

name_list:
         NAME                           {$$=oper(OPT_BROTHER,1,$1);}
        |name_list DEFINE NAME          {$$=oper(OPT_BROTHER,3,$1,$2,$3);}
        |name_list NSPLIT NAME          {$$=oper(OPT_BROTHER,3,$1,$2,$3);}
        ;

whname_list:
        whname                          {$$=oper(OPT_BROTHER,1,$1);}
        |whname_list WHSPLIT whname     {$$=oper(OPT_BROTHER,3,$1,$2,$3);}
        ;

whname:
        NAME OPERATOR VARIABLE          {$$=oper(OPT_BROTHER,3,$1,$2,$3);}
oy_list:
        OY name_list                    {$$=oper(OPT_SON,2,$1,$2);}
        ;
gy_list:
        GY name_list                    {$$=oper(OPT_SON,2,$1,$2);}
        ;
wh_list:
        WH whname_list                  {$$=oper(OPT_SON,2,$1,$2);}
        ;

name_list:
         NAME                           {$$=oper(OPT_BROTHER,1,$1);}
        |name_list DEFINE NAME          {$$=oper(OPT_BROTHER,3,$1,$2,$3);}
        |name_list NSPLIT NAME          {$$=oper(OPT_BROTHER,3,$1,$2,$3);}
        ;

whname_list:
        whname                          {$$=oper(OPT_BROTHER,1,$1);}
        |whname_list WHSPLIT whname     {$$=oper(OPT_BROTHER,3,$1,$2,$3);}
        ;

whname:
        NAME OPERATOR VARIABLE          {$$=oper(OPT_BROTHER,3,$1,$2,$3);}
        ;

%%

 

 

 

以上就是SQL SELECT语句语法分析的大体规则,这规则能完成select语句大部分的功能的分析,例如这样形式的语句,

    select id,name from table;

    select count(id),sun(bal) form (select id,name,bal from table) where name='sss' groub by classs;

     等类似语句的分析。

 

   在规则中的oper函数可以自己定义,然后根据你的情况和需求来处理,通过这些规则我们将根据自己需要构建语法树或者图,

    最后我们将在这里得到这个树的根节点,

      program stmt INDID TERMINATOR {

                displayTree($2);

       }

然后在这里可以对这个树进行处理,

 

 

 

你可能感兴趣的:(lex和yacc解析SQL语句语法解析(1))