PASCAL语言子集的词法、语法分析器之实现

针对简单的文法(PASCAL语言子集),制作相应的词法分析器和递归下降的语法分析器。

       文法要求如下:

1、  关键字、标识符、数字等:

1.begin         2.if             3.then         4.while       5.do            6.end          10.标识符           11.数字

13.+             14.-            15.*            16./             17.:             18.:=          20.<                     21.<>

22.<=          23.>           24.>=         25.=           26.;             27.(            28.)

2、  文法规则:

程序 → begin 语句串 end

语句串 → 语句 { ; 语句 }

语句 → 赋值语句 | 条件语句 | 循环语句

赋值语句 → 变量 := 表达式

条件语句 → if 条件 then ( 语句 | 程序 )

循环语句 → while 条件 do ( 语句 | 程序 )

表达式 → 项 { + 项 | - 项 }

条件 → 表达式 关系符 表达式

关系符 → < | <> | <= | > | >= | =

项 → 因子 { * 因子 | / 因子 }

因子 → 变量 | 数字 | ( 表达式 )

变量 → 标识符


一、  词法分析器

词法分析器的任务是清除源文件中多余的空格、换行、制表符等,识别文法符号。按顺序输出识别的标识符及其种别编号,供语法分析器调用。

 

代码如下:

 

#include

#include

#include

#define BOOL int

#define TRUE 1

#define FALSE 0

#define MAXSIZE 50

 

 

typedef char datatype;

 

typedef struct                              /*定义缓冲队列*/

{

  datatype data[MAXSIZE*2];

  int front,rear;

}Queue;

void setnull(Queue *q)                      /*队列初始化*/

{

  q->front = MAXSIZE*2 - 1;

  q->rear = MAXSIZE*2 - 1;

}

BOOL empty(Queue *q)                   /*判队空*/

{

  if(q->front==q->rear)

       return TRUE;

  return FALSE;

}

BOOL full(Queue *q)                         /*判队满*/

{

  if(q->front == (q->rear+1) % (MAXSIZE*2))

       return TRUE;

  return FALSE;

}

int quantity(Queue *q)                      /*求队列中元素个数*/

{

  int n;

  n = q->rear - q->front;

    if(n<0)

       n += MAXSIZE*2;

  return n;

}

datatype front(Queue *q)               /*取队头元素*/

{

  int n;

  if(empty(q))

       return 0;

  n = (q->front+1) % (MAXSIZE*2);

  return q->data[n];

}

BOOL enqueue(Queue *q,datatype x)      /*入队*/

{

  if(full(q))

       return FALSE;

  q->rear = ++q->rear % (MAXSIZE*2);

  q->data[q->rear]=x;

  return TRUE;

}

datatype dequeue(Queue *q)                  /*出队*/

{

  if(empty(q))

       return 0;

  q->front = ++q->front % (MAXSIZE*2);

  return q->data[q->front];

}

 

char token[MAXSIZE];

char* rwtab[6]={"begin","if","then","while","do","end"};

int syn;

Queue prog;

 

BOOL letter(char ch)                   /*判断是否字母*/

{

  if(ch>='a'&&ch<='z' || ch>='A'&&ch<='Z')

       return TRUE;

  return FALSE;

}

BOOL digit(char ch)                         /*判断是否数字*/

{

  if(ch>='0'&&ch<='9')

       return TRUE;

  return FALSE;

}

void saner()                                /*扫描器*/

{

  int i;

  char ch;

  for(i=0;i<50;i++)

       token[i]=0;

  i=0;

  do                                        /*去除多余空格、换行及制表符*/

  {

       ch=dequeue(&prog);

  }while(ch==' ' || ch=='\n' || ch=='\t');

  if(letter(ch))                            /*识别标识符(编号10)*/

  {

       while(1)

       {

           token[i++]=ch;

           ch=front(&prog);

           if(letter(ch) || digit(ch))

                dequeue(&prog);

           else

                break;

       }

       token[i]='\0';

       syn=10;

       for(i=0;i<6;i++)

           if(!strcmp(token,rwtab[i]))

                syn=i+1;                    /*识别关键字(编号1到6)*/

  }

  else if(digit(ch))                   /*识别无符号整数(编号11)*/

  {

       while(1)

       {

           token[i++]=ch;

           ch=front(&prog);

           if(digit(ch))

                dequeue(&prog);

           else

                break;

       }

       token[i]='\0';

       syn=11;

  }

  else

       switch(ch)

       {

       case '#':                        /*识别结束符‘#’(编号0)*/

           syn=0;

           token[i++]='#';

           token[i]='\0';

           break;

       case '+':                        /*识别‘+’(编号13)*/

           syn=13;

           token[i++]='+';

           token[i]='\0';

           break;

       case '-':                        /*识别‘-’(编号14)*/

           syn=14;

           token[i++]='-';

           token[i]='\0';

           break;

       case '*':                        /*识别‘*’(编号15)*/

           syn=15;

           token[i++]='*';

           token[i]='\0';

           break;

       case '/':                        /*识别‘/’(编号16)*/

           syn=16;

           token[i++]='/';

           token[i]='\0';

           break;

       case ':':

           token[i++]=':';

           ch=front(&prog);

           switch(ch)

           {

           case '=':                   /*识别‘:=’(编号18)*/

                syn=18;

                token[i++]='=';

                token[i]='\0';

                dequeue(&prog);

                break;

           default:                    /*识别‘:’(编号17)*/

                syn=17;

                token[i]='\0';

                break;

           }

           break;

       case '<':

           token[i++]='<';

           ch=front(&prog);

           switch(ch)

           {

           case '>':                   /*识别‘<>’(编号21)*/

                syn=21;

                token[i++]='>';

                token[i]='\0';

                dequeue(&prog);

                break;

           case '=':                   /*识别‘<=’(编号22)*/

                syn=22;

                token[i++]='=';

                token[i]='\0';

                dequeue(&prog);

                break;

           default:                    /*识别‘<’(编号20)*/

                syn=20;

                token[i]='\0';

                break;

           }

           break;

       case '>':

           token[i++]='>';

           ch=front(&prog);

           switch(ch)

           {

           case '=':                   /*识别‘>=’(编号24)*/

                syn=24;

                token[i++]='=';

                token[i]='\0';

                dequeue(&prog);

                break;

           default:                    /*识别‘>’(编号23)*/

                syn=23;

                token[i]='\0';

                break;

           }

           break;

       case '=':                        /*识别‘=’(编号25)*/

           syn=25;

           token[i++]='=';

           token[i]='\0';

           break;

       case ';':                        /*识别‘;’(编号26)*/

           syn=26;

           token[i++]=';';

           token[i]='\0';

           break;

       case '(':                        /*识别‘(’(编号27)*/

           syn=27;

           token[i++]='(';

           token[i]='\0';

           break;

       case ')':                        /*识别‘)’(编号28)*/

           syn=28;

           token[i++]=')';

           token[i]='\0';

           break;

       default:                         /*出错!*/

           syn=-1;

           break;

       }

}

 

main(int argc,char* argv[])

{

  FILE *in,*out;

  int i;

  char ch;

  const char ofname[]="scaned.txt";

  setnull(&prog);                           /*缓冲队列初始化*/

  switch(argc)

  {

  case 2:

       if(!(in=fopen(argv[1],"r")))

       {

           printf("The file is not exist!");

           exit(1);

       }

       out=fopen(ofname,"w");

       break;

  case 3:

       if(!(in=fopen(argv[1],"r")))

       {

           printf("The file is not exist!");

           exit(1);

       }

       out=fopen(argv[2],"w");

       break;

  }

  do

  {

       switch(argc)

       {

       case 1:

           do

           {

                ch=getchar();

                enqueue(&prog,ch);

           }while(ch!='#' && !full(&prog));

           if(!(out=fopen(ofname,"a")))

                out=fopen(ofname,"w");

           break;

       case 2:

           do

           {

                ch=fgetc(in);

                enqueue(&prog,ch);

           }while(ch!='#' && !full(&prog));         

           if(ch=='#')

                fclose(in);

           break;

       case 3:

           do

           {

                ch=fgetc(in);

                enqueue(&prog,ch);

           }while(ch!='#' && !full(&prog));         

           if(ch=='#')

                fclose(in);

           break;

       default:

           printf("Input error!!");

           break;

       }

       do

       {

           saner();

           switch(syn)

           {

           case 0:

                fputc('#',out);

                fputc(',',out);

                fputc('0',out);

                fputc('\n',out);

                break;

           case -1:

                fprintf(out,"Error!\n");

                break;

           default:

                i=0;

                do

                {

                     fputc(token[i++],out);

                }while(token[i]!='\0');

                fputc(',',out);

                i=syn/10;

                if(i!=0)

                     fputc(i+48,out);

                fputc(syn%10+48,out);

                fputc('\n',out);

                break;

           }

       }while(syn!=0 && (quantity(&prog) > MAXSIZE || ch=='#'));

  }while(ch!='#');

  fclose(out);

}

 

 
二、  语法分析器

语法分析器的任务是根据词法分析的结果判断是否符合文法规则,并以一定形式输出语法树。(这里按逆波兰式输出。其中符号“!”代表条件语句运算符;符号“@”代表循环语句运算符)

代码如下:

 

#include

#include

#include

#define BOOL int

#define TRUE 1

#define FALSE 0

#define MAXSIZE 50

 

typedef struct

{

    int no;

    char str[MAXSIZE];

}Element;

 

Element ch;

FILE *in,*out;

 

void scan();      /* 扫描 */

void error(int error); /* 报错 */

void P();         /* 程序 */         /* P → begin S end */

void S();         /* 语句串 */       /* S → SS { ; SS } */

void SS();        /* 语句 */         /* SS → S1 | S2 | S3 */

void S1();        /* 赋值语句 */     /* S1 → V := E */

void S2();        /* 条件语句 */     /* S2 → if SS2 then ( SS | P ) */

void S3();        /* 循环语句 */     /* S3 → while SS2 do ( SS | P ) */

void E();         /* 表达式 */       /* E → T { + T | - T } */

void SS2();       /* 条件 */         /* SS2 → E R E */

void R();         /* 关系符 */       /* R → < | <> | <= | > | >= | = */

void T();         /* 项 */           /* T → F { * F | / F } */

void F();         /* 因子 */         /* F → V | N | ( E ) */

void V();         /* 变量 */         /* V → W */

void W();         /* 标识符 */

void N();         /* 数字 */

 

void scan()

{

    char buffer;

    int i=0;

    int temp=0;

    do

    {

         buffer=fgetc(in);

         ch.str[i++]=buffer;

    }while(buffer!=',');

    while(TRUE)

    {

         buffer=fgetc(in);

         if(buffer!='\n')

         {

             temp=10*temp+buffer-48;

         }

         else

             break;

    }

    ch.no = temp;

    ch.str[--i]='\0';

}

 

void error(int n)

{

    switch(n)

    {

    case 1:

         printf("标识符begin错误!");

         break;

    case 2:

         printf("标识符if错误!");

         break;

    case 3:

         printf("标识符then错误!");

         break;

    case 4:

         printf("标识符while错误!");

         break;

    case 5:

         printf("标识符do错误!");

         break;

    case 6:

         printf("标识符end错误!");

         break;

    case 7:

         printf("不是有效的句子!");

         break;

    case 8:

         printf("判断语句出错!");

         break;

    case 9:

         printf("循环语句出错!");

         break;

    case 10:

         printf("不是正确的标识符!");

         break;

    case 11:

         printf("条件表达式错误!");

         break;

    case 12:

         printf("算术表达式错误!");

         break;

    case 18:

         printf("赋值语句错误!");

         break;

    case 28:

         printf("缺少')'!");

         break;

    default:

         printf("Compile failed!");

         break;

    }

    getch();

    exit(1);

}

 

void P()

{

    if(ch.no==1)

    {

         scan();

         S();

         if(ch.no==6)

             scan();

         else

             error(6);

    }

    else

         error(1);

}

 

void S()

{

    SS();

    while(ch.no==26)

    {

         scan();

         SS();

    }

}

 

void SS()

{

    switch(ch.no)

    {

    case 10:

         S1();

         break;

    case 2:

         S2();

         break;

    case 4:

         S3();

         break;

    default:

         error(7);

         break;

    }

}

 

void S1()

{

    V();

    if(ch.no==18)

    {

         scan();

         E();

         fputc(':',out);

         fputc('=',out);

         fputc(' ',out);

    }

    else

         error(18);

}

 

void S2()

{

    if(ch.no==2)

    {

         scan();

         SS2();

         if(ch.no==3)

         {

             scan();

             switch(ch.no)

             {

             case 10:

             case 2:

             case 4:

                  SS();

                  break;

             case 1:

                  P();

                  break;

             default:

                  error(8);

                  break;

             }

             fputc('!',out);

             fputc(' ',out);

         }

         else

             error(3);

    }

    else

         error(2);

}

 

void S3()

{

    if(ch.no==4)

    {

         scan();

         SS2();

         if(ch.no==5)

         {

             scan();

             switch(ch.no)

             {

             case 10:

             case 2:

             case 4:

                  SS();

                  break;

             case 1:

                  P();

                  break;

             default:

                  error(9);

                  break;

             }

             fputc('@',out);

             fputc(' ',out);

         }

         else

             error(5);

    }

    else

         error(4);

}

 

void V()

{

    W();

}

 

void E()

{

    T();

    while(ch.no==13)

    {

         scan();

         T();

         fputc('+',out);

         fputc(' ',out);

    }

    while(ch.no==14)

    {

         scan();

         T();

         fputc('-',out);

         fputc(' ',out);

    }

}

 

void SS2()

{

    int temp;

    E();

    temp=ch.no;

    R();

    E();

    switch(temp)

    {

    case 20:

         fputc('<',out);

         fputc(' ',out);

         break;

    case 21:

         fputc('<',out);

         fputc('>',out);

         fputc(' ',out);

         break;

    case 22:

         fputc('<',out);

         fputc('=',out);

         fputc(' ',out);

         break;

    case 23:

         fputc('>',out);

         fputc(' ',out);

         break;

    case 24:

         fputc('>',out);

         fputc('=',out);

         fputc(' ',out);

         break;

    case 25:

         fputc('=',out);

         fputc(' ',out);

         break;

    }

}

 

void R()

{

    switch(ch.no)

    {

    case 20:

         scan();

         break;

    case 21:

         scan();

         break;

    case 22:

         scan();

         break;

    case 23:

         scan();

         break;

    case 24:

         scan();

         break;

    case 25:

         scan();

         break;

    default:

         error(11);

         break;

    }

}

 

void W()

{

    int i=0;

    if(ch.no==10)

    {

         while(ch.str[i]!='\0')

             fputc(ch.str[i++],out);

         fputc(' ',out);

         scan();

    }

    else

         error(10);

}

 

void T()

{

    F();

    while(ch.no==15)

    {

         scan();

         F();

         fputc('*',out);

         fputc(' ',out);

    }

    while(ch.no==16)

    {

         scan();

         F();

         fputc('/',out);

         fputc(' ',out);

    }

}

 

void F()

{

    switch(ch.no)

    {

    case 10:

         V();

         break;

    case 11:

         N();

         break;

    case 27:

         scan();

         E();

         if(ch.no==28)

             scan();

         else

             error(28);

         break;

    default:

         error(12);

         break;

    }

}

 

void N()

{

    int i=0;

    if(ch.no==11)

    {

         while(ch.str[i]!='\0')

             fputc(ch.str[i++],out);

         fputc(' ',out);

         scan();

    }

    else

         error(12);

}

 

void main()

{

    const char* input="scaned.txt";

    const char* output="compiled.txt";

    if(!(in=fopen(input,"r")))

         error(-1);

    out=fopen(output,"w");

    scan();

    P();

    if(ch.no==0)

    {

         printf("Success!");

         fputc('#',out);

    }

    else

         error(-1);

    fclose(in);

    fclose(out);

    getch();

}

 

你可能感兴趣的:(PASCAL语言子集的词法、语法分析器之实现)