【词法分析器 】 编译原理作业

自己写的版本:

问题:

1:考虑到由于是嵌套接收,浮点型感觉少麻烦,因为还要判断是否两个小数点等,古没有实现

2:对于一些特殊符号,如三元运算符,格式控制符%d等分析不到位

3:类别应该分的更细,否则用唯一的symbol(sym)标识的话无法进行后续的语法分析

4:没有用文件指针,数据输入要在控制台,不利于交互

 

#include <iostream>

#include <string.h>

#include <cstdio>

using namespace std;

int index,sub_index,num,sym; //num输出数字,超过int认为Error,index为s的索引,sun_index为str的索引,sym为symbol种别码

char ch;

char s[50000+10];   //以cin.get(ch)的形式来接收控制台的字符,遇到#结束(\n,空格不结束)

char str[15];       //存储要输出的除数字外的字符(或字符串),数字用num输出

char keyword[27][10]={

    "auto","break","case","char","int","long","double","float","const",

    "continue","default","do","else","enum","extern","for","goto","return",

    "sizeof","static","struct","switch","typedef","union","unsigned","void","while"

};//暂时写27种关键字,是用于处理c语言的基础关键字,估计还是不全的





void function()

{

    for(int i=0;i<15;i++)str[i]=NULL;

    ch=s[index++];

    //由于cin.get不丢弃缓冲区的空格和回车,故此处人工忽略

    while(ch==' ' || ch=='\n' || ch=='\t' || ch=='\r' || ch=='\f')

    {

        ch=s[index++];

    }

    //如果是字母开头,判断其是否是标示符

    if( (ch>='a'&&ch<='z') || (ch>='A'&&ch<='Z') ||ch=='_' ){

        sub_index=0;

        while((ch>='a'&&ch<='z') || (ch>='A'&&ch<='Z') || (ch<='9'&&ch>='0') ||ch=='_'){

            str[sub_index++]=ch;

            ch=s[index++];

        }

        index--;  //减回去

        sym=3;

        for(int i=0;i<27;i++)  //再判断是否为关键字

            if(!strcmp(str,keyword[i])){

                sym=1;

                break;

            }

    }

    //数字处理

    else if(ch<='9'&&ch>='0'){

        num=0;

        while(ch<='9'&&ch>='0')

        {

            num*=10;

            num+=ch-'0';

            ch=s[index++];

        }

        index--;

        sym=2;

        if((num+1>(1<<15)) || (num<(-1<<15)))sym=-1;

    }

    //对于注释符"//"的处理,文档注释不写了吧,道理差不多,找到匹配后即重新递归

    else if(ch=='/'){

        sub_index=0;

        str[sub_index++]=ch;

        ch=s[index++];

        if(ch=='/'){

            while(ch!='\n')

            {

                ch=s[index++];

            }

            function();//从注释符的下一行从新进行function函数

        }

    }

    //<= 和<< 的处理

    else if(ch=='<'){

        sub_index=0;

        str[sub_index++]=ch;

        ch=s[index++];

        if(ch=='=' || ch=='<'){

            str[sub_index++]=ch;

        }

        else index--;

        sym=4;

    }

    //>= 和>> 的处理

     else if(ch=='>'){

        sub_index=0;

        str[sub_index++]=ch;

        ch=s[index++];

        if(ch=='=' || ch=='>'){

            str[sub_index++]=ch;

        }

        else index--;

        sym=4;

    }

     // != 和*= 和&=和^=和|=的处理

    else if(ch=='!'||ch=='*'||ch=='/'||ch=='&'||ch=='^'||ch=='|'){

        sub_index=0;

        str[sub_index++]=ch;

        ch=s[index++];

        if(ch=='='){

            str[sub_index++]=ch;

        }

        else index--;

        sym=4;

    }

    // += 和++的处理

    else if(ch=='+'){

        sub_index=0;

        str[sub_index++]=ch;

        ch=s[index++];

        if(ch=='=' || ch=='+'){

            str[sub_index++]=ch;

        }

        else index--;

        sym=4;

    }

    // -= 和--的处理

    else if(ch=='-'){

        sub_index=0;

        str[sub_index++]=ch;

        ch=s[index++];

        if(ch=='=' || ch=='-' || ch=='>'){

            str[sub_index++]=ch;

        }

        else index--;

        sym=4;

    }

     // 分界符的处理

    else if(ch=='(' || ch==')' || ch==';' || ch=='<' || ch=='>'

    || ch=='[' || ch==']' || ch==',' || ch=='{' || ch=='}'){

        sym=5;

        str[0]=ch;

    }

    // 结束符的处理

    else if(ch=='#'){

        sym=0;

        str[0]=ch;

    }

     //其他没考虑到的运算符号,如位运算等,不再一一累述

    else{

        sym=4;

        str[0]=ch;

    }

}



int main()

{

    //freopen("in.txt","r",stdin);

//    freopen("out.txt","w",stdout);

    cout << "Please input the program code:(over with'#') "<< endl;

    index=0;

    do{

        cin.get(ch);

        s[index++]=ch;

    }while(ch!='#');

    index=0;

    do{

        function();

        if(sym==-1)cout << "Error" <<endl;

        else if(sym==2)cout<<"["<<sym<<","<<num<<"]"<<endl;

        else cout <<"["<<sym<<","<<str<<"]"<<endl;

    }while(sym!=0);

     return 0;

}

 

 

 

看到一个别人实现的不错版本:

1 调用ctype虽然简易,但在一些类型符的判断上会出现一些小错误

2 txt文件光标不能直接放在文件末(当然通过改主函数也可以避免问题~)

3 所有标识符用二维数组处理很好,整齐简便,又有文件读写操作,健壮性强,值得学习。

 

#include <stdio.h>

#include <stdlib.h>

#include <string.h>

#include <ctype.h>

#include <malloc.h>

#include <conio.h>

#define NULL 0

#define true 1

#define false 0

FILE *fp;

char ch;

char *keyword[34]={"auto","break","case","char","const","continue","default","do","double",

"else","enum","extern","float","for","goto","if","int","long","register",

"return","short","signed","sizeof","static","struct","switch","typedef", "printf",

"union","unsigned","void","volatile","while","main"};//关键字

char *operatornum[6]={"+","-","*","/","++","--"};//算术运算符

char *comparison[8]={"<","<=","=",">",">=","<>","==","!="};//比较符

char *interpunction[8]={",",";",":=",".","(",")","{","}"};//分隔符

char *biaoshifu[6]={"%","$","^","&","_","#"};//特殊标识符

char *zhushifu[3]={"//",""};//注释符

char *luoji[3]={"&&","||","!"};//逻辑运算符

//////////////////////////////////////////////////////////////////////////////////////////

char search(char searchstr[],int wordtype)

{

int i;

switch (wordtype)

{

case 1:for(i=0;i<=33;i++)

  {

   if(strcmp(keyword[i],searchstr)==0)

    return(true);

  }

case 2:{

  for(i=0;i<=5;i++)

  {

   if(strcmp(operatornum[i],searchstr)==0)

 return(true);

  }

  break;

  }

case 3: {

 for(i=0;i<=7;i++)

   {

 if(strcmp(comparison[i],searchstr)==0)

  return(true);

 }

 break;

   }

case 4: for(i=0;i<=7;i++)

   {

 if(strcmp(interpunction[i],searchstr)==0)

  return(true);

   }

 

break;

case 5: for(i=0;i<=5;i++)

   {

 if(strcmp(biaoshifu[i],searchstr)==0)

  return(true);

   }break;

case 6: for(i=0;i<=2;i++)

   {

 if(strcmp(zhushifu[i],searchstr)==0)

  return(true);

   }break;

case 7: for(i=0;i<=2;i++)

   {

 if(strcmp(luoji[i],searchstr)==0)

  return(true);

   }break;

}

return(false);

}

////////////////////////

char letterprocess (char ch)//字母处理函数

{

int i=-1;

char letter[20];

while (isalnum(ch)!=0)

{

  letter[++i]=ch;

  ch=fgetc(fp);

};

letter[i+1]='\0';

if (search(letter,1))

{

  if(strcmp(letter,"main"))//letter 不等于"main"

   printf("<1,关键字,%s>\n",letter);//关键字识别码1

  else

   printf("<2,自定义标识符,%s>\n",letter);//关键字识别码1

}

else

{

  printf("<2,自定义标识符,%s>\n",letter);//自定义标识符识别码2

}

return(ch);

}

/////////////////////////

char numberprocess(char ch)//数字处理程序

{

int i=-1;

char num[20];

while (isdigit(ch)!=0)

{

  num[++i]=ch;

  ch=fgetc(fp);

}

if(isalpha(ch)!=0)

{

  while(isspace(ch)==0)

  {

   num[++i]=ch;

   ch=fgetc(fp);

  }

  num[i+1]='\0';

  printf("错误!非法标识符:%s\n",num);

  goto u;

}

num[i+1]='\0';

 printf("<3,数字,%s>\n",num);//数字识别码3

u: return(ch);

}

/////////////////////////

char otherprocess(char ch)

{

int i=-1;

char other[20];

if (isspace(ch)!=0)

{

  ch=fgetc(fp);

  goto u;

}

while ((isspace(ch)==0)&&(isalnum(ch)==0))

{

  other[++i]=ch;

  ch=fgetc(fp);

}

other[i+1]='\0';

if (search(other,2))

  printf("<4,运算符,%s>\n",other);//算术运算符识别码4

else

if (search(other,3))

   printf("<4,运算符,%s>\n",other);//关系运算符号 识别码4

else

if (search(other,4))

 printf("<5,分隔符号,%s>\n",other);//分隔符号识别码5

else

if (search(other,5))

 printf("<%s,特殊标识符号>\n",other);

else

if (search(other,6))

 printf("<%s,注释符号>\n",other);

else

if (search(other,7))

 printf("<%s,逻辑运算符号>\n",other);

else

   printf("错误!非法字符:%s\n",other);

u: return (ch);

}

///////////////////////////

void main ()

{

char str,c;

printf("**********************************词法分析器************************************\n");

if((fp=fopen("F:\\写的代码\\tt\\源程序.txt","r"))==NULL)

  printf("源程序无法打开!\n");

else

{

  str =fgetc(fp);

  while (str!=EOF)

  {

   if (isalpha(str)!=0)

 str=letterprocess(str);

   else

   {

 if (isdigit(str)!=0)

  str=numberprocess(str);

 else

  str=otherprocess(str);

   }

 

  };

  fclose(fp);

  fp=NULL;

  printf("词法分析结束,谢谢使用!\n");

  printf("点任意键退出!\n");

}

   c=getch();

}

 

你可能感兴趣的:(编译原理)