编译原理 -词法分析添加注释过滤功能

编译原理实验,查了半天资料没找到好的,自己在原有代码基础上改了改,没考虑引号内的情况(主要是自己懒)。代码大体上是基于编译原理第三版改的,写法不算特别好,仅供大家参考。

#include 
#include 
#include 
#include 
#include 
#include 
#define _KEY_WORD_END "waiting for your expanding"
typedef struct
{
    int typenum;
    char * word;
} WORD;
using namespace std;
char input[255];
char token[255]="";
int  p_input;
int  p_token;
char ch;

char* KEY_WORDS[]={"main","int","char","if","else","for","while",_KEY_WORD_END};
WORD* scaner();
int main()
{
    int over=1;
    WORD* oneword=new WORD;
    printf("Enter Your words(end with $):");
    scanf("%[^$]s",input);
    p_input=0;
    printf("Your words:\n%s\n",input);
    while(over<1000&&over!=-1){

        oneword=scaner();
        if(oneword->typenum<1000)
        printf("(%d,%s)",oneword->typenum,oneword->word);
        over=oneword->typenum;
    }
    printf("\npress # to exit:");
    scanf("%[^#]s",input);
    return 0;
}
char m_getch(){
    ch=input[p_input];
    p_input=p_input+1;
    return (ch);
}
void getbc(){
    while(ch==' '||ch==10){
        ch=input[p_input];
        p_input=p_input+1;
    }
}
void concat(){
    token[p_token]=ch;
    p_token=p_token+1;
    token[p_token]='\0';
}
int letter(){
    if(ch>='a'&&ch<='z'||ch>='A'&&ch<='Z')return 1;
    else return 0;
}
int digit(){
    if(ch>='0'&&ch<='9')return 1;
    else return 0;
}
int reserve(){
    int i=0;
    while(strcmp(KEY_WORDS[i],_KEY_WORD_END)){
        if(!strcmp(KEY_WORDS[i],token)){
            return i+1;
        }
        i=i+1;
    }
    return 10;
}
void retract(){
    p_input=p_input-1;
}
char* dtb(){
    return NULL;
}


void filter()
{
    //判断 遇到'/'判断紧跟后面的字符是否'/'或'*'
    int start=p_input-1;
    if(ch=='/')
    {
        m_getch();
        if(ch=='/')
        {
            m_getch();
            //如果是'//'类型的注释就一直getch();直到遇到换行符
            while(ch!='\0'&&ch!='\n')
                m_getch();
            getbc();
        }
        else if(ch=='*')
        {
            m_getch();
            int flag=0;
            //如果是‘/*’类型的注释一直调用getch();直到遇到'*/'
            while(ch!='\0')
            {
                if(ch=='*')
            //遇到'*'判断,如果紧跟下一个字符是'/'就满足注释结束条件,否则继续循环
                {
                    m_getch();
                    if(ch=='/')
                    {
                        m_getch();
                        getbc();
                        flag=1;
                        break;
                    }
                }
                else
                    m_getch();
            }
            if(flag!=1){
                printf("(%d,%s)",25,"/");//手动输出是因为写法问题,不然就陷入死循环了
                p_input=start+1;
                ch=m_getch();
                return;
            }
        }

        else
        //不是注释行就回退
            retract();
    }

}


WORD* scaner(){
    WORD* myword=new WORD;
    myword->typenum=10;
    myword->word="";
    p_token=0;
    m_getch();
    //这里getbc的顺序与fliter的顺序不能反
    getbc();
    //while循环用于解决连续出现注释段的情况,例如:
    // 1.//x
    // 2.//y
    while(ch=='/'&&(input[p_input]=='/'||input[p_input]=='*'))
        filter();

    if(letter()){
        while(letter()||digit()){
            concat();
            m_getch();
        }
        retract();
        myword->typenum=reserve();
        myword->word=token;
        return(myword);
    }
    else if(digit()){
        while(digit()){
            concat();
            m_getch();
        }
        retract();
        myword->typenum=20;
        myword->word=token;
        return(myword);
    }
    else switch(ch){
        case '=':   m_getch();
                    if (ch=='='){
                        myword->typenum=39;
                        myword->word="==";
                        return(myword);
                    }
                    retract();
                    myword->typenum=21;
                    myword->word="=";
                    return(myword);
                    break;
        case '+':   myword->typenum=22;
                    myword->word="+";
                    return(myword);
                    break;
        case '-':   myword->typenum=23;
                    myword->word="-";
                    return(myword);
                    break;
        case '*':   myword->typenum=24;
                    myword->word="*";
                    return(myword);
                    break;
        case '/':   myword->typenum=25;
                    myword->word="/";
                    return(myword);
                    break;
        case '(':   myword->typenum=26;
                    myword->word="(";
                    return(myword);
                    break;
        case ')':   myword->typenum=27;
                    myword->word=")";
                    return(myword);
                    break;
        case '[':   myword->typenum=28;
                    myword->word="[";
                    return(myword);
                    break;
        case ']':   myword->typenum=29;
                    myword->word="]";
                    return(myword);
                    break;
        case '{':   myword->typenum=30;
                    myword->word="{";
                    return(myword);
                    break;
        case '}':   myword->typenum=31;
                    myword->word="}";
                    return(myword);
                    break;
        case ',':   myword->typenum=32;
                    myword->word=",";
                    return(myword);
                    break;
        case ':':   myword->typenum=33;
                    myword->word=":";
                    return(myword);
                    break;
        case ';':   myword->typenum=34;
                    myword->word=";";
                    return(myword);
                    break;
        case '>':   m_getch();
                    if (ch=='='){
                        myword->typenum=37;
                        myword->word=">=";
                        return(myword);
                    }
                    retract();
                    myword->typenum=35;
                    myword->word=">";
                    return(myword);
                    break;
        case '<':   m_getch();
                    if (ch=='='){
                        myword->typenum=38;
                        myword->word="<=";
                        return(myword);
                    }
                    retract();
                    myword->typenum=36;
                    myword->word="<";
                    return(myword);
                    break;
        case '!':   m_getch();
                    if (ch=='='){
                        myword->typenum=40;
                        myword->word="!=";
                        return(myword);
                    }
                    retract();
                    myword->typenum=-1;
                    myword->word="ERROR";
                    return(myword);
                    break;
        case '\0':  myword->typenum=1000;
                    myword->word="OVER";
                    return(myword);
                    break;
        default:    myword->typenum=-1;
                    myword->word="ERROR";
                    return(myword);
    }


}

你可能感兴趣的:(编译原理)