编译原理 —— C语言词法分析程序(C++实现)

测试文件:
编译原理 —— C语言词法分析程序(C++实现)_第1张图片

运行结果:
编译原理 —— C语言词法分析程序(C++实现)_第2张图片
单词种别码表:
编译原理 —— C语言词法分析程序(C++实现)_第3张图片


源代码

lex.h

#ifndef LEX_H_INCLUDED
#define LEX_H_INCLUDED

#include 
#include 
#include 
#include 
#include 
using namespace std;

typedef struct  /* 单词二元组的结构 */
{
    int typenum;    /* 种别码 */
    string token;    /* 自身字符串 */
} WORD;


WORD* scaner(FILE *fp); // 对文件扫描读取
int keyOrIdentifier(string token);  // 判断是关键字还是标识符
#endif // LEX_H_INCLUDED

lex.cpp

#include "lex.h"

#define _KEY_WORD_END "waiting for your expanding"  /* 定义关键字结束标志 */
string KEY_WORDS[]= {"main","return","if","else","for","include","printf","int","char",_KEY_WORD_END};   /*可扩充的关键字词组*/
int line_num=1;
/* 判断是标识符还是关键字*/
int keyOrIdentifier(string token)
{
    int i=0;
    while(KEY_WORDS[i]!=_KEY_WORD_END)
    {
        if(KEY_WORDS[i]==token)
        {
            return i+1;
        }
        i=i+1;
    }
    return 10;
}

WORD* scaner(FILE *fp)
{
    char ch;
    string token="";
    WORD* myword=new WORD;
    myword->typenum=0;
    myword->token="";

    ch=fgetc(fp);

    /* 判断回车 */
    if(int(ch)==10)
    {
        line_num++;
        return (myword);
    }
    /* 判断空格 */
    else if(isspace(ch))
    {
        return (myword);
    }
    /* 标识符及关键字 */
    else if(isalpha(ch))    //如果首字符是字母
    {
        while(isalpha(ch)||isdigit(ch))
        {
            token=token+ch;
            ch=fgetc(fp);
        }
        fseek( fp, -1, SEEK_CUR );  //当获取的字符既不是字母也不是数字,则指针依然指向该字符
        myword->typenum=keyOrIdentifier(token);  // 判断获取的字符串是关键字还是标识符,获取它对应的类型
        myword->token=token;
        return(myword);
    }

    /* 数字 */
    else if(isdigit(ch))
    {
        while(isdigit(ch))
        {
            token=token+ch;
            ch=fgetc(fp);
        }
        fseek( fp, 0, SEEK_CUR );
        myword->typenum=20;
        myword->token=token;
        return(myword);
    }
    else
        switch(ch)
        {

        /* 运算符 */
        case '+':
            ch=fgetc(fp);
            if (ch=='+')
            {
                myword->typenum=16;
                myword->token="++";
                return(myword);
            }
            fseek( fp, 0, SEEK_CUR );
            myword->typenum=12;
            myword->token="+";
            return(myword);
            break;
        case '-':
            ch=fgetc(fp);
            if (ch=='-')
            {
                myword->typenum=17;
                myword->token="--";
                return(myword);
            }
            fseek( fp, 0, SEEK_CUR );
            myword->typenum=13;
            myword->token="-";
            return(myword);
            break;
        case '*':
            myword->typenum=14;
            myword->token="*";
            return(myword);
            break;
        /* 除号和注释 */
        case '/':
            ch=fgetc(fp);
            if (ch=='/')
            {
                ch=fgetc(fp);
                while(int(ch)!=10&&ch!=EOF) // 发现注释符号,则一直读取,直到遇到回车为止
                {
                    ch=fgetc(fp);
                }
                if(int(ch)==10)
                {
                    line_num++;
                }
            }
            fseek( fp, 0, SEEK_CUR );
            myword->typenum=15;
            myword->token="/";
            return(myword);
            break;
        case '(':
            myword->typenum=21;
            myword->token="(";
            return(myword);
            break;
        case '=':
            ch=fgetc(fp);
            if (ch=='=')
            {
                myword->typenum=18;
                myword->token="==";
                return(myword);
            }
            fseek( fp, 0, SEEK_CUR );
            myword->typenum=20;
            myword->token="=";
            return(myword);
            break;
        case '!':
            ch=fgetc(fp);
            if (ch=='=')
            {
                myword->typenum=19;
                myword->token="!=";
                return(myword);
            }
            fseek( fp, 0, SEEK_CUR );
            myword->typenum=-1;
            myword->token="ERROR";
            return(myword);
            break;
        case '>':
            ch=fgetc(fp);
            if (ch=='=')
            {
                myword->typenum=37;
                myword->token=">=";
                return(myword);
            }
            fseek( fp, 0, SEEK_CUR );
            myword->typenum=35;
            myword->token=">";
            return(myword);
            break;
        case '<':
            ch=fgetc(fp);
            if (ch=='=')
            {
                myword->typenum=38;
                myword->token="<=";
                return(myword);
            }
            fseek( fp, 0, SEEK_CUR );
            myword->typenum=36;
            myword->token="<";
            return(myword);
            break;


        /* 界符 */
        case ')':
            myword->typenum=22;
            myword->token=")";
            return(myword);
            break;
        case '[':
            myword->typenum=23;
            myword->token="[";
            return(myword);
            break;
        case ']':
            myword->typenum=24;
            myword->token="]";
            return(myword);
            break;
        case '{':
            myword->typenum=25;
            myword->token="{";
            return(myword);
            break;
        case '}':
            myword->typenum=26;
            myword->token="}";
            return(myword);
            break;
        case ',':
            myword->typenum=27;
            myword->token=",";
            return(myword);
            break;
        case ':':
            myword->typenum=28;
            myword->token=":";
            return(myword);
            break;
        case ';':
            myword->typenum=29;
            myword->token=";";
            return(myword);
            break;
        case '"':
            myword->typenum=34;
            myword->token="\"";
            return(myword);
            break;


        /* 结束符*/
        case EOF:
            myword->typenum=1000;
            myword->token="OVER";
            return(myword);
            break;

        /* 错误*/
        default:
            myword->typenum=-1;
            myword->token="ERROR";
            cout<<"第"<<line_num<<"行发生了错误!"<<endl;
            return(myword);
        }
}

main.cpp

/*
    输入:所给文法的源程序字符串
    输出:二元组(syn,token 或 sum)构成的序列
    其中,syn为单词种别码;token为存放的单词自身字符串;sum为整型常数
*/
#include "lex.h"

int main()
{
    WORD* oneword=new WORD;
    FILE *fp;
    int over=1; //如果词法解析出现错误,则结束读取
    if( (fp=fopen("input.txt","rt")) == NULL ){
        cout<<"Cannot open file, press any key to exit!"<<endl;
    }
    while(over<1000&&over!=-1)
    {
        oneword=scaner(fp);
        if(oneword->typenum<1000&&oneword->typenum>0)
            cout<<"("<<oneword->typenum<<","<<oneword->token<<")"<<endl;
        over=oneword->typenum;
    }
    fclose(fp);
    system("pause");

    return 0;
}

你可能感兴趣的:(编译原理)