一个简单的词法分析程序

最近上《编译原理》这门课程,老师要求我们自己写一个词法分析程序。要求如下:

要求

状态图

首先我们将文法转化为状态图~

状态图

符号表

然后我们建立好符号表~

类别编码 单词名称 类别编码 单词名称 类别编码 单词名称
1 const 11 end 20 >
2 var 12 read 21 :=
3 procedure 13 write 22 +
4 odd 14 变量名 23 -
5 if 15 数字 24 *
6 then 16 <> 25 /
7 while 17 <= 26 (
8 do 18 < 27 )
9 call 19 >= 28 ;
10 begin

代码

#include 
#include 
#include 
#include 
#include 
#include 
#include 

using namespace std;

class LexicalAnalysis{

    static const vector rwtab;

private:
    ifstream in;
    ofstream out;
    string buffer;
    vector> words;

    void readFile(){
        string tmp;
        while (! in.eof()){
            getline(in, tmp);
            buffer.append(tmp);
        }   
    }

public:
    LexicalAnalysis(){
        in.open("test.txt");
        readFile(); 
    }
    LexicalAnalysis(string address){
        in.open(address);
        readFile();
    }

    bool Scanner(){
        int len = buffer.length();
        int p = 0;
        char ch;
        while (p < len){
            int syn = 0;
            string token;
            ch = buffer[p++];
            while (p < len && ch == ' '){//过滤掉空格
                ch = buffer[p++];
            }
            if ('a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z'){ //以字母开头,可能为标识符或变量名
                while ((p < len) && ('a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || '0' <= ch && ch <= '9')){
                    token += ch; ch = buffer[p++];
                }
                p--;
                syn = 14;
                for (int i=0;i<13;i++){
                    if (rwtab[i] == token){
                        syn = i+1;
                    }
                }
            }
            else if ('0' <= ch && ch <= '9'){//数字
                while (p < len && '0' <= ch && ch <= '9'){
                    token += ch; ch = buffer[p++];
                }
                syn = 15;
            }
            else switch (ch){//其他字符
                case '<':
                    token += ch;
                    ch = buffer[p++];
                    if (ch == '>'){
                        token += ch; syn = 16;
                    }
                    else if (ch == '='){
                        token += ch; syn = 17;
                    }
                    else{
                        syn = 18; p--;
                    }
                break;

                case '>':
                    token += ch;
                    ch = buffer[p++];
                    if (ch == '='){
                        token += ch; syn = 19;
                    }
                    else{
                        p--; syn = 20;
                    }
                break;

                case ':':
                    token += ch;
                    ch = buffer[p++];
                    if (ch == '='){
                        token += ch; syn = 21;
                    }else{
                        return false;
                    }
                break;

                case '+': token += ch; syn = 22; break;
                case '-': token += ch; syn = 23; break;
                case '*': token += ch; syn = 24; break;
                case '/': token += ch; syn = 25; break;
                case '(': token += ch; syn = 26; break;
                case ')': token += ch; syn = 27; break;
                case ';': token += ch; syn = 28; break;

                default:
                    syn = -1;
                break;
            }
            if (syn > 0){
                words.push_back(make_pair(syn, token));
            }
        }
    }

    void Print(){
        for (int i = 0; i < words.size(); i++){
            std::cout << '(' << words[i].first << ',' << words[i].second << ')' << std::endl;
        }
    }
};


const vector LexicalAnalysis::rwtab = {
    "const", "var", "procedure", "odd", "if", "then", "while", "do", "call", "begin", "end", "read", "write"
};

int main(){
    LexicalAnalysis test;
    test.Scanner();
    test.Print();
}

你可能感兴趣的:(一个简单的词法分析程序)