编译原理基础实验——c语言实现简单词法分析器(if-else实现)

C语言实现简单词法分析器(if-else)

为进一步熟悉编译原理中词法分析的实现过程,采用c语言实现一个简单的针对c语言的词法分析器。此程序只能分析合法c语言代码段并生成token序列,无法进行预处理或错误识别。

已知的问题

  1. 只能识别存储于.txt文件中的代码片段,且文件的最后一个字符必须为‘\n’
  2. 只实现了对文件的绝对路径寻址,以相对路径方式寻址没有找到生成文件

详细代码

//c语言实现简易词法分析程序
#include 
#include 
#include 
#include 
#include 
#include 
#include 

using namespace std;

/*共有如下六类字符表
第一类:标识符(iT) (_ | a~z | A~Z)(_ | a~z | A~Z | 0~9)*
第二类:常数(CT) (1~9)(0~9)*| 0(0~7)* | 0x(0~9 | a~f| A~F)+
第三类:关键字(kT)(32) 独立定义
第四类:界符与运算符(pT) 独立定义
第五类:字符(cT)  '(o_letter | \(s_letter | x(0)*(digit | 空)(digit | 空)(digit | 空) | (0~7 | 空)(0~7 | 空)(0~7 | 空)))'
第六类: 字符串(sT)    "(字符 | digit)*"
*/

/*——————————————————程序开始——————————————————*/
//全局文件流、token缓存、字符缓存
fstream in, out;
string token;
char tmp;
//关键字表
static char kT[32][20] = {
    "auto", "double", "int", "struct", "break", "else",
    "long", "switch", "case", "enum", "register", "typedef",
    "char", "extern", "return", "union", "const", "float",
    "short", "unsigned", "continue", "for", "signed", "void",
    "defualt", "goto", "sizeof", "volatile", "do", "while",
    "static", "if"
};
//界符运算符表
static char pT[43][10] = {
    "+", "+=", "++", "-", "-=", "--", "*", "*=", "/", "/=",
    "<", "<=", ">", ">=", "=", "==", "!", "!=", "&", "&&",
    "|", "||", "%", "%=", "<<", ">>", "->", "[", "]", "{",
    "}", ".", "\?", ":", "{", "}", ";", "(", ")", "^",
    ",", "#", "~"
};
vector<string> cT;//字符数组,包含单引号
vector<string> sT;//字符串数组,包含双引号
vector<string> iT;//标识符表
vector<double> CT;//常数表

//判定函数
bool is_atoZ(char ch){
    if((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'))
        return true;
    else
        return false;
}
bool is_1to9(char ch){
    if(ch >= '1' && ch <= '9')
        return true;
    else
        return false;
}
bool is_0to9(char ch){
    if(ch >= '0' && ch <= '9')
        return true;
    else
        return false;
}
bool is_1to7(char ch){
    if(ch >= '1' && ch <= '7')
        return true;
    else
        return false;
}
bool is_0to7(char ch){
    if(ch >= '0' && ch <= '7')
        return true;
    else
        return false;
}
bool is_num_of_0x(char ch){
    if((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f'))
        return true;
    else
        return false;
}
bool is_num_of_0x_nz(char ch){
    if((ch >= '1' && ch <= '9') || (ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f'))
        return true;
    else
        return false;
}
bool is_none(char ch){
    if(ch == ' ' || ch == '\n' || ch == '\t')
        return true;
    else
        return false;
}

//情况判定
int get_case(char c){
    if(is_atoZ(c))
        return 2;//转至iT/kT
    else if(is_0to9(c))
        return 5;//转至CT
    else if(is_none(c))
        return 8;//转至空白处理
    else if(c == '_')
        return 1;//转至iT
    else if(c == '/')
        return 3;//转至注释
    else if(c == '0')
        return 4;//转至8/16进制
    else if(c == '\'')
        return 6;//转至cT
    else if(c == '\"')
        return 7;//转至sT
    else
        return 9;//转至pT查表
}

//具体处理
void get_iT(void){
    bool flag = true;
    token += tmp;
    while(!in.eof()){
        tmp = in.get();
        if(is_atoZ(tmp) || is_0to9(tmp) || tmp == '_')
            token += tmp;
        else
            break;
    }
    in.seekg(-1, in.cur);
    for(int i = 0; i < iT.size(); i += 1){
        if(token == iT[i]){
            out << '<' << "iT" << ',' << i + 1 << '>' << endl;
            flag = false;
            break;
        }
    }
    if(flag){
        iT.push_back(token);
        out << '<' << "iT" << ',' << iT.size() << '>' << endl;
    }
    token.clear();
    return;
}

void get_iT_or_kT(void){
    bool flag = true;
    token += tmp;
    while(!in.eof()){
        tmp = in.get();
        if(is_atoZ(tmp) || is_0to9(tmp) || tmp == '_')
            token += tmp;
        else
            break;
    }
    in.seekg(-1, in.cur);
    for(int i = 0; i < 32; i += 1){
        if(token == kT[i]){
            out << '<' << "kT" << ',' << i + 1 << '>' << endl;
            flag = false;
            break;
        }
    }
    if(flag){
        for(int i = 0; i < iT.size(); i += 1){
            if(token == iT[i]){
                out << '<' << "iT" << ',' << i + 1 << '>' << endl;
                flag = false;
                break;
            }
        }
    }
    if(flag){
        iT.push_back(token);
        out << '<' << "iT" << ',' << iT.size() << '>' << endl;
    }
    token.clear();
    return;
}

void get_other(void){
    tmp = in.get();
    if(tmp == '/'){
        while(!in.eof()){
            tmp = in.get();
            if(tmp == '\n')
                return;
        }
    }
    else if(tmp == '*'){
        while (!in.eof()) {
            tmp = in.get();
            if(tmp == '*'){
                tmp = in.get();
                if(tmp == '/')
                   return;
                else
                    continue;
            }
        }
    }
    else if(tmp == '='){
        out << '<' << "pT" << ',' << "10" << '>' << endl;
    }
    else{
        in.seekg(-1, in.cur);
        out << '<' << "pT" << ',' << "9" << '>' << endl;
    }
}

void get_0or0x(void){
    bool flag = true;
    double c_n = 0, c_l = 0;
    tmp = in.get();
    if(tmp == '0'){
        while(!in.eof()){
            tmp = in.get();
            if(is_0to7(tmp)){
                c_n = tmp - '0';
                c_n += c_l*8;
                c_l = c_n;
            }
            else
                break;
        }
    }
    else{
        while (!in.eof()) {
            tmp = in.get();
            if(is_0to9(tmp)){
                c_n = tmp - '0';
                c_n += c_l*16;
                c_l = c_n;
            }
            else if(tmp >= 'a' && tmp <= 'f'){
                c_n = tmp - 'a';
                c_n += c_l*16;
                c_l = c_n;
            }
            else if(tmp >= 'A' && tmp <= 'F'){
                c_n = tmp - 'A';
                c_n += c_l*16;
                c_l = c_n;
            }
            else
                break;
        }
    }
    in.seekg(-1, in.cur);
    for(int i = 0; i < CT.size(); i += 1){
        if(c_l == CT[i]){
            out << '<' << "CT" << ',' << i + 1 << '>' << endl;
            flag = false;
            break;
        }
    }
    if(flag){
        CT.push_back(c_l);
        out << '<' << "CT" << ',' << CT.size() << '>' << endl;
    }
    return;
}

void get_CT(void){
    bool flage = false;
    bool flag = true;
    double c_n = 0, c_l = 0;
    int p = 0, e = 0;
    c_n = tmp - '0';
    c_n += c_l*10;
    c_l = c_n;
    while(!in.eof()){
        tmp = in.get();
        if(is_0to9(tmp)){
            c_n = tmp - '0';
            c_n += c_l*10;
            c_l = c_n;
        }
        else
            break;
    }
    if(tmp == '.'){
        tmp = in.get();
        c_n = tmp - '0';
        c_n += c_l*10;
        c_n/=10;
        c_l = c_n;
        while(!in.eof()){
            tmp = in.get();
            if(is_0to9(tmp)){
                c_n = tmp - '0';
                c_n += c_l*10;
                c_n/=10;
                c_l = c_n;
            }
            else if(tmp == 'e'){
                flage = true;
                break;
            }
            else
                break;
        }
        if(flage){
            tmp = in.get();
            if(tmp == '-'){
                e = 1;
                tmp = in.get();
            }
            else if(tmp == '+'){
                e = 0;
                tmp = in.get();
            }
            p = p*10 + (tmp - '0');
            while (!in.eof()) {
                tmp = in.get();
                if(is_0to9(tmp))
                    p = p*10 + (tmp - '0');
                else{
                    if(e){
                        for(int i = 0; i < p; i += 1)
                            c_l/= 10;
                    }
                    else{
                        for(int i = 0; i < p; i += 1)
                            c_l*= 10;
                    }
                    break;
                }
            }
        }
    }
    else if(tmp == 'e'){
        tmp = in.get();
        if(tmp == '-'){
            e = 1;
            tmp = in.get();
        }
        else if(tmp == '+'){
            e = 0;
            tmp = in.get();
        }
        p = p*10 + (tmp - '0');
        while (!in.eof()) {
            tmp = in.get();
            if(is_0to9(tmp))
                p = p*10 + (tmp - '0');
            else{
                if(e){
                    for(int i = 0; i < p; i += 1)
                        c_l/= 10;
                }
                else{
                    for(int i = 0; i < p; i += 1)
                        c_l*= 10;
                }
                break;
            }
        }
    }
    in.seekg(-1, in.cur);
    for(int i = 0; i < CT.size(); i += 1){
        if(c_l == CT[i]){
            out << '<' << "CT" << ',' << i + 1 << '>' << endl;
            flag = false;
            break;
        }
    }
    if(flag){
        CT.push_back(c_l);
        out << '<' << "CT" << ',' << CT.size() << '>' << endl;
    }
    return;
}

void get_cT(void){
    token += tmp;
    tmp = in.get();
    if(tmp == '\\'){
        token += tmp;
        tmp = in.get();
        if(tmp == 'x'){
            while(!in.eof()){
                tmp = in.get();
                if(tmp == '0')
                    continue;
                else
                    break;
            }
            if(tmp == '\''){
                token += '0';
                token += '0';
                token += tmp;
            }
            else{
                tmp = in.get();
                if(tmp == '\''){
                    token += '0';
                    in.seekg(-2, in.cur);
                    tmp = in.get();
                    token += tmp;
                    tmp = in.get();
                    token += tmp;
                }
                else{
                    token += tmp;
                    tmp = in.get();
                    token += tmp;
                    tmp = in.get();
                    token += tmp;
                }
            }
        }
        else if(is_0to7(tmp)){
            tmp = in.get();
            if(tmp == '\''){
                in.seekg(-2, in.cur);
                token += '0';
                token += '0';
                tmp = in.get();
                token += tmp;
                tmp = in.get();
                token += tmp;
            }
            else{
                tmp = in.get();
                if(tmp == '\''){
                    in.seekg(-3, in.cur);
                    token += '0';
                    tmp = in.get();
                    token += tmp;
                    tmp = in.get();
                    token += tmp;
                    tmp = in.get();
                    token += tmp;
                }
                else{
                    in.seekg(-3, in.cur);
                    tmp = in.get();
                    token += tmp;
                    tmp = in.get();
                    token += tmp;
                    tmp = in.get();
                    token += tmp;
                    tmp = in.get();
                    token += tmp;
                }
            }
        }
        else{
            token += tmp;
            tmp = in.get();
            token += tmp;
        }
    }
    else{
        token += tmp;
        tmp = in.get();
        token += tmp;
    }
    cT.push_back(token);
    out << '<' << "cT" << ',' << cT.size() << '>' << endl;
    token.clear();
    return;
}

void get_sT(void){
    token += tmp;
    while(!in.eof()){
        tmp = in.get();
        if(tmp == '\"')
            break;
        token += tmp;
    }
    token += tmp;
    sT.push_back(token);
    out << '<' << "sT" << ',' << sT.size() << '>' << endl;
    token.clear();
    return;
}

void get_pT(void){
    if(tmp == '-'){
        tmp = in.get();
        if(tmp == '=')
            out << '<' << "pT" << ',' << "5" << '>' << endl;
        else if(tmp == '-')
            out << '<' << "pT" << ',' << "6" << '>' << endl;
        else if(tmp == '>')
            out << '<' << "pT" << ',' << "27" << '>' << endl;
        else{
            in.seekg(-1, in.cur);
            out << '<' << "pT" << ',' << "4" << '>' << endl;
        }
    }
    else if(tmp == '+'){
        tmp = in.get();
        if(tmp == '=')
            out << '<' << "pT" << ',' << "2" << '>' << endl;
        else if(tmp == '+')
            out << '<' << "pT" << ',' << "3" << '>' << endl;
        else{
            in.seekg(-1, in.cur);
            out << '<' << "pT" << ',' << "1" << '>' << endl;
        }
    }
    else if(tmp == '<'){
        tmp = in.get();
        if(tmp == '=')
            out << '<' << "pT" << ',' << "12" << '>' << endl;
        else if(tmp == '<')
            out << '<' << "pT" << ',' << "25" << '>' << endl;
        else{
            in.seekg(-1, in.cur);
            out << '<' << "pT" << ',' << "11" << '>' << endl;
        }
    }
    else if(tmp == '>'){
        tmp = in.get();
        if(tmp == '=')
            out << '<' << "pT" << ',' << "14" << '>' << endl;
        else if(tmp == '>')
            out << '<' << "pT" << ',' << "26" << '>' << endl;
        else{
            in.seekg(-1, in.cur);
            out << '<' << "pT" << ',' << "13" << '>' << endl;
        }
    }
    else if(tmp == '*'){
        tmp = in.get();
        if(tmp == '=')
            out << '<' << "pT" << ',' << "8" << '>' << endl;
        else{
            in.seekg(-1, in.cur);
            out << '<' << "pT" << ',' << "7" << '>' << endl;
        }
    }
    else if(tmp == '&'){
        tmp = in.get();
        if(tmp == '&')
            out << '<' << "pT" << ',' << "20" << '>' << endl;
        else{
            in.seekg(-1, in.cur);
            out << '<' << "pT" << ',' << "19" << '>' << endl;
        }
    }
    else if(tmp == '|'){
        tmp = in.get();
        if(tmp == '|')
            out << '<' << "pT" << ',' << "22" << '>' << endl;
        else{
            in.seekg(-1, in.cur);
            out << '<' << "pT" << ',' << "21" << '>' << endl;
        }
    }
    else if(tmp == '!'){
        tmp = in.get();
        if(tmp == '=')
            out << '<' << "pT" << ',' << "18" << '>' << endl;
        else{
            in.seekg(-1, in.cur);
            out << '<' << "pT" << ',' << "17" << '>' << endl;
        }
    }
    else if(tmp == '%'){
        tmp = in.get();
        if(tmp == '=')
            out << '<' << "pT" << ',' << "24" << '>' << endl;
        else{
            in.seekg(-1, in.cur);
            out << '<' << "pT" << ',' << "23" << '>' << endl;
        }
    }
    else if(tmp == '='){
        tmp = in.get();
        if(tmp == '=')
            out << '<' << "pT" << ',' << "16" << '>' << endl;
        else{
            in.seekg(-1, in.cur);
            out << '<' << "pT" << ',' << "15" << '>' << endl;
        }
    }
    else{
        token += tmp;
        for(int i = 27; i < 43; i += 1){
            if(token == pT[i]){
                out << '<' << "pT" << ',' << i + 1 << '>' << endl;
                token.clear();
                break;
            }
        }
    }
    return;
}

//主程序
int main(void){
    in.open("/Users/no1/Desktop/main.txt", ios::in);
    out.open("/Users/no1/Desktop/tmp.txt", ios::out);
    int code_c;
    token.clear();
    while(!in.eof()){
        tmp = in.get();
        code_c = get_case(tmp);
        switch (code_c) {
            case 1:
                get_iT();
                break;
            case 2:
                get_iT_or_kT();
                break;
            case 3:
                get_other();
                break;
            case 4:
                get_0or0x();
                break;
            case 5:
                get_CT();
                break;
            case 6:
                get_cT();
                break;
            case 7:
                get_sT();
                break;
            case 8:
                continue;
                break;
            default:
                get_pT();
                break;
        }
    }
    cout << "done" << endl;
    in.close();
    out.close();
    return 0;
}

你可能感兴趣的:(编译原理基础实验——c语言实现简单词法分析器(if-else实现))