编译原理课设 词法分析

编译原理课设 第一阶段

  • GUET编译原理课设 词法分析
  • 主要参考了 GUET_曼陀罗华 的博客(好像是位研究生姐姐),改了其中一些部分,在小姐姐基础上增加了出错控制。
  • 其实有更好的方式,不过前期我是这样写的,就先贴出来,循序渐进。

正文如下:

词法分析是干什么的:

  • 过滤掉所有的空格、换行、注释
  • 把有用的东西存到 pascal[ ] 数组中

比如 BEGIN 存到pascal[0]
比如 VAR 存到pascal[1]
...
这个数组可以用于以后的语法和语义分析

  • pascal[ ] 数组是dual 类型的,除了保存单词的信息,还有单词的种类dual_type

这里写出思路,具体的小细节根据实际情况修改。
比如 关键字数组类型码

符号 编号 助记符
结束符 0 FINISH
BEGIN 1 BEGIN
END 2 END
IF 3 IF
THEN 4 THEN
WHILE 5 WHILE
ELSE 6 ELSE
DO 7 DO
VAR 8 VAR
INTEGER 9 INTEGER
整数 10 INT
标识符 11 ID
+ 101 ADD
- 102 SUB
* 103 MUL
/ 104 DIV
> 105 GT
= 106 EQ
< 107 LT
: 108 COLON
:= 109 COL_EQ
<> 110 NE
<= 111 LE
>= 112 GE
; 113 FIN
// 114
/* 115
*/ 116
#include
#include
#include
#include
#include
#include
#include
using namespace std;
struct dual {
    int dual_type;
    union {
        char lexeme_text[50];
        int  lexeme_num[50];
    }lexeme;
    int x;
    int y;
} DUAL[100];

//校验通过的单词,存入到pascal中
int pasnum = 0;
dual pascal[100];

//当前数组 DUAL 下标
int num = 0;

//关键字数组
const char * keyword[] = { "sign","BEGIN","END","IF","THEN","WHILE" ,"ELSE","DO","VAR","INTEGER","INT" };

//单分界符
char singword[10] = "+-*=(),;";

//双分界符打头,注释包含在这里
char doubleword[10] = "><:/";

//类型和帮记符
int type[31] = {0,1,2,3,4,5,6,7,8,9,10,11,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119};
const char * typesign[31] = { "FINISH","BEGIN","END","IF","THEN","WHILE" ,"ELSE","DO","VAR","INTEGER","INT","ID","ADD","SUB","MUL","DIV","GT","EQ","LT","COLON","COL_EQ","NE","LE","GE","FIN","ANND","ANNF","ANNL","CO","LL","RR" };
int  findSignIndex(int dual_type) {
    int i = 0;
    for (i; i < 31; i++){
        if (type[i] == dual_type) {
            return i;
        }
    }
    return 0;
}

//整型数组转整数
int toint(int lexeme_text[]) {
    int i = 0, length = 0, sum = 0;
    for (length; lexeme_text[length] != -1; length++);
    for (i; i < length; i++) {
        sum += lexeme_text[i] * pow(10, length - i - 1);
    }
    return sum;
}

//是否单分界符元素
int isSingle(char ch) {
    int i;
    for (i = 0; i < 10; i++) {
        if (ch == singword[i]) {
            return 1;
        }
    }
    return 0;
}

//是否双分界符开头
int isDoubelStar(char ch) {
    int i;
    for (i = 0; i < 5; i++) {
        if (ch == doubleword[i]) {
            return 1;
        }
    }
    return 0;
}

//处理单分界符元素
//设置标识符类型
//结构体,类型,字符值
int handSingle(dual dual_element, int dual_type, char ch) {
    dual_element.dual_type = dual_type;
    dual_element.lexeme.lexeme_text[0] = ch;
    dual_element.lexeme.lexeme_text[0] = '\0';
    //cout << "匹配到" << ch << endl;
    return 1;
}


//出错消息控制
int errMsg(int err_type, int row, int column, const char  msg[]) {
    cout << "Error" << err_type << ":" << row << "行 " << column << "列" << " 原因:" << msg << endl;
    return 1;
}


int scaner() {
    char ch;
    int i, j;
    int row = 1;
    int clumn = 1;
    int scan_success_flag = 1;
    FILE * file;
    file = fopen("a.txt", "r");
    if (file == NULL) {
        return 0;
    }
    //通过getc获取字符
    ch = getc(file);



    while (ch != EOF) {

        //换行
        while (ch == '\n')
        {
            row++;
            clumn = 1;
            ch = getc(file);
        }

        //空格和tab,定义他们的长度都为1
        while (ch == ' ' || ch == '\t')
        {
            clumn++;
            ch = getc(file);
        }

        //是字母
        if (isalpha(ch)) {


            DUAL[num].lexeme.lexeme_text[0] = ch;
            //review
            DUAL[num].x = clumn;
            DUAL[num].y = row;
            //Token 下标移动到1
            j = 1;
            ch = getc(file);
            clumn++;

            //抽取出来,做成检验函数,排除其他可能
            while (isalpha(ch))
            {
                DUAL[num].lexeme.lexeme_text[j++] = ch;
                ch = getc(file);
                clumn++;
                //j > 8说明单词超长,为了防止多次输出错误信息,设置为9
                if (j == 9) {
                    //cout << "单词超长" < 65535 || tempnum < 0) {
                    errMsg(1013, row, clumn, "数字过大,溢出");
                    scan_success_flag = 0;
                }
                else {
                    DUAL[num].dual_type = 10;//整数类型
                    pascal[pasnum] = DUAL[num];
                    pasnum++;
                    //cout << "匹配到数字" << tempnum << endl;
                }

            }
            num++;

        }

        //只可能是单分界符开头的,提取首字符位置,类似 indexOf 函数
        else if (isSingle(ch)) {


            DUAL[num].x = row;
            DUAL[num].y = clumn++;
            DUAL[num].lexeme.lexeme_text[0] = ch;
            DUAL[num].lexeme.lexeme_text[1] = '\0';
            //cout << "匹配到" << ch << endl;
            switch (ch)
            {
            case '+':
                DUAL[num].dual_type = 101;
                break;
            case '-':
                DUAL[num].dual_type = 102;
                break;
            case '*':
                DUAL[num].dual_type = 103;
                break;
                //todo 除属于双分界符情况,不在此讨论
            case '=':
                DUAL[num].dual_type = 106;
                break;
            case ';':
                DUAL[num].dual_type = 113;
                break;
            case ',':
                DUAL[num].dual_type = 117;
                break;
            case '(':
                DUAL[num].dual_type = 118;
                break;
            case ')':
                DUAL[num].dual_type = 119;
                break;
            default:
                cout << "isSingle出错:" << ch << endl;
                break;
            }
            pascal[pasnum] = DUAL[num];
            pasnum++;
            ch = getc(file);
            num++;
        }
        //双分界开头的
        else if (isDoubelStar(ch))
        {
            int isNote = 0; //默认不是注释
                            //DUAL[num].lexeme.lexeme_text[0] = ch; 因为注释就不能存放进去
                            //DUAL[num].lexeme.lexeme_text[1] = '\0';
            char next_ch = getc(file);
            switch (ch)
            {
            case '<':
                //如果下一个是=,那么就是<=
                if (next_ch == '=') {
                    DUAL[num].dual_type = 111;
                    DUAL[num].lexeme.lexeme_text[0] = ch;
                    DUAL[num].lexeme.lexeme_text[1] = next_ch;
                    DUAL[num].lexeme.lexeme_text[2] = '\0';
                    ch = getc(file);
                    //cout << "<=" << endl;

                }
                else if (next_ch == '>') {
                    DUAL[num].dual_type = 110;
                    DUAL[num].lexeme.lexeme_text[0] = ch;
                    DUAL[num].lexeme.lexeme_text[1] = next_ch;
                    DUAL[num].lexeme.lexeme_text[2] = '\0';
                    ch = getc(file);
                    //cout << "< >" << endl;

                }
                else { //否则是单分界
                    DUAL[num].lexeme.lexeme_text[0] = ch;
                    DUAL[num].lexeme.lexeme_text[1] = '\0';
                    DUAL[num].dual_type = 107;
                    //作用相当于 getc(file),为下一次进入一级while循环做准备
                    ch = next_ch;
                    //cout << "<" << endl;
                }
                break;
            case '>':
                //如果下一个是=,那么就是>=
                if (next_ch == '=') {
                    DUAL[num].dual_type = 112;
                    DUAL[num].lexeme.lexeme_text[0] = ch;
                    DUAL[num].lexeme.lexeme_text[1] = next_ch;
                    DUAL[num].lexeme.lexeme_text[2] = '\0';
                    ch = getc(file);
                    //cout << ">=" << endl;

                }
                else { //否则是单分界
                    DUAL[num].lexeme.lexeme_text[0] = ch;
                    DUAL[num].lexeme.lexeme_text[1] = '\0';
                    DUAL[num].dual_type = 105;
                    ch = next_ch;
                    //cout << ">" << endl;
                }
                break;
            case ':':
                //如果下一个是=,那么就是:=
                if (next_ch == '=') {
                    DUAL[num].dual_type = 109;
                    DUAL[num].lexeme.lexeme_text[0] = ch;
                    DUAL[num].lexeme.lexeme_text[1] = next_ch;
                    DUAL[num].lexeme.lexeme_text[2] = '\0';
                    ch = getc(file);
                    //cout << ":=" << endl;

                }
                else { //否则出错
                    DUAL[num].dual_type = 108;
                    ch = next_ch;
                    errMsg(1014, row, clumn, "期待的 '=' 没有出现,':'之后缺少 '=' ");
                    scan_success_flag = 0;
                }
                break;
            case '/':
                //单行注释
                if (next_ch == '/') {
                    row++;
                    clumn = 1;
                    isNote = 1;
                    //cout << "// 检测到单行注释" << endl;
                    ch = getc(file);
                    while (ch != '\n')
                    {
                        ch = getc(file);
                    }
                }
                //多行注释
                else if (next_ch == '*')
                {
                    isNote = 1;
                    char ch1 = getc(file);
                    char ch2 = getc(file);
                    while (ch1 != '*' || ch2 != '/')
                    {
                        //处理坐标
                        if (ch1 == '\n') {
                            row++;
                            clumn = 1;
                        }
                        else
                        {
                            clumn++;
                        }
                        if (ch2 == '\n') {
                            row++;
                            clumn = 1;
                        }
                        else
                        {
                            clumn++;
                        }

                        //分析字符
                        if (ch2 == '*') {
                            ch1 = ch2;
                            ch2 = getc(file);
                        }
                        //包含了ch1 == ‘*’且ch2 != '/的情况
                        else
                        {
                            ch1 = getc(file);
                            ch2 = getc(file);
                        }

                        //出错控制
                        if (ch1 == EOF || ch2 == EOF)
                        {
                            //没有期待的/出现或者已经到头
                            //cout << "多行注释出错" << endl;
                            errMsg(1015, row, clumn, "没有期待的 '*/' 出现,不合法的注释");
                            break;
                        }
                    }
                    ch = getc(file);

                }
                //排除其他可能,这只是一个单纯除号
                else
                {
                    DUAL[num].dual_type = 104;
                    DUAL[num].lexeme.lexeme_text[0] = ch;
                    DUAL[num].lexeme.lexeme_text[1] = '\0';
                    //作用相当于 getc(file),为下一次进入一级while循环做准备
                    ch = next_ch;
                }

            default:
                break;
            }
            if (!isNote) {
                pascal[pasnum] = DUAL[num];
                pasnum++;
                num++;
            }

        }
        //其他字符
        else
        {
            errMsg(1016, row, clumn, "非法字符");
            scan_success_flag = 0;
            ch = getc(file);
        }
    }
    return scan_success_flag;
}

int main() {
    int i;
    if (scaner()) {
        cout << "====== 分析成功 ======" << endl;
    }
    cout << endl << "====== 输出扫描合法的词元记录 ======" << endl;
    cout << endl << " 单词       类型     助记符" << endl;
    for (i = 0; i < pasnum; i++) {
        //整数类型
        if (pascal[i].dual_type == 10) {
            cout << " " << std::left << setw(12) << toint(pascal[i].lexeme.lexeme_num) << setw(8) << pascal[i].dual_type << typesign[findSignIndex(pascal[i].dual_type)]<< endl;
        }
        else
        {
            cout << " " << std::left << setw(12) << pascal[i].lexeme.lexeme_text << setw(8) << pascal[i].dual_type << typesign[findSignIndex(pascal[i].dual_type)] << endl;
            findSignIndex(pascal[i].dual_type);
        }
    }
    system("pause");
    return 0;
}

测试文件:a.txt

BEGIN
464645545454
VAR a b;
C:=2*Pi*R;
IF A >= 3 THEN A:=(2*6+1)*9
ELSE A:=3*6;
A++;
653
// 666注释
/*
* 多行的
*注释~
*/
/*cdgvvvd
s*/
+
-
*
/
>=
<=
<>
:=
(
)
<
>
#
END

运行结果:

你可能感兴趣的:(编译原理)