编译原理课设 第一阶段
- GUET编译原理课设 词法分析
- 主要参考了 GUET_曼陀罗华 的博客(好像是位研究生姐姐),改了其中一些部分,在小姐姐基础上增加了出错控制。
- 其实有更好的方式,不过前期我是这样写的,就先贴出来,循序渐进。
正文如下:
词法分析是干什么的:
- 过滤掉所有的空格、换行、注释
- 把有用的东西存到
pascal[ ]
数组中
比如 BEGIN
存到pascal[0]
比如 VAR
存到pascal[1]
中
...
这个数组可以用于以后的语法和语义分析
-
pascal[ ]
数组是dual
类型的,除了保存单词的信息,还有单词的种类dual_type
这里写出思路,具体的小细节根据实际情况修改。
比如 关键字数组
、 类型码
等
符号 | 编号 | 助记符 |
---|---|---|
结束符 | 0 | FINISH |
BEGIN | 1 | BEGIN |
END | 2 | END |
IF | 3 | IF |
THEN | 4 | THEN |
WHILE | 5 | WHILE |
ELSE | 6 | ELSE |
DO | 7 | DO |
VAR | 8 | VAR |
INTEGER | 9 | INTEGER |
整数 | 10 | INT |
标识符 | 11 | ID |
+ | 101 | ADD |
- | 102 | SUB |
* | 103 | MUL |
/ | 104 | DIV |
> | 105 | GT |
= | 106 | EQ |
< | 107 | LT |
: | 108 | COLON |
:= | 109 | COL_EQ |
<> | 110 | NE |
<= | 111 | LE |
>= | 112 | GE |
; | 113 | FIN |
// | 114 | |
/* | 115 | |
*/ | 116 |
#include
#include
#include
#include
#include
#include
#include
using namespace std;
struct dual {
int dual_type;
union {
char lexeme_text[50];
int lexeme_num[50];
}lexeme;
int x;
int y;
} DUAL[100];
//校验通过的单词,存入到pascal中
int pasnum = 0;
dual pascal[100];
//当前数组 DUAL 下标
int num = 0;
//关键字数组
const char * keyword[] = { "sign","BEGIN","END","IF","THEN","WHILE" ,"ELSE","DO","VAR","INTEGER","INT" };
//单分界符
char singword[10] = "+-*=(),;";
//双分界符打头,注释包含在这里
char doubleword[10] = "><:/";
//类型和帮记符
int type[31] = {0,1,2,3,4,5,6,7,8,9,10,11,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119};
const char * typesign[31] = { "FINISH","BEGIN","END","IF","THEN","WHILE" ,"ELSE","DO","VAR","INTEGER","INT","ID","ADD","SUB","MUL","DIV","GT","EQ","LT","COLON","COL_EQ","NE","LE","GE","FIN","ANND","ANNF","ANNL","CO","LL","RR" };
int findSignIndex(int dual_type) {
int i = 0;
for (i; i < 31; i++){
if (type[i] == dual_type) {
return i;
}
}
return 0;
}
//整型数组转整数
int toint(int lexeme_text[]) {
int i = 0, length = 0, sum = 0;
for (length; lexeme_text[length] != -1; length++);
for (i; i < length; i++) {
sum += lexeme_text[i] * pow(10, length - i - 1);
}
return sum;
}
//是否单分界符元素
int isSingle(char ch) {
int i;
for (i = 0; i < 10; i++) {
if (ch == singword[i]) {
return 1;
}
}
return 0;
}
//是否双分界符开头
int isDoubelStar(char ch) {
int i;
for (i = 0; i < 5; i++) {
if (ch == doubleword[i]) {
return 1;
}
}
return 0;
}
//处理单分界符元素
//设置标识符类型
//结构体,类型,字符值
int handSingle(dual dual_element, int dual_type, char ch) {
dual_element.dual_type = dual_type;
dual_element.lexeme.lexeme_text[0] = ch;
dual_element.lexeme.lexeme_text[0] = '\0';
//cout << "匹配到" << ch << endl;
return 1;
}
//出错消息控制
int errMsg(int err_type, int row, int column, const char msg[]) {
cout << "Error" << err_type << ":" << row << "行 " << column << "列" << " 原因:" << msg << endl;
return 1;
}
int scaner() {
char ch;
int i, j;
int row = 1;
int clumn = 1;
int scan_success_flag = 1;
FILE * file;
file = fopen("a.txt", "r");
if (file == NULL) {
return 0;
}
//通过getc获取字符
ch = getc(file);
while (ch != EOF) {
//换行
while (ch == '\n')
{
row++;
clumn = 1;
ch = getc(file);
}
//空格和tab,定义他们的长度都为1
while (ch == ' ' || ch == '\t')
{
clumn++;
ch = getc(file);
}
//是字母
if (isalpha(ch)) {
DUAL[num].lexeme.lexeme_text[0] = ch;
//review
DUAL[num].x = clumn;
DUAL[num].y = row;
//Token 下标移动到1
j = 1;
ch = getc(file);
clumn++;
//抽取出来,做成检验函数,排除其他可能
while (isalpha(ch))
{
DUAL[num].lexeme.lexeme_text[j++] = ch;
ch = getc(file);
clumn++;
//j > 8说明单词超长,为了防止多次输出错误信息,设置为9
if (j == 9) {
//cout << "单词超长" < 65535 || tempnum < 0) {
errMsg(1013, row, clumn, "数字过大,溢出");
scan_success_flag = 0;
}
else {
DUAL[num].dual_type = 10;//整数类型
pascal[pasnum] = DUAL[num];
pasnum++;
//cout << "匹配到数字" << tempnum << endl;
}
}
num++;
}
//只可能是单分界符开头的,提取首字符位置,类似 indexOf 函数
else if (isSingle(ch)) {
DUAL[num].x = row;
DUAL[num].y = clumn++;
DUAL[num].lexeme.lexeme_text[0] = ch;
DUAL[num].lexeme.lexeme_text[1] = '\0';
//cout << "匹配到" << ch << endl;
switch (ch)
{
case '+':
DUAL[num].dual_type = 101;
break;
case '-':
DUAL[num].dual_type = 102;
break;
case '*':
DUAL[num].dual_type = 103;
break;
//todo 除属于双分界符情况,不在此讨论
case '=':
DUAL[num].dual_type = 106;
break;
case ';':
DUAL[num].dual_type = 113;
break;
case ',':
DUAL[num].dual_type = 117;
break;
case '(':
DUAL[num].dual_type = 118;
break;
case ')':
DUAL[num].dual_type = 119;
break;
default:
cout << "isSingle出错:" << ch << endl;
break;
}
pascal[pasnum] = DUAL[num];
pasnum++;
ch = getc(file);
num++;
}
//双分界开头的
else if (isDoubelStar(ch))
{
int isNote = 0; //默认不是注释
//DUAL[num].lexeme.lexeme_text[0] = ch; 因为注释就不能存放进去
//DUAL[num].lexeme.lexeme_text[1] = '\0';
char next_ch = getc(file);
switch (ch)
{
case '<':
//如果下一个是=,那么就是<=
if (next_ch == '=') {
DUAL[num].dual_type = 111;
DUAL[num].lexeme.lexeme_text[0] = ch;
DUAL[num].lexeme.lexeme_text[1] = next_ch;
DUAL[num].lexeme.lexeme_text[2] = '\0';
ch = getc(file);
//cout << "<=" << endl;
}
else if (next_ch == '>') {
DUAL[num].dual_type = 110;
DUAL[num].lexeme.lexeme_text[0] = ch;
DUAL[num].lexeme.lexeme_text[1] = next_ch;
DUAL[num].lexeme.lexeme_text[2] = '\0';
ch = getc(file);
//cout << "< >" << endl;
}
else { //否则是单分界
DUAL[num].lexeme.lexeme_text[0] = ch;
DUAL[num].lexeme.lexeme_text[1] = '\0';
DUAL[num].dual_type = 107;
//作用相当于 getc(file),为下一次进入一级while循环做准备
ch = next_ch;
//cout << "<" << endl;
}
break;
case '>':
//如果下一个是=,那么就是>=
if (next_ch == '=') {
DUAL[num].dual_type = 112;
DUAL[num].lexeme.lexeme_text[0] = ch;
DUAL[num].lexeme.lexeme_text[1] = next_ch;
DUAL[num].lexeme.lexeme_text[2] = '\0';
ch = getc(file);
//cout << ">=" << endl;
}
else { //否则是单分界
DUAL[num].lexeme.lexeme_text[0] = ch;
DUAL[num].lexeme.lexeme_text[1] = '\0';
DUAL[num].dual_type = 105;
ch = next_ch;
//cout << ">" << endl;
}
break;
case ':':
//如果下一个是=,那么就是:=
if (next_ch == '=') {
DUAL[num].dual_type = 109;
DUAL[num].lexeme.lexeme_text[0] = ch;
DUAL[num].lexeme.lexeme_text[1] = next_ch;
DUAL[num].lexeme.lexeme_text[2] = '\0';
ch = getc(file);
//cout << ":=" << endl;
}
else { //否则出错
DUAL[num].dual_type = 108;
ch = next_ch;
errMsg(1014, row, clumn, "期待的 '=' 没有出现,':'之后缺少 '=' ");
scan_success_flag = 0;
}
break;
case '/':
//单行注释
if (next_ch == '/') {
row++;
clumn = 1;
isNote = 1;
//cout << "// 检测到单行注释" << endl;
ch = getc(file);
while (ch != '\n')
{
ch = getc(file);
}
}
//多行注释
else if (next_ch == '*')
{
isNote = 1;
char ch1 = getc(file);
char ch2 = getc(file);
while (ch1 != '*' || ch2 != '/')
{
//处理坐标
if (ch1 == '\n') {
row++;
clumn = 1;
}
else
{
clumn++;
}
if (ch2 == '\n') {
row++;
clumn = 1;
}
else
{
clumn++;
}
//分析字符
if (ch2 == '*') {
ch1 = ch2;
ch2 = getc(file);
}
//包含了ch1 == ‘*’且ch2 != '/的情况
else
{
ch1 = getc(file);
ch2 = getc(file);
}
//出错控制
if (ch1 == EOF || ch2 == EOF)
{
//没有期待的/出现或者已经到头
//cout << "多行注释出错" << endl;
errMsg(1015, row, clumn, "没有期待的 '*/' 出现,不合法的注释");
break;
}
}
ch = getc(file);
}
//排除其他可能,这只是一个单纯除号
else
{
DUAL[num].dual_type = 104;
DUAL[num].lexeme.lexeme_text[0] = ch;
DUAL[num].lexeme.lexeme_text[1] = '\0';
//作用相当于 getc(file),为下一次进入一级while循环做准备
ch = next_ch;
}
default:
break;
}
if (!isNote) {
pascal[pasnum] = DUAL[num];
pasnum++;
num++;
}
}
//其他字符
else
{
errMsg(1016, row, clumn, "非法字符");
scan_success_flag = 0;
ch = getc(file);
}
}
return scan_success_flag;
}
int main() {
int i;
if (scaner()) {
cout << "====== 分析成功 ======" << endl;
}
cout << endl << "====== 输出扫描合法的词元记录 ======" << endl;
cout << endl << " 单词 类型 助记符" << endl;
for (i = 0; i < pasnum; i++) {
//整数类型
if (pascal[i].dual_type == 10) {
cout << " " << std::left << setw(12) << toint(pascal[i].lexeme.lexeme_num) << setw(8) << pascal[i].dual_type << typesign[findSignIndex(pascal[i].dual_type)]<< endl;
}
else
{
cout << " " << std::left << setw(12) << pascal[i].lexeme.lexeme_text << setw(8) << pascal[i].dual_type << typesign[findSignIndex(pascal[i].dual_type)] << endl;
findSignIndex(pascal[i].dual_type);
}
}
system("pause");
return 0;
}
测试文件:a.txt
BEGIN
464645545454
VAR a b;
C:=2*Pi*R;
IF A >= 3 THEN A:=(2*6+1)*9
ELSE A:=3*6;
A++;
653
// 666注释
/*
* 多行的
*注释~
*/
/*cdgvvvd
s*/
+
-
*
/
>=
<=
<>
:=
(
)
<
>
#
END
运行结果: