对一个简单语言的子集编制一个一遍扫描的词法分析程序。
(1)理解词法分析在编译程序中的作用
(2)加深对有穷自动机模型的理解
(3)掌握词法分析程序的实现方法和技术
(1)待分析的简单语言的词法
关键字
begin if then while do end
运算符和界符
:= + - * / < <= > >= <> = ; ( ) #
其他单词是标识符(ID)和整形常数(NUM),通过以下正规式定义:
ID=letter(letter|digit)*
NUM=digitdigit*
空格由空白、制表符和换行符组成。空格一般用来分隔ID、NUM、运算符、界符和关键字,词法分析阶段通常被忽略。
(2)各种单词符号对应的种别编码
(3)词法分析程序的功能
输入:所给文法的源程序字符串
输出:二元组(syn,token或sum)构成的序列。
syn为单词种别码;
token为存放的单词自身字符串;
sum为整形常数。
例如:对源程序begin x:=9;if x>0 then x:=2*x+1/3;end# 经词法分析后输出如下序列:(1,begin)(10,’x’) (18,:=) (11,9) (26, ; ) (2,if)……
#include
#include
#include
#define _KEY_WORD_END "waiting fou your expanding"/*定义关键字的结束标志*/
using namespace std;
typedef struct
{
int typenum; /*种别码*/
char *word;
}WORD;
char input[255]; /*源程序缓字符冲区*/
char token[255] = ""; /*单词缓冲区*/
int p_input; /*源程序字符指针*/
int p_token; /*单词缓冲区指针*/
char ch;
char *rwtab[] = { "begin","if","then","while","do","end",_KEY_WORD_END };
WORD *scaner(); /*词法扫描函数,获得一个单词*/
void main()
{
int over = 1;
int count = 0;
WORD *oneword = new WORD;
printf("Enter Your words(end with #):");
scanf("%[^#]s", input); /*输入源程序字符串到缓冲区,以#结束*/
p_input = 0;
printf("词法分析结果是:\n\n");
while (over < 1000 && over != -1)
{
oneword = scaner();
if (oneword->word == "OVER")
break;
else if (oneword->typenum < 1000)
printf("(%d,%s) ", oneword->typenum, oneword->word);
over = oneword->typenum;
count++;
if (count % 6 == 0) printf("\n");//每六行输出
}
system("pause");
}
char m_getch() /*从输入源读一个字符到CH中*/
{
ch = input[p_input];
p_input = p_input + 1;
return ch;
}
void getbc() /*去掉空白字符*/
{
while (ch == ' ' || ch == 10)
{
ch = input[p_input];
p_input = p_input + 1;
}
}
void concat() /*拼接单词*/
{
token[p_token] = ch;
p_token = p_token + 1;
token[p_token] = '\0';
}
int letter()/*判断是否是字母*/
{
if (ch >= 'a' && ch <= 'z' || ch >= 'A'&& ch <= 'Z')
return 1;
else return 0;
}
int digit()/*判断是否是数字*/
{
if (ch >= '0'&& ch <= '9')
return 1;
else return 0;
}
int reserve()/*检索关键字表格*/
{
int i = 0;
while (strcmp(rwtab[i], _KEY_WORD_END))
{
if (!strcmp(rwtab[i], token))
{
return i + 1;
}
i = i + 1;
}
return 10;
}
void retract()/*回退一个字符*/
{
p_input = p_input - 1;
}
char *dtb()
{
return NULL;
}
WORD *scaner()/*词法扫描程序*/
{
WORD *myword = new WORD;
myword->typenum = 10;
myword->word = " ";
p_token = 0;
m_getch();
getbc();
if (letter())
{
while (letter() || digit())
{
concat();
m_getch();
}
retract();
myword->typenum = reserve();
myword->word = token;
return myword;
}
else if (digit())
{
while (digit())
{
concat();
m_getch();
}
retract();
myword->typenum = 11;
myword->word = token;
return myword;
}
else switch (ch)
{
case'=': m_getch();
if (ch == '=')
{
myword->typenum = 29;
myword->word = "==";
return myword;
}
retract();
myword->typenum = 25;
myword->word = "=";
return myword;
break;
case'+':myword->typenum = 13;
myword->word = "+";
return myword;
break;
case'-':myword->typenum = 14;
myword->word = "-";
return myword;
break;
case'*':myword->typenum = 15;
myword->word = "*";
return myword;
break;
case'/':myword->typenum = 16;
myword->word = "/";
return myword;
break;
case'(':myword->typenum = 27;
myword->word = "(";
return myword;
break;
case')':myword->typenum = 28;
myword->word = ")";
return myword;
break;
case'[':myword->typenum = 30;
myword->word = "[";
return myword;
break;
case']':myword->typenum = 31;
myword->word = "]";
return myword;
break;
case'{':myword->typenum = 32;
myword->word = "{";
return myword;
break;
case'}':myword->typenum = 33;
myword->word = "}";
return myword;
break;
case',':myword->typenum = 34;
myword->word = ",";
return myword;
break;
case':':
if (input[p_input] == '=')
{
myword->typenum = 18;
myword->word = ":=";
return myword;
}
else
myword->typenum = 17;
myword->word = ":";
return myword;
break;
case';':myword->typenum = 26;
myword->word = ";";
return myword;
break;
case'>':
m_getch();
if (ch == '=')
{
myword->typenum = 24;
myword->word = ">=";
return myword;
}
retract();
myword->typenum = 23;
myword->word = ">";
return myword;
break;
case'<':
m_getch();
if (ch == '=')
{
myword->typenum = 22;
myword->word = "<=";
return myword;
}
retract();
myword->typenum = 20;
myword->word = "<";
return myword;
break;
case'!':
m_getch();
if (ch == '=')
{
myword->typenum = 40;
myword->word = "!=";
return myword;
}
retract();
myword->typenum = -1;
myword->word = "ERROR";
return myword;
break;
case'\0':
myword->typenum = 100;
myword->word = "OVER";
return myword;
break;
default:
myword->typenum = -1;
myword->word = "ERROR";
return myword;
}
}
如果遇到const char转char*的错误可以点击
如果遇到scanf不安全的问题可以点击
运行结果:
题目3: 把 NFA 确定化为 DFA 的算法 实现
设计内容及要求:构造一程序,实现:将给定的NFA M( 其状态转换矩阵及初态、终态信息保存在指定文件中)确定化为 DFA M,输出 DFA M 其状态转换 矩阵及初 态、终态信息保存在指定文件中 。
#include
#include
#define MAXS 100
using namespace std;
string NODE;//结点集合
string CHANGE;//终结符集合
int N;//NFA边数
struct edge
{
string first;
string change;
string last;
};
struct chan
{
string ltab;
string jihe[MAXS];
};
void kong(int a)
{
int i;
for (i = 0; i < a; i++)
cout << ' ';
}
//排序
void paixu(string &a)
{
int i, j;
char b;
for (j = 0; j < a.length(); j++) {
for (i = 0; i < a.length(); i++) {
if (NODE.find(a[i]) > NODE.find(a[i + 1]))
{
b = a[i];
a[i] = a[i + 1];
a[i + 1] = b;
}
}
}
}
void eclouse(char c, string &he, edge b[])
{
int k;
for (k = 0; k < N; k++)
{
if (c == b[k].first[0])
if (b[k].change == "*")
{
if (he.find(b[k].last) > he.length())
he += b[k].last;
eclouse(b[k].last[0], he, b);
}
}
}
void move(chan &he, int m, edge b[])
{
int i, j, k, l;
k = he.ltab.length();
l = he.jihe[m].length();
for (i = 0; i < k; i++)
for (j = 0; j < N; j++)
if ((CHANGE[m] == b[j].change[0]) && (he.ltab[i] == b[j].first[0]))
if (he.jihe[m].find(b[j].last[0]) > he.jihe[m].length())
he.jihe[m] += b[j].last[0];
for (i = 0; i < l; i++)
for (j = 0; j < N; j++)
if ((CHANGE[m] == b[j].change[0]) && (he.jihe[m][i] == b[j].first[0]))
if (he.jihe[m].find(b[j].last[0]) > he.jihe[m].length())
he.jihe[m] += b[j].last[0];
}
//输出
void outputfa(int len, int h, chan *t)
{
int i, j, m;
cout << " I ";
for (i = 0; i < len; i++)
cout << 'I' << CHANGE[i] << " ";
cout << endl << "-------------------------" << endl;
for (i = 0; i < h; i++)
{
cout << ' ' << t[i].ltab;
m = t[i].ltab.length();
for (j = 0; j < len; j++)
{
kong(8 - m);
m = t[i].jihe[j].length();
cout << t[i].jihe[j];
}
cout << endl;
}
}
void main()
{
edge *b = new edge[MAXS];
int i, j, k, m, n, h, x, y, len;
bool flag;
string jh[MAXS], endnode, ednode, sta;
cout << "请输入NFA各边信息(起点 条件[空为*] 终点),以#结束:" << endl;
for (i = 0; i < MAXS; i++)
{
cin >> b[i].first;
if (b[i].first == "#")
break;
cin >> b[i].change >> b[i].last;
}
N = i;
/*for(j=0;j
for (i = 0; i < N; i++)
{
if (NODE.find(b[i].first) > NODE.length())
NODE += b[i].first;
if (NODE.find(b[i].last) > NODE.length())
NODE += b[i].last;
if ((CHANGE.find(b[i].change) > CHANGE.length()) && (b[i].change != "*"))
CHANGE += b[i].change;
}
len = CHANGE.length();
cout << "结点中属于终态的是:" << endl;
cin >> endnode;
for (i = 0; i < endnode.length(); i++)
if (NODE.find(endnode[i]) > NODE.length())
{
cout << "所输终态不在集合中,错误!" << endl;
return;
}
//cout<<"endnode="<
chan *t = new chan[MAXS];
t[0].ltab = b[0].first;
h = 1;
eclouse(b[0].first[0], t[0].ltab, b);//求e-clouse
//cout<
for (i = 0; i < h; i++)
{
for (j = 0; j < t[i].ltab.length(); j++)
for (m = 0; m < len; m++)
eclouse(t[i].ltab[j], t[i].jihe[m], b);//求e-clouse
for (k = 0; k < len; k++)
{
//cout<";
move(t[i], k, b);//求move(I,a)
//cout<
for (j = 0; j < t[i].jihe[k].length(); j++)
eclouse(t[i].jihe[k][j], t[i].jihe[k], b);//求e-clouse
}
for (j = 0; j < len; j++)
{
paixu(t[i].jihe[j]);//对集合排序以便比较
for (k = 0; k < h; k++)
{
flag = operator==(t[k].ltab, t[i].jihe[j]);
if (flag)
break;
}
if (!flag&&t[i].jihe[j].length())
t[h++].ltab = t[i].jihe[j];
}
}
cout << endl << "状态转换矩阵如下:" << endl;
outputfa(len, h, t);//输出状态转换矩阵
//状态重新命名
string *d = new string[h];
NODE.erase();
cout << endl << "重命名:" << endl;
for (i = 0; i < h; i++)
{
sta = t[i].ltab;
t[i].ltab.erase();
t[i].ltab = 'A' + i;
NODE += t[i].ltab;
cout << '{' << sta << "}=" << t[i].ltab << endl;
for (j = 0; j < endnode.length(); j++) {
if (sta.find(endnode[j]) < sta.length())
d[1] = ednode += t[i].ltab;
}
for (k = 0; k < h; k++) {
for (m = 0; m < len; m++) {
if (sta == t[k].jihe[m])
t[k].jihe[m] = t[i].ltab;
}
}
}
for (i = 0; i < NODE.length(); i++) {
if (ednode.find(NODE[i]) > ednode.length())
d[0] += NODE[i];
}
endnode = ednode;
cout << endl << "DFA如下:" << endl;
outputfa(len, h, t);
//输出DFA
cout << "其中终态为:" << endnode << endl;//DFA最小化
m = 2;
sta.erase();
flag = 0;
for (i = 0; i < m; i++)
{
//cout<<"d["<
for (k = 0; k < len; k++)
{
//cout<<"I"<
y = m;
for (j = 0; j < d[i].length(); j++)
{
for (n = 0; n < y; n++)
{
if (d[n].find(t[NODE.find(d[i][j])].jihe[k]) < d[n].length() || t[NODE.find(d[i][j])].jihe[k].length() == 0)
{
if (t[NODE.find(d[i][j])].jihe[k].length() == 0)
x = m;
else
x = n;
if (!sta.length())
{
sta += x + 48;
}
else
if (sta[0] != x + 48)
{
d[m] += d[i][j];
flag = 1;
d[i].erase(j, 1);
//cout<
j--;
}
break;
//跳出n
}
}//n
}//j
if (flag)
{
m++; flag = 0;
}
//cout<<"sta="<
sta.erase();
}//k
}//i
cout << endl << "集合划分:";
for (i = 0; i < m; i++)
cout << "{" << d[i] << "}";
cout << endl;
//状态重新命名
chan *md = new chan[m];
NODE.erase();
cout << endl << "重命名:" << endl;
for (i = 0; i < m; i++)
{
md[i].ltab = 'A' + i;
NODE += md[i].ltab;
cout << "{" << d[i] << "}=" << md[i].ltab << endl;
}
for (i = 0; i < m; i++)
for (k = 0; k < len; k++)
for (j = 0; j < h; j++)
{
if (d[i][0] == t[j].ltab[0])
{
for (n = 0; n < m; n++)
{
if (!t[j].jihe[k].length())
break;
else if (d[n].find(t[j].jihe[k]) < d[n].length())
{
md[i].jihe[k] = md[n].ltab;
break;
}
}
break;
}
}
ednode.erase();
for (i = 0; i < m; i++)
for (j = 0; j < endnode.length(); j++)
if (d[i].find(endnode[j]) < d[i].length() && ednode.find(md[i].ltab))
ednode += md[i].ltab;
endnode = ednode;
cout << endl << "最小化DFA如下:" << endl;
outputfa(len, m, md);
cout << "其中终态为:" << endnode << endl;
system("pause");
}