1.实验要求
(1)从源程序文件中读取有效字符流并将其分析识别单词符号,转换成二元组内部表示形式输出。
(2)可视化方式展示词法分析识别过程或者词法分析器工作原理(选做)。
(3)实验时间4学时。
(4)实验完成后,要提交实验报告(包括源程序清单)。
2.实验内容
2.1主程序设计考虑:
主程序的说明部分为各种表格和变量安排空间(关键字和特殊符号表)。
id 和ci 数组分别存放标识符和常数;还有一些为造表填表设置的变量。
主程序的工作部分建议设计成便于调试的循环结构。每个循环处理一个单词;调用词法分析过程;输出每个单词的内部码(种别编码,属性值)。建议从文件中读取要分析的符号串。
2.2词法分析过程考虑
该过程根据输入单词的第一个有效字符(有时还需读第二个字符),判断单词种别,产生种别编码。对于标识符和常数,需分别与标识符表和常数表中已登记的元素相比较,如表中已有该元素,则记录其在表中的位置,如未出现过,将标识符按顺序填入数组 id 中,将常数存入数组中 ci 中,并记录其在表中的位置。
注:所有识别出的单词都用二元组表示。第一个表示单词的种别编码。例如:关键字的 t=1;标识符的 t=2;常数 t=3;运算符 t=4;界符 t=5。第二个为该单词在各自表中的指针或内部码值(常数表和标识符表是在编译过程中建立起来的。其 i 值是根据它们在源程序中出现的顺序确定的)。
关键字和特殊符号如下(表中数字只是标记,不代表种别编码,种别编码自己定义):
将词法分析程序设计成独立一遍扫描源程序的结构。参考设计流程图如下:
图1 词法分析程序流程图
// Exper1.cpp : 此文件包含 "main" 函数。程序执行将在此处开始并结束。
#include
#include
#include
#include
using namespace std;
static string KeyWord[15] = {//关键字 t=1
"int","char","float","void","if","else","do","const","while","scanf","printf","return","main","read","void",
};
static string Operator[23] = {//运算符 t=2
"+", "-", "*", "/","%","=","==",">","<","!=",">=","<=","&&","||","!","<>","++","--","/=","*=","+=","-=","%=",
};
static string Boundary[9] = {//界符 t=3
"(", ")" ,"{" ,"}" ,";" ,"," , "\"" ,"[","]",
};
static string Id[1000] = { "" };//标识符表 t=4
static string Const[1000] = { "" };//常数 t=4
static int Id_num = 0;
static int Const_num = 0;
void Pretreatment(string str[], int Len, string& Str) { //预处理
int num = 0;
int flag = 0;
for (int i = 0; i < Len; i++) {
int len = str[i].length();
for (int j = 0; j < len; j++) {
if (flag != 0) {
if (str[i][j] == '/' && str[i][j + 1] == '*') {
flag++;
}//可能在/* */里面还有/* */
if (str[i][j] == '*' && str[i][j + 1] == '/') {
flag--;
j++;
}
}
else {
if (str[i][j] == '/') {
//处理单行注释
if (str[i][j + 1] == '/')
break;
//处理多行注释
else if (str[i][j + 1] == '*') {
flag++;
}
else {//是/运算符
Str += str[i][j];
}
}
else if (str[i][j] == ' ') {//将一连串空格归结为一个空格
if (str[i][j - 1] == ' ') {
continue;
}
else {
Str += str[i][j];
}
}
else if (str[i][j] != '\n' && str[i][j] != '\t' && str[i][j] != '\r' && str[i][j] != '\v') {
Str += str[i][j];
}
}//if_flag
}//for_j
}//for_i
}
bool IsLetter(char c) {//判断是不是字母或下划线
if (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c == '_') {
return true;
}
return false;
}
bool IsNumber(char c) {//判断是否为数字
if (c >= '0' && c <= '9') {
return true;
}
return false;
}
/*复杂写法
int IsKeyword(string s) {//判断是否为关键字
for (int i = 0; i < 15; i++) {
if (s==KeyWord[i]) {
return i+1;
break;
}
}
return 0;
}
int IsId(string s) {//判断标识符中是否已经存在
for (int i = 0; i < Id_num; i++) {
if (s == Id[i]) {
return i + 1;
}
}
return 0;
}
int IsConst(string s) {//判断该常数是否已在常数表中
for (int i = 0; i < Const_num; i++) {
if (s == Const[i]) {
return i + 1;
break;
}
}
return 0;
}
int IsBoundary(string s) {//判断是否为界符
for (int i = 0; i < 9; i++) {
if (s == Boundary[i]) {
return i + 1;
}
}
return 0;
}
int IsOperator(string s) {//判断是否为界符
for (int i = 0; i < 23; i++) {
if (s == Operator[i]) {
return i + 1;
}
}
return 0;
}
*/
int Judge(string s, string str[], int len) {//判断
for (int i = 0; i < len; i++) {
if (s == str[i]) {
return i + 1;
}
}
return -1;
}
void Analysis(string str) {//进行子程序分析
ofstream outfile("Out.txt");
int Len = str.length();
string temp;//用来暂时存放当前单词
int npc = -1;
for (int i = 0; i < Len; i++) {
if (IsLetter(str[i])) { //第一个字符为单词
npc = i;
int f;
for (int j = i + 1; j < Len; j++) {
if (!IsLetter(str[j]) && !IsNumber(str[j])) {
temp = str.substr(npc, j - npc);//相当于回退
f = j;
break;
}
}
int flag = Judge(temp, KeyWord, 15);//查关键字表,flag记录内部编码
if (flag != -1) {//是关键字
outfile << temp << "\t关键字" << "\tt=1\t" << "内部编码" << flag << endl;
cout << temp << "\t关键字" << "\tt=1\t" << "内部编码" << flag << endl;
}
else {//不是关键字
int pc = Judge(temp, Id, Id_num);
if (pc != -1) { //标识符中已经有了
outfile << temp << "\t标识符" << "\tt=4\t" << "内部编码" << pc << endl;
cout << temp << "\t标识符" << "\tt=4\t" << "内部编码" << pc << endl;
}
else { //标识符中还没有
Id[Id_num++] = temp;
outfile << temp << "\t标识符" << "\tt=4\t" << "内部编码" << Id_num << endl;
cout << temp << "\t标识符" << "\tt=4\t" << "内部编码" << Id_num << endl;
}
}
i = f - 1;
}
else if (IsNumber(str[i])) {//第一个字符为数字
npc = i;
string temp;
int f;
for (int j = i + 1; j < Len; j++) {
if (!IsNumber(str[j])) {
temp = str.substr(npc, j - npc);
f = j;
break;
}
}
int flag = Judge(temp, Const, Const_num);
if (flag != -1) {
outfile << temp << "\t常数" << "\tt=5\t" << "内部编码" << flag << endl;
cout << temp << "\t常数" << "\tt=5\t" << "内部编码" << flag << endl;
}
else {
Const[Const_num++] = temp;
outfile << temp << "\t常数" << "\tt=5\t" << "内部编码" << Const_num << endl;
cout << temp << "\t常数" << "\tt=5\t" << "内部编码" << Const_num << endl;
}
i = f - 1;
}
else {//第一个字符不是字母或数字
if (str[i] == ' ') {
continue;
}
else {
npc = i;
int f;
string s;
s += str[i];
if (Judge(s, Boundary, 9) != -1) {//判断是否为界符
outfile << s << "\t界符" << "\tt=3\t" << "内部编码" << Judge(s, Boundary, 9) << endl;
cout << s << "\t界符" << "\tt=3\t" << "内部编码" << Judge(s, Boundary, 9) << endl;
continue;
}
for (int j = i + 1; j < Len; j++) {
string pt;
pt += str[j];
if (Judge(pt, Boundary, 9) != -1) { //第一个不是界符,当前是界符 类似 c++;}
f = j;
temp = str.substr(npc, j - npc);
break;
}
else if (IsLetter(str[j]) || IsNumber(str[j]) || str[j] == ' ') { //类似 +a
temp = str.substr(npc, j - npc);
f = j;
break;
}
}
int flag = Judge(temp, Boundary, 9);
if (flag != -1) { //是界符
outfile << temp << "\t界符" << "\tt=3\t" << "内部编码" << flag << endl;
cout << temp << "\t界符" << "\tt=3\t" << "内部编码" << flag << endl;
}
else {
flag = Judge(temp, Operator, 23);
if (flag != -1) {//是运算符
outfile << temp << "\t运算符" << "\tt=2\t" << "内部编码" << flag << endl;
cout << temp << "\t运算符" << "\tt=2\t" << "内部编码" << flag << endl;
}
else {//既不是界符也不是运算符
outfile << temp << "\t非法符号" << endl;
cout << temp << "\t非法符号" << endl;
}
}
i = f - 1;
}
}
}
cout << endl << endl << "新加入的标识符:";
outfile << endl << endl << "新加入的标识符:";
for (int i = 0; i < Id_num; i++) {
cout << Id[i] << " ";
outfile << Id[i] << " ";
}
cout << endl << endl << "新加入的常数:";
outfile << endl << endl << "新加入的常数:";
for (int i = 0; i < Const_num; i++) {
cout << Const[i] << " ";
outfile << Const[i] << " ";
}
outfile.close();
}
int main()
{
string BefSource[20];
string AftSource;
ifstream infile("Source.txt");
if (!infile) cout << "error" << endl;
int Len = 0;
while (infile.good()) {
getline(infile, BefSource[Len++]);
}
infile.close();
cout << "从文件中读取到的程序:" << endl;
for (int i = 0; i < Len; i++) {
cout << BefSource[i] << endl;
}
cout << endl << endl;
Pretreatment(BefSource, Len, AftSource);
cout << endl << "预处理后的程序:" << endl;
cout << AftSource << endl << endl << endl;
Analysis(AftSource);
}
这里的Out.txt文件可以为空文件,因为它是输出结果的文件。Source.txt需要存入进行分析的源代码。