本程序由C/C++编写,运用了自顶向下语法分析的设计方法和实现原理,实现了:LL(1)分析表的构造、LL(1)分析过程、LL(1)分析器 的构造。
程序以专题 1 词法分析程序的输出为语法分析的输入,实现了 LL(1)分析中控制程序(表驱动程序),完成了以下描述赋值语句的 LL(1)文法的 LL(1)分析过程:
G[S]:S→V=E
E→TE′
E′→ATE′|ε
T→FT′
T′→MFT′|ε
F→ (E)|i
A→+|M→*|/
V→i
主要应用了C++提供的vector、string、set、map等数据结构,并用C语言实现了txt文件的读写。其中,vector用于遍历符号串和建立分析栈;string用于读写文件中的符号串、或进行符号串的存储和处理;set主要用于存储Vt和Vn的集合;map用于保存First集和Follow集的映射关系。
利用数据结构的组合,程序构造了LL(1)分析表、LL(1)分析器并实现了相应的功能,实现了设计要求中(1)构造该文法的 LL(1)分析表、(2)输入串应是词法分析的输出二元式序列的要求。
不包括main函数,本程序共封装了12个函数实现目标功能:
首先,由Read函数读取LL(1)文法的文件,初始化Vt、Vn等,并把产生式拆分成左部和右部的集合;
而后由getFirst函数获取所有非终结符号的First集,其中每个非终结符号的First集计算由OneFirst函数完成;
再利用getFollow函数和获取所有非终结符的Follow集,repeat函数是Follow集计算的辅助函数,用于处理产生式右部非终结符的Follow集合;
parseTable函数用于构造LL(1)的分析表,同时将每一步的分析过程打印到屏幕。LL1analysis函数则是实现了具体的分析和匹配过程。
此外,isVn和isVt分别用于判断符号是不是非终结符和终结符,getVn和getVnPlus获取对应终结符或非终结符的集合下标。
#include
#include
#include
#include
#include
#include
#include
#include
#include
using namespace std;
string p;
string start;
string alike;
string table[100][100];
set<string> VtPlus;
set<string> Vt; // 终结符集合
set<string> Vn; // 非终结符集合
set<string> PP; // 产生式集合
map<string, set<string> > PPspl; // 分解的产生式集合
map<string, set<string> > first; // First集
map<string, set<string> > follow; // Follow集
string endLine = "*********************************************************************************************************";
void Read();
void getFirst();
void getFollow();
void parseTable();
void LL1analysis();
int main() {
cout << "***********************************" << endl;
cout << "LL1语法分析器程序" << endl;
cout << "***********************************" << endl;
Read();
getFirst();
getFollow();
parseTable();
LL1analysis();
cout << endLine << endl;
return 0;
}
void Read() {
string line;
ifstream in("LL1.txt");
if (in) {
getline(in, line);
PP.insert(line);
start = line.substr(0, 1);
follow[line.substr(0, 1)].insert("#"); // 加入起始符的FOLLOW集
while (getline(in, line)) PP.insert(line);
}
int position = 0;
for (set<string>::iterator it = PP.begin(); it != PP.end(); it++) {
string temp = *it;
for (int i = 0; i < temp.length(); i++) {
position = temp.find("->");
string s = temp.substr(0, position);
string ss = temp.substr(position + 2);
set<string>sss;
string t;
for (int j = 0; j < ss.length(); j++) {
if (ss[j] == '|') {
sss.insert(t);
t = "";
}
else t.append(ss.substr(j, 1));
}
sss.insert(t);
PPspl.insert(pair<string, set<string> >(s, sss));
}
}
for (set<string>::iterator it = PP.begin(); it != PP.end(); it++) {
string temp = *it;
for (int i = 0; i < temp.length(); i++) {
if ((temp[i] == '-' && temp[i + 1] == '>') || temp[i] == '|' || temp[i] == '>') continue;
if (temp[i] >= 'A' && temp[i] <= 'Z') {
if (temp[i + 1] == '\'') { // 有'则读入俩作为一个非终结符
Vn.insert(temp.substr(i, 2));
i++;
}
else Vn.insert(temp.substr(i, 1));
}
else Vt.insert(temp.substr(i, 1));
}
}
cout << endl << "非终结符号集合:" << endl;
cout << " ";
for (set<string>::iterator it = Vn.begin(); it != Vn.end(); it++) cout << *it << " ";
cout << endl;
cout << endl << "终结符号集合:" << endl;
cout << " ";
for (set<string>::iterator it = Vt.begin(); it != Vt.end(); it++) cout << *it << " ";
cout << endl;
VtPlus = Vt;
VtPlus.insert("#");
}
bool isVn(string s) {
return Vn.find(s) != Vn.end();
}
bool isVt(string s) {
return Vt.find(s) != Vt.end();
}
int getVn(string s) {
int cntVn = 0;
for (set<string>::iterator it = Vn.begin(); it != Vn.end(); it++, cntVn++) {
if (*it == s) return cntVn;
}
}
int getVtPlus(string s) {
int cntVt = 0;
for (set<string>::iterator it = VtPlus.begin(); it != VtPlus.end(); it++, cntVt++) {
if (*it == s) return cntVt;
}
}
set<string> OneFirst(string s) {
if (PPspl.count(s) > 0) {
set<string>temp = PPspl[s];
for (set<string>::iterator it = temp.begin(); it != temp.end(); it++) {
string stemp = *it;
if (stemp == "^") first[s].insert("^");
else {
int flagAll = 0;
for (int i = 0; i < stemp.length(); i++) {
int flag = 0;
if (stemp[i + 1] == '\'') {
set<string>t1 = OneFirst(stemp.substr(i, 2));
for (set<string>::iterator ii = t1.begin(); ii != t1.end(); ii++) {
if (*ii == "^") flag = 1;
else first[s].insert(*ii);
}
i++;
}
//不带’的非终结符
else if (isVn(stemp.substr(i, 1))) {
set<string>t2 = OneFirst(stemp.substr(i, 1));
for (set<string>::iterator ii = t2.begin(); ii != t2.end(); ii++) {
if (*ii == "^") flag = 1;
else first[s].insert(*ii);
}
}
//终结符
else first[s].insert(stemp.substr(i, 1));
if (flag == 1 && i == stemp.length() - 1) flagAll = 1;
if (flag == 0) break;
}
if (flagAll == 1) first[s].insert("^");
}
}
}
return first[s];
}
void getFirst() {
for (map<string, set<string> >::iterator it = PPspl.begin(); it != PPspl.end(); it++) {
OneFirst(it->first);
}
cout << endl << "对应的first集合" << endl;
for (map<string, set<string> >::iterator it = first.begin(); it != first.end(); it++) {
cout << " " << it->first << ":";
for (set<string>::iterator ii = it->second.begin(); ii != it->second.end(); ii++) {
cout << " " << *ii << " ";
}
cout << endl;
}
}
void repeat() {
for (map<string, set<string> >::iterator it = PPspl.begin(); it != PPspl.end(); it++) {
string left = it->first;
set<string>right = it->second;
for (set<string>::iterator ii = right.begin(); ii != right.end(); ii++) {
string temp = *ii;
for (int j = temp.length() - 1; j > 0; j--) {
string now;
if (temp[j] == '\'') {
now = temp.substr(j - 1, 2);
j--;
}
else now = temp.substr(j, 1);
if (isVt(now)) break; // 产生式的最后是终结符
else { // 产生式的最后是非终结符
set<string>aa = follow[left];
for (set<string>::iterator pp = aa.begin(); pp != aa.end(); pp++) {
follow[now].insert(*pp);
}
}
if (first[now].find("^") == first[now].end()) break;
}
}
}
}
void getFollow() {
for (map<string, set<string> >::iterator it = PPspl.begin(); it != PPspl.end(); it++) {
string left = it->first;
set<string>right = it->second;
for (set<string>::iterator ii = right.begin(); ii != right.end(); ii++) {
string temp = *ii;
for (int i = 0; i < temp.length(); i++) {
if (isVt(temp.substr(i, 1))) continue;
else if (i + 1 < temp.length() && temp[i + 1] == '\'') {
if (isVt(temp.substr(i + 2, 1))) {
follow[temp.substr(i, 2)].insert(temp.substr(i + 2, 1));
i++;
}
else {//非终结符+非终结符,把非终结符的first集加入follow集
string N;
if (i + 3 < temp.length() && temp[i + 3] == '\'') N = temp.substr(i + 2, 2);
else N = temp.substr(i + 2, 1);
set<string> ff = first[N];
for (set<string>::iterator nn = ff.begin(); nn != ff.end(); nn++) {
if (*nn != "^")
follow[temp.substr(i, 2)].insert(*nn);
}
}
}
else {
if (i + 1 < temp.length() && isVt(temp.substr(i + 1, 1))) {//非终结符+终结符
follow[temp.substr(i, 1)].insert(temp.substr(i + 1, 1));
i++;
}
else {//非终结符+非终结符N,把非终结符N的first集加入follow集中
string N;
if (i + 2 < temp.length() && temp[i + 2] == '\'') N = temp.substr(i + 1, 2);
else N = temp.substr(i + 1, 1);
set<string> ff = first[N];
for (set<string>::iterator nn = ff.begin(); nn != ff.end(); nn++) {
if (*nn != "^")
follow[temp.substr(i, 1)].insert(*nn);
}
}
}
}
}
}
int i = 5;
while (i--) repeat();
cout << endl << "对应的follow集合" << endl;
for (map<string, set<string> >::iterator it = follow.begin(); it != follow.end(); it++) {
cout << " " << it->first << ": ";
for (set<string>::iterator ii = it->second.begin(); ii != it->second.end(); ii++) {
cout << *ii << " ";
}
cout << endl;
}
}
void parseTable() {
int cntNon = 0;
for (map<string, set<string> >::iterator it = PPspl.begin(); it != PPspl.end(); it++, cntNon++) {
for (set<string>::iterator tmp = it->second.begin(); tmp != it->second.end(); tmp++) {
if (isVt((*tmp).substr(0, 1))) {
if (*tmp != "^") {
table[cntNon][getVtPlus((*tmp).substr(0, 1))] = *tmp;
}
if (*tmp == "^") {
// 终结符是空符号,遍历左部Follow集,,对应位置写入空
for (map<string, set<string> >::iterator in = follow.begin(); in != follow.end(); in++) {
if (in->first == it->first) {
for (set<string>::iterator ii = in->second.begin(); ii != in->second.end(); ii++) {
if (isVt(*ii) || (*ii) == "#") table[cntNon][getVtPlus(*ii)] = "^";
}
}
}
}
}
else { //如产生式右部的第一个是非终结符,遍历它的First集更新
for (map<string, set<string> >::iterator in = first.begin(); in != first.end(); in++) {
if (in->first == (*tmp).substr(0, 1)) {
for (set<string>::iterator ii = in->second.begin(); ii != in->second.end(); ii++) {
if (*ii == "^") {//如有空符号,遍历左部的Follow集,对应位置写入空
for (map<string, set<string> >::iterator in = follow.begin(); in != follow.end(); in++) {
if (in->first == it->first) {
for (set<string>::iterator ii = in->second.begin(); ii != in->second.end(); ii++) {
if (isVt(*ii) || (*ii) == "#") { table[cntNon][getVtPlus(*ii)] = "^"; }
}
}
}
}
else table[cntNon][getVtPlus(*ii)] = *tmp;
}
}
}
}
}
}
//打印LL(1)分析表
cout << "---------------------------------LL(1)分析表------------------------------" << endl;
cout << "-----------------------------------------------------------------------------------------" << endl;
cout << "\t|";
for (set<string>::iterator in = VtPlus.begin(); in != VtPlus.end(); in++)
cout << *in << "\t|";
cout << endl;
cout << "-----------------------------------------------------------------------------------------" << endl;
set<string>::iterator it = Vn.begin();
for (int i = 0; i < Vn.size(); i++, it++) {
cout << *it << "\t|";
for (int j = 0; j < VtPlus.size(); j++) {
cout << table[i][j] << "\t|";
}
cout << endl;
cout << "-----------------------------------------------------------------------------------------";
cout << endl;
}
// cout << "******************************************************************************************" << endl;
}
bool nextUnit(FILE** fp) {
char buf[32];
memset(buf, 0, sizeof(buf));//初始化单词置为0
if (!feof(*fp)) fgets(buf, 32, *fp);
else return false;
string word = buf;
if (!word.compare("")) return false;
string typevalue;
typevalue = word.substr(word.find('(') + 1, word.find(',') - word.find('(') - 1);
string value;
value = word.substr(word.find(',') + 2, word.find_last_of(')') - word.find(',') - 3);
cout << value;
p = value;
if (typevalue == "1") p = "i";
alike += p;
return true;
}
void LL1analysis() {
vector<string> myStack;
vector<string> left; // 剩余输入串
vector<string> temp;
FILE* fp;
fp = fopen("test1.txt", "r+");
left.push_back("#");
cout << endl;
cout << "表达式:";
while (nextUnit(&fp)) {
temp.push_back(p);
}
cout << endl;
cout << "可以抽象为:" << alike << endl;
cout << endl;
//倒序push进left
for (vector<string>::reverse_iterator it = temp.rbegin(); it != temp.rend(); it++)
left.push_back(*it);
cout << "---------------------------------------------------------------------------------------------" << endl;
cout << "|" << setw(20) << "分析栈" << "|" << setw(20) << "剩余输入串" << "|" << setw(25) << "对应产生式 |" << endl;
cout << "---------------------------------------------------------------------------------------------" << endl;
myStack.push_back("#");
myStack.push_back(start);
while (left.size() > 0) {
string outputs = "";
for (int i = 0; i < myStack.size(); i++) outputs += myStack[i];
cout << "|" << setw(20) << outputs;
outputs = "";
for (int i = left.size() - 1; i >= 0; i--) outputs += left[i];
cout << "|" << setw(20) << outputs;
vector<string>::iterator it = myStack.end() - 1;
vector<string>::iterator is = left.end() - 1;
string f1 = *it;
string f2 = *is;
if (f1 == f2 && f1 == "#") {//可匹配且都为#
cout << "|" << setw(25) << "全部匹配成功!" << endl;
return;
}
if (f1 == f2) {
myStack.pop_back();
left.pop_back();
cout << "|" << setw(15) << "'" << f1 << "'" << "匹配成功" << endl;
}
else if (table[getVn(f1.substr(0, 1))][getVtPlus(f2)] != "") {
myStack.pop_back();
for (map<string, set<string> >::iterator it = PPspl.begin(); it != PPspl.end(); it++) {
if (it->first == f1.substr(0, 1) || it->first == f1.substr(0, 2)) {
if (table[getVn(f1.substr(0, 1))][getVtPlus(f2)] != "^") {
if (table[getVn(f1.substr(0, 1))][getVtPlus(f2)].size() > 1) {
string t = table[getVn(f1.substr(0, 1))][getVtPlus(f2)];
for (int i = t.length(); i > 0; i--) {
string scr = t.substr(i - 1, i);
if (scr.length() > 1) scr = scr.substr(0, 1);
if (scr == "'") {
scr = t.substr(i - 2, i);
i--;
}
myStack.push_back(scr);
}
}
else { myStack.push_back(table[getVn(f1.substr(0, 1))][getVtPlus(f2)]); }
cout << "|" << setw(20) << it->first << "->" << table[getVn(f1.substr(0, 1))][getVtPlus(f2)] << endl;
}
}
}
}//对于带"’"的非终结符号的处理
else if (f1.substr(1, 2) == "'" && table[getVn(f1.substr(0, 2))][getVtPlus(f2)] != "") {
myStack.pop_back();
for (map<string, set<string> >::iterator it = PPspl.begin(); it != PPspl.end(); it++) {
if (it->first == f1.substr(0, 2)) {
if (table[getVn(f1.substr(0, 2))][getVtPlus(f2)] != "^") {
if (table[getVn(f1.substr(0, 2))][getVtPlus(f2)].size() > 1) {
string t = table[getVn(f1.substr(0, 2))][getVtPlus(f2)];
for (int i = t.length(); i > 0; i--) {
string scr = "";
scr = t.substr(i - 1, i);
if (scr.length() > 1) scr = scr.substr(0, 1);
if (scr == "'") {
scr = t.substr(i - 2, i);
i--;
}
myStack.push_back(scr);
}
}
else { myStack.push_back(table[getVn(f1.substr(0, 2))][getVtPlus(f2)]); }
cout << "|" << setw(20) << it->first << "->" << table[getVn(f1.substr(0, 2))][getVtPlus(f2)] << endl;
}
else cout << "|" << setw(20) << it->first << "->" << table[getVn(f1.substr(0, 2))][getVtPlus(f2)] << endl;
}
}
}
else {
cout << "|" << setw(20) << "匹配失败" << endl;
return;
}
}
}