编制一个读单词过程,从输入的源程序中,识别出各个具有独立意义的单词,即基本保留字、标识符、常数、运算符、分隔符五大类。并依次输出各个单词的内部编码及单词符号自身值。
如源程序为C语言。输入如下一段:
main()
{
int a=-5,b=4,j;
if(a>=b)
j=a-b;
else
j=b-a;
}
要求输出如下
(2,”main”) (5,”(”) (5,”)”)
(5,”{”) (1,”int”) (2,”a”)
(4,”=”) (3,”-5”) (5,”,”)
(2,”b”) (4,”=”) (3,”4”)
(5,”,”) (2,”j”) (5,”;”)
(1,”if”) (5,”(”) (2,”a”)
(4,”>=”) (2,”b”) (5,”)”)
(2,”j”) (4,”=”) (2,”a”)
(4,”-”) (2,”b”) (5,”;”)
(1,”else”) (2,”j”) (4,”=”)
(2,”b”) (4,”-”) (2,”a”)
(5,”;”) (5,”}”)
程序语言的单词符号一般分为五种:
识别单词:掌握单词的构成规则很重要
+
( 字母/ 数字/ 下划线)大多数程序设计语言的单词符号都可以用转换图来识别
如图
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-Hsvoq2oJ-1636038897507)(C:\Users\Lunatic\Desktop\编译原理实验\实验一\1.png)]
词法分析器输出的单词符号常常表示为二元式
即单词种别、单词符号的属性值
词法分析时,常常会用到超前搜索方法。
如当前待分析字符串为“a>+” ,当前字符为“>” ,此时,分析器倒底是将其分析为大于关系运算符还是大于等于关系运算符呢?
显然,只有知道下一个字符是什么才能下结论。于是分析器读入下一个字符’+’ ,这时可知应将’>’ 解释为大于运算符。但此时,超前读了一个字符’+’ ,所以要回退一个字符,词法分析器才能正常运行。
预处理工作包括对空白符、跳格符、回车符和换行符等编辑性字符的处理,及删除注解等。由一个预处理子程序来完成。
设计方法:
把扫描器作为语法分析的一个过程,当语法分析需要一个单词时,就调用扫描器。 扫描器从初态出发,当识别一个单词后便进入终态,送出二元式。
增加内容总结
增加了更多关键字,关键字列表如下
char *key[34] = {"if", "else", "for", "while", "do", "return", "break", "continue", "auto", "double",
"union", "const", "float", "short", "unsigned", "void", "default", "sizeof",
"main", "static", "switch", "continue","float"
};
增加对小数的处理
main.cpp
,读取输入内容并逐个字符读取已进行分析
#include "immintrin.h"
#include
#include
#include
#include "iostream"
#include "for_digit.h"
#include "for_vocabulary.h"
#include "special.h"
int main() {
fp = fopen(R"(C:\Users\Lunatic\Desktop\test.c)", "r");
buffer = fgetc(fp); /*fgetc( )函数:从磁盘文件读取一个字符*/
while (buffer != EOF) {
if (buffer == ' ' || buffer == '\n' || buffer == '\t') {
buffer = fgetc(fp);
}
else if (isalpha(buffer)) {
buffer = vocabularyProcess(buffer);
}
else if (isdigit(buffer)) {
buffer = digitProcess(buffer);
}
else {
buffer = special_char(buffer);
}
}
}
key.h
定义保留字
//
// Created by Lunatic on 2021/5/12.
//
#ifndef T1_KEY_H
#define T1_KEY_H
FILE *fp;
char buffer;
char *key[34] = {"if", "else", "for", "while", "do", "return", "break", "continue", "auto", "double",
"int", "struct", "long", "switch", "case", "File", "register", "typedef", "char", "extern",
"union", "const", "float", "short", "unsigned", "void", "default", "sizeof",
"main", "static", "switch", "continue","float"
};
[[maybe_unused]] int atype, id = 4;
#endif //T1_KEY_H
for_vocabulary.h
处理单词
//
// Created by Lunatic on 2021/5/12.
//
#ifndef T1_FOR_VOCABULARY_H
#define T1_FOR_VOCABULARY_H
#include "check.h"
char vocabularyProcess(char buffer) {
int position; /*保留字数组中的位置*/
int i = -1;
char words[20];
while ((isalpha(buffer)) || (isdigit(buffer)) || buffer == '_') {
/*读一个完整的单词放入alphatp数组中*/
words[++i] = buffer;
buffer = fgetc(fp);
}
words[i + 1] = '\0';
position = checkType(words, 1); // check the type of the words
if (position != 0) {
printf("%s, (1.Key Words,%d)\n", words, position - 1);
id = 1;
} else {
printf("(%s ,2.Identifier)\n", words);
id = 2;
}
return (buffer);
}
#endif //T1_FOR_VOCABULARY_H
check.h
确定单词是保留字还是标识符
//
// Created by Lunatic on 2021/5/12.
//
#ifndef T1_CHECK_H
#define T1_CHECK_H
/*1:关键字 2:标识符 3:常数 4:运算符 5:界符*/
int checkType(char *text, int type) {/*判断单词是保留字还是标识符*/
int p;
if (type == 1) {
for (int i = 0; i <= 32; ++i) {
if (strcmp(key[i], text) == 0) {
p = i + 1; /*是保留字则p为非0且不重复的整数*/
break;
} else {
p = 0;
} /*不是保留字则用于返回的p=0*/
}
return (p);
}
}
#endif //T1_CHECK_H
for_digit.h
读取数字
//
// Created by Lunatic on 2021/5/12.
//
#ifndef T1_FOR_DIGIT_H
#define T1_FOR_DIGIT_H
#include "cstdio"
#include
#include
#include "key.h"
char digitProcess(char buffer) {
int i = -1;
char digittp[20];
while ((isdigit(buffer)) || buffer == '.') { // int , float
digittp[++i] = buffer;
buffer = fgetc(fp);
}
digittp[i + 1] = '\0';
printf("(%s ,3.Number)\n", digittp);
id = 3;
return (buffer);
}
#endif //T1_FOR_DIGIT_H
special.h
其他字符处理,包括界符、操作符、小数
//
// Created by Lunatic on 2021/5/16.
//
#ifndef T1_SPECIAL_H
#define T1_SPECIAL_H
#include "special_c/get_next.h"
char special_char(char c) {
char ch[20];
ch[0] = c;
if (ch[0] == ',' || ch[0] == ';' || ch[0] == '{' || ch[0] == '}' || ch[0] == '(' || ch[0] == ')' || ch[0] == '[' ||
ch[0] == ']') {
// is broader
printf("(%c ,5.Boundary operator)\n", ch[0]);
return get_next_char(fp);
} else if (ch[0] == '"') {
// format define
c = fgetc(fp);
bool a;
int b;
a =b;
if (c == '%') {
ch[1] = fgetc(fp);
ch[0] = '%';
ch[2] = '\0';
printf("(%s,4.Operator)\n", ch);
return get_next_char(fp);
}
} else if (ch[0] == '*') {
// operator /times
c = fgetc(fp);
ch[1] = c;
if (ch[1] == '=') {
ch[2] = '\0';
printf("(%s,4.Operators )\n", ch);
return get_next_char(fp);
}
} else if (ch[0] == '/') {
c = fgetc(fp);
ch[1] = c;
if (ch[1] == '=') {
// operator dividing
ch[2] = '\0';
printf("(%s,4.Operators )\n", ch);
return get_next_char(fp);
} else if (ch[1] == '*' || ch[1] == '/') {
// two kinds of exegesis
int i = 1;
if (ch[1] == '*') {
// multi lines exegesis
while (ch[i] != '/') {
c = fgetc(fp);
ch[++i] = c;
}
ch[i + 1] = '\0';
printf("(%s ,5.Boundary operator)\n", ch);
return get_next_char(fp);
} else {
// single line exegesis
while (ch[i] != '\n') {
c = fgetc(fp);
ch[++i] = c;
}
ch[i] = '\0';
printf("(%s ,5.Boundary operator)\n", ch);
return get_next_char(fp);
}
}
} else if (ch[0] == '=' || ch[0] == '!' || ch[0] == '<' || ch[0] == '>') {
c = fgetc(fp);
if (c == '=') {
// equal to
ch[1] = c;
ch[2] = '\0';
printf("(%s ,4.Operators )\n", ch);
} else {
printf("(%s ,4.Operators )\n", ch);
id = 4;
return (c);
}
return get_next_char(fp);
} else if (ch[0] == '+' || ch[0] == '-') {
if (id == 4) {
// plus or minus
c = fgetc(fp);
int i = 1;
ch[i] = c;
if (isdigit(ch[i])) {
// get the total integer
while (isdigit(ch[i]) || ch[i] == '.') {
c = fgetc(fp);
ch[++i] = c;
}
ch[i] = '\0';
printf("(%s ,3.Number)\n", ch);
id = 3;
//c=fgetc(fp);
id = 4;
return (c);
}
return get_next_char(fp);
} else if (ch[0] == '+') { // ++,--
c = fgetc(fp);
ch[1] = c;
if (ch[1] == '=') {
ch[2] = '\0';
printf("(%s,4.Operators )\n", ch);
return get_next_char(fp);
}
ch[1] = '\0';
printf("(%s ,4.Operators )\n", ch);
return get_next_char(fp);
}
if (ch[0] == '-') {
c = fgetc(fp);
ch[1] = c;
if (ch[1] == '=') {
ch[2] = '\0';
printf("(%s,4. Operators )\n", ch);
return get_next_char(fp);
}
ch[1] = '\0';
printf("(%s ,4.Operators )\n", ch);
id = 4;
return (c);
}
} else if (ch[0] == '&' || ch[0] == '^' || ch[0] == '|') {
// characters for logic computing
c = fgetc(fp);
if (c == '&') {
ch[1] = c;
ch[2] = '\0';
printf("(%s ,4.Operators )\n", ch);
return get_next_char(fp);
} else if (c == '|') {
ch[1] = c;
ch[2] = '\0';
printf("(%s ,4.Operators )\n", ch);
return get_next_char(fp);
} else {
printf("(%s ,4.Operators )\n", ch);
id = 4;
return (c);
}
}
// #ifndef,
}
#endif //T1_SPECIAL_H
scanf
和 printf
语句中各类单词的分析测试程序 test.c
int t;
int main() {
// keywords
a+-
File *fp;
register int a=-5.455.5 =4,j;
char b;
if(a!=b)
j=a-b;
else j=b-a;
for (int i = 0; i < MAX_SIZE; i++) {
continue;
break;
}
/*
* numbers
*/
int c = 1.1;
// operators
d = b && c || f;
// test
if (a > b) {
do {
b = b+1;
} while (a = b);
float e = b;
printf("%f",e);
}
return 0;
}
void test(ch) {
break;
}
typedef struct node{
static int data[20];
const short top;
} SqStack, node;
识别结果过长,结果仅粘贴文本内容及部分截图
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-2lsJ2cdR-1636038897515)(C:\Users\Lunatic\Desktop\编译原理实验\实验一\1.jpg)]
int, (1.Key Words,10)
(t ,2.Identifier)
(; ,5.Boundary operator)
int, (1.Key Words,10)
main, (1.Key Words,28)
(( ,5.Boundary operator)
() ,5.Boundary operator)
({ ,5.Boundary operator)
(// keywords ,5.Boundary operator)
(a ,2.Identifier)
(+ ,4.Operators )
File, (1.Key Words,15)
(fp ,2.Identifier)
(; ,5.Boundary operator)
register, (1.Key Words,16)
int, (1.Key Words,10)
(a ,2.Identifier)
(= ,4.Operators )
(-5.455.5 ,3.Number)
(, ,5.Boundary operator)
(=5.455.5 ,4.Operators )
(4 ,3.Number)
(, ,5.Boundary operator)
(j ,2.Identifier)
(; ,5.Boundary operator)
char, (1.Key Words,18)
(b ,2.Identifier)
(; ,5.Boundary operator)
if, (1.Key Words,0)
(( ,5.Boundary operator)
(a ,2.Identifier)
(!= ,4.Operators )
(b ,2.Identifier)
() ,5.Boundary operator)
(j ,2.Identifier)
(= ,4.Operators )
(a ,2.Identifier)
(- ,4.Operators )
(b ,2.Identifier)
(; ,5.Boundary operator)
else, (1.Key Words,1)
(j ,2.Identifier)
(= ,4.Operators )
(b ,2.Identifier)
(- ,4.Operators )
(a ,2.Identifier)
(; ,5.Boundary operator)
for, (1.Key Words,2)
(( ,5.Boundary operator)
int, (1.Key Words,10)
(i ,2.Identifier)
(= ,4.Operators )
(0 ,3.Number)
(; ,5.Boundary operator)
(i ,2.Identifier)
(< ,4.Operators )
(MAX_SIZE ,2.Identifier)
(; ,5.Boundary operator)
(i ,2.Identifier)
(+ ,4.Operators )
() ,5.Boundary operator)
({ ,5.Boundary operator)
continue, (1.Key Words,7)
(; ,5.Boundary operator)
break, (1.Key Words,6)
(; ,5.Boundary operator)
(} ,5.Boundary operator)
(/*
* numbers
*/ ,5.Boundary operator)
int, (1.Key Words,10)
(c ,2.Identifier)
(= ,4.Operators )
(1.1 ,3.Number)
(; ,5.Boundary operator)
(// operators ,5.Boundary operator)
(d ,2.Identifier)
(= ,4.Operators )
(b ,2.Identifier)
(&& ,4.Operators )
(c ,2.Identifier)
(|| ,4.Operators )
(f ,2.Identifier)
(; ,5.Boundary operator)
(// test ,5.Boundary operator)
if, (1.Key Words,0)
(( ,5.Boundary operator)
(a ,2.Identifier)
(> ,4.Operators )
(b ,2.Identifier)
() ,5.Boundary operator)
({ ,5.Boundary operator)
do, (1.Key Words,4)
({ ,5.Boundary operator)
(b ,2.Identifier)
(= ,4.Operators )
(b ,2.Identifier)
(+ ,4.Operators )
(; ,5.Boundary operator)
(} ,5.Boundary operator)
while, (1.Key Words,3)
(( ,5.Boundary operator)
(a ,2.Identifier)
(= ,4.Operators )
(b ,2.Identifier)
() ,5.Boundary operator)
(; ,5.Boundary operator)
float, (1.Key Words,22)
(e ,2.Identifier)
(= ,4.Operators )
(b ,2.Identifier)
(; ,5.Boundary operator)
(printf ,2.Identifier)
(( ,5.Boundary operator)
(%f,4.Operator)
(, ,5.Boundary operator)
(e ,2.Identifier)
() ,5.Boundary operator)
(; ,5.Boundary operator)
(} ,5.Boundary operator)
return, (1.Key Words,5)
(0 ,3.Number)
(; ,5.Boundary operator)
(} ,5.Boundary operator)
void, (1.Key Words,25)
(test ,2.Identifier)
(( ,5.Boundary operator)
(ch ,2.Identifier)
() ,5.Boundary operator)
({ ,5.Boundary operator)
break, (1.Key Words,6)
(; ,5.Boundary operator)
(} ,5.Boundary operator)
typedef, (1.Key Words,17)
struct, (1.Key Words,11)
(node ,2.Identifier)
({ ,5.Boundary operator)
static, (1.Key Words,29)
int, (1.Key Words,10)
(data ,2.Identifier)
([ ,5.Boundary operator)
(20 ,3.Number)
(] ,5.Boundary operator)
(; ,5.Boundary operator)
const, (1.Key Words,21)
short, (1.Key Words,23)
(top ,2.Identifier)
(; ,5.Boundary operator)
(} ,5.Boundary operator)
(SqStack ,2.Identifier)
(, ,5.Boundary operator)
(node ,2.Identifier)
(; ,5.Boundary operator)