上周帮一个刚刚找到自己方向的好友做了一个编译原理的课程实验,要求是做一个词法分析器,具体要求如下:
对下述文法和单词表定义的语言设计编制一个语法分析器。
(1)单词符号及种别表
单词符号 |
种别编码 |
单词值 |
main |
1 |
|
int |
2 |
|
float |
3 |
|
double |
4 |
|
char |
5 |
|
if |
6 |
|
else |
7 |
|
do |
8 |
|
while |
9 |
|
l(l|d)* |
10 |
内部字符串 |
( +|-|ε ) dd*(.dd* | ε)( e ( +|-|ε ) dd*|ε) |
20 |
二进制数值表示 |
= |
21 |
|
+ |
22 |
|
- |
23 |
|
* |
24 |
|
/ |
25 |
|
( |
26 |
|
) |
27 |
|
{ |
28 |
|
} |
29 |
|
, |
30 |
|
; |
31 |
|
> |
32 |
|
>= |
33 |
|
< |
34 |
|
<= |
35 |
|
== |
36 |
|
!= |
37 |
(2)语法结构定义
<表达式> ::= <项>{ +<项>|-<项>}
<项> ::= <因子>{*<因子>|/<因子>}
<因子> ::=ID|num|(<表达式>)
num::= ( +|-|ε ) 数字数字*(.数字数字* | ε)( e ( +|-|ε ) 数字数字*|ε)
ID::=字母(字母|数字)*
字母::=a|b|c…|z|A|B|C…|Z
数字::=0|1|2…|9
本来不是个很难的程序,但我还是写的较为复杂,有这方面心得的朋友可以和我联系,一起探讨一下吧~~~下面我把自己简陋的代码贴在下面:
- #include
- #include
- #include
- #define INVALUECHAR '.' //非法字符,用于给token赋初值
- #define MAXCHARNUM 80 //接受字符串输入的长度
- #define TOKENLENGTH 15 //词组的最大长度
- #define RETABLENGTH 9 //关键字的个数
- #define TRUE 0
- #define FALSE -1
- int syn, sum = 0;
- char token[TOKENLENGTH] = { '\0' }; //记录找到的词组
- char *rwtab[RETABLENGTH] = { "main", "int", "float", "double", "char", "if", "else", "do", "while" }; //记录关键字
- int IsNumber(char num); //判断num是否为数字
- int GetSum(int i); //求token前i个数的加权和
- int HoldE(int *local, char *string, int num, int *i); //读取浮点数时进行遇到符号e的操作
- int NumberOP(int *local, char *string, int num, int *i, char c); //读取浮点数时进行遇到非数字字符的操作
- int Analyze(char *string, int num); //搜寻最长字串的函数
- int main()
- {
- int p = 0, i, j;
- char string[MAXCHARNUM] = { '\0' };
- printf("please input a string:\n");
- //读取输入的字符串,超过长度限定就让用户重新输入
- do {
- scanf("%c", &string[p++]);
- if (MAXCHARNUM <= p) {
- printf("\nYou have input more than 80 characters!\n");
- printf("Please input again:\n");
- string[0] = '\0';
- p = 0;
- }
- }while ('\n' != string[p-1]); //字符串以'\n'结束
- //遍历输入的字符串,忽略回车和空格
- for (i = 0; i < p; i++) {
- if ((' ' == string[i]) || ('\n' == string[i]))
- continue;
- else {
- j = Analyze(string, i); //当出现错误时Analyze返回FALSE,正确时返回找到字串的最后一个字符的位置
- if (FALSE == j) {
- printf("\nYour input is wrong!\n");
- return 1;
- }
- i = j; //将i赋值为查找到字串的最后一个字符的位置
- switch (syn) {
- //-1为出现非法字符,0为正常结束,11为整型数的sum输出
- case -1: { printf("\nYou have input illegal characters!\nSo it ended!\n");
- break; }
- case 0: { break; }
- case 20: { if (0 != sum) {
- printf("( %-5d%15d )\n", syn, sum);
- sum = 0;
- }
- else
- printf("( %-5d%15s )\n", syn, token);
- break; }
- default: { printf("( %-5d%15s )\n", syn, token);
- break; }
- }
- }
- }
- return 0;
- }
- //判断num是否为数字
- int IsNumber(char num)
- {
- if (('0' <= num) && ('9' >= num))
- return TRUE;
- else
- return FALSE;
- }
- //求token前i个数字的加权和
- int GetSum(int i)
- {
- int sum, j;
- char a[TOKENLENGTH];
- for (j = 0; j < i; j++)
- a[j] = token[j];
- a[j] = '\0';
- sum = atoi(a);
- return sum;
- }
- //读取浮点数时遇到e的处理
- int HoldE(int *local, char *string, int num, int *i)
- {
- char ch = 'e';
- token[(*i)++] = ch;
- ch = string[++num];
- //e的后一位为数字
- if (TRUE == IsNumber(ch)) {
- token[(*i)++] = ch;
- ch = string[++num];
- while (TRUE == IsNumber(ch)) {
- token[(*i)++] = ch;
- ch = string[++num];
- }
- *local = num - 1; //改变string中下标需要指向的位置
- return TRUE;
- }
- //e的后一位为'+'或'-'
- else if (('+' == ch) || ('-' == ch)) {
- token[(*i)++] = ch;
- ch = string[++num];
- if (TRUE != IsNumber(ch))
- return FALSE;
- while (TRUE == IsNumber(ch)) {
- token[(*i)++] = ch;
- ch = string[++num];
- }
- *local = num - 1;
- return TRUE;
- }
- //其他情况就出现错误
- else
- return FALSE;
- }
- //读取浮点数时遇到第一个非数字的字符的处理
- int NumberOP(int *local, char *string, int num, int *i, char c)
- {
- int result = TRUE;
- char ch = c;
- //这个非数字字符为小数点
- if ('.' == ch) {
- token[(*i)++] = ch;
- ch = string[++num];
- //小数点后一位如果不是数字就出现错误
- if (TRUE != IsNumber(ch))
- return FALSE;
- while (TRUE == IsNumber(ch)) {
- token[(*i)++] = ch;
- ch = string[++num];
- }
- //小数点之后一串数字过后遇到e
- if ('e' == ch) {
- result = HoldE(local, string, num, i);
- if (FALSE == result)
- return FALSE;
- }
- else
- *local = num - 1;
- }
- //这个字符为e
- else if ('e' == ch) {
- result = HoldE(local, string, num, i);
- if (FALSE == result)
- return FALSE;
- }
- //其他情况就不用再读取了,直接修改local并返回
- else
- *local = num - 1;
- syn = 20;
- return TRUE;
- }
- //查询最长字串
- int Analyze(char *string, int num)
- {
- int m, i = 0, local = num, n, flag = 0;
- int result = TRUE;
- char ch;
- for (m = 0; m < TOKENLENGTH; m++)
- token[m++] = INVALUECHAR;
- ch = string[num];
- //第一种情况为字符
- if (((ch <= 'z') && (ch >= 'a')) || ((ch <= 'Z') && (ch >= 'A'))) {
- while(((ch <= 'z') && (ch >= 'a')) || (( ch <= 'Z') && (ch >= 'A')) || (TRUE == IsNumber(ch))) {
- token[i++] = ch;
- //token长度限定
- if (i >= (TOKENLENGTH - 1))
- return FALSE;
- else
- ch = string[++num];
- }
- local = num - 1;
- //syn预设为l(l | d)*的形式
- syn = 10;
- token[i] = '\0';
- //遍历关键字数组,如果找到就修改syn并
- for (n = 0; n < RETABLENGTH; n++)
- if (0 == strcmp(token, rwtab[n])) {
- syn = n + 1;
- break;
- }
- }
- //第二种情况为遇到'+'、'-'或数字
- else if (('+' == ch) || ('-' == ch) || (TRUE == IsNumber(ch))) {
- token[i++] = ch;
- //如果为数字就用flag标记一下
- if (TRUE == IsNumber(ch))
- flag = 1;
- ch = string[++num];
- //下一位也为数字
- if (TRUE == IsNumber(ch)) {
- token[i++] = ch;
- ch = string[++num];
- while (TRUE == IsNumber(ch)) {
- token[i++] = ch;
- ch = string[++num];
- }
- //遇到第一个
- result = NumberOP(&local, string, num, &i, ch);
- if (FALSE == result)
- return FALSE;
- }
- //下一位不为数字要分情况,当第一位为数字时执行下列操作,
- //第一位为'+'、'-'时,下一位必须为数字
- else if (1 == flag) {
- result = NumberOP(&local, string, num, &i, ch);
- if (FALSE == result)
- return FALSE;
- }
- //当第一位为'+'、'-'时,下一位不为数字证明'+'、'-'已经是最长的字串了
- else {
- if ('+' == token[i-1])
- syn = 22;
- else
- syn = 23;
- }
- }
- else {
- switch (ch) {
- case '=': { token[i++] = ch;
- ch = string[++num];
- if ('=' == ch) {
- syn = 36;
- token[i++] = ch;
- local = num;
- break;
- }
- syn = 21;
- break; }
- case '*': { syn = 24;
- token[i++] = ch;
- break; }
- case '/': { syn = 25;
- token[i++] = ch;
- break; }
- case '(': { syn = 26;
- token[i++] = ch;
- break; }
- case ')': { syn = 27;
- token[i++] = ch;
- break; }
- case '{': { syn = 28;
- token[i++] = ch;
- break; }
- case '}': { syn = 29;
- token[i++] = ch;
- break; }
- case ',': { syn = 30;
- token[i++] = ch;
- break; }
- case ';': { syn = 31;
- token[i++] = ch;
- break; }
- case '>': { token[i++] = ch;
- ch = string[++num];
- if ('=' == ch) {
- syn = 33;
- token[i++] = ch;
- local = num;
- break;
- }
- syn = 32;
- break; }
- case '<': { token[i++] = ch;
- ch = string[++num];
- if ('=' == ch) {
- syn = 35;
- token[i++] = ch;
- local = num;
- break;
- }
- syn = 34;
- break; }
- case '!': { token[i++] = ch;
- ch = string[++num];
- if ('=' != ch) {
- return FALSE;
- }
- syn = 37;
- token[i++] = ch;
- local = num;
- break; }
- case '\n': { syn = 0;
- token[i++] = ch;
- break; }
- default: { syn = -1;
- break; }
- }
- }
- token[i] = '\0';
- return local;
- }