临时写了一个词法分析的代码,大概可以分析所有C语言程序吧,反正可以分析它自己,有较强的可维护性,时间空间效率都不低,采用一个类似字典树的结构(可能类似自动机),可处理任意长度的分界符识别,单双引号中的反斜杠转义字符也可处理。报错位置合理,信息清晰。muuuuuuuuuuuuuua!
#include
using namespace std;
const int KEY_WORD_NUM = 22 ;
const int DIVIDE_WORD_NUM = 49 ;
const int CHARACTER_SET_NUM = 256 ;
const string NUM1 = "NUM1" ;
const string NUM2 = "NUM2" ;
const string CONSTANT_STRING = "CONSTANT_STRING" ;
const string ID = "ID" ;
const string ANNOTATIONL1 = "//" ;
const string ANNOTATIONL2 = "/*" ;
const string key_word [ KEY_WORD_NUM ] = { "if", "else", "for", "while", "do", "int", "double", "char", "read", "write", "const", "vector", "struct", "map", "void", "bool",
"print", "scanf", "return", "inline", "true", "false" } ;
const string divide_word [ DIVIDE_WORD_NUM ]= { "!", "~", "(", ")", "{", "}", "[", "]", "+", "-", "*", "/", "%", "=", "^", "&", "|", "&&", "||", "<<", ">>", "++", "--",
"<", ">", "<=", ">=", "!=", "==", "+=", "-=", "/=", "*=", "%=", "^=", "&=", "|=", ">>=", "<<=", ".", ",", ":", ";", "'", "\"", "\\", "#", "//", "/*" } ;
struct Node{
string str;
bool is_terminator;
map < char, int > next;
Node(){
str.clear();
is_terminator = false;
next.clear();
}
Node( const string &str ){
this -> next.clear();
this -> str = str;
this -> is_terminator = false;
}
};
struct Word{
string type;
string val;
Word(){
type.clear();
val.clear();
}
Word( const string &str , const string &val ){
this -> type = str;
this -> val = val;
}
void print(){
printf( " %s %s\n", this -> type.c_str(), this -> val.c_str() );
}
};
int now_line = 1;
char ch = 0;
map < string, int > key_word_id;
map < char, int > delimiter_head;
vector < Node > state_graph;
vector < Word > ans ;
string error_information ;
string fin_name , fout_name ;
void read(char &c){
int ret = scanf("%c",&c);
if( c == '\n' ) now_line++ ;
if( ret == EOF ) c = EOF;
}
inline bool Is_Character(char &c){
return c>='A'&&c<='Z' || c>='a'&&c<='z' || c=='_';
}
inline bool Is_Number(char &c){
return c>='0'&&c<='9' ;
}
inline bool Is_delimiter_head(char &c){
return delimiter_head.count(c) > 0 ;
}
void InitTestScan(){
///读取标识符
key_word_id.clear();
for( int i=0; i<KEY_WORD_NUM; i++ ) {
key_word_id[ key_word[i] ] = i;
}
///读取分界符头字母
delimiter_head.clear();
for( int i=0; i<DIVIDE_WORD_NUM; i++ ) {
delimiter_head[ divide_word[i][0] ] = i;
}
state_graph.clear();
state_graph.push_back( Node() );
for( int i=0; i<DIVIDE_WORD_NUM; i++ ) {
const string &temp = divide_word[i];
int node_index = 0;
for( int j=0; j<(int)temp.length(); j++ ) {
Node &now_node = state_graph[node_index];
if( now_node.next.count( temp[j] ) == 0 ) {
now_node.next[ temp[j] ] = state_graph.size();
state_graph.push_back( Node( now_node.str +temp[j] ) );
node_index = state_graph.size() -1 ;
}
else{
node_index = now_node.next[ temp[j] ];
}
}
state_graph[node_index].is_terminator = true ;
}
ans.clear();
now_line = 1;
}
void PrintError( int op ){
if( op == 1 ) {
printf( "ERROR: fail to open the source program !\n" );
}
if( op == 2 ) { //理论不可能发生
printf( "ERROR: fail to open the output file !\n" );
}
if( op == 3 ) {
printf( "ERROR: the number %s is not comply with the rules !\n" , error_information.c_str() );
}
if( op == 4 ) {
printf( "ERROR: %s is not a divide word !\n" , error_information.c_str() );
}
if( op == 5 ) {
printf( "ERROR: the character %s in line %d was not clear !\n" , error_information.c_str(), now_line );
}
if( op == 6 ) {
printf( "ERROR: missing terminating \" character in line %d !\n", now_line );
}
if( op == 7 ) {
printf( "ERROR: missing terminating \' character in line %d !\n", now_line );
}
}
int InputTestScan(){
FILE *temp ;
printf( "input the name of the source program...\n" );
cin >> fin_name ;
temp = fopen( fin_name.c_str(), "r" );
if( temp == NULL ) return 1;
else fclose( temp );
printf( "input the name of the output file...\n" );
cin >> fout_name ;
freopen( fin_name.c_str(), "r", stdin ) ;
freopen( fout_name.c_str(), "w", stdout ) ;
return 0;
}
int TestScan(){
int ret = InputTestScan();
if( ret != 0 ) return ret;
InitTestScan();
read( ch );
while( ch != EOF ){
while( ch==' ' || ch=='\t' || ch=='\n' ){
read( ch );
if( ch == EOF ) return 0;
}
if( Is_Character(ch) == true ) { //处理标识符
string str;
while( Is_Character(ch) == true || Is_Number(ch) == true ) {
str += ch;
read( ch );
}
if( key_word_id.count(str) > 0 ) {
ans.push_back( Word(str, str) );
}
else {
ans.push_back( Word(ID, str) );
}
}
else if( Is_Number(ch) == true ) { //处理数字常量
string str;
bool is_decimal = false;
while( Is_Number(ch) == true ) {
str +=ch;
read( ch );
}
if( ch == '.' ) {
is_decimal = true;
str +=ch;
read( ch );
while( Is_Number(ch) == true ) {
str +=ch;
read( ch );
}
}
if( is_decimal == true ) {
ans.push_back( Word( NUM2, str ) );
if( str.back() == '.' ) {
error_information = str ;
return 3;
}
}
else{
ans.push_back( Word( NUM1, str ) );
}
}
else if ( ch == '\'' ) { //处理单引号
string str;
str +=ch;
read( ch );
while( ch!='\'' && ch!='\n' ) {
if( ch == '\\' ) {
str +=ch;
read( ch );
}
str +=ch;
read( ch );
}
if( ch != '\'' ) {
return 7;
}
str +=ch;
ans.push_back( Word(CONSTANT_STRING, str) );
read( ch );
}
else if ( ch == '"' ) { //处理双引号
string str;
str +=ch;
read( ch );
while( ch!='"' && ch!='\n' ) {
if( ch == '\\' ) {
str +=ch;
read( ch );
}
str +=ch;
read( ch );
}
if( ch != '"' ) {
return 6;
}
str +=ch;
ans.push_back( Word(CONSTANT_STRING, str) );
read( ch );
}
else if ( Is_delimiter_head(ch) == true ) { //处理分界符
int node_index = 0;
while( state_graph[node_index].next.count(ch) > 0 ) {
node_index = state_graph[node_index].next[ch] ;
read( ch );
}
Node &now_node = state_graph[node_index];
if( now_node.str == ANNOTATIONL1 ) { //处理注释1
while( ch != '\n' ) read( ch );
read( ch );
}
else if( now_node.str == ANNOTATIONL2 ) { //处理注释2
char pre_ch = ch;
read( ch );
while( !(pre_ch == '*' && ch == '/') ) {
pre_ch = ch;
read( ch );
}
read( ch );
}
else if( state_graph[node_index].is_terminator == false ) { //标识符不合法
error_information = state_graph[node_index].str ;
return 4;
}
else { //合法标识符
ans.push_back( Word(now_node.str, now_node.str) );
}
}
else{ //不合法字符
error_information = ch;
return 5;
}
}
}
void output(){
printf( "Lexical analysis completed !\n" );
for( int i=0; i<ans.size(); i++ ) {
ans[i].print();
}
printf( "----------------------------------------------------------------\n" );
printf(" Statistical information: \n" );
int num_int = 0, num_double = 0, num_string = 0, num_variable = 0, num_keyword = 0, num_divide_word = 0 ;
for( int i=0; i<ans.size() ;i++ ) {
if( ans[i].type == NUM1) {
num_int++ ;
}
else if( ans[i].type == NUM2 ) {
num_double++ ;
}
else if( ans[i].type == CONSTANT_STRING ) {
num_string++ ;
}
else if( ans[i].type == ID ) {
num_variable++ ;
}
else if( key_word_id.count( ans[i].type ) > 0 ) {
num_keyword++ ;
}
else {
num_divide_word++ ;
}
}
printf(" num_int = %d\n num_double = %d\n num_string = %d\n num_variable = %d\n num_keyword = %d\n num_divide_word = %d\n ",
num_int, num_double, num_string, num_variable, num_keyword, num_divide_word ) ;
}
int main()
{
int flag = TestScan();
if( flag == 0 ) {
output();
freopen( "CON", "w", stdout );
printf( "Lexical analysis completed, the results have been saved in the \"%s\" file !\n", fout_name.c_str() );
}
else {
freopen( "CON", "w", stdout );
PrintError( flag );
}
return 0;
}