编程实战:类C语法的编译型脚本解释器(二)

系列入口:编程实战:类C语法的编译型脚本解释器(系列)-CSDN博客

        现在开始解释所有的设计思想和与源代码。先从外围入手,最后会进入到一个巨大的解析语法的类。

        本文介绍TOKEN和变量。

目录

一、TOKEN

1.1 定义Token类型

1.2 将脚本拆解为Token

1.3 TryGetKeyword识别关键字

1.4 TryGetNumber识别数值

1.5 其余TryGetXXXX略 

二、变量


一、TOKEN

1.1 定义Token类型

        token是编程语言的基本单元,是最小单位,包括分隔符、标识符、操作符、关键字、数字、字符串字面值,不包括空白。在C和类C语法中,空白字符包括换行都会被忽略(但预处理程序并非如此,所以预处理是额外的东西)。

        所有编译程序首先都会把源代码分解成一系列token,本代码也是如此。token的相关定义如下:

	enum { TOKEN_BUF_LEN = 128 };//仅用于预定义的关键字、运算符,其它标识符任意长度
	//语法标记,去除空白之后的每个元素
	struct Token
	{
		enum types { DELIMITER = 0, OPERATOR, IDENTIFIER, NUMBER, KEYWORD, STRING };

		types type;//类型
		string text;//文本
		size_t pos;//在源代码中的位置

		Token(types _type, char const* _text, size_t _pos) :type(_type), text(_text), pos(_pos) {}

		string ToString()const
		{
			STATIC_C const char typestr[][TOKEN_BUF_LEN] = { "DELIMITER","OPERATOR","IDENTIFIER","NUMBER","KEYWORD","STRING" };//必须与types对应
			char buf[TOKEN_BUF_LEN * 2];
			string ret;
			sprintf(buf, "%03ld %-12s ", pos, typestr[type]);
			ret = buf;
			ret += text.c_str();
			return ret;
		}
	};

        每个Token包含类型和文本(对应源代码中的表现形式),同时为了调试需要,增加了pos记录在脚本中的位置。

1.2 将脚本拆解为Token

        作为编译的第一步,显然是将源代码分解为token。

        这一步由一个类来实现:

class CTokens
{
		public:
			vector m_tokens;//解析出的语法元素
			bool ToTokens(string& source)
			{
				string::size_type pos = 0;
				while (GetToken(source, pos));
				return true;
			}
};

        解析出的token保存在m_tokens中,而函数ToTokens()仅仅是循环调用GetToken()解析出一个一个token而已。 

        GetToken()是关键的主控函数:

			bool GetToken(string& source, string::size_type& pos)
			{
				Token::types type;
				string token;
				char c;
				bool isInComment = false;
				while (pos < source.size())
				{
					c = source[pos];
					if (isInComment)
					{
						if ('\n' == c)
						{
							isInComment = false;
						}
						++pos;
						continue;
					}
					if ('/' == c && pos + 1 < source.size() && '/' == source[pos + 1])
					{
						isInComment = true;
						pos += 2;
						continue;
					}
					if (!IsBlank(c))break;
					++pos;
				}
				if (source.size() == pos)return false;
				if (TryGetKeyword(source.c_str(), pos, token))
				{
					type = Token::KEYWORD;
				}
				else if (TryGetNumber(source.c_str(), pos, token))
				{
					type = Token::NUMBER;
				}
				else if (TryGetString(source.c_str(), pos, token))
				{
					type = Token::STRING;
				}
				else if (TryGetDelimiter(source.c_str(), pos, token))
				{
					type = Token::DELIMITER;
				}
				else if (TryGetOperator(source.c_str(), pos, token))
				{
					type = Token::OPERATOR;
				}
				else if (TryGetIdentifier(source.c_str(), pos, token))
				{
					type = Token::IDENTIFIER;
				}
				else
				{
					CmyException::Throw(__FILE__, __LINE__, source.c_str(), pos, "无法识别的符号");
					return false;
				}
				m_tokens.push_back(Token(type, token.c_str(), pos - token.size()));
				return true;
			}

        这个函数的流程不复杂,先跳过注释(仅支持单行注释),然后依次尝试每种token,每种尝试如果成功会修改当前位置pos(通过引用参数),如果失败则不会修改pos。

        TryGetXXXX这一组函数每个都不复杂,不过调用顺序有名堂,关键字是最优先的,这就保证关键字不可能被用作变量名。

1.3 TryGetKeyword识别关键字

        这个复杂一些,由几个函数组合而成:

			//headset最后一个必须是空串
			bool IsStartWith(char const* str, char const (*headset)[TOKEN_BUF_LEN], string& ret)const
			{
				long i = 0;
				ret = "";
				while (headset[i][0] != '\0')
				{
					size_t keylen = strlen(headset[i]);
					if (0 == strncmp(headset[i], str, keylen))
					{
						if (ret.size() < strlen(headset[i]))ret = headset[i];
					}
					++i;
				}
				return ret.size() != 0;
			}
			bool IsKeyword(char const* str, string& key)const
			{
				STATIC_C char const buf[][TOKEN_BUF_LEN] = {
					"asm","default","float","operator","static_cast","union",
					"auto","delete","for","private","struct","unsigned",
					"bool","do","friend","protected","switch","using",
					"break","double","goto","public","template","virtual",
					"case","dynamic_cast","if","register","this","void",
					"catch","else","inline","reinterpret_cast","throw","volatile",
					"char","enum","int","return","true","wchar_t",
					"class","explicit","long","short","try","while",
					"const","export","mutable","signed","typedef",
					"const_cast","extern","namespace","sizeof","typeid",
					"continue","false","new","static","typename","string",
					""
				};//必须以空串结尾
				return IsStartWith(str, buf, key);
			}
			bool TryGetKeyword(char const* source, string::size_type& pos, string& ret)
			{
				string key;
				string nextkey;
				size_t keylen;
				if (IsKeyword(source + pos, key))
				{
					keylen = key.size();
					if ('\0' == source[pos + keylen] || IsBlank(source[pos + keylen]) || IsDelimiter(source[pos + keylen]) || IsOperator(source + pos + keylen, nextkey))
					{
						ret = key;
						pos += keylen;
						return true;
					}
				}
				return false;
			}

         规则其实也很简单:以关键字开头并且其后是{空白、分隔符、操作符}则为一个关键字。

1.4 TryGetNumber识别数值

        数值是由数字或点开始的字母数字小数点的串,同时还需要符合一些规则,代码里分两步进行,第一步识别出串,第二步则将串根据各种规则转换为数值:

			//以数字或点开头的串
			bool TryGetNumber(char const* source, string::size_type& pos, string& ret)
			{
				ret = "";
				char c = source[pos];
				if (c >= '0' && c <= '9' || c == '.' && source[pos + 1] >= '0' && source[pos + 1] <= '9')
				{
					while ((c = source[pos]) != '\0')
					{
						if (c >= '0' && c <= '9' || c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || '.' == c || '_' == c)
						{
						}
						else
						{
							break;
						}
						ret += c;
						++pos;
					}
				}
				return ret.size() != 0;
			}
			bool NumberToVariable(char const* source, Variable& var)
			{
				char* endptr;
				if (IsCharIn('.', source) || IsCharIn('e', source) || IsCharIn('E', source))
				{
					var.type = Variable::DOUBLE;
					var.dValue = strtod(source, &endptr);
				}
				else
				{
					var.type = Variable::LONG;
					long prefix = 0;
					long radix = 10;
					if (strlen(source) >= 1 && '0' == source[0])
					{
						if (strlen(source) >= 2 && ('x' == source[1] || 'X' == source[1]))
						{
							radix = 16;
							prefix = 2;
						}
						else
						{
							radix = 8;
							prefix = 1;
						}
					}
					var.lValue = strtol(source + prefix, &endptr, radix);
				}
				if (strlen(endptr) != 0)
				{
					if (Variable::DOUBLE == var.type && (0 == stricmp(endptr, "f") || 0 == stricmp(endptr, "l"))
						|| Variable::LONG == var.type && (0 == stricmp(endptr, "u") || 0 == stricmp(endptr, "l") || 0 == stricmp(endptr, "i64")))
					{
						return true;
					}
					string str;
					str = "数值常量格式错误 ";
					str += endptr;
					CException::Throw(__FILE__, __LINE__, source, endptr - source, str.c_str());
					return false;
				}
				return true;
			}

1.5 其余TryGetXXXX略 

二、变量

        我非常痛恨无类型变量,比如JavaScript,所以我在这个脚本里面使用强类型。

        变量类型做了简化,分为long、double和string。通过一个类存储所有的变量,代码如下:

	//变量
	struct Variable
	{
		enum types { NULLVARIABLE = 0, LONG, DOUBLE, STRING };
		types type;
		bool isconst;
		long lValue;
		double dValue;
		string strValue;

		Variable() :type(NULLVARIABLE), isconst(false), lValue(0), dValue(0.) {}
		bool isNull() { return type == NULLVARIABLE; }
		bool isNumber() { return type == LONG || type == DOUBLE; }
		bool isString() { return type == STRING; }
		void clear()
		{
			type = NULLVARIABLE;
			isconst = false;
			lValue = 0;
			dValue = 0;
			strValue = "";
		}
		void initvalue()
		{
			lValue = 0;
			dValue = 0;
			strValue = "";
		}
		Variable& operator = (long v)
		{
			char buf[256];
			if (NULLVARIABLE == type)type = LONG;
			switch (type)
			{
			case LONG:lValue = v; break;
			case DOUBLE:dValue = v; break;
			case STRING:
				sprintf(buf, "%ld", v);
				strValue = buf;
				break;
			default:break;
			}
			return *this;
		}
		Variable& operator = (double v)
		{
			char buf[256];
			if (NULLVARIABLE == type)type = DOUBLE;
			switch (type)
			{
			case LONG:lValue = (long)v; break;
			case DOUBLE:dValue = v; break;
			case STRING:
				gcvt(v, 200, buf);
				strValue = buf;
				break;
			default:break;
			}
			return *this;
		}
		Variable& operator = (string const& v)
		{
			if (NULLVARIABLE == type)type = STRING;
			switch (type)
			{
			case LONG:lValue = atol(v.c_str()); break;
			case DOUBLE:dValue = atof(v.c_str()); break;
			case STRING:strValue = v; break;
			default:break;
			}
			return *this;
		}
		Variable& operator = (Variable const& v)
		{
			if (NULLVARIABLE == type)type = v.type;
			switch (type)
			{
			case LONG:lValue = v.GetLong(); break;
			case DOUBLE:dValue = v.GetDouble(); break;
			case STRING:strValue = v.GetString(); break;
			default:break;
			}
			return *this;
		}
		Variable operator-()const
		{
			Variable tmp = *this;
			switch (type)
			{
			case LONG:tmp.lValue = -lValue; break;
			case DOUBLE:tmp.dValue = -dValue; break;
			default:break;
			}
			return tmp;
		}
		//eva=true则是为赋值提升,结果以左边为准
		static types typeUpgrade(types a, types b, bool eva = false)
		{
			if (NULLVARIABLE == a || NULLVARIABLE == b)return NULLVARIABLE;
			if (LONG == a && LONG == b)return LONG;
			if (STRING == a && STRING == b)return STRING;
			if (DOUBLE == a && DOUBLE == b)return DOUBLE;
			if (!eva)
			{
				if (DOUBLE == a && LONG == b)return DOUBLE;
				if (LONG == a && DOUBLE == b)return DOUBLE;
			}
			else
			{
				if (DOUBLE == a && LONG == b)return DOUBLE;
				if (LONG == a && DOUBLE == b)return LONG;
			}
			return NULLVARIABLE;
		}
		long GetLong()const
		{
			string tmp;
			switch (type)
			{
			case LONG: return lValue;
			case DOUBLE: return (long)dValue;
			case STRING: tmp = strValue; return atol(tmp.c_str());
			default:return 0;
			}
		}
		double GetDouble()const
		{
			string tmp;
			switch (type)
			{
			case LONG: return lValue;
			case DOUBLE: return dValue;
			case STRING: tmp = strValue; return atof(tmp.c_str());
			default:return 0.;
			}
		}
		bool GetBool()const
		{
			switch (type)
			{
			case LONG: return 0 != lValue;
			case DOUBLE: return 0 != dValue;
			default:return false;
			}
		}
		string GetString()const
		{
			char buf[256];
			switch (type)
			{
			case LONG: sprintf(buf, "%ld", lValue); return buf;
			case DOUBLE: gcvt(dValue, 200, buf); return buf;
			case STRING: return strValue;
			default:return "";
			}
		}
		Variable operator+(Variable const& b)const
		{
			Variable tmp = *this;
			tmp.type = typeUpgrade(type, b.type);
			switch (tmp.type)
			{
			case LONG:tmp.lValue = GetLong() + b.GetLong(); break;
			case DOUBLE:tmp.dValue = GetDouble() + b.GetDouble(); break;
			case STRING:tmp.strValue = GetString() + b.GetString(); break;
			default:break;
			}
			return tmp;
		}
		Variable operator-(Variable const& b)const
		{
			Variable tmp = *this;
			tmp.type = typeUpgrade(type, b.type);
			switch (tmp.type)
			{
			case LONG:tmp.lValue = GetLong() - b.GetLong(); break;
			case DOUBLE:tmp.dValue = GetDouble() - b.GetDouble(); break;
			default:break;
			}
			return tmp;
		}
		Variable operator*(Variable const& b)const
		{
			Variable tmp = *this;
			tmp.type = typeUpgrade(type, b.type);
			switch (tmp.type)
			{
			case LONG:tmp.lValue = GetLong() * b.GetLong(); break;
			case DOUBLE:tmp.dValue = GetDouble() * b.GetDouble(); break;
			default:break;
			}
			return tmp;
		}
		Variable operator/(Variable const& b)const
		{
			Variable tmp = *this;
			tmp.type = typeUpgrade(type, b.type);
			switch (tmp.type)
			{
			case LONG:
			{
				if (0 == b.GetLong())throw "div zero";
				tmp.lValue = GetLong() / b.GetLong(); break;
			}
			case DOUBLE:
			{
				if (0 == b.GetDouble())throw "div zero";
				tmp.dValue = GetDouble() / b.GetDouble(); break;
			}
			default:break;
			}
			return tmp;
		}
		Variable operator%(Variable const& b)const
		{
			Variable tmp = *this;
			tmp.type = typeUpgrade(type, b.type);
			switch (tmp.type)
			{
			case LONG:
			{
				if (0 == b.GetLong())throw "mod zero";
				tmp.lValue = GetLong() % b.GetLong(); break;
			}
			default:break;
			}
			return tmp;
		}
		Variable operator>(Variable const& b)const
		{
			Variable tmp = *this;
			tmp.type = LONG;
			switch (typeUpgrade(type, b.type))
			{
			case LONG:tmp.lValue = GetLong() > b.GetLong(); break;
			case DOUBLE:tmp.lValue = GetDouble() > b.GetDouble(); break;
			case STRING:tmp.lValue = GetString() > b.GetString(); break;
			default:break;
			}
			return tmp;
		}
		Variable operator<(Variable const& b)const
		{
			Variable tmp = *this;
			tmp.type = LONG;
			switch (typeUpgrade(type, b.type))
			{
			case LONG:tmp.lValue = GetLong() < b.GetLong(); break;
			case DOUBLE:tmp.lValue = GetDouble() < b.GetDouble(); break;
			case STRING:tmp.lValue = GetString() < b.GetString(); break;
			default:break;
			}
			return tmp;
		}
		Variable operator>=(Variable const& b)const
		{
			Variable tmp = *this;
			tmp.type = LONG;
			switch (typeUpgrade(type, b.type))
			{
			case LONG:tmp.lValue = GetLong() >= b.GetLong(); break;
			case DOUBLE:tmp.lValue = GetDouble() >= b.GetDouble(); break;
			case STRING:tmp.lValue = GetString() >= b.GetString(); break;
			default:break;
			}
			return tmp;
		}
		Variable operator<=(Variable const& b)const
		{
			Variable tmp = *this;
			tmp.type = LONG;
			switch (typeUpgrade(type, b.type))
			{
			case LONG:tmp.lValue = GetLong() <= b.GetLong(); break;
			case DOUBLE:tmp.lValue = GetDouble() <= b.GetDouble(); break;
			case STRING:tmp.lValue = GetString() <= b.GetString(); break;
			default:break;
			}
			return tmp;
		}
		Variable operator==(Variable const& b)const
		{
			Variable tmp = *this;
			tmp.type = LONG;
			switch (typeUpgrade(type, b.type))
			{
			case LONG:tmp.lValue = GetLong() == b.GetLong(); break;
			case DOUBLE:tmp.lValue = GetDouble() == b.GetDouble(); break;
			case STRING:tmp.lValue = GetString() == b.GetString(); break;
			default:break;
			}
			return tmp;
		}
		Variable operator!=(Variable const& b)const
		{
			Variable tmp = *this;
			tmp.type = LONG;
			switch (typeUpgrade(type, b.type))
			{
			case LONG:tmp.lValue = GetLong() != b.GetLong(); break;
			case DOUBLE:tmp.lValue = GetDouble() != b.GetDouble(); break;
			case STRING:tmp.lValue = GetString() != b.GetString(); break;
			default:break;
			}
			return tmp;
		}
		Variable operator&&(Variable const& b)const
		{
			Variable tmp = *this;
			tmp.type = LONG;
			switch (typeUpgrade(type, b.type))
			{
			case LONG:tmp.lValue = GetLong() && b.GetLong(); break;
			case DOUBLE:tmp.lValue = GetDouble() && b.GetDouble(); break;
			default:break;
			}
			return tmp;
		}
		Variable operator||(Variable const& b)const
		{
			Variable tmp = *this;
			tmp.type = LONG;
			switch (typeUpgrade(type, b.type))
			{
			case LONG:tmp.lValue = GetLong() || b.GetLong(); break;
			case DOUBLE:tmp.lValue = GetDouble() || b.GetDouble(); break;
			default:break;
			}
			return tmp;
		}
		static char const* TypeStr(types type)
		{
			STATIC_C const char typestr[][TOKEN_BUF_LEN] = { "NULLVARIABLE","LONG","DOUBLE","STRING" };//必须与types对应
			if(type>=0 && type<4)return typestr[type];
			else
			{
				static char buf[256];
				sprintf(buf, "错误的类型 %d", type);
				//cout << buf << endl; exit(0);
				return buf;
			}
		}
		string ToString(long level = 0)const
		{
			string ret;
			char buf[256];
			string prefix;
			prefix.assign(level * 4, ' ');
			switch (type)
			{
			case LONG:sprintf(buf, "%ld", lValue); break;
			case DOUBLE:gcvt(dValue, 200, buf); break;
			case STRING:strcpy(buf, strValue.c_str()); break;
			default:sprintf(buf, "NULL"); break;
			}
			ret = prefix + " ";
			ret += (isconst ? "常量" : "变量");
			ret += "类型 ";
			ret += TypeStr(type);
			ret += " : ";
			ret += buf;
			return ret;
		}
	};

        没有使用union,直接用类型和三个变量来存储,空间当然是有浪费的,但是没人知道啊。

        重载了各种类型的相互操作,都很简单,只是繁琐。

        成员变量:

类型 变量名 功能
enum types type 实际存储的类型
bool isconst 是否是常量,常量不允许修改
long lValue type为LONG时使用
double dValue type为DOUBLE时使用
string strValue type为STRING时使用

(待续)

(这里是结束)

你可能感兴趣的:(C语法的预编译脚本解释器,c语言,C++,脚本解释器)