2020年寒假?(不)认真学习《自制编程语言,基于C语言》第一个Demo,词法分析

前言

这本书的sparrow(麻雀)语言,基于wren(鹪鹩)语言,是一门面向对象语言!——至于其他特点我忘了,还记得的就是Demo中部分函数命名很简单、简单到与其他库函数产生冲突了。。。。

几个月前写(抄)的项目,后面的语法分析、制作C语言的面向对象、设计虚拟机等等内容没时间看下去了,只能讲讲废话了——

加前缀

C语言做的,因为没有前缀,所以一是不好看,二是容易冲突。只有一个解决办法,就是用专有的前缀。所以,我在每个函数、每个结构体、每个枚举、许多个宏的前面,都加上相同的前缀 hj

词法分析的Demo

头文件

//hj_common.h
/*
hj_common.h文件是整个项目的最底层的文件,这个文件里只有宏

整个项目是C写的,没有命名空间,所以很容易发生重命名!
规定:
		1.变量、结构体、枚举、函数等,所有的非宏标识符前都要有前缀 hj
		2.特殊宏前后都有两个下划线。特殊宏就是那些特别简单,分不清是枚举还是宏的
		3.所有的前缀都是hj,把整个项目所有的hj都删除了基本不影响理解
			可能影响执行。毕竟不能避免冲突和重命名了!!!


*/
#define _CRT_SECURE_NO_WARNINGS
#ifndef _hj_common_h
#define _hj_common_h
#include
#include
#include //uint32_t
#include
#include
#include
#include
#include
#include //stat 求文件大小
#include





#define hjBOOL char
#define hjTRUE 1
#define hjFALSE 0
#define hjPUBLIC
#define hjPRIVATE static
#define UNUSED __attribute__((unused))




/*hjDEBUG 的定义会写在makefile里*/
#ifdef hjDEBUG
	#define hjASSERT(condition,errMsg)\
	{\
		if(!condition){\
			fprintf(stderr,"hjASSERT failed! %s:%d   IN FUNCTION %s(): %s\n",__FILE__,__LINE__,__func__,errMsg);\
			abort();\
		}\
	}
#else
	#define hjASSERT(condition,errMsg) ((void)0)
#endif


/*hjNOT_REACHED放在不能抵达的地方*/
#define hjNOT_REACHED \
{\
fprintf(stderr,"hjNOT_REACHED :%s:%d   IN FUNCTION %s()\n",__FILE__,__LINE__,__func__);\
abort();\
}




typedef struct hjVM hjVM;
typedef struct hjLexer hjLexer;
typedef struct hjParser hjParser;
typedef struct hjClass hjClass;


#endif
//hj_core.h
/*
hj_core.h文件是整个项目的最核心的文件
		1.几乎所有底层的结构体、枚举都定义在这里
		2.只有结构体和枚举和全局变量,没有函数

*/
#ifndef _hj_core_h
#define _hj_core_h
#include"hj_common.h"


//虚拟机
typedef struct hjVM {
	uint32_t allocatedByte;		//累加已经分配的字节数
	hjLexer* curLexer;					//当前词法分析器
}hjVM;





//单词类型
typedef enum hjTokenType {
	/*未知单词*/
	hjTOKEN_UNKNOWN,

	/*数据类型*/
	hjTOKEN_NUM,					//数字
	hjTOKEN_STR,					//字符串
	hjTOKEN_ID,						//标识符
	hjTOKEN_EXPR,					//内嵌表达式

	/*关键字*/
	hjTOKEN_LET,					//let
	hjTOKEN_FUNCTION,		//function
	hjTOKEN_IF,						//if
	hjTOKEN_ELSE,					//else
	hjTOKEN_TRUE,					//True
	hjTOKEN_FALSE,				//False
	hjTOKEN_WHILE,				//while
	hjTOKEN_FOR,					//for
	hjTOKEN_BREAK,				//break
	hjTOKEN_CONTINUE,		//continue
	hjTOKEN_RETURN,			//return
	hjTOKEN_NIL,						//Nil
	/*关于类及模块导入*/
	hjTOKEN_CLASS,				//class
	hjTOKEN_SELF,					//self
	hjTOKEN_STATIC,				//static
	hjTOKEN_IS,						//is
	hjTOKEN_SUPER,				//super
	hjTOKEN_IMPORT,			//import
	

	/*分隔符*/
	hjTOKEN_AT,						//@
	hjTOKEN_WELL,					//#
	hjTOKEN_ACCENT,			//'      重音
	hjTOKEN_SEMI,					//;      分号
	hjTOKEN_BASKSLASH,		// \     反斜杠
	hjTOKEN_COLON,				//:      冒号
	hjTOKEN_COMMA,				//,      逗号
	hjTOKEN_DOT,					//.      一个点
	hjTOKEN_DOT_DOT,			//..     两个连续的点
	hjTOKEN_LEFT_PAREN,	//(   
	hjTOKEN_RIGHT_PAREN,	//)   
	hjTOKEN_LEFT_BRACKET,	//[   
	hjTOKEN_RIGHT_BRACKET,	//]   
	hjTOKEN_LEFT_BRACE,	//{   
	hjTOKEN_RIGHT_BRACE,	//}   


	//赋值号
	hjTOKEN_ASSIGN,				//=

	/*简单的二元运算符*/
	hjTOKEN_ADD,					//+
	hjTOKEN_SUB,					//-
	hjTOKEN_MUL,					//*
	hjTOKEN_DIV,					// /
	hjTOKEN_MOD,					//%
	hjTOKEN_POW,					//^			上面几个都是左结合的,乘方是右结合的!

	/*位运算符*/
	hjTOKEN_BIT_AND,			//&
	hjTOKEN_BIT_OR,				//|
	hjTOKEN_BIT_NOT,			//~
	hjTOKEN_BIT_SHIFT_LEFT,			//<<
	hjTOKEN_BIT_SHIFT_RIGHT,			//>>



	/*逻辑运算符*/
	hjTOKEN_LOGIC_AND,		//&&
	hjTOKEN_LOGIC_OR,		//||
	hjTOKEN_LOGIC_NOT,		//!



	/*关系运算符*/
	hjTOKEN_EQ,						//==
	hjTOKEN_NEQ,					//!=
	hjTOKEN_GT,						//>
	hjTOKEN_GE,						//>=
	hjTOKEN_LT,						//<
	hjTOKEN_LE,						//<=


	/*问号,可能是?:三元运算符的一部分*/
	hjTOKEN_QUE,					//?


	hjTOKEN_EOL,					//end of line = newline = line feed = line break = line ending =  \n = 换行符 ,EOL = LF
	hjTOKEN_EOF					//end of file = 文件结束符   ,  EOF = LE

}hjTokenType;




//单词
typedef struct hjToken {
	hjTokenType type;			 //单词类型
	const char* start;				 //指向单词字符串的开始
	uint32_t len;						 //单词字符串的长度
	uint32_t lineNo;				 //单词所在的文件行号
}hjToken;





//词法分析器
typedef struct hjLexer {
	const char* file;				//文件
	const char* srcCode;		//源码
	const char* next;				//指向下一个字符
	char curChar;					//当前字符
	hjToken preToken;			//先前单词
	hjToken curToken;			//当前单词
	hjVM* vm;						//虚拟机
	int Lcounter;						//记录小括号的嵌套,也就是记录内嵌表达式
}hjLexer;








//字符串
//这个类型的数据将作为符号表的元素
typedef struct hjString {
	uint32_t len;						//字符串长度,不包含\0
	char* str;							//字符串内容
}hjString;



/*
字符串缓冲区
用于存储字符串对象中的字符串

*/
typedef struct hjCharValue {
	uint32_t len;						//除\0之外的字符个数!!!
	char start[0];					//柔性数组
}hjCharValue;







/*--------------------定义几种类型的动态数组--------------------*/
//字符串数组
typedef struct  hjStringArray {
	uint32_t count;					//数据个数
	uint32_t capacity;			//容量
	hjString* data;					//字符串数据缓冲区
}hjStringArray;



//字节数组
typedef struct  hjByteArray {
	uint32_t count;					//数据个数
	uint32_t capacity;			//容量
	uint8_t* data;					//字节数据缓冲区
}hjByteArray;


//字符数组
typedef struct  hjCharArray {
	uint32_t count;					//数据个数
	uint32_t capacity;			//容量
	char* data;						//字符数据缓冲区
}hjCharArray;

//int数组
typedef struct  hjIntArray {
	uint32_t count;					//数据个数
	uint32_t capacity;			//容量
	int* data;							//int数据缓冲区
}hjIntArray;



//符号表类型就是字符串数组类型
#define hjSymbolTable hjStringArray






#endif
//hj_error.h
/*
hj_error.h文件是错误处理模块
*/
#ifndef _hj_error_h
#define _hj_error_h
#include"hj_core.h"



//错误类型
typedef enum hjErrorType {
	hjERROR_IO,
	hjERROR_MEM,
	hjERROR_LEX,
	hjERROR_PARSE,
	hjERROR_COMPILE,
	hjERROR_RUNTIME
}hjErrorType;



//打印错误信息
hjPUBLIC void hjError(void* _lexer, hjErrorType _errType, char* _errMsg);
hjPUBLIC void hjError_(void* _lexer, hjErrorType _errType, const char* _format,...);



#define __hjERROR_IO__(...)  hjError_(NULL,hjERROR_IO,__VA_ARGS__)
#define __hjERROR_MEM__(...)  hjError_(NULL,hjERROR_MEM,__VA_ARGS__)
#define __hjERROR_LEX__(lexer,...)  hjError_(lexer,hjERROR_LEX,__VA_ARGS__)
#define __hjERROR_PARSE__(lexer,...)  hjError_(lexer,hjERROR_PARSE,__VA_ARGS__)
#define __hjERROR_COMPILE__(lexer,...)  hjError_(lexer,hjERROR_COMPILE,__VA_ARGS__)
#define __hjERROR_RUNTIME__(...)  hjError_(NULL,hjERROR_RUNTIME,__VA_ARGS__)






#endif
//hj_mem.h
/*
hj_mem.h文件是内存管理模块
内存管理模块的功能:
			1.申请内存
			2.修改空间大小
			3.释放内存

*/
#ifndef _hj_mem_h
#define _hj_mem_h
#include"hj_error.h"





//申请内存
hjPUBLIC void* hjMemManager(hjVM* _vm, void* _ptr, uint32_t _oldSize, uint32_t _newSize);
//找出大于等于_v的最近的2次幂
hjPUBLIC uint32_t hjNumOf2Pow_GE_v(uint32_t _v);
//清理符号表
hjPUBLIC void hjClear_SymbolTable(hjVM* _vm, hjSymbolTable* _table);




/*一组内存管理的宏*/
#define ALLOCATE(vm,type)  (type*)hjMemManager(vm,NULL,0,sizeof(type))																//给type类型申请type类型大小的内存
#define ALLOCATE_EXTRA(vm,type,extraSize)  (type*)hjMemManager(vm,NULL,0,sizeof(type) + extraSize)			//给type类型申请type类型大小+额外大小的内存
#define ALLOCATE_ARRAY(vm,type,count)  (type*)hjMemManager(vm,NULL,0,sizeof(type) * count)							//申请若干个type类型大小的内存
#define DEALLOCATE(vm,mem)  MemManager(vm,mem,0,0)																						//释放mem的内存
#define DEALLOCATE_ARRAY(vm,arr,count)  MemManager(vm,arr,sizeof(arr[0]) * count,0)										 //释放arr数组的内存










/*--------------------字符串数组方法--------------------*/
hjPUBLIC void hjInit_StringArray(hjStringArray* _arr);
hjPUBLIC void hjWrite_StringArray(hjVM* _vm, hjStringArray* _arr, hjString _data, uint32_t _fillCount);
hjPUBLIC void hjAdd_StringArray(hjVM* _vm, hjStringArray* _arr, hjString _data);
hjPUBLIC void hjClear_StringArray(hjVM* _vm, hjStringArray* _arr);


/*--------------------字节数组方法--------------------*/
hjPUBLIC void hjInit_ByteArray(hjByteArray* _arr);
hjPUBLIC void hjWrite_ByteArray(hjVM* _vm, hjByteArray* _arr, uint8_t _data, uint32_t _fillCount);
hjPUBLIC void hjAdd_ByteArray(hjVM* _vm, hjByteArray* _arr, uint8_t _data);
hjPUBLIC void hjClear_ByteArray(hjVM* _vm, hjByteArray* _arr);

/*--------------------字符数组方法--------------------*/
hjPUBLIC void hjInit_CharArray(hjCharArray* _arr);
hjPUBLIC void hjWrite_CharArray(hjVM* _vm, hjCharArray* _arr, char _data, uint32_t _fillCount);
hjPUBLIC void hjAdd_CharArray(hjVM* _vm, hjCharArray* _arr, char _data);
hjPUBLIC void hjClear_CharArray(hjVM* _vm, hjCharArray* _arr);


/*--------------------int数组方法--------------------*/
hjPUBLIC void hjInit_IntArray(hjIntArray* _arr);
hjPUBLIC void hjWrite_IntArray(hjVM* _vm, hjIntArray* _arr, int _data, uint32_t _fillCount);
hjPUBLIC void hjAdd_IntArray(hjVM* _vm, hjIntArray* _arr, int _data);
hjPUBLIC void hjClear_IntgArray(hjVM* _vm, hjIntArray* _arr);





#endif
//hj_lexer.h
/*
hj_lexer.h文件是词法分析器

*/
#ifndef _hj_lexer_h
#define _hj_lexer_h
#include"hj_vm.h"
#include"hj_mem.h"
#include"hj_utf8.h"






/*定义全局变量*/
//文件根目录
extern char* hj_rootDir;




//读取脚本
hjPUBLIC char* hjReadScript(const char* _path);






//初始化词法分析器
hjPUBLIC hjInit_Lexer(hjVM* _vm, hjLexer* _lexer, const char* _file, const char* _srcCode);
//查看下一个字符
hjPUBLIC char hjPeek_NextChar(hjLexer* _lexer);
//获取下一个单词
hjPUBLIC void hjGet_NextToken(hjLexer* _lexer);
//匹配单词
hjPUBLIC hjBOOL hjMatch_Token(hjLexer* _lexer, hjTokenType _expectedToken);
//吸收当前单词
hjPUBLIC void hjConsume_CurToken(hjLexer* _lexer, hjTokenType _expectedToken, const char* _errMsg);
//吸收下一个单词
hjPUBLIC void hjConsume_NextToken(hjLexer* _lexer, hjTokenType _expectedToken, const char* _errMsg);










//判断是否是数字
hjPRIVATE hjBOOL hjJudge_Num(char _c);
//判断是否是字母
hjPRIVATE hjBOOL hjJudge_Alpha(char _c);

//判断是标识符还是关键字
hjPRIVATE hjTokenType hjJudge_IdOrKey(const char* _start, uint32_t _len);
//获取下一个字符
hjPRIVATE void hjGet_NextChar(hjLexer* _lexer);
//匹配下一个字符
hjPRIVATE hjBOOL hjMatch_NextChar(hjLexer* _lexer, char _expectedChar);


//跳过连续的空字符
hjPRIVATE void hjSkip_Blanks(hjLexer* _lexer);
//跳过单行注释
hjPRIVATE void hjSkip_CommentALine(hjLexer* _lexer);
//跳过多行注释
hjPRIVATE void hjSkip_Comment(hjLexer* _lexer);
//跳过一行
hjPRIVATE void hjSkip_ALine(hjLexer* _lexer);


//解析标识符
hjPRIVATE void hjLex_Id(hjLexer* _lexer, hjTokenType _token);
//解析字符串
hjPRIVATE void hjLex_Str(hjLexer* _lexer);
//解析UTF8码点
hjPRIVATE void hjLex_CodePointOfUTF8(hjLexer* _lexer, hjByteArray* _buf);






#endif
//hj_utf8.h
/*
hj_utf8.h文件是unicode utf8编码模块
*/
#ifndef _hj_utf8_h
#define _hj_utf8_h
#include"hj_common.h"



//获取编码UTF8后的字节数
hjPUBLIC uint32_t hjGet_ByteNumOfEncodedUTF8(int _value);
//获取解码UTF8后的字节数
hjPUBLIC uint32_t hjGet_ByteNumOfDecodedUTF8(uint8_t _byte);
//编码UTF8
hjPUBLIC uint8_t hjEncodeUTF8(uint8_t* _buf, int _value);
//解码UTF8
hjPUBLIC int hjDecodeUTF8(const uint8_t* _bytePtr, uint32_t _len);


#endif
//hj_vm.h
/*
hj_vm.h文件是虚拟机

*/
#ifndef _hj_vm_h
#define _hj_vm_h
#include"hj_error.h"







//初始化虚拟机
hjPUBLIC void hjInit_VM(hjVM* _vm);
//创建虚拟机
hjPUBLIC hjVM* hjNew_VM();



#endif

   \;
   \;
   \;
   \;
   \;

源文件

//hj_error.h
#include "hj_error.h"

/*
打印错误信息
*/
hjPUBLIC void hjError(void* _lexer, hjErrorType _errType, char* _errMsg) {

	//设置异常信息的字体颜色
	SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_RED); //高亮红色

	switch (_errType) {
	case hjERROR_IO:
		printf("[Error_io]");
		break;
	case hjERROR_MEM:
		printf("[Error_mem]");
		break;
	case hjERROR_LEX:
		printf("[Error_lex]");
		break;
	case hjERROR_PARSE:
		printf("[Error_parse]");
		break;
	case hjERROR_RUNTIME:
		printf("[Error_runtime]");
		break;
	case hjERROR_COMPILE:
		printf("[Error_compile]");
		break;	
	default:
		break;
	}

	if (_lexer != NULL) { 
		hjLexer* myLexer = (hjLexer*)_lexer;
		printf("file \"%s\",line %d,at \"%s\"=>", myLexer->file, myLexer->curToken.lineNo, myLexer->curToken.start);
	}
	printf("%s\n", _errMsg);

	//恢复字体颜色
	SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), FOREGROUND_RED | FOREGROUND_BLUE | FOREGROUND_GREEN);//白色

}

/*
打印错误信息
*/
hjPUBLIC void hjError_(void* _lexer, hjErrorType _errType, const char* _format, ...) {

	char buf[512] = { '\0' };
	va_list  myVa;
	va_start(myVa, _format);
	vsnprintf(buf, 512, _format, myVa);   //把可变参数列表按照_format的格式放入buf中
	va_end(myVa);


	//设置异常信息的字体颜色
	SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_RED); //高亮红色

	switch (_errType) {
	case hjERROR_IO:
	case hjERROR_MEM:
		fprintf(stderr, "%s:%d IN FUNCTION %s():%s\n", __FILE__, __LINE__, __func__, buf);
		break;
	case hjERROR_LEX:
	case hjERROR_PARSE:
	case hjERROR_COMPILE:
		hjASSERT(_lexer != NULL, "lexer is null");
		fprintf(stderr, "%s:%d \"%s\"\n",
			((hjLexer*)_lexer)->file,
			((hjLexer*)_lexer)->preToken.lineNo,
			buf
		);
		break;
	case hjERROR_RUNTIME:
		fprintf(stderr, "%s\n", buf);
		break;	
	default:
		hjNOT_REACHED;
		break;
	}

	//恢复字体颜色
	SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), FOREGROUND_RED | FOREGROUND_BLUE | FOREGROUND_GREEN);//白色

	//退出程序
	exit(1);
}
//hj_mem.cpp
#include"hj_mem.h"





//申请内存
hjPUBLIC void* hjMemManager(hjVM* _vm, void* _ptr, uint32_t _oldSize, uint32_t _newSize) {
	if (_newSize < 0 || _oldSize < 0)return NULL;//做个检查

	//累计系统分配的总内存
	_vm->allocatedByte += _newSize - _oldSize;

	/*
	避免realloc(NULL,0)定义的新地址,此地址不能被释放
	realloc(NULL,0)会返回无法使用,也无法置空的非空指针
	*/
	if(_newSize==0){
		free(_ptr);   
		_ptr = NULL;
		return NULL;
	}
	//p变量是为了避免返回空指针,把原来不为空的指针置空了
	void* p = realloc(_ptr, _newSize);
	if (p == NULL) {
		__hjERROR_MEM__("memory allocate failed!");
	}
	return p;
}



/*
找出大于等于_v的最近的2次幂
1U<<(log(v-1)+1)
向下找离2最近的2的次幂,然后再乘以一个2


*/
hjPUBLIC uint32_t hjNumOf2Pow_GE_v(uint32_t _v) {

	//修复_v==0时结果为0的情况,在_v==0时给_v设置为1
	_v += (_v == 0);
	

	--_v;
	//给_v开31次方
	_v |= (_v >> 1);//除以2
	_v |= (_v >> 2);//除以4
	_v |= (_v >> 4);//除以16
	_v |= (_v >>8);//除以64
	_v |= (_v >> 16);//除以2^16
	//
	++_v;
	return _v;

}


//清理符号表
hjPUBLIC void hjClear_SymbolTable(hjVM* _vm, hjSymbolTable* _table) {

	uint32_t i = 0;
	for (; i < _table->count;++i) {
		hjMemManager(_vm, _table->data[i].str, 0, 0);
	}
	hjClear_StringArray(_vm, _table);
}








/*--------------------字符串数组方法--------------------*/
hjPUBLIC void hjInit_StringArray(hjStringArray* _arr) {
	_arr->data = NULL;
	_arr->capacity = _arr->count = 0;
}

hjPUBLIC void hjWrite_StringArray(hjVM* _vm, hjStringArray* _arr, hjString _data, uint32_t _fillCount) {

	//当前已经存在的元素个数加上要添加的元素个数
	uint32_t newCount = _arr->count + _fillCount;

	if (newCount > _arr->capacity) { //要扩容了
		size_t oldSize = _arr->capacity * sizeof(hjString);
		_arr->capacity = hjNumOf2Pow_GE_v(newCount);//找出大于newCount的2次幂,做新的容量
		size_t newSize = _arr->capacity * sizeof(hjString);

		//新的尺寸还不如旧的尺寸的话,就报错!
		hjASSERT(newSize > oldSize, "faint ... memory allocate!");

		//申请新的内存空间
		_arr->data = (hjString*)hjMemManager(_vm, _arr->data, oldSize, newSize);

	}

	//添加数据
	uint32_t i = 0;
	for (; i < _fillCount; ++i, ++_arr->count) {
		_arr->data[_arr->count] = _data;
	}
}

hjPUBLIC void hjAdd_StringArray(hjVM* _vm, hjStringArray* _arr, hjString _data) {
	hjWrite_StringArray(_vm, _arr, _data, 1);
}

hjPUBLIC void hjClear_StringArray(hjVM* _vm, hjStringArray* _arr) {
	uint32_t size = _arr->capacity * sizeof(_arr->data[0]);
	hjMemManager(_vm, _arr->data, size, 0);//size => 0
	hjInit_StringArray(_arr);
}




/*--------------------字节数组方法--------------------*/
hjPUBLIC void hjInit_ByteArray(hjByteArray* _arr) {
	_arr->data = NULL;
	_arr->capacity = _arr->count = 0;
}

hjPUBLIC void hjWrite_ByteArray(hjVM* _vm, hjByteArray* _arr, uint8_t _data, uint32_t _fillCount) {

	//当前已经存在的元素个数加上要添加的元素个数
	uint32_t newCount = _arr->count + _fillCount;

	if (newCount > _arr->capacity) { //要扩容了
		size_t oldSize = _arr->capacity * sizeof(uint8_t);
		_arr->capacity = hjNumOf2Pow_GE_v(newCount);//找出大于等于newCount的2次幂,做新的容量
		size_t newSize = _arr->capacity * sizeof(uint8_t);

		//新的尺寸还不如旧的尺寸的话,就报错!
		hjASSERT(newSize > oldSize, "faint ... memory allocate!");

		//申请新的内存空间
		_arr->data = (uint8_t*)hjMemManager(_vm, _arr->data, oldSize, newSize);

	}

	//添加数据
	uint32_t i = 0;
	for (; i < _fillCount; ++i, ++_arr->count) {
		_arr->data[_arr->count] = _data;
	}

}

hjPUBLIC void hjAdd_ByteArray(hjVM* _vm, hjByteArray* _arr, uint8_t _data) {
	hjWrite_ByteArray(_vm, _arr, _data, 1);
}

hjPUBLIC void hjClear_ByteArray(hjVM* _vm, hjByteArray* _arr) {
	uint32_t size = _arr->capacity * sizeof(_arr->data[0]);
	hjMemManager(_vm, _arr->data, size, 0);//size => 0
	hjInit_ByteArray(_arr);
}




/*--------------------字符数组方法--------------------*/
hjPUBLIC void hjInit_CharArray(hjCharArray* _arr) {
	_arr->data = NULL;
	_arr->capacity = _arr->count = 0;
}

hjPUBLIC void hjWrite_CharArray(hjVM* _vm, hjCharArray* _arr, char _data, uint32_t _fillCount) {

	//当前已经存在的元素个数加上要添加的元素个数
	uint32_t newCount = _arr->count + _fillCount;

	if (newCount > _arr->capacity) { //要扩容了
		size_t oldSize = _arr->capacity * sizeof(char);
		_arr->capacity = hjNumOf2Pow_GE_v(newCount);//找出大于newCount的2次幂,做新的容量
		size_t newSize = _arr->capacity * sizeof(char);

		//新的尺寸还不如旧的尺寸的话,就报错!
		hjASSERT(newSize > oldSize, "faint ... memory allocate!");

		//申请新的内存空间
		_arr->data = (char*)hjMemManager(_vm, _arr->data, oldSize, newSize);

	}

	//添加数据
	uint32_t i = 0;
	for (; i < _fillCount; ++i, ++_arr->count) {
		_arr->data[_arr->count] = _data;
	}

}

hjPUBLIC void hjAdd_CharArray(hjVM* _vm, hjCharArray* _arr, char _data) {
	hjWrite_CharArray(_vm, _arr, _data, 1);
}

hjPUBLIC void hjClear_CharArray(hjVM* _vm, hjCharArray* _arr) {
	uint32_t size = _arr->capacity * sizeof(_arr->data[0]);
	hjMemManager(_vm, _arr->data, size, 0);//size => 0
	hjInit_CharArray(_arr);
}





/*--------------------int数组方法--------------------*/
hjPUBLIC void hjInit_IntArray(hjIntArray* _arr) {
	_arr->data = NULL;
	_arr->capacity = _arr->count = 0;
}

hjPUBLIC void hjWrite_IntArray(hjVM* _vm, hjIntArray* _arr, int _data, uint32_t _fillCount) {

	//当前已经存在的元素个数加上要添加的元素个数
	uint32_t newCount = _arr->count + _fillCount;

	if (newCount > _arr->capacity) { //要扩容了
		size_t oldSize = _arr->capacity * sizeof(int);
		_arr->capacity = hjNumOf2Pow_GE_v(newCount);//找出大于newCount的2次幂,做新的容量
		size_t newSize = _arr->capacity * sizeof(int);

		//新的尺寸还不如旧的尺寸的话,就报错!
		hjASSERT(newSize > oldSize, "faint ... memory allocate!");

		//申请新的内存空间
		_arr->data = (int*)hjMemManager(_vm, _arr->data, oldSize, newSize);

	}

	//添加数据
	uint32_t i = 0;
	for (; i < _fillCount; ++i, ++_arr->count) {
		_arr->data[_arr->count] = _data;
	}

}

hjPUBLIC void hjAdd_IntArray(hjVM* _vm, hjIntArray* _arr, int _data) {
	hjWrite_IntArray(_vm, _arr, _data, 1);
}

hjPUBLIC void hjClear_IntgArray(hjVM* _vm, hjIntArray* _arr) {
	uint32_t size = _arr->capacity * sizeof(_arr->data[0]);
	hjMemManager(_vm, _arr->data, size, 0);//size => 0
	hjInit_IntArray(_arr);

}


//hj_utf8.cpp
#include"hj_utf8.h"




//获取编码UTF8后的字节数
hjPUBLIC uint32_t hjGet_ByteNumOfEncodedUTF8(int _value) {
	hjASSERT(_value > 0, "can't encode negative value!");

	//单个ASCII一个字节
	if (_value <= 0x7f)return 1;
	//此范围内数值编码为UTF8需要2字节
	if (_value <= 0x7ff)return 2;
	//此范围内数值编码为UTF8需要3字节
	if (_value <= 0xffff)return 3;
	//此范围内数值编码为UTF8需要4字节
	if (_value <= 0x10ffff)return 4;


	//超过范围返回0
	return 0;

}


//获取解码UTF8后的字节数
hjPUBLIC uint32_t hjGet_ByteNumOfDecodedUTF8(uint8_t _byte) {

	//_byte应该是UTF8的最高一字节,如果指定了UTF8编码后面的低字节部分则返回0
	
	if ((_byte & 0xc0) == 0x80)return 0;
	if ((_byte & 0xf8) == 0xf0)return 4;
	if ((_byte & 0xf0) == 0xe0)return 3;
	if ((_byte & 0xe0) == 0xc0)return 2;

	//ASCII码
	return 1;
}


//编码UTF8
hjPUBLIC uint8_t hjEncodeUTF8(uint8_t* _buf, int _value) {

	hjASSERT(_value > 0, "can't encode negative value!");

	/*按照大端模式写入缓冲区*/

	//单个ASCII一个字节
	if (_value <= 0x7f) {
		*_buf = _value & 0x7f;
		return 1;
	}
	//此范围内数值编码为UTF8需要2字节
	if (_value <= 0x7ff) {
		//先写入高字节
		*_buf++ = 0xc0 | ((_value & 0x7c0) >> 6);
		//再写入低字节
		*_buf = 0x80 | (_value & 0x3f);
		return 2;
	}
	//此范围内数值编码为UTF8需要3字节
	if (_value <= 0xffff) {
		//先写入高字节
		*_buf++ = 0xe0 | ((_value & 0xf000) >> 12);
		//再写入中字节
		*_buf++ = 0x80 | ((_value & 0xfc0) >> 6);
		//再写入低字节
		*_buf = 0x80 | (_value & 0x3f);
		return 3;
	}
	//此范围内数值编码为UTF8需要4字节
	if (_value <= 0x10ffff){
		*_buf++ = 0xf0 | ((_value & 0x1c0000) >> 18);
		*_buf++ = 0x80 | ((_value & 0x3f000) >> 12);
		*_buf++ = 0x80 | ((_value & 0xfc0) >> 6);
		*_buf = 0x80 | (_value & 0x3f);
		return 4;
	}

	hjNOT_REACHED;
	//超过范围返回0
	return 0;
}


//解码UTF8
hjPUBLIC int hjDecodeUTF8(const uint8_t* _bytePtr, uint32_t _len) {

	//如果是一字节的ASCII码:0xxx'xxxx
	if (*_bytePtr <= 0x7f)return *_bytePtr;


	int value;
	uint32_t remainingBytes;



	/*先读取高字节*/
	if ((*_bytePtr & 0xe0) == 0xc0) {  //如果是2字节的UTF8
		value = *_bytePtr & 0x1f;
		remainingBytes = 1;
	}
	else if ((*_bytePtr & 0xf0) == 0xe0) {  //如果是3字节的UTF8
		value = *_bytePtr & 0x0f;
		remainingBytes = 2;
	}
	else if ((*_bytePtr & 0xf8) == 0xf0) {  //如果是4字节的UTF8
		value = *_bytePtr & 0x07;
		remainingBytes = 3;
	}
	else { //非法编码
		return -1;
	}


	/*如果UTF8被折断就不再读过去了*/
	if (remainingBytes > _len - 1)return -1;


	/*再读取低字节*/
	
	for (; remainingBytes > 0;) {
		++_bytePtr;
		--remainingBytes;

		//高2位必须是10
		if ((*_bytePtr & 0xc0) != 0x80)return -1;

		//从次高序往低字节读,不断累加各字节的低6位
		value = value << 6 | (*_bytePtr & 0x3f);

	}

	return value;
}

//hj_lexer.cpp
#include"hj_lexer.h"



/*初始化全局变量*/
char* hj_rootDir="";




/*定义私有化结构体和映射表*/
//关键字
typedef struct hjKeyToken {
	char* keyword;					//关键字字符串
	uint8_t len;						//字符串长度
	hjTokenType type;			//关键字类型
}hjKeyToken;


//关键字映射表
hjKeyToken hjMap_KeyToken[] = {
	{"let",3,hjTOKEN_LET},
	{"function",8,hjTOKEN_FUNCTION},
	{"if",2,hjTOKEN_IF},
	{"else",4,hjTOKEN_ELSE},
	{"hjTRUE",4,hjTOKEN_TRUE},
	{"hjFALSE",5,hjTOKEN_FALSE},
	{"while",5,hjTOKEN_WHILE},
	{"for",3,hjTOKEN_FOR},
	{"break",5,hjTOKEN_BREAK},
	{"continue",8,hjTOKEN_CONTINUE},
	{"return",6,hjTOKEN_RETURN},
	{"Nil",3,hjTOKEN_NIL},
	{"class",5,hjTOKEN_CLASS},
	{"self",4,hjTOKEN_SELF},
	{"static",6,hjTOKEN_STATIC},
	{"is",2,hjTOKEN_IS},
	{"super",5,hjTOKEN_SUPER},
	{"import",6,hjTOKEN_IMPORT}
};









//读取脚本
hjPUBLIC char* hjReadScript(const char* _path) {
	FILE* file = fopen(_path, "r");
	if (file == NULL) {
		__hjERROR_IO__("Couldn't open file \"%s\".", _path);
	}


	//获取文件大小
	struct stat myStat;
	stat(_path, &myStat);
	size_t fileSize = myStat.st_size;
	char* fileContent = (char*)malloc(1 + fileSize);
	if (fileContent == NULL) {
		__hjERROR_MEM__("Couldn't allocate memory for reading file \"%s\".\n", _path);
	}

	
	//将文件中的内容按char的步长读取进fileContent内
	if (fread(fileContent, sizeof(char), fileSize, file) == 0) {
		__hjERROR_IO__("Couldn't read file \"%s\".", _path);
	}
	fileContent[fileSize] = '\0';
	fclose(file);
	return fileContent;
}








//初始化词法分析器
hjPUBLIC hjInit_Lexer(hjVM* _vm, hjLexer* _lexer, const char* _file, const char* _srcCode) {

	//设置文件路径
	_lexer->file = _file;
	//设置源码字符串
	_lexer->srcCode = _srcCode;
	//设置当前字符
	_lexer->curChar = *_srcCode;
	//设置下一个字符指针
	_lexer->next = _lexer->srcCode + 1;
	//初始化当前单词
	_lexer->curToken.lineNo = 1;
	_lexer->curToken.len = 0;
	_lexer->curToken.start = NULL;
	_lexer->curToken.type = hjTOKEN_UNKNOWN;
	//初始先前单词
	_lexer->preToken = _lexer->curToken;
	//初始化(计数器
	_lexer->Lcounter = 0;
	//初始化虚拟机
	_lexer->vm = _vm;


}


//查看下一个字符
hjPUBLIC char hjPeek_NextChar(hjLexer* _lexer) {
	return *(_lexer->next);
}




//获取下一个单词
hjPUBLIC void hjGet_NextToken(hjLexer* _lexer) {

	/*当前单词指针后移*/
	_lexer->preToken = _lexer->curToken;
	//跳过待识别单词之间的空格
	hjSkip_Blanks(_lexer);
	//初始化当前单词
	_lexer->curToken.type = hjTOKEN_EOF;
	_lexer->curToken.len = 0;
	_lexer->curToken.start = _lexer->next - 1;



	for (;_lexer->curChar!='\0';) {

		switch (_lexer->curChar) {
		/*
		case '@':
			_lexer->curToken.type = hjTOKEN_AT;
			break;
		case '#':
			_lexer->curToken.type = hjTOKEN_WELL;
			break;
		case '`':	//重音
			_lexer->curToken.type = hjTOKEN_ACCENT;
			break;
		case ';':		//分号
			_lexer->curToken.type = hjTOKEN_SEMI;
			break;
		case '\\':     反斜杠
			_lexer->curToken.type = hjTOKEN_SEMI;
			break;
			*/

		case ':':		//冒号
			_lexer->curToken.type = hjTOKEN_COLON;
			break;
		case ',':		//逗号
			_lexer->curToken.type = hjTOKEN_COMMA;
			break;
		case '.':		//点     
			if (hjMatch_NextChar(_lexer, '.')) {//两个连续的点
				_lexer->curToken.type = hjTOKEN_DOT_DOT;
			}
			else {//一个点
				_lexer->curToken.type = hjTOKEN_DOT;
			}
			break;
		case '(':
			if (_lexer->Lcounter > 0) {
				++_lexer->Lcounter;//遇到(加一
			}
			_lexer->curToken.type = hjTOKEN_LEFT_PAREN;
			break;
		case ')':
			if (_lexer->Lcounter > 0) {
				--_lexer->Lcounter;//遇到)减一
				
				if (_lexer->Lcounter == 0) {
					hjLex_Str(_lexer);
					break;
				}

			}
			_lexer->curToken.type = hjTOKEN_RIGHT_PAREN;
			break;
		case '[':
			_lexer->curToken.type = hjTOKEN_LEFT_BRACKET;
			break;
		case ']':
			_lexer->curToken.type = hjTOKEN_RIGHT_BRACKET;
			break;
		case '{':
			_lexer->curToken.type = hjTOKEN_LEFT_BRACE;
			break;
		case '}':
			_lexer->curToken.type = hjTOKEN_RIGHT_BRACE;
			break;
		case '=':
			if (hjMatch_NextChar(_lexer, '=')) {//判断等于
				_lexer->curToken.type = hjTOKEN_EQ;
			}
			else {//赋值
				_lexer->curToken.type = hjTOKEN_ASSIGN;
			}
			break;
		case '+':
			_lexer->curToken.type = hjTOKEN_ADD;
			break;
		case '-':
			_lexer->curToken.type = hjTOKEN_SUB;
			break;
		case '*':
			_lexer->curToken.type = hjTOKEN_MUL;
			break;
		case '/':
			if (hjMatch_NextChar(_lexer,'/') ) { //单行注释
				hjSkip_CommentALine(_lexer);
				_lexer->curToken.start = _lexer->next - 1;
				continue;
			}
			else if (hjMatch_NextChar(_lexer, '*')) {  //多行注释
				hjSkip_Comment(_lexer);
				_lexer->curToken.start = _lexer->next - 1;
				continue;
			}
			else { //除号
				_lexer->curToken.type = hjTOKEN_DIV;
			}
			break;
		case '%':
			_lexer->curToken.type = hjTOKEN_MOD;
			break;
		case '^':
			_lexer->curToken.type = hjTOKEN_POW;
			break;
		case '&':
			if (hjMatch_NextChar(_lexer, '&')) {//逻辑与
				_lexer->curToken.type = hjTOKEN_LOGIC_AND;
			}
			else {//位运算与
				_lexer->curToken.type = hjTOKEN_BIT_AND;
			}
			break;
		case '|':
			if (hjMatch_NextChar(_lexer, '|')) {//逻辑或
				_lexer->curToken.type = hjTOKEN_LOGIC_OR;
			}
			else {//位运算或
				_lexer->curToken.type = hjTOKEN_BIT_OR;
			}
			break;	
		case '!':
			if (hjMatch_NextChar(_lexer, '=')) {//不等于
				_lexer->curToken.type = hjTOKEN_NEQ;
			}
			else {//逻辑非
				_lexer->curToken.type = hjTOKEN_LOGIC_NOT;
			}
			break;
		case '~':
			_lexer->curToken.type = hjTOKEN_BIT_NOT;
			break;
		case '<':
			if (hjMatch_NextChar(_lexer, '=')) {//<=
				_lexer->curToken.type = hjTOKEN_LE;
			}
			else {//<
				_lexer->curToken.type = hjTOKEN_LT;
			}
			break;
		case '>':
			if (hjMatch_NextChar(_lexer, '=')) {//>=
				_lexer->curToken.type = hjTOKEN_GE;
			}
			else {//>
				_lexer->curToken.type = hjTOKEN_GT;
			}
			break;
		case '?':
			_lexer->curToken.type = hjTOKEN_QUE;
			break;
		case '"':	//双引号
			hjLex_Str(_lexer);
			break;	
		/*eol、eof 没有处理。eof没办法处理,但是eol之后可以想法处理一下	*/
		default:
			//如果首字符是字母或下划线
			if (hjJudge_Alpha(_lexer->curChar) || _lexer->curChar == '_') {
				hjLex_Id(_lexer,hjTOKEN_UNKNOWN);
			}
			//不处理#!及其后面的内容
			else 	if (_lexer->curChar == '#' && hjMatch_NextChar(_lexer, '!')) {
				hjSkip_ALine(_lexer);

				_lexer->curToken.start = _lexer->next - 1;
				continue;
			}
			else{
				///__hjERROR_LEX__(_lexer, "unsupport char : \"%c\" , quit.", _lexer->curChar);
			}
			return;
		}


		/*默认执行上面操作一次就退出了,但是可能遇到continue,那样就可能循环几遍*/
		_lexer->curToken.len = (uint32_t)(_lexer->next - _lexer->curToken.start);
		hjGet_NextChar(_lexer);
		return;
	}
}




/*
匹配单词
如果当前token是所期望的话,就读入下一个token并返回true,否则返回false
*/
hjPUBLIC hjBOOL hjMatch_Token(hjLexer* _lexer, hjTokenType _expectedToken) {

	if (_lexer->curToken.type == _expectedToken) {
		hjGet_NextToken(_lexer);
		return hjTRUE;
	}
	return hjFALSE;
}


/*
吸收当前单词
断言当前token为所期望的话,就读入下一个token,否则报错
*/
hjPUBLIC void hjConsume_CurToken(hjLexer* _lexer, hjTokenType _expectedToken, const char* _errMsg) {
	if (_lexer->curToken.type == _expectedToken) {
		hjGet_NextToken(_lexer);
	}	
	else {
		__hjERROR_COMPILE__(_lexer, _errMsg);
	}
}


/*
吸收下一个单词
断言当前token为所期望的,否则报错
*/
hjPUBLIC void hjConsume_NextToken(hjLexer* _lexer, hjTokenType _expectedToken, const char* _errMsg) {
	hjGet_NextToken(_lexer);
	if (_lexer->curToken.type != _expectedToken) {
		__hjERROR_COMPILE__(_lexer, _errMsg);
	}
}









//判断是否是数字
hjPRIVATE hjBOOL hjJudge_Num(char _c) {
	if (_c <= 57 && _c >= 48)return hjTRUE;
	return hjFALSE;
}
//判断是否是字母
hjPRIVATE hjBOOL hjJudge_Alpha(char _c) {
	if ((_c <= 90 && _c >= 65 )|| (_c<= 122&& _c>= 97) )return hjTRUE;
	return hjFALSE;
}


/*
判断是标识符还是关键字
输入的字符串肯定是标识符或者是关键字,这个函数就是具体区分一下到底是哪种
关键字,还是标识符!
*/
hjPRIVATE hjTokenType hjJudge_IdOrKey(const char* _start, uint32_t _len) {
	
	uint32_t i ;
	for (i=0; hjMap_KeyToken[i].keyword != NULL;++i) {
		if (hjMap_KeyToken[i].len == _len && \
			memcmp(hjMap_KeyToken[i].keyword ,_start,_len ) == 0) {
			return hjMap_KeyToken[i].type;//返回具体的关键字类型
		}
	}
	
	//如果不是关键字那就是标识符了
	return hjTOKEN_ID;
}


//获取下一个字符
hjPRIVATE void hjGet_NextChar(hjLexer* _lexer) {
	_lexer->curChar = *(_lexer->next);//下一个字符当做是当前字符
	++_lexer->next;//下一个字符后(这个指针的移动应该是在srcCode上移动的)
}


/*
匹配下一个字符
如果匹配上了,就后移当前字符指针,否则就不移动!
*/
hjPRIVATE hjBOOL hjMatch_NextChar(hjLexer* _lexer,char _expectedChar) {
	if ( hjPeek_NextChar(_lexer)== _expectedChar ) {//如果下一个字符是我所希望的字符的话,就获取该字符
		hjGet_NextChar(_lexer);
		return hjTRUE;
	}
	return hjFALSE;
}





//跳过连续的空字符
hjPRIVATE void hjSkip_Blanks(hjLexer* _lexer) {
	for (;_lexer->curChar == ' '||_lexer->curChar=='\n'||_lexer->curChar=='\t'||_lexer->curChar=='\r';) {
		if (_lexer->curChar == '\n') {
			++_lexer->curToken.lineNo;//行数加一
		}
		hjGet_NextChar(_lexer);
	}
}



//跳过单行注释
hjPRIVATE void hjSkip_CommentALine(hjLexer* _lexer) {
	//跳过一行
	hjSkip_ALine(_lexer);
	//注释之后有可能会有空白字符
	hjSkip_Blanks(_lexer);
}


//跳过多行注释
hjPRIVATE void hjSkip_Comment(hjLexer* _lexer) {

	char nextChar = hjPeek_NextChar(_lexer);
	//吸收注释内的字符
	for (; nextChar != '*' && nextChar != '\0';) {
		hjGet_NextChar(_lexer);
		if (_lexer->curChar == '\n') {
			++_lexer->curToken.lineNo;//行数加一
		}
		nextChar = hjPeek_NextChar(_lexer);
	}

	//匹配 */
	if (hjMatch_NextChar(_lexer, '*')) {
		if (!hjMatch_NextChar(_lexer, '/')) {
			__hjERROR_LEX__(_lexer, "expect \"/\" after \"*\"!");
		}
		hjGet_NextChar(_lexer);
	}
	else {
		__hjERROR_LEX__(_lexer, "expect \"*/\" before file end!");
	}


	//注释之后有可能会有空白字符
	hjSkip_Blanks(_lexer);
}


//跳过一行
hjPRIVATE void hjSkip_ALine(hjLexer* _lexer) {
	
	hjGet_NextChar(_lexer);
	for (;_lexer->curChar!='\0';) {
		if (_lexer->curChar=='\n') {
			++_lexer->curToken.lineNo;//行数加一
			hjGet_NextChar(_lexer);
			break;
		}
		hjGet_NextChar(_lexer);
	}
}







//解析标识符
hjPRIVATE void hjLex_Id(hjLexer* _lexer, hjTokenType _token) {

	//标识符是字母或下划线开头的,后面是连续的字母或数字或下划线
	for (;hjJudge_Num(_lexer->curChar)|| hjJudge_Alpha(_lexer->curChar) || _lexer->curChar == '_';) {
		hjGet_NextChar(_lexer);
	}

	//next指向第一个不合法字符的下一个字符,因此要-1
	uint32_t len = (uint32_t)(_lexer->next - _lexer->curToken.start - 1);

	if (_token != hjTOKEN_UNKNOWN) {
		_lexer->curToken.type = _token;
	}
	else { //是关键字或者是标识符
		_lexer->curToken.type = hjJudge_IdOrKey(_lexer->curToken.start, len);
	}

	_lexer->curToken.len = len;
	
}


//解析字符串
hjPRIVATE void hjLex_Str(hjLexer* _lexer) {

	hjByteArray str;
	hjInit_ByteArray(&str);
	
	for (;1;) {
		hjGet_NextChar(_lexer);

		if (_lexer->curChar == '\0') {
			__hjERROR_LEX__(_lexer, "unterminated string!");
		}
		else if (_lexer->curChar == '"') {  //双引号
			_lexer->curToken.type = hjTOKEN_STR;
			break;
		}
		else if (_lexer->curChar == '%') {  
			if (!hjMatch_NextChar(_lexer, '(')) { //%后面没有跟着(
				__hjERROR_LEX__(_lexer, "\"%\" should followed by \"(\"!");
			}
			
			if(_lexer->Lcounter>0){ //
				__hjERROR_COMPILE__(_lexer, "not support nest interpolate expression");
			}

			_lexer->Lcounter = 1;
			_lexer->curToken.type = hjTOKEN_EXPR;
			break;
		}


		/*处理转移字符*/
		if (_lexer->curChar == '\\') {
			hjGet_NextChar(_lexer);

			switch (_lexer->curChar) {
			case '0':
				hjAdd_ByteArray(_lexer->vm, &str, '\0');
				break;
			case 'a':
				hjAdd_ByteArray(_lexer->vm, &str, '\a');
				break;
			case 'b':
				hjAdd_ByteArray(_lexer->vm, &str, '\b');
				break;
			case 'f':
				hjAdd_ByteArray(_lexer->vm, &str, '\f');
				break;
			case 'n':
				hjAdd_ByteArray(_lexer->vm, &str, '\n');
				break;
			case 'r':
				hjAdd_ByteArray(_lexer->vm, &str, '\r');
				break;
			case 't':
				hjAdd_ByteArray(_lexer->vm, &str, '\t');
				break;
			case 'u':
				hjLex_CodePointOfUTF8(_lexer, &str);
				break;
			case '"':	//双引号
				hjAdd_ByteArray(_lexer->vm, &str, '"');
				break;
			case '\\':
				hjAdd_ByteArray(_lexer->vm, &str, '\\');
				break;
			default:
				__hjERROR_LEX__(_lexer, "unsupport escape \\%c", _lexer->curChar);
				break;
			}
		}
		else {
			hjAdd_ByteArray(_lexer->vm, &str, _lexer->curChar);
		}


	}


	hjClear_ByteArray(_lexer->vm, &str);

}


//解析UTF8码点
hjPRIVATE void hjLex_CodePointOfUTF8(hjLexer* _lexer, hjByteArray* _buf) {


	uint32_t i;
	int value = 0;
	uint8_t digit = 0;
	//获得数值,u后面跟着4位16进制数字
	for (i=0;i<4;) {
		++i;
		hjGet_NextChar(_lexer);
		if (_lexer->curChar == '\0') {
			__hjERROR_LEX__(_lexer, "unterminated unicode!");
		}
		else if (_lexer->curChar >= '0' && _lexer->curChar <= '9') {
			digit = _lexer->curChar - '0';
		}
		else if (_lexer->curChar >= 'a' && _lexer->curChar <= 'f') {
			digit = _lexer->curChar - 'a'+10;
		}
		else if (_lexer->curChar >= 'A' && _lexer->curChar <= 'F') {
			digit = _lexer->curChar - 'A' + 10;
		}
		else{
			__hjERROR_LEX__(_lexer, "invalid unicode!");
		}

		value = (value * 16) | digit;
	}


	uint32_t byteNum = hjGet_ByteNumOfEncodedUTF8(value);
	hjASSERT(byteNum != 0, "utf8 encode bytes should be between 1 and 4 ");


	/*
	为了代码通用,下面会直接写 _buf->data ,在此先写入byteNum个0,
	以保证事先有byteNum个空间
	*/
	hjWrite_ByteArray(_lexer->vm, _buf, 0, byteNum);
	//把value编码成utf8后写入缓冲区
	hjEncodeUTF8(_buf->data + _buf->count - byteNum, value);
}




//hj_vm.cpp
#include"hj_vm.h"



//初始化虚拟机
hjPUBLIC void hjInit_VM(hjVM* _vm) {
	_vm->allocatedByte = 0;
	_vm->curLexer = NULL;
}
//创建虚拟机
hjPUBLIC hjVM* hjNew_VM() {
	hjVM* vm = (hjVM*)malloc(sizeof(hjVM));
	if (vm == NULL) {

	}
	hjInit_VM(vm);
	return vm;
}

主函数的文件hj.cpp

//hj.cpp
#include"hj_lexer.h"

/*
项目文件引用关系——下次做个打印文件引用关系的工具!!!
common 
|————core
|				|————error 
|				|				|————vm
|				|								|————lexer
|		        |
|				|————mem
|				|				|————lexer
|				|				|				|
|				|				|
|
|
|
|————utf8
|				|————lexer

*/

void test_error() {
	hjLexer* myLexer = (hjLexer*)malloc(sizeof(hjLexer));


	hjToken* myToken = (hjToken*)malloc(sizeof(hjToken));
	myToken->lineNo = 50;
	myToken->start = "function";
	myToken->type = hjTOKEN_FUNCTION;

	myLexer->curToken = *myToken;
	myLexer->file = "E:\\C\\hjRun.hj";


	hjError(myLexer, hjERROR_MEM, "出错了!!!");

}





//执行脚本文件
hjPRIVATE void hjRunScript(const char* _path) {
	const char* lastSlash = strrchr(_path, '/');

	if (lastSlash != NULL) {
		char* root = (char*)malloc(lastSlash - _path + 2);
		memcpy(root, _path, lastSlash - _path + 1);
		root[lastSlash - _path + 1] = '\0';
		hj_rootDir = root;
	}

	
	//创建虚拟机
	hjVM* vm = hjNew_VM();

	//读取脚本
	const char* srcCode = hjReadScript(_path);

	//创建词法分析器
	hjLexer  myLexer;
	hjInit_Lexer(vm, &myLexer, _path, srcCode);

	//导入单词表
#include "token.list"

	uint32_t i = 0;
	for (; myLexer.curToken.type != hjTOKEN_EOF;) {
		hjGet_NextToken(&myLexer);
		//打印行号和单词类型
		printf("%d:%s[", myLexer.curToken.lineNo, hjTokenArr[myLexer.curToken.type]);
		
		//打印单词
		for (i=0; i < myLexer.curToken.len;++i) {
			printf("%c", *(myLexer.curToken.start+i));
		}

		printf("]\n");
	}

}



int main(int argc,const char** argv) {

	argc = 2;
	argv[1] = "test.hj";

	//test_error();
	if (argc < 2) {
		printf("参数不够");
		return 0;
	}


	hjRunScript(argv[1]);

	system("pause");
	return 0;
}


token.list

这个无所谓,把数组放在其他文件也一样!

char* hjTokenArr[] = {
	"UNKNOWN",
	"NUM",				
	"STR",				
	"ID",					
	"EXPR",				
	"LET",				
	"FUNCTION",	
	"IF",					
	"ELSE",				
	"TRUE",				
	"FALSE",			
	"WHILE",			
	"FOR",				
	"BREAK",			
	"CONTINUE",	
	"RETURN",		
	"NIL",					
	"CLASS",			
	"SELF",				
	"STATIC",			
	"IS",					
	"SUPER",			
	"IMPORT",		
	"AT",					
	"WELL",				
	"ACCENT",		
	"SEMI",	
	"BACKSLASH",
    "COLON",			
    "COMMA",			
    "DOT",				
    "DOT_DOT",		
    "LEFT_PAREN",
    "RIGHT_PAREN",
    "LEFT_BRACKET",
    "RIGHT_BRACKET",
    "LEFT_BRACE",
    "RIGHT_BRACE",
	"ASSIGN",
    "ADD",				
    "SUB",				
    "MUL",				
    "DIV",				
    "MOD",				
    "POW",				
    "BIT_AND",		
    "BIT_OR",			
    "BIT_NOT",		
    "BIT_SHIFT_LEFT",
    "BIT_SHIFT_RRIGHT",
    "LOGIC_AND",	
    "LOGIC_OR",	
    "LOGIC_NOT",	 
    "EQ",					
    "NEQ",				
    "GT",					
    "GE",					
    "LT",					
    "LE",					
    "QUE",				
    "EOL",				
    "EOF"				
};

   \;
   \;
   \;
   \;

测试文件test.hj

//   单行注释
/*
	多
	行
	注
	释
*/
import people for People
function func(){
	print("hello word")
	let p = People.new("demllie","turtle")
	p.run()
}


class Family : People{
	let father
	let mother
	let child
	new(f,m,c){
		father = f
		mother = m
		child = c

		super("Kiana","wife")
	}
}


let f = Family.new("Kiana","female","shine")
f.run()

func()


运行结果

8:IMPORT[import]
8:ID[people]
8:FOR[for]
8:ID[People]
9:FUNCTION[function]
9:ID[func]
9:LEFT_PAREN[(]
9:RIGHT_PAREN[)]
9:LEFT_BRACE[{]
10:ID[print]
10:LEFT_PAREN[(]
10:STR["hello word"]
10:RIGHT_PAREN[)]
11:LET[let]
11:ID[p]
11:ASSIGN[=]
11:ID[People]
11:DOT[.]
11:ID[new]
11:LEFT_PAREN[(]
11:STR["demllie"]
11:COMMA[,]
11:STR["turtle"]
11:RIGHT_PAREN[)]
12:ID[p]
12:DOT[.]
12:ID[run]
12:LEFT_PAREN[(]
12:RIGHT_PAREN[)]
13:RIGHT_BRACE[}]
16:CLASS[class]
16:ID[Family]
16:COLON[:]
16:ID[People]
16:LEFT_BRACE[{]
17:LET[let]
17:ID[father]
18:LET[let]
18:ID[mother]
19:LET[let]
19:ID[child]
20:ID[new]
20:LEFT_PAREN[(]
20:ID[f]
20:COMMA[,]
20:ID[m]
20:COMMA[,]
20:ID[c]
20:RIGHT_PAREN[)]
20:LEFT_BRACE[{]
21:ID[father]
21:ASSIGN[=]
21:ID[f]
22:ID[mother]
22:ASSIGN[=]
22:ID[m]
23:ID[child]
23:ASSIGN[=]
23:ID[c]
25:SUPER[super]
25:LEFT_PAREN[(]
25:STR["Kiana"]
25:COMMA[,]
25:STR["wife"]
25:RIGHT_PAREN[)]
26:RIGHT_BRACE[}]
27:RIGHT_BRACE[}]
30:LET[let]
30:ID[f]
30:ASSIGN[=]
30:ID[Family]
30:DOT[.]
30:ID[new]
30:LEFT_PAREN[(]
30:STR["Kiana"]
30:COMMA[,]
30:STR["female"]
30:COMMA[,]
30:STR["shine"]
30:RIGHT_PAREN[)]
31:ID[f]
31:DOT[.]
31:ID[run]
31:LEFT_PAREN[(]
31:RIGHT_PAREN[)]
33:ID[func]
33:LEFT_PAREN[(]
33:RIGHT_PAREN[)]
35:EOF[]

最后

除了编码风格外,文件依赖关系也变了,增加了几种token类型,更改了关键字!

hj=happy joy

参考:《自制编程语言,基于C语言》

你可能感兴趣的:(#,编译原理学习)