这本书的sparrow(麻雀)语言,基于wren(鹪鹩)语言,是一门面向对象语言!——至于其他特点我忘了,还记得的就是Demo中部分函数命名很简单、简单到与其他库函数产生冲突了。。。。
几个月前写(抄)的项目,后面的语法分析、制作C语言的面向对象、设计虚拟机等等内容没时间看下去了,只能讲讲废话了——
C语言做的,因为没有前缀,所以一是不好看,二是容易冲突。只有一个解决办法,就是用专有的前缀。所以,我在每个函数、每个结构体、每个枚举、许多个宏的前面,都加上相同的前缀 hj
//hj_common.h
/*
hj_common.h文件是整个项目的最底层的文件,这个文件里只有宏
整个项目是C写的,没有命名空间,所以很容易发生重命名!
规定:
1.变量、结构体、枚举、函数等,所有的非宏标识符前都要有前缀 hj
2.特殊宏前后都有两个下划线。特殊宏就是那些特别简单,分不清是枚举还是宏的
3.所有的前缀都是hj,把整个项目所有的hj都删除了基本不影响理解
可能影响执行。毕竟不能避免冲突和重命名了!!!
*/
#define _CRT_SECURE_NO_WARNINGS
#ifndef _hj_common_h
#define _hj_common_h
#include
#include
#include //uint32_t
#include
#include
#include
#include
#include
#include //stat 求文件大小
#include
#define hjBOOL char
#define hjTRUE 1
#define hjFALSE 0
#define hjPUBLIC
#define hjPRIVATE static
#define UNUSED __attribute__((unused))
/*hjDEBUG 的定义会写在makefile里*/
#ifdef hjDEBUG
#define hjASSERT(condition,errMsg)\
{\
if(!condition){\
fprintf(stderr,"hjASSERT failed! %s:%d IN FUNCTION %s(): %s\n",__FILE__,__LINE__,__func__,errMsg);\
abort();\
}\
}
#else
#define hjASSERT(condition,errMsg) ((void)0)
#endif
/*hjNOT_REACHED放在不能抵达的地方*/
#define hjNOT_REACHED \
{\
fprintf(stderr,"hjNOT_REACHED :%s:%d IN FUNCTION %s()\n",__FILE__,__LINE__,__func__);\
abort();\
}
typedef struct hjVM hjVM;
typedef struct hjLexer hjLexer;
typedef struct hjParser hjParser;
typedef struct hjClass hjClass;
#endif
//hj_core.h
/*
hj_core.h文件是整个项目的最核心的文件
1.几乎所有底层的结构体、枚举都定义在这里
2.只有结构体和枚举和全局变量,没有函数
*/
#ifndef _hj_core_h
#define _hj_core_h
#include"hj_common.h"
//虚拟机
typedef struct hjVM {
uint32_t allocatedByte; //累加已经分配的字节数
hjLexer* curLexer; //当前词法分析器
}hjVM;
//单词类型
typedef enum hjTokenType {
/*未知单词*/
hjTOKEN_UNKNOWN,
/*数据类型*/
hjTOKEN_NUM, //数字
hjTOKEN_STR, //字符串
hjTOKEN_ID, //标识符
hjTOKEN_EXPR, //内嵌表达式
/*关键字*/
hjTOKEN_LET, //let
hjTOKEN_FUNCTION, //function
hjTOKEN_IF, //if
hjTOKEN_ELSE, //else
hjTOKEN_TRUE, //True
hjTOKEN_FALSE, //False
hjTOKEN_WHILE, //while
hjTOKEN_FOR, //for
hjTOKEN_BREAK, //break
hjTOKEN_CONTINUE, //continue
hjTOKEN_RETURN, //return
hjTOKEN_NIL, //Nil
/*关于类及模块导入*/
hjTOKEN_CLASS, //class
hjTOKEN_SELF, //self
hjTOKEN_STATIC, //static
hjTOKEN_IS, //is
hjTOKEN_SUPER, //super
hjTOKEN_IMPORT, //import
/*分隔符*/
hjTOKEN_AT, //@
hjTOKEN_WELL, //#
hjTOKEN_ACCENT, //' 重音
hjTOKEN_SEMI, //; 分号
hjTOKEN_BASKSLASH, // \ 反斜杠
hjTOKEN_COLON, //: 冒号
hjTOKEN_COMMA, //, 逗号
hjTOKEN_DOT, //. 一个点
hjTOKEN_DOT_DOT, //.. 两个连续的点
hjTOKEN_LEFT_PAREN, //(
hjTOKEN_RIGHT_PAREN, //)
hjTOKEN_LEFT_BRACKET, //[
hjTOKEN_RIGHT_BRACKET, //]
hjTOKEN_LEFT_BRACE, //{
hjTOKEN_RIGHT_BRACE, //}
//赋值号
hjTOKEN_ASSIGN, //=
/*简单的二元运算符*/
hjTOKEN_ADD, //+
hjTOKEN_SUB, //-
hjTOKEN_MUL, //*
hjTOKEN_DIV, // /
hjTOKEN_MOD, //%
hjTOKEN_POW, //^ 上面几个都是左结合的,乘方是右结合的!
/*位运算符*/
hjTOKEN_BIT_AND, //&
hjTOKEN_BIT_OR, //|
hjTOKEN_BIT_NOT, //~
hjTOKEN_BIT_SHIFT_LEFT, //<<
hjTOKEN_BIT_SHIFT_RIGHT, //>>
/*逻辑运算符*/
hjTOKEN_LOGIC_AND, //&&
hjTOKEN_LOGIC_OR, //||
hjTOKEN_LOGIC_NOT, //!
/*关系运算符*/
hjTOKEN_EQ, //==
hjTOKEN_NEQ, //!=
hjTOKEN_GT, //>
hjTOKEN_GE, //>=
hjTOKEN_LT, //<
hjTOKEN_LE, //<=
/*问号,可能是?:三元运算符的一部分*/
hjTOKEN_QUE, //?
hjTOKEN_EOL, //end of line = newline = line feed = line break = line ending = \n = 换行符 ,EOL = LF
hjTOKEN_EOF //end of file = 文件结束符 , EOF = LE
}hjTokenType;
//单词
typedef struct hjToken {
hjTokenType type; //单词类型
const char* start; //指向单词字符串的开始
uint32_t len; //单词字符串的长度
uint32_t lineNo; //单词所在的文件行号
}hjToken;
//词法分析器
typedef struct hjLexer {
const char* file; //文件
const char* srcCode; //源码
const char* next; //指向下一个字符
char curChar; //当前字符
hjToken preToken; //先前单词
hjToken curToken; //当前单词
hjVM* vm; //虚拟机
int Lcounter; //记录小括号的嵌套,也就是记录内嵌表达式
}hjLexer;
//字符串
//这个类型的数据将作为符号表的元素
typedef struct hjString {
uint32_t len; //字符串长度,不包含\0
char* str; //字符串内容
}hjString;
/*
字符串缓冲区
用于存储字符串对象中的字符串
*/
typedef struct hjCharValue {
uint32_t len; //除\0之外的字符个数!!!
char start[0]; //柔性数组
}hjCharValue;
/*--------------------定义几种类型的动态数组--------------------*/
//字符串数组
typedef struct hjStringArray {
uint32_t count; //数据个数
uint32_t capacity; //容量
hjString* data; //字符串数据缓冲区
}hjStringArray;
//字节数组
typedef struct hjByteArray {
uint32_t count; //数据个数
uint32_t capacity; //容量
uint8_t* data; //字节数据缓冲区
}hjByteArray;
//字符数组
typedef struct hjCharArray {
uint32_t count; //数据个数
uint32_t capacity; //容量
char* data; //字符数据缓冲区
}hjCharArray;
//int数组
typedef struct hjIntArray {
uint32_t count; //数据个数
uint32_t capacity; //容量
int* data; //int数据缓冲区
}hjIntArray;
//符号表类型就是字符串数组类型
#define hjSymbolTable hjStringArray
#endif
//hj_error.h
/*
hj_error.h文件是错误处理模块
*/
#ifndef _hj_error_h
#define _hj_error_h
#include"hj_core.h"
//错误类型
typedef enum hjErrorType {
hjERROR_IO,
hjERROR_MEM,
hjERROR_LEX,
hjERROR_PARSE,
hjERROR_COMPILE,
hjERROR_RUNTIME
}hjErrorType;
//打印错误信息
hjPUBLIC void hjError(void* _lexer, hjErrorType _errType, char* _errMsg);
hjPUBLIC void hjError_(void* _lexer, hjErrorType _errType, const char* _format,...);
#define __hjERROR_IO__(...) hjError_(NULL,hjERROR_IO,__VA_ARGS__)
#define __hjERROR_MEM__(...) hjError_(NULL,hjERROR_MEM,__VA_ARGS__)
#define __hjERROR_LEX__(lexer,...) hjError_(lexer,hjERROR_LEX,__VA_ARGS__)
#define __hjERROR_PARSE__(lexer,...) hjError_(lexer,hjERROR_PARSE,__VA_ARGS__)
#define __hjERROR_COMPILE__(lexer,...) hjError_(lexer,hjERROR_COMPILE,__VA_ARGS__)
#define __hjERROR_RUNTIME__(...) hjError_(NULL,hjERROR_RUNTIME,__VA_ARGS__)
#endif
//hj_mem.h
/*
hj_mem.h文件是内存管理模块
内存管理模块的功能:
1.申请内存
2.修改空间大小
3.释放内存
*/
#ifndef _hj_mem_h
#define _hj_mem_h
#include"hj_error.h"
//申请内存
hjPUBLIC void* hjMemManager(hjVM* _vm, void* _ptr, uint32_t _oldSize, uint32_t _newSize);
//找出大于等于_v的最近的2次幂
hjPUBLIC uint32_t hjNumOf2Pow_GE_v(uint32_t _v);
//清理符号表
hjPUBLIC void hjClear_SymbolTable(hjVM* _vm, hjSymbolTable* _table);
/*一组内存管理的宏*/
#define ALLOCATE(vm,type) (type*)hjMemManager(vm,NULL,0,sizeof(type)) //给type类型申请type类型大小的内存
#define ALLOCATE_EXTRA(vm,type,extraSize) (type*)hjMemManager(vm,NULL,0,sizeof(type) + extraSize) //给type类型申请type类型大小+额外大小的内存
#define ALLOCATE_ARRAY(vm,type,count) (type*)hjMemManager(vm,NULL,0,sizeof(type) * count) //申请若干个type类型大小的内存
#define DEALLOCATE(vm,mem) MemManager(vm,mem,0,0) //释放mem的内存
#define DEALLOCATE_ARRAY(vm,arr,count) MemManager(vm,arr,sizeof(arr[0]) * count,0) //释放arr数组的内存
/*--------------------字符串数组方法--------------------*/
hjPUBLIC void hjInit_StringArray(hjStringArray* _arr);
hjPUBLIC void hjWrite_StringArray(hjVM* _vm, hjStringArray* _arr, hjString _data, uint32_t _fillCount);
hjPUBLIC void hjAdd_StringArray(hjVM* _vm, hjStringArray* _arr, hjString _data);
hjPUBLIC void hjClear_StringArray(hjVM* _vm, hjStringArray* _arr);
/*--------------------字节数组方法--------------------*/
hjPUBLIC void hjInit_ByteArray(hjByteArray* _arr);
hjPUBLIC void hjWrite_ByteArray(hjVM* _vm, hjByteArray* _arr, uint8_t _data, uint32_t _fillCount);
hjPUBLIC void hjAdd_ByteArray(hjVM* _vm, hjByteArray* _arr, uint8_t _data);
hjPUBLIC void hjClear_ByteArray(hjVM* _vm, hjByteArray* _arr);
/*--------------------字符数组方法--------------------*/
hjPUBLIC void hjInit_CharArray(hjCharArray* _arr);
hjPUBLIC void hjWrite_CharArray(hjVM* _vm, hjCharArray* _arr, char _data, uint32_t _fillCount);
hjPUBLIC void hjAdd_CharArray(hjVM* _vm, hjCharArray* _arr, char _data);
hjPUBLIC void hjClear_CharArray(hjVM* _vm, hjCharArray* _arr);
/*--------------------int数组方法--------------------*/
hjPUBLIC void hjInit_IntArray(hjIntArray* _arr);
hjPUBLIC void hjWrite_IntArray(hjVM* _vm, hjIntArray* _arr, int _data, uint32_t _fillCount);
hjPUBLIC void hjAdd_IntArray(hjVM* _vm, hjIntArray* _arr, int _data);
hjPUBLIC void hjClear_IntgArray(hjVM* _vm, hjIntArray* _arr);
#endif
//hj_lexer.h
/*
hj_lexer.h文件是词法分析器
*/
#ifndef _hj_lexer_h
#define _hj_lexer_h
#include"hj_vm.h"
#include"hj_mem.h"
#include"hj_utf8.h"
/*定义全局变量*/
//文件根目录
extern char* hj_rootDir;
//读取脚本
hjPUBLIC char* hjReadScript(const char* _path);
//初始化词法分析器
hjPUBLIC hjInit_Lexer(hjVM* _vm, hjLexer* _lexer, const char* _file, const char* _srcCode);
//查看下一个字符
hjPUBLIC char hjPeek_NextChar(hjLexer* _lexer);
//获取下一个单词
hjPUBLIC void hjGet_NextToken(hjLexer* _lexer);
//匹配单词
hjPUBLIC hjBOOL hjMatch_Token(hjLexer* _lexer, hjTokenType _expectedToken);
//吸收当前单词
hjPUBLIC void hjConsume_CurToken(hjLexer* _lexer, hjTokenType _expectedToken, const char* _errMsg);
//吸收下一个单词
hjPUBLIC void hjConsume_NextToken(hjLexer* _lexer, hjTokenType _expectedToken, const char* _errMsg);
//判断是否是数字
hjPRIVATE hjBOOL hjJudge_Num(char _c);
//判断是否是字母
hjPRIVATE hjBOOL hjJudge_Alpha(char _c);
//判断是标识符还是关键字
hjPRIVATE hjTokenType hjJudge_IdOrKey(const char* _start, uint32_t _len);
//获取下一个字符
hjPRIVATE void hjGet_NextChar(hjLexer* _lexer);
//匹配下一个字符
hjPRIVATE hjBOOL hjMatch_NextChar(hjLexer* _lexer, char _expectedChar);
//跳过连续的空字符
hjPRIVATE void hjSkip_Blanks(hjLexer* _lexer);
//跳过单行注释
hjPRIVATE void hjSkip_CommentALine(hjLexer* _lexer);
//跳过多行注释
hjPRIVATE void hjSkip_Comment(hjLexer* _lexer);
//跳过一行
hjPRIVATE void hjSkip_ALine(hjLexer* _lexer);
//解析标识符
hjPRIVATE void hjLex_Id(hjLexer* _lexer, hjTokenType _token);
//解析字符串
hjPRIVATE void hjLex_Str(hjLexer* _lexer);
//解析UTF8码点
hjPRIVATE void hjLex_CodePointOfUTF8(hjLexer* _lexer, hjByteArray* _buf);
#endif
//hj_utf8.h
/*
hj_utf8.h文件是unicode utf8编码模块
*/
#ifndef _hj_utf8_h
#define _hj_utf8_h
#include"hj_common.h"
//获取编码UTF8后的字节数
hjPUBLIC uint32_t hjGet_ByteNumOfEncodedUTF8(int _value);
//获取解码UTF8后的字节数
hjPUBLIC uint32_t hjGet_ByteNumOfDecodedUTF8(uint8_t _byte);
//编码UTF8
hjPUBLIC uint8_t hjEncodeUTF8(uint8_t* _buf, int _value);
//解码UTF8
hjPUBLIC int hjDecodeUTF8(const uint8_t* _bytePtr, uint32_t _len);
#endif
//hj_vm.h
/*
hj_vm.h文件是虚拟机
*/
#ifndef _hj_vm_h
#define _hj_vm_h
#include"hj_error.h"
//初始化虚拟机
hjPUBLIC void hjInit_VM(hjVM* _vm);
//创建虚拟机
hjPUBLIC hjVM* hjNew_VM();
#endif
\;
\;
\;
\;
\;
//hj_error.h
#include "hj_error.h"
/*
打印错误信息
*/
hjPUBLIC void hjError(void* _lexer, hjErrorType _errType, char* _errMsg) {
//设置异常信息的字体颜色
SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_RED); //高亮红色
switch (_errType) {
case hjERROR_IO:
printf("[Error_io]");
break;
case hjERROR_MEM:
printf("[Error_mem]");
break;
case hjERROR_LEX:
printf("[Error_lex]");
break;
case hjERROR_PARSE:
printf("[Error_parse]");
break;
case hjERROR_RUNTIME:
printf("[Error_runtime]");
break;
case hjERROR_COMPILE:
printf("[Error_compile]");
break;
default:
break;
}
if (_lexer != NULL) {
hjLexer* myLexer = (hjLexer*)_lexer;
printf("file \"%s\",line %d,at \"%s\"=>", myLexer->file, myLexer->curToken.lineNo, myLexer->curToken.start);
}
printf("%s\n", _errMsg);
//恢复字体颜色
SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), FOREGROUND_RED | FOREGROUND_BLUE | FOREGROUND_GREEN);//白色
}
/*
打印错误信息
*/
hjPUBLIC void hjError_(void* _lexer, hjErrorType _errType, const char* _format, ...) {
char buf[512] = { '\0' };
va_list myVa;
va_start(myVa, _format);
vsnprintf(buf, 512, _format, myVa); //把可变参数列表按照_format的格式放入buf中
va_end(myVa);
//设置异常信息的字体颜色
SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_RED); //高亮红色
switch (_errType) {
case hjERROR_IO:
case hjERROR_MEM:
fprintf(stderr, "%s:%d IN FUNCTION %s():%s\n", __FILE__, __LINE__, __func__, buf);
break;
case hjERROR_LEX:
case hjERROR_PARSE:
case hjERROR_COMPILE:
hjASSERT(_lexer != NULL, "lexer is null");
fprintf(stderr, "%s:%d \"%s\"\n",
((hjLexer*)_lexer)->file,
((hjLexer*)_lexer)->preToken.lineNo,
buf
);
break;
case hjERROR_RUNTIME:
fprintf(stderr, "%s\n", buf);
break;
default:
hjNOT_REACHED;
break;
}
//恢复字体颜色
SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), FOREGROUND_RED | FOREGROUND_BLUE | FOREGROUND_GREEN);//白色
//退出程序
exit(1);
}
//hj_mem.cpp
#include"hj_mem.h"
//申请内存
hjPUBLIC void* hjMemManager(hjVM* _vm, void* _ptr, uint32_t _oldSize, uint32_t _newSize) {
if (_newSize < 0 || _oldSize < 0)return NULL;//做个检查
//累计系统分配的总内存
_vm->allocatedByte += _newSize - _oldSize;
/*
避免realloc(NULL,0)定义的新地址,此地址不能被释放
realloc(NULL,0)会返回无法使用,也无法置空的非空指针
*/
if(_newSize==0){
free(_ptr);
_ptr = NULL;
return NULL;
}
//p变量是为了避免返回空指针,把原来不为空的指针置空了
void* p = realloc(_ptr, _newSize);
if (p == NULL) {
__hjERROR_MEM__("memory allocate failed!");
}
return p;
}
/*
找出大于等于_v的最近的2次幂
1U<<(log(v-1)+1)
向下找离2最近的2的次幂,然后再乘以一个2
*/
hjPUBLIC uint32_t hjNumOf2Pow_GE_v(uint32_t _v) {
//修复_v==0时结果为0的情况,在_v==0时给_v设置为1
_v += (_v == 0);
--_v;
//给_v开31次方
_v |= (_v >> 1);//除以2
_v |= (_v >> 2);//除以4
_v |= (_v >> 4);//除以16
_v |= (_v >>8);//除以64
_v |= (_v >> 16);//除以2^16
//
++_v;
return _v;
}
//清理符号表
hjPUBLIC void hjClear_SymbolTable(hjVM* _vm, hjSymbolTable* _table) {
uint32_t i = 0;
for (; i < _table->count;++i) {
hjMemManager(_vm, _table->data[i].str, 0, 0);
}
hjClear_StringArray(_vm, _table);
}
/*--------------------字符串数组方法--------------------*/
hjPUBLIC void hjInit_StringArray(hjStringArray* _arr) {
_arr->data = NULL;
_arr->capacity = _arr->count = 0;
}
hjPUBLIC void hjWrite_StringArray(hjVM* _vm, hjStringArray* _arr, hjString _data, uint32_t _fillCount) {
//当前已经存在的元素个数加上要添加的元素个数
uint32_t newCount = _arr->count + _fillCount;
if (newCount > _arr->capacity) { //要扩容了
size_t oldSize = _arr->capacity * sizeof(hjString);
_arr->capacity = hjNumOf2Pow_GE_v(newCount);//找出大于newCount的2次幂,做新的容量
size_t newSize = _arr->capacity * sizeof(hjString);
//新的尺寸还不如旧的尺寸的话,就报错!
hjASSERT(newSize > oldSize, "faint ... memory allocate!");
//申请新的内存空间
_arr->data = (hjString*)hjMemManager(_vm, _arr->data, oldSize, newSize);
}
//添加数据
uint32_t i = 0;
for (; i < _fillCount; ++i, ++_arr->count) {
_arr->data[_arr->count] = _data;
}
}
hjPUBLIC void hjAdd_StringArray(hjVM* _vm, hjStringArray* _arr, hjString _data) {
hjWrite_StringArray(_vm, _arr, _data, 1);
}
hjPUBLIC void hjClear_StringArray(hjVM* _vm, hjStringArray* _arr) {
uint32_t size = _arr->capacity * sizeof(_arr->data[0]);
hjMemManager(_vm, _arr->data, size, 0);//size => 0
hjInit_StringArray(_arr);
}
/*--------------------字节数组方法--------------------*/
hjPUBLIC void hjInit_ByteArray(hjByteArray* _arr) {
_arr->data = NULL;
_arr->capacity = _arr->count = 0;
}
hjPUBLIC void hjWrite_ByteArray(hjVM* _vm, hjByteArray* _arr, uint8_t _data, uint32_t _fillCount) {
//当前已经存在的元素个数加上要添加的元素个数
uint32_t newCount = _arr->count + _fillCount;
if (newCount > _arr->capacity) { //要扩容了
size_t oldSize = _arr->capacity * sizeof(uint8_t);
_arr->capacity = hjNumOf2Pow_GE_v(newCount);//找出大于等于newCount的2次幂,做新的容量
size_t newSize = _arr->capacity * sizeof(uint8_t);
//新的尺寸还不如旧的尺寸的话,就报错!
hjASSERT(newSize > oldSize, "faint ... memory allocate!");
//申请新的内存空间
_arr->data = (uint8_t*)hjMemManager(_vm, _arr->data, oldSize, newSize);
}
//添加数据
uint32_t i = 0;
for (; i < _fillCount; ++i, ++_arr->count) {
_arr->data[_arr->count] = _data;
}
}
hjPUBLIC void hjAdd_ByteArray(hjVM* _vm, hjByteArray* _arr, uint8_t _data) {
hjWrite_ByteArray(_vm, _arr, _data, 1);
}
hjPUBLIC void hjClear_ByteArray(hjVM* _vm, hjByteArray* _arr) {
uint32_t size = _arr->capacity * sizeof(_arr->data[0]);
hjMemManager(_vm, _arr->data, size, 0);//size => 0
hjInit_ByteArray(_arr);
}
/*--------------------字符数组方法--------------------*/
hjPUBLIC void hjInit_CharArray(hjCharArray* _arr) {
_arr->data = NULL;
_arr->capacity = _arr->count = 0;
}
hjPUBLIC void hjWrite_CharArray(hjVM* _vm, hjCharArray* _arr, char _data, uint32_t _fillCount) {
//当前已经存在的元素个数加上要添加的元素个数
uint32_t newCount = _arr->count + _fillCount;
if (newCount > _arr->capacity) { //要扩容了
size_t oldSize = _arr->capacity * sizeof(char);
_arr->capacity = hjNumOf2Pow_GE_v(newCount);//找出大于newCount的2次幂,做新的容量
size_t newSize = _arr->capacity * sizeof(char);
//新的尺寸还不如旧的尺寸的话,就报错!
hjASSERT(newSize > oldSize, "faint ... memory allocate!");
//申请新的内存空间
_arr->data = (char*)hjMemManager(_vm, _arr->data, oldSize, newSize);
}
//添加数据
uint32_t i = 0;
for (; i < _fillCount; ++i, ++_arr->count) {
_arr->data[_arr->count] = _data;
}
}
hjPUBLIC void hjAdd_CharArray(hjVM* _vm, hjCharArray* _arr, char _data) {
hjWrite_CharArray(_vm, _arr, _data, 1);
}
hjPUBLIC void hjClear_CharArray(hjVM* _vm, hjCharArray* _arr) {
uint32_t size = _arr->capacity * sizeof(_arr->data[0]);
hjMemManager(_vm, _arr->data, size, 0);//size => 0
hjInit_CharArray(_arr);
}
/*--------------------int数组方法--------------------*/
hjPUBLIC void hjInit_IntArray(hjIntArray* _arr) {
_arr->data = NULL;
_arr->capacity = _arr->count = 0;
}
hjPUBLIC void hjWrite_IntArray(hjVM* _vm, hjIntArray* _arr, int _data, uint32_t _fillCount) {
//当前已经存在的元素个数加上要添加的元素个数
uint32_t newCount = _arr->count + _fillCount;
if (newCount > _arr->capacity) { //要扩容了
size_t oldSize = _arr->capacity * sizeof(int);
_arr->capacity = hjNumOf2Pow_GE_v(newCount);//找出大于newCount的2次幂,做新的容量
size_t newSize = _arr->capacity * sizeof(int);
//新的尺寸还不如旧的尺寸的话,就报错!
hjASSERT(newSize > oldSize, "faint ... memory allocate!");
//申请新的内存空间
_arr->data = (int*)hjMemManager(_vm, _arr->data, oldSize, newSize);
}
//添加数据
uint32_t i = 0;
for (; i < _fillCount; ++i, ++_arr->count) {
_arr->data[_arr->count] = _data;
}
}
hjPUBLIC void hjAdd_IntArray(hjVM* _vm, hjIntArray* _arr, int _data) {
hjWrite_IntArray(_vm, _arr, _data, 1);
}
hjPUBLIC void hjClear_IntgArray(hjVM* _vm, hjIntArray* _arr) {
uint32_t size = _arr->capacity * sizeof(_arr->data[0]);
hjMemManager(_vm, _arr->data, size, 0);//size => 0
hjInit_IntArray(_arr);
}
//hj_utf8.cpp
#include"hj_utf8.h"
//获取编码UTF8后的字节数
hjPUBLIC uint32_t hjGet_ByteNumOfEncodedUTF8(int _value) {
hjASSERT(_value > 0, "can't encode negative value!");
//单个ASCII一个字节
if (_value <= 0x7f)return 1;
//此范围内数值编码为UTF8需要2字节
if (_value <= 0x7ff)return 2;
//此范围内数值编码为UTF8需要3字节
if (_value <= 0xffff)return 3;
//此范围内数值编码为UTF8需要4字节
if (_value <= 0x10ffff)return 4;
//超过范围返回0
return 0;
}
//获取解码UTF8后的字节数
hjPUBLIC uint32_t hjGet_ByteNumOfDecodedUTF8(uint8_t _byte) {
//_byte应该是UTF8的最高一字节,如果指定了UTF8编码后面的低字节部分则返回0
if ((_byte & 0xc0) == 0x80)return 0;
if ((_byte & 0xf8) == 0xf0)return 4;
if ((_byte & 0xf0) == 0xe0)return 3;
if ((_byte & 0xe0) == 0xc0)return 2;
//ASCII码
return 1;
}
//编码UTF8
hjPUBLIC uint8_t hjEncodeUTF8(uint8_t* _buf, int _value) {
hjASSERT(_value > 0, "can't encode negative value!");
/*按照大端模式写入缓冲区*/
//单个ASCII一个字节
if (_value <= 0x7f) {
*_buf = _value & 0x7f;
return 1;
}
//此范围内数值编码为UTF8需要2字节
if (_value <= 0x7ff) {
//先写入高字节
*_buf++ = 0xc0 | ((_value & 0x7c0) >> 6);
//再写入低字节
*_buf = 0x80 | (_value & 0x3f);
return 2;
}
//此范围内数值编码为UTF8需要3字节
if (_value <= 0xffff) {
//先写入高字节
*_buf++ = 0xe0 | ((_value & 0xf000) >> 12);
//再写入中字节
*_buf++ = 0x80 | ((_value & 0xfc0) >> 6);
//再写入低字节
*_buf = 0x80 | (_value & 0x3f);
return 3;
}
//此范围内数值编码为UTF8需要4字节
if (_value <= 0x10ffff){
*_buf++ = 0xf0 | ((_value & 0x1c0000) >> 18);
*_buf++ = 0x80 | ((_value & 0x3f000) >> 12);
*_buf++ = 0x80 | ((_value & 0xfc0) >> 6);
*_buf = 0x80 | (_value & 0x3f);
return 4;
}
hjNOT_REACHED;
//超过范围返回0
return 0;
}
//解码UTF8
hjPUBLIC int hjDecodeUTF8(const uint8_t* _bytePtr, uint32_t _len) {
//如果是一字节的ASCII码:0xxx'xxxx
if (*_bytePtr <= 0x7f)return *_bytePtr;
int value;
uint32_t remainingBytes;
/*先读取高字节*/
if ((*_bytePtr & 0xe0) == 0xc0) { //如果是2字节的UTF8
value = *_bytePtr & 0x1f;
remainingBytes = 1;
}
else if ((*_bytePtr & 0xf0) == 0xe0) { //如果是3字节的UTF8
value = *_bytePtr & 0x0f;
remainingBytes = 2;
}
else if ((*_bytePtr & 0xf8) == 0xf0) { //如果是4字节的UTF8
value = *_bytePtr & 0x07;
remainingBytes = 3;
}
else { //非法编码
return -1;
}
/*如果UTF8被折断就不再读过去了*/
if (remainingBytes > _len - 1)return -1;
/*再读取低字节*/
for (; remainingBytes > 0;) {
++_bytePtr;
--remainingBytes;
//高2位必须是10
if ((*_bytePtr & 0xc0) != 0x80)return -1;
//从次高序往低字节读,不断累加各字节的低6位
value = value << 6 | (*_bytePtr & 0x3f);
}
return value;
}
//hj_lexer.cpp
#include"hj_lexer.h"
/*初始化全局变量*/
char* hj_rootDir="";
/*定义私有化结构体和映射表*/
//关键字
typedef struct hjKeyToken {
char* keyword; //关键字字符串
uint8_t len; //字符串长度
hjTokenType type; //关键字类型
}hjKeyToken;
//关键字映射表
hjKeyToken hjMap_KeyToken[] = {
{"let",3,hjTOKEN_LET},
{"function",8,hjTOKEN_FUNCTION},
{"if",2,hjTOKEN_IF},
{"else",4,hjTOKEN_ELSE},
{"hjTRUE",4,hjTOKEN_TRUE},
{"hjFALSE",5,hjTOKEN_FALSE},
{"while",5,hjTOKEN_WHILE},
{"for",3,hjTOKEN_FOR},
{"break",5,hjTOKEN_BREAK},
{"continue",8,hjTOKEN_CONTINUE},
{"return",6,hjTOKEN_RETURN},
{"Nil",3,hjTOKEN_NIL},
{"class",5,hjTOKEN_CLASS},
{"self",4,hjTOKEN_SELF},
{"static",6,hjTOKEN_STATIC},
{"is",2,hjTOKEN_IS},
{"super",5,hjTOKEN_SUPER},
{"import",6,hjTOKEN_IMPORT}
};
//读取脚本
hjPUBLIC char* hjReadScript(const char* _path) {
FILE* file = fopen(_path, "r");
if (file == NULL) {
__hjERROR_IO__("Couldn't open file \"%s\".", _path);
}
//获取文件大小
struct stat myStat;
stat(_path, &myStat);
size_t fileSize = myStat.st_size;
char* fileContent = (char*)malloc(1 + fileSize);
if (fileContent == NULL) {
__hjERROR_MEM__("Couldn't allocate memory for reading file \"%s\".\n", _path);
}
//将文件中的内容按char的步长读取进fileContent内
if (fread(fileContent, sizeof(char), fileSize, file) == 0) {
__hjERROR_IO__("Couldn't read file \"%s\".", _path);
}
fileContent[fileSize] = '\0';
fclose(file);
return fileContent;
}
//初始化词法分析器
hjPUBLIC hjInit_Lexer(hjVM* _vm, hjLexer* _lexer, const char* _file, const char* _srcCode) {
//设置文件路径
_lexer->file = _file;
//设置源码字符串
_lexer->srcCode = _srcCode;
//设置当前字符
_lexer->curChar = *_srcCode;
//设置下一个字符指针
_lexer->next = _lexer->srcCode + 1;
//初始化当前单词
_lexer->curToken.lineNo = 1;
_lexer->curToken.len = 0;
_lexer->curToken.start = NULL;
_lexer->curToken.type = hjTOKEN_UNKNOWN;
//初始先前单词
_lexer->preToken = _lexer->curToken;
//初始化(计数器
_lexer->Lcounter = 0;
//初始化虚拟机
_lexer->vm = _vm;
}
//查看下一个字符
hjPUBLIC char hjPeek_NextChar(hjLexer* _lexer) {
return *(_lexer->next);
}
//获取下一个单词
hjPUBLIC void hjGet_NextToken(hjLexer* _lexer) {
/*当前单词指针后移*/
_lexer->preToken = _lexer->curToken;
//跳过待识别单词之间的空格
hjSkip_Blanks(_lexer);
//初始化当前单词
_lexer->curToken.type = hjTOKEN_EOF;
_lexer->curToken.len = 0;
_lexer->curToken.start = _lexer->next - 1;
for (;_lexer->curChar!='\0';) {
switch (_lexer->curChar) {
/*
case '@':
_lexer->curToken.type = hjTOKEN_AT;
break;
case '#':
_lexer->curToken.type = hjTOKEN_WELL;
break;
case '`': //重音
_lexer->curToken.type = hjTOKEN_ACCENT;
break;
case ';': //分号
_lexer->curToken.type = hjTOKEN_SEMI;
break;
case '\\': 反斜杠
_lexer->curToken.type = hjTOKEN_SEMI;
break;
*/
case ':': //冒号
_lexer->curToken.type = hjTOKEN_COLON;
break;
case ',': //逗号
_lexer->curToken.type = hjTOKEN_COMMA;
break;
case '.': //点
if (hjMatch_NextChar(_lexer, '.')) {//两个连续的点
_lexer->curToken.type = hjTOKEN_DOT_DOT;
}
else {//一个点
_lexer->curToken.type = hjTOKEN_DOT;
}
break;
case '(':
if (_lexer->Lcounter > 0) {
++_lexer->Lcounter;//遇到(加一
}
_lexer->curToken.type = hjTOKEN_LEFT_PAREN;
break;
case ')':
if (_lexer->Lcounter > 0) {
--_lexer->Lcounter;//遇到)减一
if (_lexer->Lcounter == 0) {
hjLex_Str(_lexer);
break;
}
}
_lexer->curToken.type = hjTOKEN_RIGHT_PAREN;
break;
case '[':
_lexer->curToken.type = hjTOKEN_LEFT_BRACKET;
break;
case ']':
_lexer->curToken.type = hjTOKEN_RIGHT_BRACKET;
break;
case '{':
_lexer->curToken.type = hjTOKEN_LEFT_BRACE;
break;
case '}':
_lexer->curToken.type = hjTOKEN_RIGHT_BRACE;
break;
case '=':
if (hjMatch_NextChar(_lexer, '=')) {//判断等于
_lexer->curToken.type = hjTOKEN_EQ;
}
else {//赋值
_lexer->curToken.type = hjTOKEN_ASSIGN;
}
break;
case '+':
_lexer->curToken.type = hjTOKEN_ADD;
break;
case '-':
_lexer->curToken.type = hjTOKEN_SUB;
break;
case '*':
_lexer->curToken.type = hjTOKEN_MUL;
break;
case '/':
if (hjMatch_NextChar(_lexer,'/') ) { //单行注释
hjSkip_CommentALine(_lexer);
_lexer->curToken.start = _lexer->next - 1;
continue;
}
else if (hjMatch_NextChar(_lexer, '*')) { //多行注释
hjSkip_Comment(_lexer);
_lexer->curToken.start = _lexer->next - 1;
continue;
}
else { //除号
_lexer->curToken.type = hjTOKEN_DIV;
}
break;
case '%':
_lexer->curToken.type = hjTOKEN_MOD;
break;
case '^':
_lexer->curToken.type = hjTOKEN_POW;
break;
case '&':
if (hjMatch_NextChar(_lexer, '&')) {//逻辑与
_lexer->curToken.type = hjTOKEN_LOGIC_AND;
}
else {//位运算与
_lexer->curToken.type = hjTOKEN_BIT_AND;
}
break;
case '|':
if (hjMatch_NextChar(_lexer, '|')) {//逻辑或
_lexer->curToken.type = hjTOKEN_LOGIC_OR;
}
else {//位运算或
_lexer->curToken.type = hjTOKEN_BIT_OR;
}
break;
case '!':
if (hjMatch_NextChar(_lexer, '=')) {//不等于
_lexer->curToken.type = hjTOKEN_NEQ;
}
else {//逻辑非
_lexer->curToken.type = hjTOKEN_LOGIC_NOT;
}
break;
case '~':
_lexer->curToken.type = hjTOKEN_BIT_NOT;
break;
case '<':
if (hjMatch_NextChar(_lexer, '=')) {//<=
_lexer->curToken.type = hjTOKEN_LE;
}
else {//<
_lexer->curToken.type = hjTOKEN_LT;
}
break;
case '>':
if (hjMatch_NextChar(_lexer, '=')) {//>=
_lexer->curToken.type = hjTOKEN_GE;
}
else {//>
_lexer->curToken.type = hjTOKEN_GT;
}
break;
case '?':
_lexer->curToken.type = hjTOKEN_QUE;
break;
case '"': //双引号
hjLex_Str(_lexer);
break;
/*eol、eof 没有处理。eof没办法处理,但是eol之后可以想法处理一下 */
default:
//如果首字符是字母或下划线
if (hjJudge_Alpha(_lexer->curChar) || _lexer->curChar == '_') {
hjLex_Id(_lexer,hjTOKEN_UNKNOWN);
}
//不处理#!及其后面的内容
else if (_lexer->curChar == '#' && hjMatch_NextChar(_lexer, '!')) {
hjSkip_ALine(_lexer);
_lexer->curToken.start = _lexer->next - 1;
continue;
}
else{
///__hjERROR_LEX__(_lexer, "unsupport char : \"%c\" , quit.", _lexer->curChar);
}
return;
}
/*默认执行上面操作一次就退出了,但是可能遇到continue,那样就可能循环几遍*/
_lexer->curToken.len = (uint32_t)(_lexer->next - _lexer->curToken.start);
hjGet_NextChar(_lexer);
return;
}
}
/*
匹配单词
如果当前token是所期望的话,就读入下一个token并返回true,否则返回false
*/
hjPUBLIC hjBOOL hjMatch_Token(hjLexer* _lexer, hjTokenType _expectedToken) {
if (_lexer->curToken.type == _expectedToken) {
hjGet_NextToken(_lexer);
return hjTRUE;
}
return hjFALSE;
}
/*
吸收当前单词
断言当前token为所期望的话,就读入下一个token,否则报错
*/
hjPUBLIC void hjConsume_CurToken(hjLexer* _lexer, hjTokenType _expectedToken, const char* _errMsg) {
if (_lexer->curToken.type == _expectedToken) {
hjGet_NextToken(_lexer);
}
else {
__hjERROR_COMPILE__(_lexer, _errMsg);
}
}
/*
吸收下一个单词
断言当前token为所期望的,否则报错
*/
hjPUBLIC void hjConsume_NextToken(hjLexer* _lexer, hjTokenType _expectedToken, const char* _errMsg) {
hjGet_NextToken(_lexer);
if (_lexer->curToken.type != _expectedToken) {
__hjERROR_COMPILE__(_lexer, _errMsg);
}
}
//判断是否是数字
hjPRIVATE hjBOOL hjJudge_Num(char _c) {
if (_c <= 57 && _c >= 48)return hjTRUE;
return hjFALSE;
}
//判断是否是字母
hjPRIVATE hjBOOL hjJudge_Alpha(char _c) {
if ((_c <= 90 && _c >= 65 )|| (_c<= 122&& _c>= 97) )return hjTRUE;
return hjFALSE;
}
/*
判断是标识符还是关键字
输入的字符串肯定是标识符或者是关键字,这个函数就是具体区分一下到底是哪种
关键字,还是标识符!
*/
hjPRIVATE hjTokenType hjJudge_IdOrKey(const char* _start, uint32_t _len) {
uint32_t i ;
for (i=0; hjMap_KeyToken[i].keyword != NULL;++i) {
if (hjMap_KeyToken[i].len == _len && \
memcmp(hjMap_KeyToken[i].keyword ,_start,_len ) == 0) {
return hjMap_KeyToken[i].type;//返回具体的关键字类型
}
}
//如果不是关键字那就是标识符了
return hjTOKEN_ID;
}
//获取下一个字符
hjPRIVATE void hjGet_NextChar(hjLexer* _lexer) {
_lexer->curChar = *(_lexer->next);//下一个字符当做是当前字符
++_lexer->next;//下一个字符后(这个指针的移动应该是在srcCode上移动的)
}
/*
匹配下一个字符
如果匹配上了,就后移当前字符指针,否则就不移动!
*/
hjPRIVATE hjBOOL hjMatch_NextChar(hjLexer* _lexer,char _expectedChar) {
if ( hjPeek_NextChar(_lexer)== _expectedChar ) {//如果下一个字符是我所希望的字符的话,就获取该字符
hjGet_NextChar(_lexer);
return hjTRUE;
}
return hjFALSE;
}
//跳过连续的空字符
hjPRIVATE void hjSkip_Blanks(hjLexer* _lexer) {
for (;_lexer->curChar == ' '||_lexer->curChar=='\n'||_lexer->curChar=='\t'||_lexer->curChar=='\r';) {
if (_lexer->curChar == '\n') {
++_lexer->curToken.lineNo;//行数加一
}
hjGet_NextChar(_lexer);
}
}
//跳过单行注释
hjPRIVATE void hjSkip_CommentALine(hjLexer* _lexer) {
//跳过一行
hjSkip_ALine(_lexer);
//注释之后有可能会有空白字符
hjSkip_Blanks(_lexer);
}
//跳过多行注释
hjPRIVATE void hjSkip_Comment(hjLexer* _lexer) {
char nextChar = hjPeek_NextChar(_lexer);
//吸收注释内的字符
for (; nextChar != '*' && nextChar != '\0';) {
hjGet_NextChar(_lexer);
if (_lexer->curChar == '\n') {
++_lexer->curToken.lineNo;//行数加一
}
nextChar = hjPeek_NextChar(_lexer);
}
//匹配 */
if (hjMatch_NextChar(_lexer, '*')) {
if (!hjMatch_NextChar(_lexer, '/')) {
__hjERROR_LEX__(_lexer, "expect \"/\" after \"*\"!");
}
hjGet_NextChar(_lexer);
}
else {
__hjERROR_LEX__(_lexer, "expect \"*/\" before file end!");
}
//注释之后有可能会有空白字符
hjSkip_Blanks(_lexer);
}
//跳过一行
hjPRIVATE void hjSkip_ALine(hjLexer* _lexer) {
hjGet_NextChar(_lexer);
for (;_lexer->curChar!='\0';) {
if (_lexer->curChar=='\n') {
++_lexer->curToken.lineNo;//行数加一
hjGet_NextChar(_lexer);
break;
}
hjGet_NextChar(_lexer);
}
}
//解析标识符
hjPRIVATE void hjLex_Id(hjLexer* _lexer, hjTokenType _token) {
//标识符是字母或下划线开头的,后面是连续的字母或数字或下划线
for (;hjJudge_Num(_lexer->curChar)|| hjJudge_Alpha(_lexer->curChar) || _lexer->curChar == '_';) {
hjGet_NextChar(_lexer);
}
//next指向第一个不合法字符的下一个字符,因此要-1
uint32_t len = (uint32_t)(_lexer->next - _lexer->curToken.start - 1);
if (_token != hjTOKEN_UNKNOWN) {
_lexer->curToken.type = _token;
}
else { //是关键字或者是标识符
_lexer->curToken.type = hjJudge_IdOrKey(_lexer->curToken.start, len);
}
_lexer->curToken.len = len;
}
//解析字符串
hjPRIVATE void hjLex_Str(hjLexer* _lexer) {
hjByteArray str;
hjInit_ByteArray(&str);
for (;1;) {
hjGet_NextChar(_lexer);
if (_lexer->curChar == '\0') {
__hjERROR_LEX__(_lexer, "unterminated string!");
}
else if (_lexer->curChar == '"') { //双引号
_lexer->curToken.type = hjTOKEN_STR;
break;
}
else if (_lexer->curChar == '%') {
if (!hjMatch_NextChar(_lexer, '(')) { //%后面没有跟着(
__hjERROR_LEX__(_lexer, "\"%\" should followed by \"(\"!");
}
if(_lexer->Lcounter>0){ //
__hjERROR_COMPILE__(_lexer, "not support nest interpolate expression");
}
_lexer->Lcounter = 1;
_lexer->curToken.type = hjTOKEN_EXPR;
break;
}
/*处理转移字符*/
if (_lexer->curChar == '\\') {
hjGet_NextChar(_lexer);
switch (_lexer->curChar) {
case '0':
hjAdd_ByteArray(_lexer->vm, &str, '\0');
break;
case 'a':
hjAdd_ByteArray(_lexer->vm, &str, '\a');
break;
case 'b':
hjAdd_ByteArray(_lexer->vm, &str, '\b');
break;
case 'f':
hjAdd_ByteArray(_lexer->vm, &str, '\f');
break;
case 'n':
hjAdd_ByteArray(_lexer->vm, &str, '\n');
break;
case 'r':
hjAdd_ByteArray(_lexer->vm, &str, '\r');
break;
case 't':
hjAdd_ByteArray(_lexer->vm, &str, '\t');
break;
case 'u':
hjLex_CodePointOfUTF8(_lexer, &str);
break;
case '"': //双引号
hjAdd_ByteArray(_lexer->vm, &str, '"');
break;
case '\\':
hjAdd_ByteArray(_lexer->vm, &str, '\\');
break;
default:
__hjERROR_LEX__(_lexer, "unsupport escape \\%c", _lexer->curChar);
break;
}
}
else {
hjAdd_ByteArray(_lexer->vm, &str, _lexer->curChar);
}
}
hjClear_ByteArray(_lexer->vm, &str);
}
//解析UTF8码点
hjPRIVATE void hjLex_CodePointOfUTF8(hjLexer* _lexer, hjByteArray* _buf) {
uint32_t i;
int value = 0;
uint8_t digit = 0;
//获得数值,u后面跟着4位16进制数字
for (i=0;i<4;) {
++i;
hjGet_NextChar(_lexer);
if (_lexer->curChar == '\0') {
__hjERROR_LEX__(_lexer, "unterminated unicode!");
}
else if (_lexer->curChar >= '0' && _lexer->curChar <= '9') {
digit = _lexer->curChar - '0';
}
else if (_lexer->curChar >= 'a' && _lexer->curChar <= 'f') {
digit = _lexer->curChar - 'a'+10;
}
else if (_lexer->curChar >= 'A' && _lexer->curChar <= 'F') {
digit = _lexer->curChar - 'A' + 10;
}
else{
__hjERROR_LEX__(_lexer, "invalid unicode!");
}
value = (value * 16) | digit;
}
uint32_t byteNum = hjGet_ByteNumOfEncodedUTF8(value);
hjASSERT(byteNum != 0, "utf8 encode bytes should be between 1 and 4 ");
/*
为了代码通用,下面会直接写 _buf->data ,在此先写入byteNum个0,
以保证事先有byteNum个空间
*/
hjWrite_ByteArray(_lexer->vm, _buf, 0, byteNum);
//把value编码成utf8后写入缓冲区
hjEncodeUTF8(_buf->data + _buf->count - byteNum, value);
}
//hj_vm.cpp
#include"hj_vm.h"
//初始化虚拟机
hjPUBLIC void hjInit_VM(hjVM* _vm) {
_vm->allocatedByte = 0;
_vm->curLexer = NULL;
}
//创建虚拟机
hjPUBLIC hjVM* hjNew_VM() {
hjVM* vm = (hjVM*)malloc(sizeof(hjVM));
if (vm == NULL) {
}
hjInit_VM(vm);
return vm;
}
//hj.cpp
#include"hj_lexer.h"
/*
项目文件引用关系——下次做个打印文件引用关系的工具!!!
common
|————core
| |————error
| | |————vm
| | |————lexer
| |
| |————mem
| | |————lexer
| | | |
| | |
|
|
|
|————utf8
| |————lexer
*/
void test_error() {
hjLexer* myLexer = (hjLexer*)malloc(sizeof(hjLexer));
hjToken* myToken = (hjToken*)malloc(sizeof(hjToken));
myToken->lineNo = 50;
myToken->start = "function";
myToken->type = hjTOKEN_FUNCTION;
myLexer->curToken = *myToken;
myLexer->file = "E:\\C\\hjRun.hj";
hjError(myLexer, hjERROR_MEM, "出错了!!!");
}
//执行脚本文件
hjPRIVATE void hjRunScript(const char* _path) {
const char* lastSlash = strrchr(_path, '/');
if (lastSlash != NULL) {
char* root = (char*)malloc(lastSlash - _path + 2);
memcpy(root, _path, lastSlash - _path + 1);
root[lastSlash - _path + 1] = '\0';
hj_rootDir = root;
}
//创建虚拟机
hjVM* vm = hjNew_VM();
//读取脚本
const char* srcCode = hjReadScript(_path);
//创建词法分析器
hjLexer myLexer;
hjInit_Lexer(vm, &myLexer, _path, srcCode);
//导入单词表
#include "token.list"
uint32_t i = 0;
for (; myLexer.curToken.type != hjTOKEN_EOF;) {
hjGet_NextToken(&myLexer);
//打印行号和单词类型
printf("%d:%s[", myLexer.curToken.lineNo, hjTokenArr[myLexer.curToken.type]);
//打印单词
for (i=0; i < myLexer.curToken.len;++i) {
printf("%c", *(myLexer.curToken.start+i));
}
printf("]\n");
}
}
int main(int argc,const char** argv) {
argc = 2;
argv[1] = "test.hj";
//test_error();
if (argc < 2) {
printf("参数不够");
return 0;
}
hjRunScript(argv[1]);
system("pause");
return 0;
}
这个无所谓,把数组放在其他文件也一样!
char* hjTokenArr[] = {
"UNKNOWN",
"NUM",
"STR",
"ID",
"EXPR",
"LET",
"FUNCTION",
"IF",
"ELSE",
"TRUE",
"FALSE",
"WHILE",
"FOR",
"BREAK",
"CONTINUE",
"RETURN",
"NIL",
"CLASS",
"SELF",
"STATIC",
"IS",
"SUPER",
"IMPORT",
"AT",
"WELL",
"ACCENT",
"SEMI",
"BACKSLASH",
"COLON",
"COMMA",
"DOT",
"DOT_DOT",
"LEFT_PAREN",
"RIGHT_PAREN",
"LEFT_BRACKET",
"RIGHT_BRACKET",
"LEFT_BRACE",
"RIGHT_BRACE",
"ASSIGN",
"ADD",
"SUB",
"MUL",
"DIV",
"MOD",
"POW",
"BIT_AND",
"BIT_OR",
"BIT_NOT",
"BIT_SHIFT_LEFT",
"BIT_SHIFT_RRIGHT",
"LOGIC_AND",
"LOGIC_OR",
"LOGIC_NOT",
"EQ",
"NEQ",
"GT",
"GE",
"LT",
"LE",
"QUE",
"EOL",
"EOF"
};
\;
\;
\;
\;
// 单行注释
/*
多
行
注
释
*/
import people for People
function func(){
print("hello word")
let p = People.new("demllie","turtle")
p.run()
}
class Family : People{
let father
let mother
let child
new(f,m,c){
father = f
mother = m
child = c
super("Kiana","wife")
}
}
let f = Family.new("Kiana","female","shine")
f.run()
func()
8:IMPORT[import]
8:ID[people]
8:FOR[for]
8:ID[People]
9:FUNCTION[function]
9:ID[func]
9:LEFT_PAREN[(]
9:RIGHT_PAREN[)]
9:LEFT_BRACE[{]
10:ID[print]
10:LEFT_PAREN[(]
10:STR["hello word"]
10:RIGHT_PAREN[)]
11:LET[let]
11:ID[p]
11:ASSIGN[=]
11:ID[People]
11:DOT[.]
11:ID[new]
11:LEFT_PAREN[(]
11:STR["demllie"]
11:COMMA[,]
11:STR["turtle"]
11:RIGHT_PAREN[)]
12:ID[p]
12:DOT[.]
12:ID[run]
12:LEFT_PAREN[(]
12:RIGHT_PAREN[)]
13:RIGHT_BRACE[}]
16:CLASS[class]
16:ID[Family]
16:COLON[:]
16:ID[People]
16:LEFT_BRACE[{]
17:LET[let]
17:ID[father]
18:LET[let]
18:ID[mother]
19:LET[let]
19:ID[child]
20:ID[new]
20:LEFT_PAREN[(]
20:ID[f]
20:COMMA[,]
20:ID[m]
20:COMMA[,]
20:ID[c]
20:RIGHT_PAREN[)]
20:LEFT_BRACE[{]
21:ID[father]
21:ASSIGN[=]
21:ID[f]
22:ID[mother]
22:ASSIGN[=]
22:ID[m]
23:ID[child]
23:ASSIGN[=]
23:ID[c]
25:SUPER[super]
25:LEFT_PAREN[(]
25:STR["Kiana"]
25:COMMA[,]
25:STR["wife"]
25:RIGHT_PAREN[)]
26:RIGHT_BRACE[}]
27:RIGHT_BRACE[}]
30:LET[let]
30:ID[f]
30:ASSIGN[=]
30:ID[Family]
30:DOT[.]
30:ID[new]
30:LEFT_PAREN[(]
30:STR["Kiana"]
30:COMMA[,]
30:STR["female"]
30:COMMA[,]
30:STR["shine"]
30:RIGHT_PAREN[)]
31:ID[f]
31:DOT[.]
31:ID[run]
31:LEFT_PAREN[(]
31:RIGHT_PAREN[)]
33:ID[func]
33:LEFT_PAREN[(]
33:RIGHT_PAREN[)]
35:EOF[]
除了编码风格外,文件依赖关系也变了,增加了几种token类型,更改了关键字!
hj=happy joy
参考:《自制编程语言,基于C语言》