tcc 是一个很有趣的小型 C 编译器,其最大的特点是可以实现即时编译 C 代码,无需产生中间文件即可生成 native code,且将内部功能全部作为库函数暴露出来,可以很方便地嵌入到自己的应用中。
目前有一些现成的 lua 模块可以将 tcc 引入到 lua 里,用其即时生成 C-function 供 lua 使用。其中 lua-tcc 是最简单的一个,但代码年代较久,需要进行 patch 才可同最新的 tcc-0.9.25 一同工作,patch 后的代码参见这里 。
下面是一个例子,对比了 lua 原生的 json 解析器 json4lua 和通过 tcc 内联的 c json 解析器 yajl 的效率(需预先安装 json4lua 、lua-tcc 以及 yajl ):
local json = require("json") local tcc = require("lua_tcc") yajl = tcc.compile([=[ #include#include #include #include #include #include /* FIXME: maximum container nesting level hardcoded to 512 */ #define MAX_NEST_LEVEL 512 typedef struct { lua_State *L; int curr_level; enum {not_in_container, in_map, in_array} env[MAX_NEST_LEVEL]; int start_of_container[MAX_NEST_LEVEL]; } parse_ctx_t; int parse_null(void *ctx) { parse_ctx_t *c = (parse_ctx_t*)ctx; lua_pushnil(c->L); switch(c->env[c->curr_level]) { case in_map: /* nothing to do, push prev pair until the next pair key appears or map is end */ break; case in_array: lua_rawseti(c->L, -2, lua_objlen(c->L, -2)+1); break; default: break; } return 1; } int parse_boolean(void *ctx, int bool_val) { parse_ctx_t *c = (parse_ctx_t*)ctx; lua_pushboolean(c->L, bool_val); switch(c->env[c->curr_level]) { case in_map: /* nothing to do, push prev pair until the next pair key appears or map is end */ break; case in_array: lua_rawseti(c->L, -2, lua_objlen(c->L, -2)+1); break; default: break; } return 1; } int parse_integer(void *ctx, long int_val) { parse_ctx_t *c = (parse_ctx_t*)ctx; lua_pushinteger(c->L, (lua_Integer)int_val); switch(c->env[c->curr_level]) { case in_map: /* nothing to do, push prev pair until the next pair key appears or map is end */ break; case in_array: lua_rawseti(c->L, -2, lua_objlen(c->L, -2)+1); break; default: break; } return 1; } int parse_double(void *ctx, double dbl_val) { parse_ctx_t *c = (parse_ctx_t*)ctx; lua_pushnumber(c->L, (lua_Number)dbl_val); switch(c->env[c->curr_level]) { case in_map: /* nothing to do, push prev pair until the next pair key appears or map is end */ break; case in_array: lua_rawseti(c->L, -2, lua_objlen(c->L, -2)+1); break; default: break; } return 1; } int parse_string(void *ctx, const unsigned char *str_val, unsigned int str_len) { parse_ctx_t *c = (parse_ctx_t*)ctx; lua_pushlstring(c->L, (const char*)str_val, str_len); switch(c->env[c->curr_level]) { case in_map: /* nothing to do, push prev pair until the next pair key appears or map is end */ break; case in_array: lua_rawseti(c->L, -2, lua_objlen(c->L, -2)+1); break; default: break; } return 1; } int parse_start_map(void *ctx) { parse_ctx_t *c = (parse_ctx_t*)ctx; switch(c->env[c->curr_level]) { case in_map: case in_array: default: lua_newtable(c->L); c->curr_level++; c->env[c->curr_level] = in_map; c->start_of_container[c->curr_level] = 1; break; } return 1; } int parse_map_key(void *ctx, const unsigned char *key, unsigned int len) { parse_ctx_t *c = (parse_ctx_t*)ctx; switch(c->env[c->curr_level]) { case in_map: /* store prev pair */ if(c->start_of_container[c->curr_level]) { c->start_of_container[c->curr_level] = 0; } else { lua_rawset(c->L, -3); } /* push new pair's key */ lua_pushlstring(c->L, (const char*)key, len); break; case in_array: default: return 0; } return 1; } int parse_end_map(void *ctx) { parse_ctx_t *c = (parse_ctx_t*)ctx; switch(c->env[c->curr_level]) { case in_map: /* store prev pair */ lua_rawset(c->L, -3); c->curr_level--; switch(c->env[c->curr_level]) { case in_array: lua_rawseti(c->L, -2, lua_objlen(c->L, -2)+1); break; case in_map: default: break; } break; case in_array: default: return 0; } return 1; } int parse_start_array(void *ctx) { parse_ctx_t *c = (parse_ctx_t*)ctx; switch(c->env[c->curr_level]) { case in_map: case in_array: default: lua_newtable(c->L); c->curr_level++; c->env[c->curr_level] = in_array; c->start_of_container[c->curr_level] = 1; break; } return 1; } int parse_end_array(void *ctx) { parse_ctx_t *c = (parse_ctx_t*)ctx; switch(c->env[c->curr_level]) { case in_array: c->curr_level--; switch(c->env[c->curr_level]) { case in_array: lua_rawseti(c->L, -2, lua_objlen(c->L, -2)+1); break; case in_map: default: break; } break; case in_map: default: return 0; } return 1; } int parse_json(lua_State *L) { static yajl_parser_config _config = { allowComments: 1, checkUTF8: 1 }; static yajl_callbacks _callbacks = { yajl_null: parse_null, yajl_boolean: parse_boolean, yajl_integer: parse_integer, yajl_double: parse_double, yajl_number: NULL, yajl_string: parse_string, yajl_start_map: parse_start_map, yajl_map_key: parse_map_key, yajl_end_map: parse_end_map, yajl_start_array: parse_start_array, yajl_end_array: parse_end_array }; parse_ctx_t ctx; size_t json_len; const unsigned char *json_str = (const unsigned char*)luaL_checklstring(L, -1, &json_len); yajl_handle yajl; yajl_status status; ctx.L = L; ctx.curr_level = 0; ctx.env[0] = not_in_container; ctx.start_of_container[0] = 0; yajl = yajl_alloc(&_callbacks, &_config, NULL, &ctx); status = yajl_parse(yajl, json_str, json_len); if(status == yajl_status_ok) { status = yajl_parse_complete(yajl); } if(status != yajl_status_ok) { unsigned char* err_msg = yajl_get_error(yajl, 1, json_str, json_len); lua_pushfstring(L, "parsing error: %s", err_msg); yajl_free_error(yajl, err_msg); return lua_error(L); } yajl_free(yajl); return 1; } ]=], {"parse_json"}, {"yajl"} ) local ntimes = 300000 local st, ed local str = '{"a":1234,"b":2.34,"c":true,"d":"hello","e":[1,2,{"a":{"b":{"c":"d"}}}]}' st = os.time() for i=1,ntimes do local arr = yajl.parse_json(str) end ed = os.time() print("yajl elapsed = ", (ed-st)) st = os.time() for i=1,ntimes do local arr = json.decode(str) end ed = os.time() print("json4lua elapsed = ", (ed-st))
在我的 colinux 虚拟环境下用标准 lua 运行该程序时结果如下:
$ lua yajl.lua yajl elapsed = 4 json4lua elapsed = 134
相比之下通过 tcc 内联的 yajl 解析器解析相同的 json 串时比 json4lua 快了 33.5 倍!而换用 luajit-2.0 运行该程序时结果如下:
$ luajit yajl.lua yajl elapsed = 2 json4lua elapsed = 52
json4lua 表现好了不少,但 yajl 的解析速度仍是 json4lua 的 26 倍。由此可见,在恰当的地方通过 tcc 内联 c 代码替代 lua 处理数据可以收到相当好的性能提升效果,且由于模块代码以源代码形式分发,对于没有外部依赖库的代码不会存在传统 Lua-C 模块那样的跨平台二进制接口兼容性问题。