稍微在吉里吉里2.28的源代码里找了下TJS2 VM的执行机制,主要着眼于dispatch loop的实现,并且找到了下面的代码:
kirikiri2\src\core\tjs2\tjsInterCodeExec.cpp/972:
tjs_int tTJSInterCodeContext::ExecuteCode(tTJSVariant *ra_org, tjs_int startip,
tTJSVariant **args, tjs_int numargs, tTJSVariant *result)
{
// execute VM codes
tjs_int32 *codesave;
try
{
tjs_int32 *code = codesave = CodeArea + startip;
if(TJSStackTracerEnabled()) TJSStackTracerSetCodePointer(CodeArea, &codesave);
tTJSVariant *ra = ra_org;
tTJSVariant *da = DataArea;
bool flag = false;
while(true)
{
codesave = code;
switch(*code)
{
case VM_NOP:
code ++;
break;
case VM_CONST:
TJS_GET_VM_REG(ra, code[1]).CopyRef(TJS_GET_VM_REG(da, code[2]));
code += 3;
break;
case VM_CP:
TJS_GET_VM_REG(ra, code[1]).CopyRef(TJS_GET_VM_REG(ra, code[2]));
code += 3;
break;
case VM_CL:
TJS_GET_VM_REG(ra, code[1]).Clear();
code += 2;
break;
case VM_CCL:
ContinuousClear(ra, code);
code += 3;
break;
case VM_TT:
flag = TJS_GET_VM_REG(ra, code[1]).operator bool();
code += 2;
break;
case VM_TF:
flag = !(TJS_GET_VM_REG(ra, code[1]).operator bool());
code += 2;
break;
case VM_CEQ:
flag = TJS_GET_VM_REG(ra, code[1]).NormalCompare(
TJS_GET_VM_REG(ra, code[2]));
code += 3;
break;
case VM_CDEQ:
flag = TJS_GET_VM_REG(ra, code[1]).DiscernCompare(
TJS_GET_VM_REG(ra, code[2]));
code += 3;
break;
case VM_CLT:
flag = TJS_GET_VM_REG(ra, code[1]).GreaterThan(
TJS_GET_VM_REG(ra, code[2]));
code += 3;
break;
case VM_CGT:
flag = TJS_GET_VM_REG(ra, code[1]).LittlerThan(
TJS_GET_VM_REG(ra, code[2]));
code += 3;
break;
case VM_SETF:
TJS_GET_VM_REG(ra, code[1]) = flag;
code += 2;
break;
case VM_SETNF:
TJS_GET_VM_REG(ra, code[1]) = !flag;
code += 2;
break;
case VM_LNOT:
TJS_GET_VM_REG(ra, code[1]).logicalnot();
code += 2;
break;
case VM_NF:
flag = !flag;
code ++;
break;
case VM_JF:
if(flag)
TJS_ADD_VM_CODE_ADDR(code, code[1]);
else
code += 2;
break;
case VM_JNF:
if(!flag)
TJS_ADD_VM_CODE_ADDR(code, code[1]);
else
code += 2;
break;
case VM_JMP:
TJS_ADD_VM_CODE_ADDR(code, code[1]);
break;
case VM_INC:
TJS_GET_VM_REG(ra, code[1]).increment();
code += 2;
break;
case VM_INCPD:
OperatePropertyDirect0(ra, code, TJS_OP_INC);
code += 4;
break;
case VM_INCPI:
OperatePropertyIndirect0(ra, code, TJS_OP_INC);
code += 4;
break;
case VM_INCP:
OperateProperty0(ra, code, TJS_OP_INC);
code += 3;
break;
case VM_DEC:
TJS_GET_VM_REG(ra, code[1]).decrement();
code += 2;
break;
case VM_DECPD:
OperatePropertyDirect0(ra, code, TJS_OP_DEC);
code += 4;
break;
case VM_DECPI:
OperatePropertyIndirect0(ra, code, TJS_OP_DEC);
code += 4;
break;
case VM_DECP:
OperateProperty0(ra, code, TJS_OP_DEC);
code += 3;
break;
#define TJS_DEF_VM_P(vmcode, rope) \
case VM_##vmcode: \
TJS_GET_VM_REG(ra, code[1]).rope(TJS_GET_VM_REG(ra, code[2])); \
code += 3; \
break; \
case VM_##vmcode##PD: \
OperatePropertyDirect(ra, code, TJS_OP_##vmcode); \
code += 5; \
break; \
case VM_##vmcode##PI: \
OperatePropertyIndirect(ra, code, TJS_OP_##vmcode); \
code += 5; \
break; \
case VM_##vmcode##P: \
OperateProperty(ra, code, TJS_OP_##vmcode); \
code += 4; \
break
TJS_DEF_VM_P(LOR, logicalorequal);
TJS_DEF_VM_P(LAND, logicalandequal);
TJS_DEF_VM_P(BOR, operator |=);
TJS_DEF_VM_P(BXOR, operator ^=);
TJS_DEF_VM_P(BAND, operator &=);
TJS_DEF_VM_P(SAR, operator >>=);
TJS_DEF_VM_P(SAL, operator <<=);
TJS_DEF_VM_P(SR, rbitshiftequal);
TJS_DEF_VM_P(ADD, operator +=);
TJS_DEF_VM_P(SUB, operator -=);
TJS_DEF_VM_P(MOD, operator %=);
TJS_DEF_VM_P(DIV, operator /=);
TJS_DEF_VM_P(IDIV, idivequal);
TJS_DEF_VM_P(MUL, operator *=);
#undef TJS_DEF_VM_P
case VM_BNOT:
TJS_GET_VM_REG(ra, code[1]).bitnot();
code += 2;
break;
case VM_ASC:
CharacterCodeOf(TJS_GET_VM_REG(ra, code[1]));
code += 2;
break;
case VM_CHR:
CharacterCodeFrom(TJS_GET_VM_REG(ra, code[1]));
code += 2;
break;
case VM_NUM:
TJS_GET_VM_REG(ra, code[1]).tonumber();
code += 2;
break;
case VM_CHS:
TJS_GET_VM_REG(ra, code[1]).changesign();
code += 2;
break;
case VM_INV:
TJS_GET_VM_REG(ra, code[1]) =
(TJS_GET_VM_REG(ra,
code[1]).AsObjectClosureNoAddRef().Invalidate(0,
NULL, NULL, ra[-1].AsObjectNoAddRef()) == TJS_S_TRUE);
code += 2;
break;
case VM_CHKINV:
TJS_GET_VM_REG(ra, code[1]) =
TJSIsObjectValid(TJS_GET_VM_REG(ra,
code[1]).AsObjectClosureNoAddRef().IsValid(0,
NULL, NULL, ra[-1].AsObjectNoAddRef()));
code += 2;
break;
case VM_INT:
TJS_GET_VM_REG(ra, code[1]).ToInteger();
code += 2;
break;
case VM_REAL:
TJS_GET_VM_REG(ra, code[1]).ToReal();
code += 2;
break;
case VM_STR:
TJS_GET_VM_REG(ra, code[1]).ToString();
code += 2;
break;
case VM_OCTET:
TJS_GET_VM_REG(ra, code[1]).ToOctet();
code += 2;
break;
case VM_TYPEOF:
TypeOf(TJS_GET_VM_REG(ra, code[1]));
code += 2;
break;
case VM_TYPEOFD:
TypeOfMemberDirect(ra, code, TJS_MEMBERMUSTEXIST);
code += 4;
break;
case VM_TYPEOFI:
TypeOfMemberIndirect(ra, code, TJS_MEMBERMUSTEXIST);
code += 4;
break;
case VM_EVAL:
Eval(TJS_GET_VM_REG(ra, code[1]),
TJSEvalOperatorIsOnGlobal ? NULL : ra[-1].AsObjectNoAddRef(),
true);
code += 2;
break;
case VM_EEXP:
Eval(TJS_GET_VM_REG(ra, code[1]),
TJSEvalOperatorIsOnGlobal ? NULL : ra[-1].AsObjectNoAddRef(),
false);
code += 2;
break;
case VM_CHKINS:
InstanceOf(TJS_GET_VM_REG(ra, code[2]),
TJS_GET_VM_REG(ra, code[1]));
code += 3;
break;
case VM_CALL:
case VM_NEW:
code += CallFunction(ra, code, args, numargs);
break;
case VM_CALLD:
code += CallFunctionDirect(ra, code, args, numargs);
break;
case VM_CALLI:
code += CallFunctionIndirect(ra, code, args, numargs);
break;
case VM_GPD:
GetPropertyDirect(ra, code, 0);
code += 4;
break;
case VM_GPDS:
GetPropertyDirect(ra, code, TJS_IGNOREPROP);
code += 4;
break;
case VM_SPD:
SetPropertyDirect(ra, code, 0);
code += 4;
break;
case VM_SPDE:
SetPropertyDirect(ra, code, TJS_MEMBERENSURE);
code += 4;
break;
case VM_SPDEH:
SetPropertyDirect(ra, code, TJS_MEMBERENSURE|TJS_HIDDENMEMBER);
code += 4;
break;
case VM_SPDS:
SetPropertyDirect(ra, code, TJS_MEMBERENSURE|TJS_IGNOREPROP);
code += 4;
break;
case VM_GPI:
GetPropertyIndirect(ra, code, 0);
code += 4;
break;
case VM_GPIS:
GetPropertyIndirect(ra, code, TJS_IGNOREPROP);
code += 4;
break;
case VM_SPI:
SetPropertyIndirect(ra, code, 0);
code += 4;
break;
case VM_SPIE:
SetPropertyIndirect(ra, code, TJS_MEMBERENSURE);
code += 4;
break;
case VM_SPIS:
SetPropertyIndirect(ra, code, TJS_MEMBERENSURE|TJS_IGNOREPROP);
code += 4;
break;
case VM_GETP:
GetProperty(ra, code);
code += 3;
break;
case VM_SETP:
SetProperty(ra, code);
code += 3;
break;
case VM_DELD:
DeleteMemberDirect(ra, code);
code += 4;
break;
case VM_DELI:
DeleteMemberIndirect(ra, code);
code += 4;
break;
case VM_SRV:
if(result) result->CopyRef(TJS_GET_VM_REG(ra, code[1]));
code += 2;
break;
case VM_RET:
return code+1-CodeArea;
case VM_ENTRY:
code = CodeArea + ExecuteCodeInTryBlock(ra, code-CodeArea + 3, args,
numargs, result, TJS_FROM_VM_CODE_ADDR(code[1])+code-CodeArea,
TJS_FROM_VM_REG_ADDR(code[2]));
break;
case VM_EXTRY:
return code+1-CodeArea; // same as ret
case VM_THROW:
ThrowScriptException(TJS_GET_VM_REG(ra, code[1]),
Block, CodePosToSrcPos(code-CodeArea));
code += 2; // actually here not proceed...
break;
case VM_CHGTHIS:
TJS_GET_VM_REG(ra, code[1]).ChangeClosureObjThis(
TJS_GET_VM_REG(ra, code[2]).AsObjectNoAddRef());
code += 3;
break;
case VM_GLOBAL:
TJS_GET_VM_REG(ra, code[1]) = Block->GetTJS()->GetGlobalNoAddRef();
code += 2;
break;
case VM_ADDCI:
AddClassInstanceInfo(ra, code);
code+=3;
break;
case VM_REGMEMBER:
RegisterObjectMember(ra[-1].AsObjectNoAddRef());
code ++;
break;
case VM_DEBUGGER:
TJSNativeDebuggerBreak();
code ++;
break;
default:
ThrowInvalidVMCode();
}
}
}
catch(eTJSSilent &e)
{
throw e;
}
catch(eTJSScriptException &e)
{
e.AddTrace(this, codesave-CodeArea);
throw e;
}
catch(eTJSScriptError &e)
{
e.AddTrace(this, codesave-CodeArea);
throw e;
}
catch(eTJS &e)
{
DisplayExceptionGeneratedCode(codesave - CodeArea, ra_org);
TJS_eTJSScriptError(e.GetMessage(), this, codesave-CodeArea);
}
catch(exception &e)
{
DisplayExceptionGeneratedCode(codesave - CodeArea, ra_org);
TJS_eTJSScriptError(e.what(), this, codesave-CodeArea);
}
catch(const wchar_t *text)
{
DisplayExceptionGeneratedCode(codesave - CodeArea, ra_org);
TJS_eTJSScriptError(text, this, codesave-CodeArea);
}
catch(const char *text)
{
DisplayExceptionGeneratedCode(codesave - CodeArea, ra_org);
TJS_eTJSScriptError(text, this, codesave-CodeArea);
}
#ifdef TJS_SUPPORT_VCL
catch(const EAccessViolation &e)
{
DisplayExceptionGeneratedCode(codesave - CodeArea, ra_org);
TJS_eTJSScriptError(e.Message.c_str(), this, codesave-CodeArea);
}
catch(const Exception &e)
{
DisplayExceptionGeneratedCode(codesave - CodeArea, ra_org);
TJS_eTJSScriptError(e.Message.c_str(), this, codesave-CodeArea);
}
#endif
return codesave-CodeArea;
}
果然还是相当典型且直观的解释器。这种不断读入VM码(在每个case之后决定code的增量),并通过单一的switch语句来完成dispatch的方法,是基本解释器实现里最直观,但通常也是最慢的方式。
对于一个比较小的指令集(例如RISC),threaded code通常是更好的解决方案,因为在每个指令例程的最后添加跳转操作能减少现代CPU的跳转预测失误。其中又有indirected与directed这两大类的threadeding。
Anton Ertl有篇
关于threaded code不错的文章。在
Virtual Machines: Versatile Platforms for Systems and Processes一书中有更详细的解释。
TJS2 VM现有的实现有不少让人很想吐槽的地方,例如说基于引用计数的GC,还有那难以阅读的代码……那么大量的宏用起来真够难受的。
其实W.Dee氏之所以宁可直接开始实现吉里吉里3的Risse VM而不在现有的吉里吉里2的codebase上修改,恐怕也是因为这codebase太乱了吧。新的Risse VM已经有了不少实质性的改进,例如用Boehm GC来代替原本不太好的引用计数GC;将中间表示(IR)改进为SSA形式,等等。不过就这么那TJS2 VM扔掉也怪可惜的。想慢慢把TJS2 VM中可改进的地方挖出来,看看是否适合给予改进。如果能赶在吉里吉里3的Risse VM完成前对TJS2 VM做些改进,那还算有价值。
不过吉里吉里系列内的VM有个很紧的要求,那就是整个运行时的外表看起来要像一个解释器,即:内部实现是先将文本形式的脚本源代码编译为中间表现,然后再由VM执行(此处的VM又是一个真正的解释器)。这对编译的部分要求比较高,使一些耗时间的优化不太好进行。要是W.Dee氏肯接受真正完整的编译,再交由VM执行,会轻松很多。
(试想一下,写一个程序把Java Compiler与JVM包装起来,像解释脚本一样执行Java源文件。如果你用的是Sun的JDK,恭喜你,编译HelloWorld可能也要半分钟。而且JDK 1.6.0系列还经常诡异的出现NoClassDefFoundError,让我只好对1.6.0系列敬而远之)
附注:吉里吉里2的源代码基于GPL许可证发布