在我的decomplier中关闭JP to statement开关后,得到原始的未经过处理的跳转指令。
类似汇编中我们写的跳转。这就是高级语言和低级语言的差别。低阶语言更繁琐和趋近于机器处理过程。比如汇编中的寻址,基本就是cpu取数和处理数的一个过程。
P-code中,顺序结构倒好处理,直接把赋值,函数调用搞定也就没什么了,唯一需要处理的是这些跳转。
在汇编中,有许多种的跳转,比如等跳,不等跳等等。。但是在高级语言,如pb中,语言有bool的明确规定,所以if while等conditon部分都必须是bool类型的结构,这也就是c++中比较你写=,不写==不会发生错误的原因,你在pb中的条件判定处写=,编译器就知道是逻辑意义上的判断,而不是赋值。因为没有其他值到bool的隐式转化。所以在pb中,我们只有三种跳转,JTP,JFP,JXP,分别是“为真跳”,“为假跳”,“绝对跳”。当然只是知道这三种区别还不行,还没足够信息反向成高级语言中的statement,我们在分析代码时,顺便在前期已经将跳转时当前offset,和will to address做了比较,区分出了“往前跳”,还是“往后跳”,这样我们不会在长长的字符串中再来繁琐地判定(效率低)。
我们编写了几种结构加于分析区别,以便还原。找出其中一些规律。因为现代编程习惯中,已经废弃了goto的使用,虽然它还是关键字,但是我们约定,我们写的statement都是成块的。这样有助于我们得到一个模糊的表述。
代码 _0012: _L_ (1) = 2 _J_F_F_0032
意义: 地址 logic expression JP
我们拿两个相近似的结构来对比:
(A)
_0000: ls_note = "if ... end if"
_0012: _L_ (1) = 2 _J_F_F_0032
_0024: ll_1 = 1
_0032:
(B)
_0052: ls_note = "if ... else ... end if"
_0064: _L_ (1) = 2 _J_F_F_0088
_0076: ll_1 = 3
_0084: _J_X_F_0096
_0088: ll_1 = 3
_0096
在a结构中我们看到,可以这么描述我们即将编程的伪码:
如果存在logic(没有logic的话,那就只有goto语言产生的绝对跳转了),并且存在“往后跳”,并且遍历当前行到“跳转目标行”之间再无“logic”和“其他跳转指令”,那我们复活成一条“if ... then ... end if”语句。
在b结构中,我们描述为:
如果存在logic,并且存在“往后跳”,并且遍历当前行到“跳转目标行”之间存在“绝对往后跳”,那我们复活成“if ... then ...else ...end if”语句。
当然,这也存在嵌套问题,我想出于简化的设计思路,就没必要用递归。可以采用层层剥菜的方式,先处理内层,处理完后扫描一次即可。
就是必须把“最小的块”找到,然后先处理。
以上只是大致思路。还未具体实现,估计得耗时几天才能处理好。
//20090908:statement反向已经满意搞定,包括缩进显示,都搞好了。
//object name: lf_333.fun
global type lf_333 from lf_333
end type
forward prototypes
global subroutine lf_333() throws exception
end prototypes
global subroutine lf_333() throws exception;
//variables list
long ll_1
long a
string ls_note
//global var: string gs_id5
//expression lines: 66
gs_id5 = ""
ls_note = "if ... end if"
if 1 = 2 then
ll_1 = 1
end if
ls_note = "if .do loop unitl. end if"
1 = 2
ll_1 = 2
do
ll_1 = 8
if 1 = 2 then
ll_1 = 2
end if
loop while ll_1 = 0
ls_note = "if ... else ... end if"
if 1 = 2 then
ll_1 = 3
ll_1 = 3
ll_1 = 3
else
ll_1 = 3
ll_1 = 3
ll_1 = 3
end if
ls_note = "do while ... loop"
do while ll_1 = 0
ll_1 = 5
loop
ls_note = "do until ... loop"
do until ll_1 = 0
ll_1 = 6
loop
ls_note = "do ...loop while"
do
ll_1 = 7
loop while ll_1 = 0
ls_note = "do ...loop until"
do
ll_1 = 8
loop while ll_1 = 0
ls_note = "if if if if if end if end if end if end if"
if a = 0 then
if a = 0 then
if a = 0 then
if a = 0 then
a = 0
end if
end if
end if
end if
ls_note = "choose case.. end choose"
choose case ll_1
case 1
ll_1 = 111
case 2
ll_1 = 111
case 3
ll_1 = 111
case 4
ll_1 = 111
case 5
ll_1 = 111
end choose
choose case ll_1
case 1
ll_1 = 222
case 2 , 3 , 4 , 5
ll_1 = 222
case 2 to 5
ll_1 = 222
case is >= 3
do while 1 = 2
ll_1 = 222
loop
ll_1 = 222
case (1 ) , 2 to 5 , 5
ll_1 = 222
end choose
end function
////////////////////////////////////////////
顺便写写关键字的缩进处理。
声明结构并初始化:
const strc_INDENTATION KEY_INDENTATION[MAX_KEY_INDENTATION] = {
1,"if",2,
3,"else",4,
3,"elseif",6,
2,"end",3,
1,"for",3,
2,"next",4,
4,"choose",6, //特殊,因为case要 -t
5,"end choose",10, //特殊,因为case要 -t
3,"case",4,
1,"while",5,
1,"do",2,
2,"loop",4,
1,"try",3,
3,"catch",5,
3,"finally",7
};
//直接给代码,很简单也不用解释了。
int iIndent0 = 0; //current
short iChanged;
astring asIndent;
char chFirst;
ilines = MemoProcess->Lines->Count;
for(int i = 0;i < ilines;i++){
iChanged = 0; //是否改变缩进个数
chFirst = MemoProcess->Lines->Strings[i][1];
if ('i' == chFirst || 'e' == chFirst || 'f' == chFirst || 'n' == chFirst ||
'c' == chFirst || 'w' == chFirst || 'd' == chFirst || 'l' == chFirst || 't' == chFirst)
{
for(int k = 0;k < MAX_KEY_INDENTATION;k++){
if(LeftStr(MemoProcess->Lines->Strings[i],KEY_INDENTATION[k].cLen) == KEY_INDENTATION[k].KeyWords){
iChanged = KEY_INDENTATION[k].flag;
break; //匹配后退出for内层
}
}
}
switch (iChanged){
case 0: //no changed
if (iIndent0 > 0) {
MemoProcess->Lines->Strings[i] = asIndent + MemoProcess->Lines->Strings[i];
}
break;
case 1: //do this,next to change
if (iIndent0 > 0) {
MemoProcess->Lines->Strings[i] = asIndent + MemoProcess->Lines->Strings[i];
}
asIndent = AnsiString::StringOfChar('/t',++iIndent0);
break;
case 2: //change first,then do this row
if (iIndent0 > 0){
asIndent = AnsiString::StringOfChar('/t',--iIndent0);
}
if (iIndent0 > 0) {
MemoProcess->Lines->Strings[i] = asIndent + MemoProcess->Lines->Strings[i];
}
break;
case 3: //change +t,do this row,then -t for next
asIndent = AnsiString::StringOfChar('/t',++iIndent0);
if (iIndent0 > 0) {
MemoProcess->Lines->Strings[i] = asIndent + MemoProcess->Lines->Strings[i];
}
if (iIndent0 > 0) {
asIndent = AnsiString::StringOfChar('/t',--iIndent0);
}
break;
case 4: //using choose所以必须先加2t
++iIndent0;
asIndent = AnsiString::StringOfChar('/t',++iIndent0);
if (iIndent0 > 0) {
MemoProcess->Lines->Strings[i] = asIndent + MemoProcess->Lines->Strings[i];
}
if (iIndent0 > 0) {
asIndent = AnsiString::StringOfChar('/t',--iIndent0);
}
break;
case 5: //using end choose所以必须先 -2t
if (iIndent0 > 1) {
iIndent0 -=2;
asIndent = AnsiString::StringOfChar('/t',iIndent0);
}
if (iIndent0 > 0) {
MemoProcess->Lines->Strings[i] = asIndent + MemoProcess->Lines->Strings[i];
}
break;
}
}
//附:
global subroutine lf_333() throws exception;
//variables list
long ll_1
long a
string ls_note
//exprssion lines: 42
_0000: ls_note = "if ... end if"
_0012: _L_ (1) = 2 _J_F_F_0032
_0024: ll_1 = 1
_0032: _L_ (1) = 2 _J_F_F_0052
_0044: ll_1 = 2
_0052: ls_note = "if ... else ... end if"
_0064: _L_ (1) = 2 _J_F_F_0088
_0076: ll_1 = 3
_0084: _J_X_F_0096
_0088: ll_1 = 3
_0096: ls_note = "if ... elseif ... end if"
_00A8: _L_ (1) = 2 _J_F_F_00CC
_00BA: ll_1 = 4
_00C8: _J_X_F_00EC
_00CC: _L_ (2) = 3 _J_F_F_00EC
_00DE: ll_1 = 4
_00EC: ls_note = "do while ... loop"
_00FE: _L_ (ll_1) = 0 _J_F_F_0120
_010E: ll_1 = 5
_011C: _J_X_B_00FE
_0120: ls_note = "do until ... loop"
_0132: _L_ (ll_1) = 0 _J_T_F_0154
_0142: ll_1 = 6
_0150: _J_X_B_0132
_0154: ls_note = "do ...loop while"
_0166: ll_1 = 7
_0174: _L_ (ll_1) = 0 _J_T_B_0166
_0184: ls_note = "do ...loop until"
_0196: ll_1 = 8
_01A4: _L_ (ll_1) = 0 _J_F_B_0196
_01B4: ls_note = "if if if if if end if end if end if end if"
_01C6: _L_ (a) = 0 _J_F_F_0214
_01D6: _L_ (a) = 0 _J_F_F_0214
_01E6: _L_ (a) = 0 _J_F_F_0214
_01F6: _L_ (a) = 0 _J_F_F_0214
_0206: a = 0
_0214: _END
end function