这时需要完成的工作是用C语言将汇编代码的行为进行模拟。一本MIPS指令集手册是必须的,可以通过下面的URL获得:
http://dkrizanc.web.wesleyan.edu/courses/231/07/MIPS_Vol2.pdf
手册名为:《MIPS32™ Architecture For Programmers Volume II: The MIPS32™ Instruction Set》
当然也有工具可以帮助我们完成这项工作:
但我们还是建议大家手动的完成这项工作,这样对算法的理解也会有帮助。这里还建议大家使用IDA里图形的方式(Graph view)方式来阅读汇编代码。
下面是我列出来的模拟代码,供大家参考:
1: void lzss_decoder(u8* des, u8* src, int len)
2: {
3: u32 t0, t1, t2, t3;
4: u32 a0, a1, a2, a3, a4;
5: u32 v1;
6: v1 = des+len;
7:
8: if(len<0)
9: {
10: return;
11: }
12:
13: t0 = 1;
14: t1 = *src;
15:
16: loc_882ADEC:
17: a2 = t1 & 3;
18: src ++;
19: if(a2 != 0)
20: {
21: a2 = t1 & 1;
22: if(a2 != t0)
23: {
24: a3 = *src;
25: a2 = *(src+1);
26: a3 = a3 << 8;
27: a2= a2 << 16;
28: a2 = a3 + a2;
29: a3 = t1 + a2;
30: a2 = (a3 << 2) & 0x3ff;
31: t1 = a2 + 3;
32: a2 = a3 >> 12;
33: t2 = a0 - a2;
34: src +=2;
35: }
36: else
37: {
38: a2 = *src;
39: a2 = a2 << 8;
40: a3 = t1 + a2;
41: a2 = (a3 >> 1) & 0x7;
42: t1 = a2 + 3;
43: a2 = a3 >> 4;
44: t2 = des - a2;
45: src++;
46: }
47: a2 = t1;
48: t1 --;
49: if(a2 > 0)
50: {
51: loc_882AE90:
52: a3 = *(u8 *) t2;
53: a2 = t1;
54: t1 --;
55: *dst = a3;
56: t2 ++;
57: dst++;
58: if(a2>0)
59: {
60: goto loc_882AE90;
61: }
62: }
63: loc_882AEAC:
64: a2 = 1:0 ? (dst < v1);
65: }
66: else
67: {
68: a2 = t1 >> 2;
69: a2 ++;
70: t1 = a2 -1;
71: if(a2 > 0)
72: {
73: loc_882AE08:
74: a3 = *src;
75: a2 = t1;
76: t1 --;
77: *des = a3;
78: des++;
79: src++;
80: if(a2>0)
81: {
82: goto loc_882AE08;
83: }
84: a2 = 1:0 ? (a085: }
86: else87: {
88: goto loc_882AEAC;89: }
90: }
91: if(a2 != 0)92: {
93: t1 = *src;
94: goto loc_882ADEC;95: }
96: else97: {
98: return;99: }
100: }
恢复高级语言特征
下面的工作是恢复高级语言特征。
1. 压缩赋值语句的行数
2. 根据循环条件,将goto语句还原为while或for
1: void lzss_decoder(u8* des, u8* src, int len)
2: {
3: u32 t0, t1, t2, t3;
4: u32 a0, a1, a2, a3, a4;
5: u8 *end, type;
6:
7: end = des + len;
8: if(len<0)
9: {
10: return;
11: }
12: t0 = 1;
13: type = *src;
14:
15: loc_882ADEC:
16: if(0 == (type & 3))
17: {
18: a2 = type >> 2;
19: a2++;
20: for(i=0; i21: {
22: *des = *src;
23: des++;
24: src++;
25: }
26: goto loc_882AEAC;27: }
28: else if(t0 == (type & 1))29: {
30: a2 = *src<<8 + (*(src+1)<<16);
31: a3 = type + a2;
32: a2 = (a3<<2) & 0x3ff;
33: type = a2 + 3;
34: t2 = des - (a3>>12);
35: src += 2;
36: }
37: else38: {
39: a2 = (*src << 8);
40: a3 = a2 + type;
41: a2 = (a3 >> 1) & 0x7;
42: type = a2 + 3;
43: a2 = a3 >> 4;
44: t2 = des - a2;
45: src++;
46: }
47: for(i=0; i48: {
49: *dst = *(u8 *)t2;
50: t2++;
51: dst++;
52: }
53:
54: loc_882AEAC:
55: if(dst < end)56: {
57: type = *src;
58: goto loc_882ADEC;59: }
60: return;61: }
下面要做的工作是看懂算法,为变量命名和调整代码结构,使其具有可读性。
最终的代码:
1: void lzss_decoder(u8* dst, u8* src, int dst_len)
2: {
3: u8 *end, *offset, type;
4: u32 len;
5: int i;
6:
7: end = dst + dst_len;
8:
9: if(dst_len<0)
10: {
11: return;
12: }
13:
14: while(dst < end)
15: {
16: type = *src++;
17: if(0 != (type & 0x3)) /* handle compressed data */
18: {
19: if(1 != (type & 0x1))
20: {
21: len = type | (*src<<8) | (*(src+1)<<16);
22: offset = dst - (len>>12);
23: len = ((len<<2) & 0x3ff) + 3;
24: src += 2;
25: }
26: else
27: {
28: len = type | (*src << 8);
29: offset = dst - (len>>4);
30: len = ((len>>1) & 0x7) + 3;
31: src++;
32: }
33: for(i=0; i34: {
35: *dst++ = *offset++;
36: }
37: }
38: else /* handle uncompressed data */39: {
40: len = type >> 2;
41: len += 1;
42: for(i=0; i43: {
44: *dst++ = *src++;
45: }
46: }
47: }
48: return;49: }
我们还需要验证代码的正确性。这时上面dump的内存文件leaveLzssDecoder.bin就有用了。
1: /* codecVerify.c */
2: #include
3:
4: typedef unsigned int u32;
5: typedef unsigned short u16;
6: typedef unsigned char u8;
7:
8: int load_psp_memfile(char *path, u32* buf);
9: void mem_check(u8* dst, u8* src, int len);
10: void lzss_decoder(u8* dst, u8* src, int dst_len);
11:
12: #define ADDRMAP(a,b) (b - 0x08800000 + a)
13:
14: int main(int argc, char * argv[])
15: {
16: int ret = 0;
17: u8 *pspmem = NULL;
18: u8 *dst = NULL;
19: ret = load_psp_memfile("leaveLzssDecoder.bin", &pspmem);
20: if(0 != ret)
21: {
22: printf("Open file [%s] failed/n", "leaveLzssDecoder.bin");
23: return ret;
24: }
25: dst = malloc(0x0000010E);
26: memset(dst, 0, 0x0000010E);
27: //a0:0x08B39798 a1:0x08BE4800
28: lzss_decoder(dst, ADDRMAP(pspmem, 0x08BE4800), 0x0000010E);
29: mem_check(dst, ADDRMAP(pspmem, 0x08B39798), 0x0000010E);
30: free(pspmem);
31: return ret;
32: }
33:
34: int load_psp_memfile(char *path, u32* buf)
35: {
36: FILE* fp = NULL;
37: int size;
38:
39: fp = fopen(path, "rb");
40: if(NULL == fp)
41: return -1;
42: fseek(fp, 0, SEEK_END);
43: size = ftell(fp);
44: fseek(fp, 0, SEEK_SET);
45: *buf = malloc(size);
46: fread(*buf, size, 1, fp);
47: fclose(fp);
48: }
49:
50: void mem_check(u8* dst, u8* src, int len)
51: {
52: int i;
53: for(i=0; i54: {
55: if(*(dst+i) != *(src+i))56: {
57: printf("memory check failed/n offset[0x%x] dst[0x%x]!=src[0x%x]/n", i, *(dst+i), *(src+i));58: return;59: }
60: }
61: printf("memory check pass!/n");62: }
在这个阶段我发现我将
len = ((len>>2) & 0x3ff) + 3;
这句话误写成了
len = ((len<<2) & 0x3ff) + 3;
从汇编到c模拟,直到最终的验证,其中每个阶段都可能人为的引入错误,所以如果发现应及时更正,以至于不会将错误放大到后面的阶段。
Lzss算法是一种匹配串的替换算法。其变种主要发生在下面几点:
1. 搜索窗的尺寸
2. 匹配串长度的下限和上限
3. 码字和未压缩数据的区分方式
4. 码字中偏移地址和字串长度的组织方式
WIKI指定的LZSS算法参考代码为Allegro
http://alleg.svn.sourceforge.net/viewvc/alleg/allegro/branches/4.2/src/lzss.c?revision=7522&view=markup
该变种算法中根据偏移地址的范围做了2种不同的编码方式。通过码字最低的2个bits来区分是否经过压缩以及编码的方式。仔细阅读代码可以获得下面的编码参数。
#define N 4095 /* buffers for LZ compression */
#define F1 10 /* upper limit for LZ match length for 16bits */
#define F2 1026 /* upper limit for LZ match length for 24bits */
#define THRESHOLD 2 /* LZ encode string into pos and length */
有了上面的参数实现encoder也就比较简单了。这里需要指出的是,不同的encoder实现最终产生的压缩信息会不一样,我们不要求和原版一致,但求压缩率以及代码的执行效率。
因为loading.xb中只包含0x20这一种算法。
所以我们在第一个文件压缩内容的偏移地址设置读断点:0x26*4= 0x98
0x08BE47C0 + 0x98 = 0x08BE4858
bpset 0x08BE4858 r |
跳过8个字节的长度信息,来到压缩数据部分:
bpset 0x08BE4860 r |
前一个断点因为在读取长度,经分析是算法函数的调用者,后一个是lzss_decoder函数的领空。
经过试验验证,0x20这种lzss和文件名列表部分的压缩采用同样的算法。