实验三:逆向工程实验 1.理解程序(控制语句、函数、返回值、堆栈结构)是如何运行的 2.掌握GDB调试工具 3.掌握objdump反汇编工具
实验介绍: 本实验设计为一个黑客拆解二进制炸弹的游戏。我们仅给黑客(同学)提供一个二进制可执行文件bomb_64和主函数所在的源程序bomb.c,不提供每个关卡的源代码。程序运行中有6个关卡(6个phase),每个关卡需要用户输入正确的字符串或数字才能通关,否则会引爆炸弹(打印出一条错误信息,并导致评分下降)! 要求同学运用GDB调试工具和objdump反汇编工具,通过分析汇编代码,找到在每个phase程序段中,引导程序跳转到“explode_bomb”程序段的地方,并分析其成功跳转的条件,以此为突破口寻找应该在命令行输入何种字符串来通关。
实验说明
•6个关卡,难度随关卡升级而提升; •通过解读汇编代码来推断其对应的函数结构(推断过程不唯一),某些关卡答案不唯一; •尽力而为,能通几关就几关。 •提示: ①第一关(知识点:string,函数调用,栈) ②第二关(知识点:循环语句,数组) ③第三关(知识点: switch语句) ④第四关(知识点:递归) ⑤第五关(知识点:字串变换,ascii转换,寻址) ⑥第六关(知识点:寻址)
(第1-5关,各15分。第6关10分。实验总结15分。)
实验步骤
1.输入反汇编命令查看汇编代码(保存在1.txt文件中) $ objdump -d bomb_64 > 1.txt 2.首先找到main函数,发现它调用了从phase1到phase6这六个函数。再找到phase1,代码如下:(举例分析) 0000000000400e70 400e70: 48 83 ec 08 sub $0x8,%rsp 400e74: be f8 1a 40 00 mov $0x401af8,%esi 400e79: e8 bf 03 00 00 callq 40123d <strings_not_equal> 400e7e: 85 c0 test %eax,%eax 400e80: 74 05 je 400e87 400e82: e8 b6 07 00 00 callq 40163d <explode_bomb> 400e87: 48 83 c4 08 add $0x8,%rsp 400e8b: c3 retq
实验过程及内容: 当前用户可能对bomb文件没有执行权限,建议先用ls查看文件权限,若没有执行权限,请用chmod +x bomb命令增加当前用户对bomb文件的执行权限。
注2: 若用命令./bomb执行该文件提示no such file时,请先用 su szu 切换到szu账户,然后输入命令sudo apt-get install lib32z1,等待下载安装完毕即可。
PHASE 1:
分析对 phase_1 函数对应的 Pseudo-code:
void phase_1(const char *str) { if (!string_not_equal(“…..”, str) == 1) { explode_bomb(); } }
容易猜到我们应该找出 "...."中的值。
故,确认了PHASE1中进行了字符串比较操作,显示数据后发现相关内容。 0x401af8 "Science isn't about why, it's about why not?"
故正确答案为: Science isn't about why, it's about why not?
PHASE2:分析代码可知: 0000000000401743 read_six_numbers(): ;; char *enteredString ;; int array[6] 401743: 48 83 ec 18 sub $0x18,%rsp 401747: 48 89 f2 mov %rsi,%rdx 40174a: 48 8d 4e 04 lea 0x4(%rsi),%rcx 40174e: 48 8d 46 14 lea 0x14(%rsi),%rax ; $(rsp+8) := 0x14 + $rsp 401752: 48 89 44 24 08 mov %rax,0x8(%rsp) ; 401757: 48 8d 46 10 lea 0x10(%rsi),%rax 40175b: 48 89 04 24 mov %rax,(%rsp) 40175f: 4c 8d 4e 0c lea 0xc(%rsi),%r9 401763: 4c 8d 46 08 lea 0x8(%rsi),%r8 401767: be b2 1e 40 00 mov $0x401eb2,%esi 40176c: b8 00 00 00 00 mov $0x0,%eax ;; int *var1 = &arr[0]; ;; int *var2 = &arr[1]; ;; int *var3 = &arr[2]; ;; int *var4 = &arr[3]; ;; int *var5 = &arr[4]; ;; int *var6 = &arr[5]; ;; int ret = sscanf (str, "%d %d %d %d %d %d", var1, var2, var3, ...); 401771: e8 3a f3 ff ff callq 400ab0 <__isoc99_sscanf@plt> 401776: 83 f8 05 cmp $0x5,%eax ;; if (ret != 5) explode_bomb(); /*No-Exit*/ 401779: 7f 05 jg 401780 40177b: e8 bd fe ff ff callq 40163d 401780: 48 83 c4 18 add $0x18,%rsp ;; return 5; 401784: c3 retq
可知read_six_numbers函数实际上就是
Phase_2 这一方法则是
循环判断读入的数字是否为 arr[x] == (arr[x+3] >> 32) 即判断是否符合下列格式 X Y Z X Y Z 举例一组正确答案: 1 2 3 1 2 3
PHASE 3:0000000000400ef9 phase_3(): 400ef9: 48 83 ec 18 sub $0x18,%rsp 400efd: 48 8d 4c 24 08 lea 0x8(%rsp),%rcx 400f02: 48 8d 54 24 0c lea 0xc(%rsp),%rdx 400f07: be be 1e 40 00 mov $0x401ebe,%esi 400f0c: b8 00 00 00 00 mov $0x0,%eax ;; if( scanf ("%d %d", &var1, &var2) <= 1) explode_bomb(); 400f11: e8 9a fb ff ff callq 400ab0 <__isoc99_sscanf@plt> 400f16: 83 f8 01 cmp $0x1,%eax 400f19: 7f 05 jg 400f20 400f1b: e8 1d 07 00 00 callq 40163d ;; switch (var1) { 400f20: 83 7c 24 0c 07 cmpl $0x7,0xc(%rsp) 400f25: 77 3c ja 400f63 400f27: 8b 44 24 0c mov 0xc(%rsp),%eax 400f2b: ff 24 c5 60 1b 40 00 jmpq *0x401b60(,%rax,8) ;; --------- below is a jump table ---------- ;; case 1 : tmp = 0x217; break; ;; case 2 : tmp = 0xd6; break; ;; case 3 : tmp = 0x153; break; ;; case 4 : tmp = 0x77; break; ;; case 5 : tmp = 0x160; break; ;; case 6 : tmp = 0x397; break; ;; case 7 : tmp = 0x19c; break; 400f32: b8 17 02 00 00 mov $0x217,%eax 400f37: eb 3b jmp 400f74 400f39: b8 d6 00 00 00 mov $0xd6,%eax 400f3e: eb 34 jmp 400f74 400f40: b8 53 01 00 00 mov $0x153,%eax 400f45: eb 2d jmp 400f74 400f47: b8 77 00 00 00 mov $0x77,%eax 400f4c: eb 26 jmp 400f74 400f4e: b8 60 01 00 00 mov $0x160,%eax 400f53: eb 1f jmp 400f74 400f55: b8 97 03 00 00 mov $0x397,%eax 400f5a: eb 18 jmp 400f74 400f5c: b8 9c 01 00 00 mov $0x19c,%eax 400f61: eb 11 jmp 400f74 ;; default: explode_bomb(); /*no-exit*/ ;; } 400f63: e8 d5 06 00 00 callq 40163d 400f68: b8 00 00 00 00 mov $0x0,%eax 400f6d: eb 05 jmp 400f74 400f6f: b8 9e 03 00 00 mov $0x39e,%eax ;; if (var2 != tmp) explode_bomb(); ;; return tmp;
大致原理就是输入 scanf(“%d %d”, &ofst, &data); 二者有下述要求: 数组{0x217, 0xd6, 0x153, 0x77, 0x160, 0x397, 0x19c} 的第ofst项 必须等于 data 故 1 535 2 214 3 343 4 119 5 352 6 919 7 412 每行均为可能的正确答案。
PHASE 4:思路就是 输入 x 使得 Fibonacci (x-1) = func4(x) = 0x37 = 55
0000000000400fc1 phase_4(): ;; const char * str 400fc1: 48 83 ec 18 sub $0x18,%rsp 400fc5: 48 8d 54 24 0c lea 0xc(%rsp),%rdx 400fca: be c1 1e 40 00 mov $0x401ec1,%esi 400fcf: b8 00 00 00 00 mov $0x0,%eax ;; if(sscanf (str, "%d", &var1) != 1) explode_bomb(); 400fd4: e8 d7 fa ff ff callq 400ab0 <__isoc99_sscanf@plt> 400fd9: 83 f8 01 cmp $0x1,%eax 400fdc: 75 07 jne 400fe5 ;; if (var1 <= 0) explode_bomb(); 400fde: 83 7c 24 0c 00 cmpl $0x0,0xc(%rsp) 400fe3: 7f 05 jg 400fea 400fe5: e8 53 06 00 00 callq 40163d ;; int tmp; ;; if ( (tmp = func4 (var1)) != 0x37) explode_bomb(); ;; return tmp; 400fea: 8b 7c 24 0c mov 0xc(%rsp),%edi 400fee: e8 91 ff ff ff callq 400f84 400ff3: 83 f8 37 cmp $0x37,%eax 400ff6: 74 05 je 400ffd 400ff8: e8 40 06 00 00 callq 40163d 400ffd: 48 83 c4 18 add $0x18,%rsp 401001: c3 retq
Func4 (n) 为计算 Finonacci 数列 第 n-1 项的算法
0000000000400f84 func4(): ;; args : ;; int input 400f84: 48 89 5c 24 f0 mov %rbx,-0x10(%rsp) 400f89: 48 89 6c 24 f8 mov %rbp,-0x8(%rsp) 400f8e: 48 83 ec 18 sub $0x18,%rsp 400f92: 89 fb mov %edi,%ebx 400f94: b8 01 00 00 00 mov $0x1,%eax 400f99: 83 ff 01 cmp $0x1,%edi ;; if (input <= 1) return 1; 400f9c: 7e 14 jle 400fb2 ;; return func4(input - 2) + func4(input - 1) 400f9e: 8d 7b ff lea -0x1(%rbx),%edi 400fa1: e8 de ff ff ff callq 400f84 400fa6: 89 c5 mov %eax,%ebp 400fa8: 8d 7b fe lea -0x2(%rbx),%edi 400fab: e8 d4 ff ff ff callq 400f84 400fb0: 01 e8 add %ebp,%eax 400fb2: 48 8b 5c 24 08 mov 0x8(%rsp),%rbx 400fb7: 48 8b 6c 24 10 mov 0x10(%rsp),%rbp 400fbc: 48 83 c4 18 add $0x18,%rsp 400fc0: c3 retq
查表的 Fibonacci(x - 1) = 55 得 x = 10, n = x-1 = 9 正确答案为 9
PHASE 5:
思路首先是输入两个数字 offset 和 data int array_3014[] = {10,2,14,7,8,12,15,11,0,4,1,13,3,9,6,5} ofst 必须满足 0-14 或 0-14 与前缀1的组合, data 必须满足 过程中 为 12次 array_3014[ofst & 0xf] 的和, 即 data = 15 + 6 + 14 + 2 + 1 + 10 + 0 + 8 + 4 + 9 + 13 + 11 = 93 P.S. 这里的0x401030 这一行是 Dead-Code (无影响代码)可以被理解为 NOP
这里的offset 有一定的特殊性,即要求在 循环十二次后, array_3014[offset] 等于15。 在12次循环之前,必须保证 offset, array_3014[offset] 都不为15, 并每次用 array_3014[offset] 来替换 offset.
类似一个指针表,array_3014 存储的是下一个节点的偏移,15即为退出。 经过反向计算,[6]15 -> [14] = 6 -> [2] = 14 -> [1] = 2 -> [10] = 1 -> [0] = 10 -> [8] =0 -> [4] = 8 -> [9] = 4 -> [13] = 9 -> [11] = 13 -> [7] = 11 ## 暂时无用: -> [3] = 7 -> [12] = 3 -> [5] = 12 -> [15] = 5
解的样例 (0xXXXXXXX7)10, 93 这里共有 2^24 组解,这里只写出其中的一部分: 7 93 23 93 55 93 2147483639 93 (0x7ffffff7 93) -9 93 (最多个位为1的解 0xfffffff7 93)
PHASE 6:思路:链表排序问题 假设 node0/1/2/… 的类型为 Struct NODE{ int data; int sequence; NODE *next; } 我们输入的数据会被放入node0 中,我们现在所做的事情就是要算出能够满足下列性质的node0.data 的合适的值。 {. } 注意到 nodeX.next 总是 nodeX+1 的地址,我们可以利用这一性质,考虑 func6 的返回值应为nodeX, 这样判断的条件就是 nodeX+3.data 与 node0.data 之间的比较。
Fun6 函数: ;; void * fun6 (struct NODE * node, int arg2) { fun6(): ;; INT save = node.next ;; node.next = 0 40106f: 4c 8b 47 08 mov 0x8(%rdi),%r8 401073: 48 c7 47 08 00 00 00 movq $0x0,0x8(%rdi) 40107a: 00 ; ????? why is here a nop ????? ;; NODE *ret = node; ;; NODE *last = node; ;; NODE *current; 40107b: 48 89 f8 mov %rdi,%rax 40107e: 48 89 f9 mov %rdi,%rcx ;; if (save == 0) return node; ;; else goto BB3; 401081: 4d 85 c0 test %r8,%r8 401084: 75 40 jne LABEL_BB3 <0x4010c6> 401086: 48 89 f8 mov %rdi,%rax 401089: c3 retq
LABEL_BB2: ; INLINE_FUNC ;; while (true) { ;; last = current; ;; current = last.next; 40108a: 48 89 d1 mov %rdx,%rcx LABEL_ENTRY_2: 40108d: 48 8b 51 08 mov 0x8(%rcx),%rdx
;; if(current == NULL) { ;; node = last; ;; break; ;; } 401091: 48 85 d2 test %rdx,%rdx 401094: 74 09 je 40109f LABEL_BB4 ;; if (current.i0 > arg2) continue; ;; 401096: 39 32 cmp %esi,(%rdx) 401098: 7f f0 jg 40108a LABEL_BB2 ;; else node = last; break; ; ;; } 40109a: 48 89 cf mov %rcx,%rdi 40109d: eb 03 jmp 4010a2 LABEL_BB5
LABEL_BB4: 40109f: 48 89 cf mov %rcx,%rdi LABEL_BB5: MAIN_PART_INLINE_FUNC: ;;; if (current == node) { ;;; ret = save; ;;; } else { 4010a2: 48 39 d7 cmp %rdx,%rdi 4010a5: 74 06 je 4010ad LABEL_BB6 ;;; node.next = save; ;;; } 4010a7: 4c 89 47 08 mov %r8,0x8(%rdi) 4010ab: eb 03 jmp 4010b0 LABEL_BB7 LABEL_BB6: ; 4010ad: 4c 89 c0 mov %r8,%rax LABEL_BB7: ;;; last = save.next; 4010b0: 49 8b 48 08 mov 0x8(%r8),%rcx ;;; save.next = current 4010b4: 49 89 50 08 mov %rdx,0x8(%r8) ;;; if ( last == NULL ) return ret; 4010b8: 48 85 c9 test %rcx,%rcx 4010bb: 74 1a je 4010d7 ;;; save = last ;;; last = ret ;;; node = ret 4010bd: 49 89 c8 mov %rcx,%r8 4010c0: 48 89 c1 mov %rax,%rcx 4010c3: 48 89 c7 mov %rax,%rdi LABEL_BB3: ;;; BB3: /*save != 0*/ ;;; current = last; ;;; if (last == NULL) goto MAIN_PART; ;;; arg2 = save.i1 4010c6: 48 89 ca mov %rcx,%rdx 4010c9: 48 85 c9 test %rcx,%rcx 4010cc: 74 d4 je 4010a2 4010ce: 41 8b 30 mov (%r8),%esi ;;; if (current.i1 > arg2) { /*may be save.i1*/ ;;; goto LABEL_ENTRY_2; // INLINE FUNC1 ;;; } ;;; ;;; goto MAIN_PART // INLINE FUNC2 ;;; 4010d1: 39 31 cmp %esi,(%rcx) 4010d3: 7f b8 jg 40108d 4010d5: eb cb jmp 4010a2 LABEL_RET: 4010d7: f3 c3 repz retq
整理后得到 C 代码, 通读逻辑可知其代码操作为一排序函数, #include
typedef int INT; typedef long long INT64;
typedef struct Node_t { int data; int seq; struct Node_t *next; } NODE;
NODE *fun6 (register NODE *node, register int arg2) {
// INIT: register NODE *save = node->next; node->next = 0;
register NODE *ret = node; register NODE *last = node; register NODE *current;
if (save == NULL) { return node; }
if ((current = last) == NULL || ((arg2 = ((NODE*)save)->data) > current->data)) goto MAIN_PART;
LOOP_2: do { last = current; current = last->next; if(current == NULL) { node = last; return; } if(current->data > arg2) { continue; } else { node = last; break; }
do { MAIN_PART:
if (current == node) { ret = save; } else { node->next = save; }
last = ((NODE *) save)->next; ((NODE *) save)->next = current; if (last == NULL) return ret; save = (INT) last; last = ret; node = ret;
// BB3: } while ((current = last) == NULL);
// if(current.data > arg2) { // goto LOOP_2; // } //} } while ((arg2 = ((NODE *) save)->data) < current->data);
}
|
|