一、hook定义
hook:改变程序执行流程的一种技术统称。
二、fishhook简介
它是Facebook提供的一个动态修改链接mach-O文件的工具。利用MachO文件加载原理,通过修改懒加载和非懒加载两个表的指针达到C函数HOOK的目的。
fishhook
三、使用fishhook修改系统C函数实现
以NSLog函数为例
1.利用一个函数指针保存原NSLog函数的地址
static void (*sys_nslog)(NSString *format,...);
2.替换函数,效果就是调用NSLog时,先执行我们的替换函数,一般都会手动调用父类或原先的实现(根据需求),通过上一步指针的记录来调用。
void myNSLog(NSString *format, ...) {
format = [format stringByAppendingString:@"hook成功!"];
sys_nslog(format);
sys_nslog(@"%s",__func__);
}
3.重新绑定符号
struct rebinding nslog;
nslog.name = "NSLog";
nslog.replacement = myNSLog;
nslog.replaced = (void *)&sys_nslog;
struct rebinding rebs[1] = {nslog};
/**
重新绑定符号
@param rebindings#> 存放rebingding结构体的数组 description#>
@param rebindings_nel#> 数组的长度 description#>
@return return value description
*/
rebind_symbols(rebs, 1);
Demo
四、符号查找过程梳理
引用fishhook给出的流程图:
下面以查找printf函数为例
分析用到的Mach-O文件:Mach-O File
1.Find entry with same index in indirect symbol table
第一阶段,从__DATA
段的__la_symbol_ptr
节开始,寻找索引(用于Dynamic Symbol Table
下的Indirect Symbols
)。也就是说两个表的索引是一样的,printf
函数在Lazy Symbol Pointers
下为第44条记录,对应在Indirect Symbols
下也为第44条记录。
这里自己想了一下,可以用公式计算:
- printf在
Lazy Symbol Pointers
的Offset :0x241D8 -
Lazy Symbol Pointers
起始Offset : 0x24080 -
Lazy Symbol Pointers
两条记录之间相差: 0x8 - 因此printf索引:(0x241D8 - 0x24080) / 0x8 = 0x2B = 43 (索引从0开始)
- 切换到
Indirect Symbols
计算:0x2B * 0x4(两条记录之间相差) + 0x2A4E0(起始Offset) = 0X2A58C(对应Indirect Symbols
下的符号__printf)
2.Treat value as index into symbol table array
第二阶段,将Indirect Symbols
下的__printf符号对应的Data值换算成10进制,此处为将0x1A5转成10进制的421,跳到Symbols Table
下的Symbols
,找到第421条记录,对应着__printf,如下:
3.Look up string table entry by adding offset from symbol table entry to string table base
第三阶段,将symbol table
的符号偏移值(Data段)加上String Table
的基址,此处为0x577 + 0x2A6C0 = 0x2AC37,如下:
以上
五、fishhook源码分析
从我们常用的符号重绑定函数rebind_symbols
入口开始,prepend_rebindings
函数内部分配内存空间将需要重新绑定符号的结构体初始化成一条struct rebindings_entry
结构体的链表,通过retval
检查是否传入结构体。如果是第一次调用,就注册image添加的回调,否则遍历所有加载的image(模块),如下:
int rebind_symbols(struct rebinding rebindings[], size_t rebindings_nel) {
int retval = prepend_rebindings(&_rebindings_head, rebindings, rebindings_nel);
if (retval < 0) {
return retval;
}
// If this was the first call, register callback for image additions (which is also invoked for
// existing images, otherwise, just run on existing images
if (!_rebindings_head->next) {
_dyld_register_func_for_add_image(_rebind_symbols_for_image);
} else {
uint32_t c = _dyld_image_count();
for (uint32_t i = 0; i < c; i++) {
_rebind_symbols_for_image(_dyld_get_image_header(i), _dyld_get_image_vmaddr_slide(i));
}
}
return retval;
}
最后,都会调用rebind_symbols_for_image
函数对某个模块中的符号表指针进行替换。rebind_symbols_for_image
内部获取__LINKEDIT、符号表、间接跳转表、字符串表在内存中的真实位置,然后调用perform_rebingding_with_section
分别对懒加载表和非懒加载表进行替换,如下:
static void rebind_symbols_for_image(struct rebindings_entry *rebindings,
const struct mach_header *header,
intptr_t slide) {
Dl_info info;
if (dladdr(header, &info) == 0) {
return;
}
segment_command_t *cur_seg_cmd;
segment_command_t *linkedit_segment = NULL;
struct symtab_command* symtab_cmd = NULL;
struct dysymtab_command* dysymtab_cmd = NULL;
uintptr_t cur = (uintptr_t)header + sizeof(mach_header_t);
for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
cur_seg_cmd = (segment_command_t *)cur;
//获取__LINKEDIT
if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
if (strcmp(cur_seg_cmd->segname, SEG_LINKEDIT) == 0) {
linkedit_segment = cur_seg_cmd;
}
//获取符号表
} else if (cur_seg_cmd->cmd == LC_SYMTAB) {
symtab_cmd = (struct symtab_command*)cur_seg_cmd;
//获取动态符号表
} else if (cur_seg_cmd->cmd == LC_DYSYMTAB) {
dysymtab_cmd = (struct dysymtab_command*)cur_seg_cmd;
}
}
if (!symtab_cmd || !dysymtab_cmd || !linkedit_segment ||
!dysymtab_cmd->nindirectsyms) {
return;
}
// Find base symbol/string table addresses
//就是获取machoheader
uintptr_t linkedit_base = (uintptr_t)slide + linkedit_segment->vmaddr - linkedit_segment->fileoff;
//符号表在内存的位置
nlist_t *symtab = (nlist_t *)(linkedit_base + symtab_cmd->symoff);
//字符串表在内存的位置
char *strtab = (char *)(linkedit_base + symtab_cmd->stroff);
//动态符号表间接跳转表在内存的位置
// Get indirect symbol table (array of uint32_t indices into symbol table)
uint32_t *indirect_symtab = (uint32_t *)(linkedit_base + dysymtab_cmd->indirectsymoff);
cur = (uintptr_t)header + sizeof(mach_header_t);
//遍历Load Commands下的每个加载指令
for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
cur_seg_cmd = (segment_command_t *)cur;
if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
if (strcmp(cur_seg_cmd->segname, SEG_DATA) != 0 &&
strcmp(cur_seg_cmd->segname, SEG_DATA_CONST) != 0) {
continue;
}
//分别绑定懒加载和非懒加载表(__DATA段)
for (uint j = 0; j < cur_seg_cmd->nsects; j++) {
section_t *sect =
(section_t *)(cur + sizeof(segment_command_t)) + j;
if ((sect->flags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS) {
perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
}
if ((sect->flags & SECTION_TYPE) == S_NON_LAZY_SYMBOL_POINTERS) {
perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
}
}
}
}
}
关键函数perform_rebinding_with_section
,参数传进了结构体数组,内存中节、ASLR、符号表、字符串表、间接符号表的地址,主要工作如下:
- 1.获取懒加载表或非懒加载表在间接符号表中的位置
- 2.遍历section,找到对应位置在间接跳转表中所对应的符号表下标
- 3.根据符号表的下标,获取符号表中对应的符号字符串
- 4.遍历rebindings链表,判断符号是否匹配或是否已被替换,如果没有,则进行替换操作
如下:
static void perform_rebinding_with_section(struct rebindings_entry *rebindings,
section_t *section,
intptr_t slide,
nlist_t *symtab,
char *strtab,
uint32_t *indirect_symtab) {
//在间接跳转符号表中的偏移
uint32_t *indirect_symbol_indices = indirect_symtab + section->reserved1;
//找到对应的section
void **indirect_symbol_bindings = (void **)((uintptr_t)slide + section->addr);
for (uint i = 0; i < section->size / sizeof(void *); i++) {
//从间接符号表中获取在符号表中的索引
uint32_t symtab_index = indirect_symbol_indices[i];
if (symtab_index == INDIRECT_SYMBOL_ABS || symtab_index == INDIRECT_SYMBOL_LOCAL ||
symtab_index == (INDIRECT_SYMBOL_LOCAL | INDIRECT_SYMBOL_ABS)) {
continue;
}
uint32_t strtab_offset = symtab[symtab_index].n_un.n_strx;
char *symbol_name = strtab + strtab_offset;
printf("%s\n",symbol_name);
if (strnlen(symbol_name, 2) < 2) {
continue;
}
struct rebindings_entry *cur = rebindings;
while (cur) {
for (uint j = 0; j < cur->rebindings_nel; j++) {
if (strcmp(&symbol_name[1], cur->rebindings[j].name) == 0) {
if (cur->rebindings[j].replaced != NULL &&
indirect_symbol_bindings[i] != cur->rebindings[j].replacement) {
*(cur->rebindings[j].replaced) = indirect_symbol_bindings[i];
}
indirect_symbol_bindings[i] = cur->rebindings[j].replacement;
goto symbol_loop;
}
}
cur = cur->next;
}
symbol_loop:;
}
}
六、小结
了解fishhook的原理同时以及联想到dyld的加载流程可以加深对Mach-O文件的认识,,符号查找过程主要跟那几个表有关系,分别是Lazy Symbol Pointers
、Indirect Symbols
、Symbols
、String Table
。笔记过程大部分是个人理解,可能有些地方可能写得不太清晰,有错误请指出。
七、参考
fishhook源码分析
《iOS应用逆向与安全》