15 - fishhook源码解析

fishhook使用

struct rebinding reb;
reb.name="NSLog";
reb.replaced = (void *)&sys_NSLog;
reb.replacement = my_NSLog;

struct rebinding rebs[] = {reb};
rebind_symbols(rebs, 1);

使用说明:

  1. 定义一个rebinding结构体
    1. rebinding结构体中name参数表示原函数名称
    2. rebinding结构体中replaced参数表示原函数地址
    3. rebinding结构体中replacement参数表示新的函数地址
  2. 调用rebind_symbols函数,将新函数的地址替换懒加载符号表中原函数的地址。

fishhook原理

rebind_symbols函数

int rebind_symbols(struct rebinding rebindings[], size_t rebindings_nel) {
    int retval = prepend_rebindings(&_rebindings_head, rebindings, rebindings_nel);
    if (retval < 0) {
    return retval;
  }
  if (!_rebindings_head->next){
    _dyld_register_func_for_add_image(_rebind_symbols_for_image);
  } else {
    uint32_t c = _dyld_image_count();
    for (uint32_t i = 0; i < c; i++) {
      _rebind_symbols_for_image(_dyld_get_image_header(i), _dyld_get_image_vmaddr_slide(i));
    }
  }
  return retval;
}

分析:
1、prepend_rebindings函数,每一次调用rebind_symbols,都会将rebindings数组,以头插法的形式添加到,_rebindings_head链表中。
2、根据_rebindings_head->next是否为空判断rebind_symbols函数是否是第一次调用。
若是第一次调用,则调用_dyld_register_func_for_add_image注册监听方法。那么已经被dyld加载的image会立刻进入回调,若没被dyld加载的image会在dyld装载的时候触发回调。
若不是第一次调用,则遍历已经加载的image,然后进行的hook。

_rebind_symbols_for_image函数

static void _rebind_symbols_for_image(const struct mach_header *header, intptr_t slide) {
    rebind_symbols_for_image(_rebindings_head, header, slide);
}

static void rebind_symbols_for_image(struct rebindings_entry *rebindings, const struct mach_header *header, intptr_t slide) {
  Dl_info info;
  if (dladdr(header, &info) == 0) {
    return;
  }

  segment_command_t *cur_seg_cmd;
  segment_command_t *linkedit_segment = NULL;
  struct symtab_command* symtab_cmd = NULL;
  struct dysymtab_command* dysymtab_cmd = NULL;

  uintptr_t cur = (uintptr_t)header + sizeof(mach_header_t);
  for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
    cur_seg_cmd = (segment_command_t *)cur;
    if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
      if (strcmp(cur_seg_cmd->segname, SEG_LINKEDIT) == 0) {
        linkedit_segment = cur_seg_cmd;
      }
    } else if (cur_seg_cmd->cmd == LC_SYMTAB) {
      symtab_cmd = (struct symtab_command*)cur_seg_cmd;
    } else if (cur_seg_cmd->cmd == LC_DYSYMTAB) {
      dysymtab_cmd = (struct dysymtab_command*)cur_seg_cmd;
    }
  }

  if (!symtab_cmd || !dysymtab_cmd || !linkedit_segment ||
      !dysymtab_cmd->nindirectsyms) {
    return;
  }

  uintptr_t linkedit_base = (uintptr_t)slide + linkedit_segment->vmaddr - linkedit_segment->fileoff;
  
  nlist_t *symtab = (nlist_t *)(linkedit_base + symtab_cmd->symoff);

  char *strtab = (char *)(linkedit_base + symtab_cmd->stroff);

  uint32_t *indirect_symtab = (uint32_t *)(linkedit_base + dysymtab_cmd->indirectsymoff);

  cur = (uintptr_t)header + sizeof(mach_header_t);
  for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
    cur_seg_cmd = (segment_command_t *)cur;
    if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
        //寻找到data段
      if (strcmp(cur_seg_cmd->segname, SEG_DATA) != 0 &&
          strcmp(cur_seg_cmd->segname, SEG_DATA_CONST) != 0) {
        continue;
      }
        
      for (uint j = 0; j < cur_seg_cmd->nsects; j++) {
        section_t *sect =
          (section_t *)(cur + sizeof(segment_command_t)) + j;
          //找懒加载表
        if ((sect->flags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS) {
          perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
        }
          //非懒加载表
        if ((sect->flags & SECTION_TYPE) == S_NON_LAZY_SYMBOL_POINTERS) {
          perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
        }
      }
    }
  }
}

分析:
1、进程可通过dladdr()获取有关最近定义给定address的符号的信息。dladdr()可确定指定的address是否位于构成进程的进址空间的其中一个加载模块(可执行库或共享库)内。如果某个地址位于在其上面映射加载模块
的基址和为该加载模块映射的最高虚拟地址之间(包括两端),则认为该地址在加载模块的范围内。如果某个加载模块符合这个条件,则会搜索其动态符号表,以查找与指定的address最接近的符号。

2、从Mach-O中获取符号表地址、字符表地址以及动态符号表地址
符号表地址(代码中所用符号的信息) = 程序基址(ASLR) + 符号表偏移量
字符表地址(存储代码中的所有字符) = 程序基址(ASLR) + 字符串表偏移量
动态符号表地址(存储各符号在符号表中的标号) = 程序基址(ASLR) + 动态符号表偏移量
其中:
符号表偏移量Load Commands找到LC_SYMTAB,其中symoff就是符号表偏移量
字符串表偏移量Load Commands找到LC_SYMTAB,其中stroff就是字符串表偏移量
动态符号表偏移量: Load Commands找到LC_DYSYMTAB,其中indirectsymoff就是动态符号表偏移量

3、遍历所有的Load Commands,定位至__DATA段__DATA_CONST段,并遍历__DATA段__DATA_CONST段的所有section,找到section type分别为S_LAZY_SYMBOL_POINTERSS_NON_LAZY_SYMBOL_POINTERSsection,将section信息和符号表地址、字符表地址以及动态符号表地址一起作为perform_rebinding_with_section函数的入参。

perform_rebinding_with_section函数

static void perform_rebinding_with_section(struct rebindings_entry *rebindings, section_t *section, intptr_t slide, nlist_t *symtab, char *strtab, uint32_t *indirect_symtab) {
  uint32_t *indirect_symbol_indices = indirect_symtab + section->reserved1;
  void **indirect_symbol_bindings = (void **)((uintptr_t)slide + section->addr);
  for (uint i = 0; i < section->size / sizeof(void *); i++) {
    uint32_t symtab_index = indirect_symbol_indices[i];
    if (symtab_index == INDIRECT_SYMBOL_ABS || symtab_index == INDIRECT_SYMBOL_LOCAL ||
        symtab_index == (INDIRECT_SYMBOL_LOCAL   | INDIRECT_SYMBOL_ABS)) {
      continue;
    }
      uint32_t strtab_offset = symtab[symtab_index].n_un.n_strx;
      char *symbol_name = strtab + strtab_offset;
      bool symbol_name_longer_than_1 = symbol_name[0] && symbol_name[1];
      struct rebindings_entry *cur = rebindings;
      while (cur) {
          for (uint j = 0; j < cur->rebindings_nel; j++) {
              if (symbol_name_longer_than_1 &&
                  strcmp(&symbol_name[1], cur->rebindings[j].name) == 0) {
                  if (cur->rebindings[j].replaced != NULL &&
                      indirect_symbol_bindings[i] != cur->rebindings[j].replacement) {
                      *(cur->rebindings[j].replaced) = indirect_symbol_bindings[i];
                  }
                  indirect_symbol_bindings[i] = cur->rebindings[j].replacement;
                  goto symbol_loop;
        }
      }
      cur = cur->next;
    }
  symbol_loop:;
  }
}

分析:接下来的分析以S_LAZY_SYMBOL_POINTERSsection为例,S_NON_LAZY_SYMBOL_POINTERSsection与之类似。

1、从section中获取reserved1,表示懒加载符号位于动态符号表的起始索引值,并使用indirect_symbol_indices指针指向动态符号中懒加载符号的起始位置。

2、从section中获取addr,该地址indirect_symbol_bindings指向一个数组,数组中的内容是所有懒加载函数实现地址。在该数组中,懒加载函数实现地址的顺序与动态符号表中懒加载符号的顺序是一致的。例如:懒加载函数实现地址数组中第一个为NSLog的地址。那在动态符号表的第reserved1个就是NSLog的符号。

3、遍历section中所有符号,使用indirect_symbol_indices取下标方法得到符号在动态符号表中的值(symtab_index),这个值就是符号在符号表中的索引号

4、以symtab_index作为下标,访问符号表,可以得到符号的名称。

5、遍历rebindings_entry链表,对比HOOK函数的名称与当前定位的符号表中符号的名称。若一致,则将indirect_symbol_bindings中符号对应的函数实现地址修改成rebindings_entry链表中replacement指定的地址。并将原函数地址赋值给replaced

至此,函数HOOK完成。当调用待HOOK的函数时,查找indirect_symbol_bindings中的函数实现地址时,已经被HOOK的函数的地址了。

总结

  • 【第一步】通过接口设置需要HOOK的函数名称,以及需要替换的函数地址。
  • 【第二步】从Mach-O中得到符号表、字符表、动态符号表的地址
  • 【第三步】从section中获取懒加载符号在动态符号表中的起始地址。
  • 【第四步】从第三步的得到的起始索引值开始遍历动态符号表,得到符号位于符号表的索引值。
  • 【第五步】从符号表中获取符号位于字符串表的偏移值。
  • 【第六步】从字符串表中获取符号名称,并与设置HOOK函数的名称进行对比,若一致,则修改符号对应的函数实现地址为replacement指定的地址,并将原函数地址赋值给replaced

流程图如下:

16214137877191.jpg

你可能感兴趣的:(15 - fishhook源码解析)