开始文章之前, 按照惯例先把Fishhook文档中的图片放在最前面。
其中的四个表格值得我们特别注意:
- Lazy Symbol Pointer Table (延迟符号指针表)
- Indirect Symbol Table (间接符号表)
- Symbol Table(符号表)
- String Table (字符表)
struct rebindings_entry {
struct rebinding *rebindings;
size_t rebindings_nel;
struct rebindings_entry *next;};
static struct rebindings_entry *_rebindings_head;
static int prepend_rebindings(struct rebindings_entry **rebindings_head,
struct rebinding rebindings[],
size_t nel) {
struct rebindings_entry *new_entry = (struct rebindings_entry *) malloc(sizeof(struct rebindings_entry));
if (!new_entry) {
return -1;
}
new_entry->rebindings = (struct rebinding *) malloc(sizeof(struct rebinding) * nel);
if (!new_entry->rebindings) {
free(new_entry);
return -1;
}
memcpy(new_entry->rebindings, rebindings, sizeof(struct rebinding) * nel);
new_entry->rebindings_nel = nel;
new_entry->next = *rebindings_head;
*rebindings_head = new_entry;
return 0;
}
复制代码
先介绍最重要的一个库 dlfcn:
int dladdr(const void* addr, Dl_info* info);复制代码
The dladdr() function queries dyld (the dynamic linker) for information
about the image containing the address addr. The information is returned
in the structure specified by info. The structure contains at least the
following members:
const char* dli_fname The pathname of the shared object containing
the address.
void* dli_fbase The base address (mach_header) at which the
image is mapped into the address space of the
calling process.
const char* dli_sname The name of the nearest run-time symbol with a
value less than or equal to addr.
void* dli_saddr The value of the symbol returned in dli_sname.
The dladdr() function is available only in dynamically linked programs.复制代码
const struct mach_header* _dyld_get_image_header(uint32_t image_index)
returns a pointer to the mach header of the image indexed by image_index.
If image_index is out of range, NULL is returned.复制代码
intptr_t _dyld_get_image_vmaddr_slide(uint32_t image_index)
returns the virtual memory address slide amount of the image
indexed by image_index. If image_index is out of range zero is returned.复制代码
/*
* The mach header appears at the very beginning of the object file; it
* is the same for both 32-bit and 64-bit architectures.
*/
struct mach_header {
uint32_t magic; /* mach magic number identifier */ /*魔数*/
cpu_type_t cputype; /* cpu specifier */ /*CPU类型*/
cpu_subtype_t cpusubtype; /* machine specifier */ /*机器类型*/
uint32_t filetype; /* type of file */ /*文件类型*/
uint32_t ncmds; /* number of load commands */ /*加载指令数*/
uint32_t sizeofcmds; /* the size of all the load commands */ /*所有指令的大小*/
uint32_t flags; /* flags */ /*标志位*/
};复制代码
struct mach_header_64 {
uint32_t magic; /* mach magic number identifier */
cpu_type_t cputype; /* cpu specifier */
cpu_subtype_t cpusubtype; /* machine specifier */
uint32_t filetype; /* type of file */
uint32_t ncmds; /* number of load commands */
uint32_t sizeofcmds; /* the size of all the load commands */
uint32_t flags; /* flags */
uint32_t reserved; /* reserved */
};复制代码
/*
* The load commands directly follow the mach_header. The total size of all
* of the commands is given by the sizeofcmds field in the mach_header. All
* load commands must have as their first two fields cmd and cmdsize. The cmd
* field is filled in with a constant for that command type. Each command type
* has a structure specifically for it. The cmdsize field is the size in bytes
* of the particular load command structure plus anything that follows it that
* is a part of the load command (i.e. section structures, strings, etc.). To
* advance to the next load command the cmdsize can be added to the offset or
* pointer of the current load command. The cmdsize for 32-bit architectures
* MUST be a multiple of 4 bytes and for 64-bit architectures MUST be a multiple
* of 8 bytes (these are forever the maximum alignment of any load commands).
* sizeof(long) (this is forever the maximum alignment of any load commands).
* The padded bytes must be zero. All tables in the object file must also
* follow these rules so the file can be memory mapped. Otherwise the pointers
* to these tables will not work well or at all on some machines. With all
* padding zeroed like objects will compare byte for byte.
*/
/*
load命令直接跟在mach_header之后。所有的指令们的总大小命令由mach_header中的sizeofcmds字段给出。
所有的load命令必须有前两个字段cmd和cmdsize。 cmd字段用是表示该命令类型的常量值。每种命令类型都有
它门对应的结构。 cmdsize字段是以字节为单位的大小特定的加载命令结构加上它后面的任何内容是加载命令的一部分(即部分结构,字符串等)。
前进到下一个加载命令,可以将cmdsize添加到偏移量或当前加载命令的指针。
适用于32位体系结构的cmdsize必须是4字节的倍数,对于64位架构必须是倍数8个字节(这些永远是任何加载命令的最大对齐)。
sizeof(long)(这是任何加载命令的最大对齐)。填充字节必须为零。目标文件中的所有表也必须遵循这些规则,以便文件可以进行内存映射。
否则指针这些表格在某些机器上无法正常工作或根本无法正常工作。所有padding归零像对象将比较逐字节。
*/
struct load_command {
unsigned long cmd; /* type of load command */ /*加载指令类型*/
unsigned long cmdsize; /* total size of command in bytes */ /*加载指令大小*/
};复制代码
/* Constants for the cmd field of all load commands, the type */
#define LC_SEGMENT 0x1 /* segment of this file to be mapped */
#define LC_SYMTAB 0x2 /* link-edit stab symbol table info */
#define LC_SYMSEG 0x3 /* link-edit gdb symbol table info (obsolete) */
#define LC_THREAD 0x4 /* thread */
#define LC_UNIXTHREAD 0x5 /* unix thread (includes a stack) */
#define LC_LOADFVMLIB 0x6 /* load a specified fixed VM shared library */
#define LC_IDFVMLIB 0x7 /* fixed VM shared library identification */
#define LC_IDENT 0x8 /* object identification info (obsolete) */
#define LC_FVMFILE 0x9 /* fixed VM file inclusion (internal use) */
#define LC_PREPAGE 0xa /* prepage command (internal use) */
#define LC_DYSYMTAB 0xb /* dynamic link-edit symbol table info */
#define LC_LOAD_DYLIB 0xc /* load a dynamicly linked shared library */
#define LC_ID_DYLIB 0xd /* dynamicly linked shared lib identification */
#define LC_LOAD_DYLINKER 0xe /* load a dynamic linker */
#define LC_ID_DYLINKER 0xf /* dynamic linker identification */
#define LC_PREBOUND_DYLIB 0x10 /* modules prebound for a dynamicly */
/* linked shared library */复制代码
/*
* The segment load command indicates that a part of this file is to be
* mapped into the task's address space. The size of this segment in memory,
* vmsize, maybe equal to or larger than the amount to map from this file,
* filesize. The file is mapped starting at fileoff to the beginning of
* the segment in memory, vmaddr. The rest of the memory of the segment,
* if any, is allocated zero fill on demand. The segment's maximum virtual
* memory protection and initial virtual memory protection are specified
* by the maxprot and initprot fields. If the segment has sections then the
* section structures directly follow the segment command and their size is
* reflected in cmdsize.
*/
struct segment_command { /* for 32-bit architectures */
unsigned long cmd; /* LC_SEGMENT */
unsigned long cmdsize; /* includes sizeof section structs */
char segname[16]; /* segment name */ /*段名 __TEXT, __DATA, __LINKEDIT*/
unsigned long vmaddr; /* memory address of this segment */ /*段虚拟地址*/
unsigned long vmsize; /* memory size of this segment */ /*段大小*/
unsigned long fileoff; /* file offset of this segment */ /**/
unsigned long filesize; /* amount to map from the file */
vm_prot_t maxprot; /* maximum VM protection */
vm_prot_t initprot; /* initial VM protection */
unsigned long nsects; /* number of sections in segment */ /*段的节数*/
unsigned long flags; /* flags */ /*段的标识位*/
};
/*
* The 64-bit segment load command indicates that a part of this file is to be
* mapped into a 64-bit task's address space. If the 64-bit segment has
* sections then section_64 structures directly follow the 64-bit segment
* command and their size is reflected in cmdsize.
*/
struct segment_command_64 { /* for 64-bit architectures */
uint32_t cmd; /* LC_SEGMENT_64 */
uint32_t cmdsize; /* includes sizeof section_64 structs */
char segname[16]; /* segment name */
uint64_t vmaddr; /* memory address of this segment */
uint64_t vmsize; /* memory size of this segment */
uint64_t fileoff; /* file offset of this segment */
uint64_t filesize; /* amount to map from the file */
vm_prot_t maxprot; /* maximum VM protection */
vm_prot_t initprot; /* initial VM protection */
uint32_t nsects; /* number of sections in segment */
uint32_t flags; /* flags */
};
复制代码
struct symtab_command {
unsigned long cmd; /* LC_SYMTAB */
unsigned long cmdsize; /* sizeof(struct symtab_command) */
unsigned long symoff; /* symbol table offset */ /*符号表偏移量*/
unsigned long nsyms; /* number of symbol table entries */ /*符号表元素个数*/
unsigned long stroff; /* string table offset */ /*字符表偏移量*/
unsigned long strsize; /* string table size in bytes */ /*字符表中总共的字符的大小*/
};
复制代码
struct dysymtab_command {
unsigned long cmd; /* LC_DYSYMTAB */
unsigned long cmdsize; /* sizeof(struct dysymtab_command) */
/*
* The symbols indicated by symoff and nsyms of the LC_SYMTAB load command
* are grouped into the following three groups:
* local symbols (further grouped by the module they are from)
* defined external symbols (further grouped by the module they are from)
* undefined symbols
*
* The local symbols are used only for debugging. The dynamic binding
* process may have to use them to indicate to the debugger the local
* symbols for a module that is being bound.
*
* The last two groups are used by the dynamic binding process to do the
* binding (indirectly through the module table and the reference symbol
* table when this is a dynamicly linked shared library file).
*/
unsigned long ilocalsym; /* index to local symbols */
unsigned long nlocalsym; /* number of local symbols */
unsigned long iextdefsym; /* index to externally defined symbols */
unsigned long nextdefsym; /* number of externally defined symbols */
unsigned long iundefsym; /* index to undefined symbols */
unsigned long nundefsym; /* number of undefined symbols */
/*
* For the for the dynamic binding process to find which module a symbol
* is defined in the table of contents is used (analogous to the ranlib
* structure in an archive) which maps defined external symbols to modules
* they are defined in. This exists only in a dynamicly linked shared
* library file. For executable and object modules the defined external
* symbols are sorted by name and is use as the table of contents.
*/
unsigned long tocoff; /* file offset to table of contents */
unsigned long ntoc; /* number of entries in table of contents */
/*
* To support dynamic binding of "modules" (whole object files) the symbol
* table must reflect the modules that the file was created from. This is
* done by having a module table that has indexes and counts into the merged
* tables for each module. The module structure that these two entries
* refer to is described below. This exists only in a dynamicly linked
* shared library file. For executable and object modules the file only
* contains one module so everything in the file belongs to the module.
*/
unsigned long modtaboff; /* file offset to module table */
unsigned long nmodtab; /* number of module table entries */
/*
* To support dynamic module binding the module structure for each module
* indicates the external references (defined and undefined) each module
* makes. For each module there is an offset and a count into the
* reference symbol table for the symbols that the module references.
* This exists only in a dynamicly linked shared library file. For
* executable and object modules the defined external symbols and the
* undefined external symbols indicates the external references.
*/
unsigned long extrefsymoff; /* offset to referenced symbol table */
unsigned long nextrefsyms; /* number of referenced symbol table entries */
/*
* The sections that contain "symbol pointers" and "routine stubs" have
* indexes and (implied counts based on the size of the section and fixed
* size of the entry) into the "indirect symbol" table for each pointer
* and stub. For every section of these two types the index into the
* indirect symbol table is stored in the section header in the field
* reserved1. An indirect symbol table entry is simply a 32bit index into
* the symbol table to the symbol that the pointer or stub is referring to.
* The indirect symbol table is ordered to match the entries in the section.
*/
unsigned long indirectsymoff; /* file offset to the indirect symbol table */
unsigned long nindirectsyms; /* number of indirect symbol table entries */
/*
* To support relocating an individual module in a library file quickly the
* external relocation entries for each module in the library need to be
* accessed efficiently. Since the relocation entries can't be accessed
* through the section headers for a library file they are separated into
* groups of local and external entries further grouped by module. In this
* case the presents of this load command who's extreloff, nextrel,
* locreloff and nlocrel fields are non-zero indicates that the relocation
* entries of non-merged sections are not referenced through the section
* structures (and the reloff and nreloc fields in the section headers are
* set to zero).
*
* Since the relocation entries are not accessed through the section headers
* this requires the r_address field to be something other than a section
* offset to identify the item to be relocated. In this case r_address is
* set to the offset from the vmaddr of the first LC_SEGMENT command.
*
* The relocation entries are grouped by module and the module table
* entries have indexes and counts into them for the group of external
* relocation entries for that the module.
*
* For sections that are merged across modules there must not be any
* remaining external relocation entries for them (for merged sections
* remaining relocation entries must be local).
*/
unsigned long extreloff; /* offset to external relocation entries */
unsigned long nextrel; /* number of external relocation entries */
/*
* All the local relocation entries are grouped together (they are not
* grouped by their module since they are only used if the object is moved
* from it staticly link edited address).
*/
unsigned long locreloff; /* offset to local relocation entries */
unsigned long nlocrel; /* number of local relocation entries */
};复制代码
/*
* A segment is made up of zero or more sections. Non-MH_OBJECT files have
* all of their segments with the proper sections in each, and padded to the
* specified segment alignment when produced by the link editor. The first
* segment of a MH_EXECUTE and MH_FVMLIB format file contains the mach_header
* and load commands of the object file before it's first section. The zero
* fill sections are always last in their segment (in all formats). This
* allows the zeroed segment padding to be mapped into memory where zero fill
* sections might be. The gigabyte zero fill sections, those with the section
* type S_GB_ZEROFILL, can only be in a segment with sections of this type.
* These segments are then placed after all other segments.
*
* The MH_OBJECT format has all of it's sections in one segment for
* compactness. There is no padding to a specified segment boundary and the
* mach_header and load commands are not part of the segment.
*
* Sections with the same section name, sectname, going into the same segment,
* segname, are combined by the link editor. The resulting section is aligned
* to the maximum alignment of the combined sections and is the new section's
* alignment. The combined sections are aligned to their original alignment in
* the combined section. Any padded bytes to get the specified alignment are
* zeroed.
*
* The format of the relocation entries referenced by the reloff and nreloc
* fields of the section structure for mach object files is described in the
* header file .
*/
struct section { /* for 32-bit architectures */
char sectname[16]; /* name of this section */ /*节的名字*/
char segname[16]; /* segment this section goes in */ /*节所在段名*/
unsigned long addr; /* memory address of this section */ /*节所在地址*/
unsigned long size; /* size in bytes of this section */ /*节的大小*/
unsigned long offset; /* file offset of this section */ /*节的文件偏移*/
unsigned long align; /* section alignment (power of 2) */ /*节的对齐*/
unsigned long reloff; /* file offset of relocation entries */ //
unsigned long nreloc; /* number of relocation entries */ //
unsigned long flags; /* flags (section type and attributes)*/ //
unsigned long reserved1; /* reserved */
unsigned long reserved2; /* reserved */
};
struct section_64 { /* for 64-bit architectures */
char sectname[16]; /* name of this section */
char segname[16]; /* segment this section goes in */
uint64_t addr; /* memory address of this section */
uint64_t size; /* size in bytes of this section */
uint32_t offset; /* file offset of this section */
uint32_t align; /* section alignment (power of 2) */
uint32_t reloff; /* file offset of relocation entries */
uint32_t nreloc; /* number of relocation entries */
uint32_t flags; /* flags (section type and attributes)*/
uint32_t reserved1; /* reserved (for offset or index) */
uint32_t reserved2; /* reserved (for count or sizeof) */
uint32_t reserved3; /* reserved */
};复制代码
ASLR
ASLR 是 Address Space Layout Randomization 的缩写,这个概念在业界由来已久,并非苹果原创。由于 vmaddr (虚拟地址) 是链接器链接的时候写入 Mach-O 文件的,对于一个程序来说是静态不变的,因此给黑客攻击带来了便利,iOS 4.3 以后引入了 ASLR,给每个镜像在 vmaddr 的基础上再加一个随机的偏移量 slide,因此每段数据的真实的虚拟地址是 vmaddr + slide。
static void rebind_symbols_for_image(struct rebindings_entry *rebindings,
const struct mach_header *header,
intptr_t slide) {
Dl_info info;
if (dladdr(header, &info) == 0) {
return;
}
segment_command_t *cur_seg_cmd;
segment_command_t *linkedit_segment = NULL;
struct symtab_command* symtab_cmd = NULL;
struct dysymtab_command* dysymtab_cmd = NULL;
// 获取Load Command的起始位置
uintptr_t cur = (uintptr_t)header + sizeof(mach_header_t);
// 便利每一个Command来获取几个表格的位置
for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
cur_seg_cmd = (segment_command_t *)cur;
//在LC_SEGMENT 中遍历寻找__LINKEDIT的 Section
if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
if (strcmp(cur_seg_cmd->segname, SEG_LINKEDIT) == 0) {
linkedit_segment = cur_seg_cmd;
}
//遍历寻找LC_SYMTAB
} else if (cur_seg_cmd->cmd == LC_SYMTAB) {
symtab_cmd = (struct symtab_command*)cur_seg_cmd;
//遍历寻找LC_DYSYMTAB
} else if (cur_seg_cmd->cmd == LC_DYSYMTAB) {
dysymtab_cmd = (struct dysymtab_command*)cur_seg_cmd;
}
}
/* 检测必要的数据结构是否都存在
* LC_SYMTAB这个LoadCommand主要提供了两个信息
* Symbol Table的偏移量与Symbol Table中元素的个数
* String Table的偏移量与String Table的长度
* LC_DYSYMTAB提供了动态符号表的位移和元素个数,还有一些其他的表格索引
* LC_SEGMENT.__LINKEDIT 含有为动态链接库使用的原始数据
*/
if (!symtab_cmd || !dysymtab_cmd || !linkedit_segment || !dysymtab_cmd->nindirectsyms) {
return;
}
// 找到__LINKEDIT段的头地址
uintptr_t linkedit_base = (uintptr_t)slide + linkedit_segment->vmaddr - linkedit_segment->fileoff;
// 获取符号表的真实地址
// 符号表的地址 = 基址 + 符号表偏移量
nlist_t *symtab = (nlist_t *)(linkedit_base + symtab_cmd->symoff);
// 获取字符表的真实地址
// 字符串表的地址 = 基址 + 字符串表偏移量
char *strtab = (char *)(linkedit_base + symtab_cmd->stroff);
// 获取间接符号表的真实地址
uint32_t *indirect_symtab = (uint32_t *)(linkedit_base + dysymtab_cmd->indirectsymoff);
cur = (uintptr_t)header + sizeof(mach_header_t);
for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
cur_seg_cmd = (segment_command_t *)cur;
if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
if (strcmp(cur_seg_cmd->segname, SEG_DATA) != 0 && strcmp(cur_seg_cmd->segname, SEG_DATA_CONST) != 0) {
continue;
}
for (uint j = 0; j < cur_seg_cmd->nsects; j++) {
section_t *sect = (section_t *)(cur + sizeof(segment_command_t)) + j;
if ((sect->flags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS) {
perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
}
if ((sect->flags & SECTION_TYPE) == S_NON_LAZY_SYMBOL_POINTERS) {
perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
}
}
}
}
}
static void _rebind_symbols_for_image(const struct mach_header *header,
intptr_t slide) {
rebind_symbols_for_image(_rebindings_head, header, slide);
}复制代码
static void perform_rebinding_with_section(struct rebindings_entry *rebindings,
section_t *section,
intptr_t slide,
nlist_t *symtab,
char *strtab,
uint32_t *indirect_symtab) {
uint32_t *indirect_symbol_indices = indirect_symtab + section->reserved1;
void **indirect_symbol_bindings = (void **)((uintptr_t)slide + section->addr);
for (uint i = 0; i < section->size / sizeof(void *); i++) {
uint32_t symtab_index = indirect_symbol_indices[i];
if (symtab_index == INDIRECT_SYMBOL_ABS || symtab_index == INDIRECT_SYMBOL_LOCAL || symtab_index == (INDIRECT_SYMBOL_LOCAL | INDIRECT_SYMBOL_ABS)) {
continue;
}
uint32_t strtab_offset = symtab[symtab_index].n_un.n_strx;
char *symbol_name = strtab + strtab_offset;
bool symbol_name_longer_than_1 = symbol_name[0] && symbol_name[1];
struct rebindings_entry *cur = rebindings;
while (cur) {
for (uint j = 0; j < cur->rebindings_nel; j++) {
if (symbol_name_longer_than_1 && strcmp(&symbol_name[1], cur->rebindings[j].name) == 0) {
if (cur->rebindings[j].replaced != NULL && indirect_symbol_bindings[i] != cur->rebindings[j].replacement) {
*(cur->rebindings[j].replaced) = indirect_symbol_bindings[i];
}
indirect_symbol_bindings[i] = cur->rebindings[j].replacement;
goto symbol_loop;
}
}
cur = cur->next;
}
symbol_loop:;
}
}复制代码