hook基础库函数可以实现多种功能,比如:
1.malloc/free,内存监控;
2.pthread_create,线程泄漏;
3.open/close,fd泄漏;
有助于程序的稳定性检测。
本文记录hook方法和原理,持续补充
glic malloc hook,plt hook(xhook)
原理:glic提供__malloc_hook
, __realloc_hook
, __free_hook
可以实现hook自定义mallco/free函数
__malloc_initialize_hook
是弱变量malloc初始化的时候会调用1次,赋值指向hook指针初始化函数my_init_hook
,__malloc_hook
指向自定义的malloc函数my_malloc_hook
,在调用malloc
的时候,实际会调用到my_malloc_hook
my_malloc_hook
中,还原__malloc_hook
,再调用mallco
,真正分配内存,最后__malloc_hook
指向my_malloc_hook
,下次调用malloc
时,再次进入my_malloc_hook
。__malloc_hook = old_malloc_hook;
result = malloc(size);
old_malloc_hook = __malloc_hook;
__malloc_hook = my_malloc_hook;
free和realloc同理。
man __malloc_initialize_hook,直接查看源码和例子
定义:
#include
void *(*__malloc_hook)(size_t size, const void *caller);
void *(*__realloc_hook)(void *ptr, size_t size, const void *caller);
void *(*__memalign_hook)(size_t alignment, size_t size,
const void *caller);
void (*__free_hook)(void *ptr, const void *caller);
void (*__malloc_initialize_hook)(void);
void (*__after_morecore_hook)(void);
例子:
#include
#include
/* Prototypes for our hooks. */
static void my_init_hook(void);
static void *my_malloc_hook(size_t, const void *);
/* Variables to save original hooks. */
static void *(*old_malloc_hook)(size_t, const void *);
/* Override initializing hook from the C library. */
void (*__malloc_initialize_hook) (void) = my_init_hook;
static void
my_init_hook(void)
{
printf("in my_init_hook __malloc_hook:%p \n", __malloc_hook);
old_malloc_hook = __malloc_hook;
__malloc_hook = my_malloc_hook;
}
static void *
my_malloc_hook(size_t size, const void *caller)
{
void *result;
/* Restore all old hooks */
__malloc_hook = old_malloc_hook;
/* Call recursively */
result = malloc(size);
/* Save underlying hooks */
old_malloc_hook = __malloc_hook;
/* printf() might call malloc(), so protect it too. */
printf("malloc(%u) called from %p returns %p\n",
(unsigned int) size, caller, result);
/* Restore our own hooks */
__malloc_hook = my_malloc_hook;
return result;
}
int main(){
int *a = (int *)malloc(10);
int *c = (int *)malloc(20);
int *b = new int;
return 0;
}
结果:
malloc(10) called from 0x5647f6d77b07 returns 0x5647f7e07280
malloc(20) called from 0x5647f6d77b15 returns 0x5647f7e072a0
malloc(4) called from 0x7fbabdb4d258 returns 0x5647f7e072c0
缺点:
my_malloc_hook
,真正分配内存时,__malloc_hook
直接指向原来的malloc
,这时其他进程malloc会直接调用真正的malloc
,不会进去my_malloc_hook
。my_malloc_hook
中加锁也无法解决多线程调用问题。NOTES
The use of these hook functions is not safe in multithreaded programs, and they are now deprecated. From glibc 2.24 onwards, the __malloc_initialize_hook variable has been removed from the API. Programmers should instead preempt calls to the relevant functions by defining and exporting functions such as “malloc” and “free”.
PLT(Procedure Linkage Table) hook 是基于动态链接实现的hook。
参考爱奇艺开源的xhook,针对ELF格式。
https://github.com/iqiyi/xHook
https://github.com/iqiyi/xHook/blob/master/docs/overview/android_plt_hook_overview.zh-CN.md
qiyi的文档很清楚地描述了通过直接改so的.rel.plt(重定位section),实现hook malloc到自定义的函数。
通过文档的malloc,plt hook关键在于找到需要hook函数的重定位地址,so在调用该函数时,会调用该地址指向的函数地址。同时plt hook是根据动态链接实现的,不能hook elf的内部函数。
下面将根据xhook的部分代码,详细描述找到symbol的重定位地址。
最好了解elf的相关知识,我自己先补了下《程序员的自我修养》
ubuntu 64位,为了方便直接在pc是测试,qiyi的例子是在android上。PLT的优势在于不像inline_hook那样直接修改汇编指令,只要是elf格式的都能支持,无关不同架构处理器的指令。
testso.cpp
#include
#include
#include "testso.h"
void sotest_malloc(){
printf("===%s begin===\n", __FUNCTION__);
int *a = (int *)malloc(10);
int *c = (int *)malloc(20);
int *b = new int[10];
printf("===%s end===\n", __FUNCTION__);
}
编译得到libtestso.so
。通过readelf
可以看到elf文件信息
readelf -a libtestso.so
首先可以找到.rela.plt
setction,malloc的offset=0x201010
, 通过直接修改(libtestso.so基地址)+0x201010
的值就能hook到定义的函数。
Relocation section '.rela.plt' at offset 0x568 contains 3 entries:
Offset Info Type Sym. Value Sym. Name + Addend
000000201018 000100000007 R_X86_64_JUMP_SLO 0000000000000000 printf@GLIBC_2.2.5 + 0
000000201020 000300000007 R_X86_64_JUMP_SLO 0000000000000000 malloc@GLIBC_2.2.5 + 0
000000201028 000700000007 R_X86_64_JUMP_SLO 0000000000000000 _Znwm@GLIBCXX_3.4 + 0
目前hook malloc的关键在于怎么找到0x201010
这个值。
通过maps,得到libtestso.so的基地址=7f7b4cdf9000
cat /proc/[pid]/maps
......
7f7b4cbef000-7f7b4cdef000 ---p 001e7000 08:03 4723484 /lib/x86_64-linux-gnu/libc-2.27.so
7f7b4cdef000-7f7b4cdf3000 r--p 001e7000 08:03 4723484 /lib/x86_64-linux-gnu/libc-2.27.so
7f7b4cdf3000-7f7b4cdf5000 rw-p 001eb000 08:03 4723484 /lib/x86_64-linux-gnu/libc-2.27.so
7f7b4cdf5000-7f7b4cdf9000 rw-p 00000000 00:00 0
7f7b4cdf9000-7f7b4cdfa000 r-xp 00000000 08:08 4212888 /home/fengqian/workspaces/code/hookTest/plt_hook/build/libtestso.so
7f7b4cdfa000-7f7b4cff9000 ---p 00001000 08:08 4212888 /home/fengqian/workspaces/code/hookTest/plt_hook/build/libtestso.so
7f7b4cff9000-7f7b4cffa000 r--p 00000000 08:08 4212888 /home/fengqian/workspaces/code/hookTest/plt_hook/build/libtestso.so
7f7b4cffa000-7f7b4cffb000 rw-p 00001000 08:08 4212888 /home/fengqian/workspaces/code/hookTest/plt_hook/build/libtestso.so
7f7b4cffb000-7f7b4d015000 r-xp 00000000 08:03 4723617 /lib/x86_64-linux-gnu/libpthread-2.27.so
7f7b4d015000-7f7b4d214000 ---p 0001a000 08:03 4723617 /lib/x86_64-linux-gnu/libpthread-2.27.so
7f7b4d214000-7f7b4d215000 r--p 00019000 08:03 4723617 /lib/x86_64-linux-gnu/libpthread-2.27.so
......
代码中打开/proc/self/maps,查找
if(NULL == (fp = fopen("/proc/self/maps", "r"))) return;
while(fgets(line, sizeof(line), fp))
{
......
}
xhook里的主要函数
int xh_elf_init(xh_elf_t *self, uintptr_t base_addr, const char *pathname)
ELF Header
和Program Headers
//ELF Header
self->ehdr = (ElfW(Ehdr) *)base_addr;
//Program Headers
self->phdr = (ElfW(Phdr) *)(base_addr + self->ehdr->e_phoff); //segmentation fault sometimes
Elf64_Ehdr->e_phoff,Elf64_Ehdr->e_phnum,分别为Program Headers的偏移和个数。
对照readelf:
e_phoff = 64
e_phnum = 7
ELF Header:
Magic: 7f 45 4c 46 02 01 01 00 00 00 00 00 00 00 00 00
Class: ELF64
Data: 2's complement, little endian
Version: 1 (current)
OS/ABI: UNIX - System V
ABI Version: 0
Type: DYN (Shared object file)
Machine: Advanced Micro Devices X86-64
Version: 0x1
Entry point address: 0x620
Start of program headers: 64 (bytes into file)
Start of section headers: 9344 (bytes into file)
Flags: 0x0
Size of this header: 64 (bytes)
Size of program headers: 56 (bytes)
Number of program headers: 7
Size of section headers: 64 (bytes)
Number of section headers: 33
Section header string table index: 32
typedef struct
{
unsigned char e_ident[EI_NIDENT]; /* Magic number and other info */
Elf64_Half e_type; /* Object file type */
Elf64_Half e_machine; /* Architecture */
Elf64_Word e_version; /* Object file version */
Elf64_Addr e_entry; /* Entry point virtual address */
Elf64_Off e_phoff; /* Program header table file offset */
Elf64_Off e_shoff; /* Section header table file offset */
Elf64_Word e_flags; /* Processor-specific flags */
Elf64_Half e_ehsize; /* ELF header size in bytes */
Elf64_Half e_phentsize; /* Program header table entry size */
Elf64_Half e_phnum; /* Program header table entry count */
Elf64_Half e_shentsize; /* Section header table entry size */
Elf64_Half e_shnum; /* Section header table entry count */
Elf64_Half e_shstrndx; /* Section header string table index */
} Elf64_Ehdr;
PT_LOAD
,且offset=0
的segment,计算基地址bias_addr
计算基地址的精确方法是什么?
。dynamic segment
ElfW(Phdr) *dhdr = xh_elf_get_first_segment_by_type(self, PT_DYNAMIC);
self->dyn = (ElfW(Dyn) *)(self->bias_addr + dhdr->p_vaddr);
self->dyn_sz = dhdr->p_memsz;
找到Program Headers中PT_DYNAMIC
segment,得到PT_DYNAMIC
的地址,根据readelf
dhdr->p_vaddr = 200e00
dyn = 基地址+200e00
dyn_sz = 0x1e0 = 480
typedef struct
{
Elf64_Word p_type; /* Segment type */
Elf64_Word p_flags; /* Segment flags */
Elf64_Off p_offset; /* Segment file offset */
Elf64_Addr p_vaddr; /* Segment virtual address */
Elf64_Addr p_paddr; /* Segment physical address */
Elf64_Xword p_filesz; /* Segment size in file */
Elf64_Xword p_memsz; /* Segment size in memory */
Elf64_Xword p_align; /* Segment alignment */
} Elf64_Phdr;
Program Headers:
Type Offset VirtAddr PhysAddr
FileSiz MemSiz Flags Align
LOAD 0x0000000000000000 0x0000000000000000 0x0000000000000000
0x000000000000083c 0x000000000000083c R E 0x200000
LOAD 0x0000000000000df0 0x0000000000200df0 0x0000000000200df0
0x0000000000000248 0x0000000000000250 RW 0x200000
DYNAMIC 0x0000000000000e00 0x0000000000200e00 0x0000000000200e00
0x00000000000001e0 0x00000000000001e0 RW 0x8
NOTE 0x00000000000001c8 0x00000000000001c8 0x00000000000001c8
0x0000000000000024 0x0000000000000024 R 0x4
GNU_EH_FRAME 0x0000000000000798 0x0000000000000798 0x0000000000000798
0x0000000000000024 0x0000000000000024 R 0x4
GNU_STACK 0x0000000000000000 0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000 RW 0x10
GNU_RELRO 0x0000000000000df0 0x0000000000200df0 0x0000000000200df0
0x0000000000000210 0x0000000000000210 R 0x1
Dynamic结构体和elf内容如下,参考elf.h
DT_STRTAB:Address of string table, .dynstr
DT_SYMTAB:Address of symbol table,.dynsym
DT_PLTREL:Type of reloc in PLT
DT_JMPREL:Address of PLT relocs, rel.plt
DT_GNU_HASH:GNU-style hash table
DT_HASH:Address of symbol hash table
DT_REL/DT_RELA:Address of Rel/Rela relocs, rel.dyn
解析完Dynamic section
后需要的信息都准备好了。dyn结构体和readelf如下:
typedef struct
{
Elf64_Sxword d_tag; /* Dynamic entry type */
union
{
Elf64_Xword d_val; /* Integer value */
Elf64_Addr d_ptr; /* Address value */
} d_un;
} Elf64_Dyn;
Dynamic section at offset 0xe00 contains 26 entries:
Tag Type Name/Value
0x0000000000000001 (NEEDED) Shared library: [libstdc++.so.6]
0x0000000000000001 (NEEDED) Shared library: [libc.so.6]
0x000000000000000e (SONAME) Library soname: [libtestso.so]
0x000000000000000c (INIT) 0x5b0
0x000000000000000d (FINI) 0x760
0x0000000000000019 (INIT_ARRAY) 0x200df0
0x000000000000001b (INIT_ARRAYSZ) 8 (bytes)
0x000000000000001a (FINI_ARRAY) 0x200df8
0x000000000000001c (FINI_ARRAYSZ) 8 (bytes)
0x000000006ffffef5 (GNU_HASH) 0x1f0
0x0000000000000005 (STRTAB) 0x380
0x0000000000000006 (SYMTAB) 0x230
0x000000000000000a (STRSZ) 222 (bytes)
0x000000000000000b (SYMENT) 24 (bytes)
0x0000000000000003 (PLTGOT) 0x201000
0x0000000000000002 (PLTRELSZ) 72 (bytes)
0x0000000000000014 (PLTREL) RELA
0x0000000000000017 (JMPREL) 0x568
0x0000000000000007 (RELA) 0x4c0
0x0000000000000008 (RELASZ) 168 (bytes)
0x0000000000000009 (RELAENT) 24 (bytes)
0x000000006ffffffe (VERNEED) 0x480
0x000000006fffffff (VERNEEDNUM) 2
0x000000006ffffff0 (VERSYM) 0x45e
0x000000006ffffff9 (RELACOUNT) 3
0x0000000000000000 (NULL) 0x0
注:
DT_STRTAB,DT_SYMTAB,DT_JMPREL,DT_REL/DT_RELA,DT_GNU_HASH
这些值从readelf看应该是偏移地址,xhook中代码,也是按偏移地址写的:
case DT_STRTAB:
{
self->strtab = (const char *)(self->bias_addr + dyn->d_un.d_ptr);
if((ElfW(Addr))(self->strtab) < self->base_addr) return XH_ERRNO_FORMAT;
break;
}
但我在ubuntu跑时,这几个值是绝对地址,如DT_STRTAB,val=7f03a081d380
,380
就是readelf看到的值。这里可能是平台编译的差异?是不是哪里的section可以判断是绝对地址还是偏移地址?原因暂时未知,值是绝对地址挺奇怪的。
代码先改为如下:
case DT_STRTAB:
{
//dyn->d_un.d_ptr 是绝对地址??
if (dyn->d_un.d_ptr > self->bias_addr)
self->strtab = (const char *)(dyn->d_un.d_ptr);
else
self->strtab = (const char *)(self->bias_addr + dyn->d_un.d_ptr);
if((ElfW(Addr))(self->strtab) < self->base_addr) return XH_ERRNO_FORMAT;
break;
}
先看.dynsym
,mallco的index=3
,对应rela.plt中的info前4byte0003
,只需要找到malloc在dynsym中的index,那么.rela.plt
中info index为3的offset
就为需要找的malloc重定位地址。
Symbol table '.dynsym' contains 14 entries:
Num: Value Size Type Bind Vis Ndx Name
0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND
1: 0000000000000000 0 FUNC GLOBAL DEFAULT UND printf@GLIBC_2.2.5 (2)
2: 0000000000000000 0 NOTYPE WEAK DEFAULT UND __gmon_start__
3: 0000000000000000 0 FUNC GLOBAL DEFAULT UND malloc@GLIBC_2.2.5 (2)
4: 0000000000000000 0 NOTYPE WEAK DEFAULT UND _ITM_deregisterTMCloneTab
5: 0000000000000000 0 NOTYPE WEAK DEFAULT UND _ITM_registerTMCloneTable
6: 0000000000000000 0 FUNC WEAK DEFAULT UND __cxa_finalize@GLIBC_2.2.5 (2)
7: 0000000000000000 0 FUNC GLOBAL DEFAULT UND _Znwm@GLIBCXX_3.4 (3)
8: 0000000000201040 0 NOTYPE GLOBAL DEFAULT 23 _end
9: 0000000000201038 0 NOTYPE GLOBAL DEFAULT 22 _edata
10: 00000000000006fa 101 FUNC GLOBAL DEFAULT 12 _Z13sotest_mallocv
11: 0000000000201038 0 NOTYPE GLOBAL DEFAULT 23 __bss_start
12: 00000000000005b0 0 FUNC GLOBAL DEFAULT 9 _init
13: 0000000000000760 0 FUNC GLOBAL DEFAULT 13 _fini
Relocation section '.rela.plt' at offset 0x568 contains 3 entries:
Offset Info Type Sym. Value Sym. Name + Addend
000000201018 000100000007 R_X86_64_JUMP_SLO 0000000000000000 printf@GLIBC_2.2.5 + 0
000000201020 000300000007 R_X86_64_JUMP_SLO 0000000000000000 malloc@GLIBC_2.2.5 + 0
000000201028 000700000007 R_X86_64_JUMP_SLO 0000000000000000 _Znwm@GLIBCXX_3.4 + 0
找symbol index涉及到DT_HASH
和DT_GNU_HASH
2种
//find symbol index by symbol name
if(0 != (r = xh_elf_find_symidx_by_name(self, symbol, &symidx))) return 0;
主要通过strtab
和symtab
得到符号字符串,对比是否需要找的symbol。
const char *symname = self->strtab + self->symtab[i].st_name;
确定symbol的index后,在.rela.plt
中根据info的index就可以找到malloc的Offset
,最后去修改这个Offset地址的值,改为自定义的函数就实现了malloc hook。
xh_elf_plain_reloc_iterator_init(&plain_iter, self->relplt, self->relplt_sz, self->is_use_rela);
while(NULL != (rel_common = xh_elf_plain_reloc_iterator_next(&plain_iter)))
{
if(0 != (r = xh_elf_find_and_replace_func(self,
(self->is_use_rela ? ".rela.plt" : ".rel.plt"), 1,
symbol, new_func, old_func,
symidx, rel_common, &found))) return r;
if(found) break;
}
https://github.com/qianfeng0/HookSample/tree/master/plt_hook
//testso.cpp
void sotest_malloc(){
printf("===%s begin===\n", __FUNCTION__);
int *a = (int *)malloc(10);
int *c = (int *)malloc(20);
int *b = new int[10];
printf("===%s end===\n", __FUNCTION__);
}
//main.cpp
void *my_malloc(size_t size)
{
printf("%zu bytes memory are allocated by libtest.so\n", size);
return malloc(size);
}
int main()
{
hook("libtestso.so", "malloc");
sotest_malloc();
return 0;
}
结果:
10 bytes memory are allocated by libtest.so
20 bytes memory are allocated by libtest.so
上面代码只hook了libtestso.so的malloc,libtestso.so在调用new的时候实际是调用到libstdc++.so
,libstdc++.so
再调用malloc
,需要hooklibstdc++.so
才能监测到new
。加上:
hook("libstdc++.so", "malloc");
结果:
10 bytes memory are allocated by libtest.so
20 bytes memory are allocated by libtest.so
40 bytes memory are allocated by libtest.so