此前LEF文件——栈回溯中只描述了通过exidx进行栈回溯的过程,本文将描述根据eh_frame进行栈回溯的原理及过程。
dwarf的核心是一张表格,该表格根据函数的压栈过程获取,以一个函数的汇编代码为例:
0000000000023c80 <_dl_start>:
_dl_start():
/usr/src/debug/glibc/2.31+gitAUTOINC+f84949f1c4-r0/git/csu/init-first.c:96
23c80: a9bf7bfd stp x29, x30, [sp, #-16]!
23c84: 910003fd mov x29, sp
/usr/src/debug/glibc/2.31+gitAUTOINC+f84949f1c4-r0/git/csu/init-first.c:97
23c88: 94000024 bl 23d18 <abort>
其对应的表格为(readelf -wF):
00000050 0000000000000014 00000054 FDE cie=00000000 pc=0000000000023c80..0000000000023c8c
LOC CFA x29 ra
0000000000023c80 sp+0 u u
0000000000023c84 sp+16 c-16 c-8
表格中的原始数据格式为(readelf -wf):
00000050 0000000000000014 00000054 FDE cie=00000000 pc=0000000000023c80..0000000000023c8c
DW_CFA_advance_loc: 4 to 0000000000023c84 //ip地址
DW_CFA_def_cfa_offset: 16 //sp偏移16
DW_CFA_offset: r29 (x29) at cfa-16
DW_CFA_offset: r30 (x30) at cfa-8
根据汇编对照表格可获得如下解释:
上述的该表格通过CIE(common information entry)和FDE(frame description entry)来描述,而CIE和FDE存储在eh_frame section中,为了加快FDE和CIE的查找,还存在一个eh_frame_hdr section,其各自描述如下:
Encoding | Field |
---|---|
unsigned char | version |
unsigned char | eh_frame_ptr_enc |
unsigned char | fde_count_enc |
unsigned char | table_enc |
encoded | eh_frame_ptr |
encoded | fde_count |
binary search table | |
各成员说明如下: |
eh_frame_hdr在libunwind库中的声明如下:
struct __attribute__((packed)) dwarf_eh_frame_hdr
{
unsigned char version;
unsigned char eh_frame_ptr_enc; //eh_frame_ptr_enc,对内部成员eh_frame的编码格式
unsigned char fde_count_enc; //fde_count 编码格式
unsigned char table_enc; //table encode,table entry的编码格式
Elf_W (Addr) eh_frame; //指向eh_frame section,
/* The rest of the header is variable-length and consists of the
following members:
后面紧跟的为一个变长的结构体,记录table的内容
encoded_t fde_count;
struct
{
encoded_t start_ip; // first address covered by this FDE,地址是基于eh_frame_hdr地址的偏移,不是映射地址
encoded_t fde_addr; // address of the FDE
}
binary_search_table[fde_count]; */
};
Field | Description |
---|---|
Length | Required |
Extended Length | Optional |
CIE Pointer | Required |
PC begin | Required |
PC Range | Required |
Augmentation Data Length | Optional |
Augmentation Data | Optional |
Call Frame Instructions | Required |
Padding |
FDE中各成员的长度不定,因此不能以固定格式的结构体对其进行描述,而是需要根据编码规则/标志读取每个成员对应的值,各成员的含义说明如下:
Field | Description |
---|---|
Length | Required |
Extended Length | Optional |
CIE ID | Required |
version | Required |
Augmentation String | Required |
Code Alignment Factor | Required |
Data Alignment Factor | Required |
Return Address Register | Required |
Augmentation Data Length | Optional |
Augmentation Data | Optional |
Inital Instructions | Optional |
Padding | |
各成员说明如下: |
注意:
对于没有fde table的elf文件而言,可转向debug_frame去搜索符合条件的fde,只不过此时需要从debug_frame开始逐行对每个fde进行解析,直到找到目标fde。而拥有fde table的elf文件则可以直接根据搜索表定位到目标fde的地址,直接解析该fde即可。
/*src/dwarf/Goarser.c*/
/* The function finds the saved locations and applies the register
state as well. */
HIDDEN int
dwarf_step (struct dwarf_cursor *c)
{
int ret;
dwarf_state_record_t sr;
if ((ret = find_reg_state (c, &sr)) < 0)//[1.1]
return ret;
return apply_reg_state (c, &sr.rs_current);//[1.2]
}
[1.1] 获取寄存器装态,根据FDE填充寄存器
/* Find the saved locations. */
static int
find_reg_state (struct dwarf_cursor *c, dwarf_state_record_t *sr)
{
dwarf_reg_state_t *rs;
struct dwarf_rs_cache *cache;
int ret = 0;
intrmask_t saved_mask;
if ((cache = get_rs_cache(c->as, &saved_mask)) &&
(rs = rs_lookup(cache, c)))
{
/* update hint; no locking needed: single-word writes are atomic */
unsigned short index = rs - cache->buckets;
c->use_prev_instr = ! cache->links[index].signal_frame;
memcpy (&sr->rs_current, rs, sizeof (*rs));
}
else
{
ret = fetch_proc_info (c, c->ip); //[1.1.1]
int next_use_prev_instr = c->use_prev_instr;
if (ret >= 0)
{
/* Update use_prev_instr for the next frame. */
assert(c->pi.unwind_info);
struct dwarf_cie_info *dci = c->pi.unwind_info;
next_use_prev_instr = ! dci->signal_frame;
ret = create_state_record_for (c, sr, c->ip);//[1.1.2]
}
put_unwind_info (c, &c->pi);
c->use_prev_instr = next_use_prev_instr;
if (cache && ret >= 0)
{
rs = rs_new (cache, c);
cache->links[rs - cache->buckets].hint = 0;
memcpy(rs, &sr->rs_current, sizeof(*rs));
}
}
unsigned short index = -1;
if (cache)
{
put_rs_cache (c->as, cache, &saved_mask);
if (rs)
{
index = rs - cache->buckets;
c->hint = cache->links[index].hint;
cache->links[c->prev_rs].hint = index + 1;
c->prev_rs = index;
}
}
if (ret < 0)
return ret;
if (cache)
tdep_reuse_frame (c, cache->links[index].signal_frame);
return 0;
}
[1.1.1] 获取FDE
static int
fetch_proc_info (struct dwarf_cursor *c, unw_word_t ip)
{
int ret, dynamic = 1;
/* The 'ip' can point either to the previous or next instruction
depending on what type of frame we have: normal call or a place
to resume execution (e.g. after signal frame).
For a normal call frame we need to back up so we point within the
call itself; this is important because a) the call might be the
very last instruction of the function and the edge of the FDE,
and b) so that run_cfi_program() runs locations up to the call
but not more.
For signal frame, we need to do the exact opposite and look
up using the current 'ip' value. That is where execution will
continue, and it's important we get this right, as 'ip' could be
right at the function entry and hence FDE edge, or at instruction
that manipulates CFA (push/pop). */
if (c->use_prev_instr)
--ip;
memset (&c->pi, 0, sizeof (c->pi));
/*首先在动态库列表中查找,如果动态库列表中不存在则需要遍历进程加载的动态库去查找*/
/* check dynamic info first --- it overrides everything else */
ret = unwi_find_dynamic_proc_info (c->as, ip, &c->pi, 1,
c->as_arg);
if (ret == -UNW_ENOINFO)
{
dynamic = 0;
if ((ret = tdep_find_proc_info (c, ip, 1)) < 0)
return ret;
}
if (c->pi.format != UNW_INFO_FORMAT_DYNAMIC
&& c->pi.format != UNW_INFO_FORMAT_TABLE
&& c->pi.format != UNW_INFO_FORMAT_REMOTE_TABLE)
return -UNW_ENOINFO;
c->pi_valid = 1;
c->pi_is_dynamic = dynamic;
/* Let system/machine-dependent code determine frame-specific attributes. */
if (ret >= 0)
tdep_fetch_frame (c, ip, 1);
return ret;
}
HIDDEN int
unwi_find_dynamic_proc_info (unw_addr_space_t as, unw_word_t ip,
unw_proc_info_t *pi, int need_unwind_info,
void *arg)
{
/*判断是否是当前地址空间,即是查找本进程的porc_info还是查找其他进程(被attach)的proc_info*/
if (as == unw_local_addr_space)
return local_find_proc_info (as, ip, pi, need_unwind_info, arg);
else
return remote_find_proc_info (as, ip, pi, need_unwind_info, arg);
}
static inline int
local_find_proc_info (unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi,
int need_unwind_info, void *arg)
{
unw_dyn_info_list_t *list;
unw_dyn_info_t *di;
/*检查是否存在动态库链表,无则直接返回,有则在链表中查找符合条件的动态库*/
#ifndef UNW_LOCAL_ONLY
# pragma weak _U_dyn_info_list_addr
if (!_U_dyn_info_list_addr)
return -UNW_ENOINFO;
#endif
list = (unw_dyn_info_list_t *) (uintptr_t) _U_dyn_info_list_addr ();
for (di = list->first; di; di = di->next)
if (ip >= di->start_ip && ip < di->end_ip)
return unwi_extract_dynamic_proc_info (as, ip, pi, di, need_unwind_info,
arg);
return -UNW_ENOINFO;
}
/*下文分析以arm架构、local unwind为例,/include/tdep-arm/libunwind_i.h*/
#ifdef UNW_LOCAL_ONLY
# define tdep_find_proc_info(c,ip,n) \
arm_find_proc_info((c)->as, (ip), &(c)->pi, (n), \
(c)->as_arg)
# define tdep_put_unwind_info(as,pi,arg) \
arm_put_unwind_info((as), (pi), (arg))
#else
# define tdep_find_proc_info(c,ip,n) \
(*(c)->as->acc.find_proc_info)((c)->as, (ip), &(c)->pi, (n), \
(c)->as_arg)
# define tdep_put_unwind_info(as,pi,arg) \
(*(as)->acc.put_unwind_info)((as), (pi), (arg))
#endif
/*/src/arm/Gex_tables.c*/
HIDDEN int
arm_find_proc_info (unw_addr_space_t as, unw_word_t ip,
unw_proc_info_t *pi, int need_unwind_info, void *arg)
{
int ret = -1;
intrmask_t saved_mask;
Debug (14, "looking for IP=0x%lx\n", (long) ip);
/*若采用DWARF方式*/
if (UNW_TRY_METHOD(UNW_ARM_METHOD_DWARF))
ret = dwarf_find_proc_info (as, ip, pi, need_unwind_info, arg);
/*若采用EXIDX方式*/
if (ret < 0 && UNW_TRY_METHOD (UNW_ARM_METHOD_EXIDX))
{
struct arm_cb_data cb_data;
memset (&cb_data, 0, sizeof (cb_data));
cb_data.ip = ip;
cb_data.pi = pi;
cb_data.di.format = -1;
SIGPROCMASK (SIG_SETMASK, &unwi_full_mask, &saved_mask);
ret = dl_iterate_phdr (arm_phdr_cb, &cb_data);
SIGPROCMASK (SIG_SETMASK, &saved_mask, NULL);
if (cb_data.di.format != -1)
ret = arm_search_unwind_table (as, ip, &cb_data.di, pi,
need_unwind_info, arg);
else
ret = -UNW_ENOINFO;
}
return ret;
}
/*/src/dwarf*/
HIDDEN int
dwarf_find_proc_info (unw_addr_space_t as, unw_word_t ip,
unw_proc_info_t *pi, int need_unwind_info, void *arg)
{
struct dwarf_callback_data cb_data;
intrmask_t saved_mask;
int ret;
Debug (14, "looking for IP=0x%lx\n", (long) ip);
memset (&cb_data, 0, sizeof (cb_data));
cb_data.ip = ip;
cb_data.pi = pi;
cb_data.need_unwind_info = need_unwind_info;
cb_data.di.format = -1;
cb_data.di_debug.format = -1;
/*通过dl_iterate_phdr函数遍历本进程加载的elf文件(包括可执行程序和所有动态库),并依次调用dwarf_callback函数,在callback函数中查找符合条件的动态库*/
SIGPROCMASK (SIG_SETMASK, &unwi_full_mask, &saved_mask);
ret = dl_iterate_phdr (dwarf_callback, &cb_data);
SIGPROCMASK (SIG_SETMASK, &saved_mask, NULL);
if (ret > 0)
{
if (cb_data.single_fde)
/* already got the result in *pi */
return 0;
/*如果存在fde search table 则在seerch table中查找*/
/* search the table: */
if (cb_data.di.format != -1)
ret = dwarf_search_unwind_table_int (as, ip, &cb_data.di,
pi, need_unwind_info, arg);
else
ret = -UNW_ENOINFO;
/*如果search table中没有找到,并且还存在debug frame则尝试在debug frame中找*/
if (ret == -UNW_ENOINFO && cb_data.di_debug.format != -1)
ret = dwarf_search_unwind_table_int (as, ip, &cb_data.di_debug, pi,
need_unwind_info, arg);
}
else
ret = -UNW_ENOINFO;
return ret;
}
/*/src/dwarf/Gfind_proc_info-lsb.c*/
/* ptr is a pointer to a dwarf_callback_data structure and, on entry,
member ip contains the instruction-pointer we're looking
for. */
HIDDEN int
dwarf_callback (struct dl_phdr_info *info, size_t size, void *ptr)
{
struct dwarf_callback_data *cb_data = ptr;
unw_dyn_info_t *di = &cb_data->di;
const Elf_W(Phdr) *phdr, *p_eh_hdr, *p_dynamic, *p_text;
unw_word_t addr, eh_frame_start, eh_frame_end, fde_count, ip;
Elf_W(Addr) load_base, max_load_addr = 0;
int ret, need_unwind_info = cb_data->need_unwind_info;
unw_proc_info_t *pi = cb_data->pi;
struct dwarf_eh_frame_hdr *hdr = NULL;
unw_accessors_t *a;
long n;
int found = 0;
struct dwarf_eh_frame_hdr synth_eh_frame_hdr;
#ifdef CONFIG_DEBUG_FRAME
unw_word_t start, end;
#endif /* CONFIG_DEBUG_FRAME*/
/*待定位的目标ip值*/
ip = cb_data->ip;
/* Make sure struct dl_phdr_info is at least as big as we need. */
if (size < offsetof (struct dl_phdr_info, dlpi_phnum)
+ sizeof (info->dlpi_phnum))
return -1;
Debug (15, "checking %s, base=0x%lx)\n",
info->dlpi_name, (long) info->dlpi_addr);
phdr = info->dlpi_phdr; //动态库program header指针
load_base = info->dlpi_addr; //动态库加载地址
p_text = NULL; //代码段首地址
p_eh_hdr = NULL; //eh_frame_hdr首地址
p_dynamic = NULL; //动态段首地址
/* See if PC falls into one of the loaded segments. Find the
eh-header segment at the same time. */
for (n = info->dlpi_phnum; --n >= 0; phdr++) //遍历该动态库的segment
{
if (phdr->p_type == PT_LOAD) //如果是加载段
{
Elf_W(Addr) vaddr = phdr->p_vaddr + load_base;
if (ip >= vaddr && ip < vaddr + phdr->p_memsz) //如果目标地址在该加载段,则该加载段为代码段
p_text = phdr; //记录代码段首地址
if (vaddr + phdr->p_filesz > max_load_addr) //记录加载到内存的虚拟地址最大地址
max_load_addr = vaddr + phdr->p_filesz;
}
else if (phdr->p_type == PT_GNU_EH_FRAME)
p_eh_hdr = phdr; //记录eh_frame_hdr段program header
else if (phdr->p_type == PT_DYNAMIC)
p_dynamic = phdr; //记录动态段program header
}
if (!p_text) //如果ptext为0,则代表目标ip不在该动态库,直接返回
return 0;
if (p_eh_hdr) //如果eh_frame_hdr存在,则记录eh_frame_hdr段首地址
{
hdr = (struct dwarf_eh_frame_hdr *) (p_eh_hdr->p_vaddr + load_base);
}
else
{ //如果不存在eh_frame_hdr则需要去合成eh_frame_header
Elf_W (Addr) eh_frame;
Debug (1, "no .eh_frame_hdr section found\n");
eh_frame = dwarf_find_eh_frame_section (info);
if (eh_frame)
{
Debug (1, "using synthetic .eh_frame_hdr section for %s\n",
info->dlpi_name);
synth_eh_frame_hdr.version = DW_EH_VERSION;
synth_eh_frame_hdr.eh_frame_ptr_enc = DW_EH_PE_absptr |
((sizeof(Elf_W (Addr)) == 4) ? DW_EH_PE_udata4 : DW_EH_PE_udata8);
synth_eh_frame_hdr.fde_count_enc = DW_EH_PE_omit;
synth_eh_frame_hdr.table_enc = DW_EH_PE_omit;
synth_eh_frame_hdr.eh_frame = eh_frame;
hdr = &synth_eh_frame_hdr;
}
}
if (hdr) //如果找到了eh_frame_hdr的首地址
{
if (p_dynamic) //如果动态段存在
{
/* For dynamicly linked executables and shared libraries,
DT_PLTGOT is the value that data-relative addresses are
relative to for that object. We call this the "gp". */
Elf_W(Dyn) *dyn = (Elf_W(Dyn) *)(p_dynamic->p_vaddr + load_base);
for (; dyn->d_tag != DT_NULL; ++dyn)
if (dyn->d_tag == DT_PLTGOT) //根据动态段找到PLTGOT(重定位表)段的首地址
{
/* Assume that _DYNAMIC is writable and GLIBC has
relocated it (true for x86 at least). */
di->gp = dyn->d_un.d_ptr;
break;
}
}
else
/* Otherwise this is a static executable with no _DYNAMIC. Assume
that data-relative addresses are relative to 0, i.e.,
absolute. */
di->gp = 0;
pi->gp = di->gp; //将di->gp记录到unwind info
if (hdr->version != DW_EH_VERSION)
{
Debug (1, "table `%s' has unexpected version %d\n",
info->dlpi_name, hdr->version);
return 0;
}
a = unw_get_accessors_int (unw_local_addr_space); //初始化结构体
addr = (unw_word_t) (uintptr_t) (&hdr->eh_frame); //eh_frame地址,为变长的地址
/* (Optionally) read eh_frame_ptr: */ //读取eh_frame_start地址,读取之后addr会增加,指向下一个成员
if ((ret = dwarf_read_encoded_pointer (unw_local_addr_space, a,
&addr, hdr->eh_frame_ptr_enc, pi,
&eh_frame_start, NULL)) < 0)
return ret;
/* (Optionally) read fde_count: */ //读取fde_count
if ((ret = dwarf_read_encoded_pointer (unw_local_addr_space, a,
&addr, hdr->fde_count_enc, pi,
&fde_count, NULL)) < 0)
return ret;
/*如果没有fde table,则需要遍历eh_frame去查找*/
if (hdr->table_enc != (DW_EH_PE_datarel | DW_EH_PE_sdata4))
{
/* If there is no search table or it has an unsupported
encoding, fall back on linear search. */
if (hdr->table_enc == DW_EH_PE_omit)
Debug (4, "table `%s' lacks search table; doing linear search\n",
info->dlpi_name);
else
Debug (4, "table `%s' has encoding 0x%x; doing linear search\n",
info->dlpi_name, hdr->table_enc);
eh_frame_end = max_load_addr; /* XXX can we do better? */
if (hdr->fde_count_enc == DW_EH_PE_omit)
fde_count = ~0UL;
if (hdr->eh_frame_ptr_enc == DW_EH_PE_omit)
abort ();
Debug (1, "eh_frame_start = %lx eh_frame_end = %lx\n",
eh_frame_start, eh_frame_end);
/* XXX we know how to build a local binary search table for
.debug_frame, so we could do that here too. */
found = linear_search (unw_local_addr_space, ip,
eh_frame_start, eh_frame_end, fde_count,
pi, need_unwind_info, NULL);
if (found != 1)
found = 0;
else
cb_data->single_fde = 1;
}
else //如果存在fde table,则直接在table中根据索引查找,直接填充相关的数据
{
di->format = UNW_INFO_FORMAT_REMOTE_TABLE;
di->start_ip = p_text->p_vaddr + load_base; //代码段起始地址
di->end_ip = p_text->p_vaddr + load_base + p_text->p_memsz; //代码段终止地址
di->u.rti.name_ptr = (unw_word_t) (uintptr_t) info->dlpi_name; //动态库名称
di->u.rti.table_data = addr; //table地址
assert (sizeof (struct table_entry) % sizeof (unw_word_t) == 0);
di->u.rti.table_len = (fde_count * sizeof (struct table_entry)
/ sizeof (unw_word_t));
/* For the binary-search table in the eh_frame_hdr, data-relative
means relative to the start of that section... */
di->u.rti.segbase = (unw_word_t) (uintptr_t) hdr; //相对地址的基准为eh_frame_ptr的基地址
//就是说table entry中保存的地址都是相对与segbase的地址
found = 1;
Debug (15, "found table `%s': segbase=0x%lx, len=%lu, gp=0x%lx, "
"table_data=0x%lx\n", (char *) (uintptr_t) di->u.rti.name_ptr,
(long) di->u.rti.segbase, (long) di->u.rti.table_len,
(long) di->gp, (long) di->u.rti.table_data);
}
}
#ifdef CONFIG_DEBUG_FRAME
/* Find the start/end of the described region by parsing the phdr_info
structure. */
start = (unw_word_t) -1;
end = 0;
/*找到加载到内存的起始和终止地址*/
for (n = 0; n < info->dlpi_phnum; n++)
{
if (info->dlpi_phdr[n].p_type == PT_LOAD)
{
unw_word_t seg_start = info->dlpi_addr + info->dlpi_phdr[n].p_vaddr;
unw_word_t seg_end = seg_start + info->dlpi_phdr[n].p_memsz;
if (seg_start < start)
start = seg_start;
if (seg_end > end)
end = seg_end;
}
}
//从文件中读取debug frame段,并填充table表
found = dwarf_find_debug_frame (found, &cb_data->di_debug, ip,
info->dlpi_addr, info->dlpi_name, start,
end);
#endif /* CONFIG_DEBUG_FRAME */
return found;
}
/*函数的别名为dwarf_search_unwind_table*/
#ifndef __clang__
static ALIAS(dwarf_search_unwind_table) int
dwarf_search_unwind_table_int (unw_addr_space_t as,
unw_word_t ip,
unw_dyn_info_t *di,
unw_proc_info_t *pi,
int need_unwind_info, void *arg);
#else
#define dwarf_search_unwind_table_int dwarf_search_unwind_table
#endif
int
dwarf_search_unwind_table (unw_addr_space_t as, unw_word_t ip,
unw_dyn_info_t *di, unw_proc_info_t *pi,
int need_unwind_info, void *arg)
{
const struct table_entry *e = NULL, *table;
unw_word_t ip_base = 0, segbase = 0, last_ip, fde_addr;
unw_accessors_t *a;
#ifndef UNW_LOCAL_ONLY
struct table_entry ent;
#endif
int ret;
unw_word_t debug_frame_base;
size_t table_len;
#ifdef UNW_REMOTE_ONLY
assert (is_remote_table(di->format));
#else
assert (is_remote_table(di->format)
|| di->format == UNW_INFO_FORMAT_TABLE);
#endif
assert (ip >= di->start_ip && ip < di->end_ip);
/*从eh_frame的table中找*/
if (is_remote_table(di->format))
{
table = (const struct table_entry *) (uintptr_t) di->u.rti.table_data;
table_len = di->u.rti.table_len * sizeof (unw_word_t);
debug_frame_base = 0;
}
else /*从debug frame中查找*/
{
assert(di->format == UNW_INFO_FORMAT_TABLE);
#ifndef UNW_REMOTE_ONLY
struct unw_debug_frame_list *fdesc = (void *) di->u.ti.table_data;
/* UNW_INFO_FORMAT_TABLE (i.e. .debug_frame) is read from local address
space. Both the index and the unwind tables live in local memory, but
the address space to check for properties like the address size and
endianness is the target one. */
as = unw_local_addr_space;
table = fdesc->index;
table_len = fdesc->index_size * sizeof (struct table_entry);
debug_frame_base = (uintptr_t) fdesc->debug_frame;
#endif
}
a = unw_get_accessors_int (as);
segbase = di->u.rti.segbase;
if (di->format == UNW_INFO_FORMAT_IP_OFFSET) {
ip_base = di->start_ip;
} else {
ip_base = segbase;
}
#ifndef UNW_REMOTE_ONLY
if (as == unw_local_addr_space)
{
e = lookup (table, table_len, ip - ip_base); //相对地址为ip - ipbase
if (e && &e[1] < &table[table_len])
last_ip = e[1].start_ip_offset + ip_base; //找到被该FDE覆盖的起始映射地址
else
last_ip = di->end_ip;
}
else
#endif
{
#ifndef UNW_LOCAL_ONLY
int32_t last_ip_offset = di->end_ip - ip_base;
segbase = di->u.rti.segbase;
if ((ret = remote_lookup (as, (uintptr_t) table, table_len,
ip - ip_base, &ent, &last_ip_offset, arg)) < 0)
return ret;
if (ret)
{
e = &ent;
last_ip = last_ip_offset + ip_base;
}
else
e = NULL; /* no info found */
#endif
}
if (!e)
{
Debug (1, "IP %lx inside range %lx-%lx, but no explicit unwind info found\n",
(long) ip, (long) di->start_ip, (long) di->end_ip);
/* IP is inside this table's range, but there is no explicit
unwind info. */
return -UNW_ENOINFO;
}
Debug (15, "ip=0x%lx, start_ip=0x%lx\n",
(long) ip, (long) (e->start_ip_offset));
if (debug_frame_base)
fde_addr = e->fde_offset + debug_frame_base;
else
fde_addr = e->fde_offset + segbase; //fde的地址
Debug (1, "e->fde_offset = %lx, segbase = %lx, debug_frame_base = %lx, "
"fde_addr = %lx\n", (long) e->fde_offset, (long) segbase,
(long) debug_frame_base, (long) fde_addr);
//根据fde的内容解析proc
if ((ret = dwarf_extract_proc_info_from_fde (as, a, &fde_addr, pi,
debug_frame_base ?
debug_frame_base : segbase,
need_unwind_info,
debug_frame_base != 0, arg)) < 0)
return ret;
/* .debug_frame uses an absolute encoding that does not know about any
shared library relocation. */
if (di->format == UNW_INFO_FORMAT_TABLE)
{
pi->start_ip += segbase;
pi->end_ip += segbase;
pi->flags = UNW_PI_FLAG_DEBUG_FRAME;
}
#if defined(NEED_LAST_IP)
pi->last_ip = last_ip;
#else
(void)last_ip;
#endif
if (ip < pi->start_ip || ip >= pi->end_ip)
return -UNW_ENOINFO;
return 0;
}
/* Extract proc-info from the FDE starting at adress ADDR.
Pass BASE as zero for eh_frame behaviour, or a pointer to
debug_frame base for debug_frame behaviour. */
HIDDEN int
dwarf_extract_proc_info_from_fde (unw_addr_space_t as, unw_accessors_t *a,
unw_word_t *addrp, unw_proc_info_t *pi,
unw_word_t base,
int need_unwind_info, int is_debug_frame,
void *arg)
{
unw_word_t fde_end_addr, cie_addr, cie_offset_addr, aug_end_addr = 0;
unw_word_t start_ip, ip_range, aug_size, addr = *addrp;
int ret, ip_range_encoding;
struct dwarf_cie_info dci;
uint64_t u64val;
uint32_t u32val;
Debug (12, "FDE @ 0x%lx\n", (long) addr);
memset (&dci, 0, sizeof (dci));
//读取FDE的第一个成员length:如果该值不为0xffffffff,则表示的是FDE的长度;如果该值为0xffffffff,则表示接下来的64位为FDE的长度,为64位格式的FDE entry。
if ((ret = dwarf_readu32 (as, a, &addr, &u32val, arg)) < 0)
return ret;
if (u32val != 0xffffffff)
{
int32_t cie_offset = 0;
/* In some configurations, an FDE with a 0 length indicates the
end of the FDE-table. */
if (u32val == 0)
return -UNW_ENOINFO;
/* the FDE is in the 32-bit DWARF format */ //32位的DWARF 格式
*addrp = fde_end_addr = addr + u32val; //FDE entry的尾地址
cie_offset_addr = addr;
//CIE的偏移地址
if ((ret = dwarf_reads32 (as, a, &addr, &cie_offset, arg)) < 0)
return ret;
if (is_cie_id (cie_offset, is_debug_frame))
/* ignore CIEs (happens during linear searches) */
return 0;
if (is_debug_frame) //根据是否为debug_frame中,获取CIE的地址
cie_addr = base + cie_offset;
else
/* DWARF says that the CIE_pointer in the FDE is a
.debug_frame-relative offset, but the GCC-generated .eh_frame
sections instead store a "pcrelative" offset, which is just
as fine as it's self-contained. */
cie_addr = cie_offset_addr - cie_offset;
}
else //若为64位的FDE entry
{
int64_t cie_offset = 0;
/* the FDE is in the 64-bit DWARF format */
if ((ret = dwarf_readu64 (as, a, &addr, &u64val, arg)) < 0)
return ret;
*addrp = fde_end_addr = addr + u64val;
cie_offset_addr = addr;
if ((ret = dwarf_reads64 (as, a, &addr, &cie_offset, arg)) < 0)
return ret;
if (is_cie_id (cie_offset, is_debug_frame))
/* ignore CIEs (happens during linear searches) */
return 0;
if (is_debug_frame)
cie_addr = base + cie_offset;
else
/* DWARF says that the CIE_pointer in the FDE is a
.debug_frame-relative offset, but the GCC-generated .eh_frame
sections instead store a "pcrelative" offset, which is just
as fine as it's self-contained. */
cie_addr = (unw_word_t) ((uint64_t) cie_offset_addr - cie_offset);
}
Debug (15, "looking for CIE at address %lx\n", (long) cie_addr);
//根据CIE的地址,读取并解析CIE的内容到DCI结构体
if ((ret = parse_cie (as, a, cie_addr, pi, &dci, is_debug_frame, arg)) < 0)
return ret;
/* IP-range has same encoding as FDE pointers, except that it's
always an absolute value: */
ip_range_encoding = dci.fde_encoding & DW_EH_PE_FORMAT_MASK;
//读取FDE的内容并根据CIE的编码格式进行解析,此时并没有对FDE的指令码进行解析
if ((ret = dwarf_read_encoded_pointer (as, a, &addr, dci.fde_encoding,
pi, &start_ip, arg)) < 0
|| (ret = dwarf_read_encoded_pointer (as, a, &addr, ip_range_encoding,
pi, &ip_range, arg)) < 0)
return ret;
pi->start_ip = start_ip;
pi->end_ip = start_ip + ip_range;
pi->handler = dci.handler;
if (dci.sized_augmentation)
{
if ((ret = dwarf_read_uleb128 (as, a, &addr, &aug_size, arg)) < 0)
return ret;
aug_end_addr = addr + aug_size;
}
if ((ret = dwarf_read_encoded_pointer (as, a, &addr, dci.lsda_encoding,
pi, &pi->lsda, arg)) < 0)
return ret;
Debug (15, "FDE covers IP 0x%lx-0x%lx, LSDA=0x%lx\n",
(long) pi->start_ip, (long) pi->end_ip, (long) pi->lsda);
//判断是否需要保存CIE FDE的内容,
if (need_unwind_info)
{
pi->format = UNW_INFO_FORMAT_TABLE;
pi->unwind_info_size = sizeof (dci);
pi->unwind_info = mempool_alloc (&dwarf_cie_info_pool);
if (!pi->unwind_info)
return -UNW_ENOMEM;
if (dci.have_abi_marker)
{
if ((ret = dwarf_readu16 (as, a, &addr, &dci.abi, arg)) < 0
|| (ret = dwarf_readu16 (as, a, &addr, &dci.tag, arg)) < 0)
return ret;
Debug (13, "Found ABI marker = (abi=%u, tag=%u)\n",
dci.abi, dci.tag);
}
if (dci.sized_augmentation)
dci.fde_instr_start = aug_end_addr;
else
dci.fde_instr_start = addr;
dci.fde_instr_end = fde_end_addr;
memcpy (pi->unwind_info, &dci, sizeof (dci));
}
return 0;
}
[1.1.2] 解析FDE并填充寄存器
static int
create_state_record_for (struct dwarf_cursor *c, dwarf_state_record_t *sr,
unw_word_t ip)
{
int ret;
switch (c->pi.format)
{
case UNW_INFO_FORMAT_TABLE:
case UNW_INFO_FORMAT_REMOTE_TABLE:
if ((ret = setup_fde(c, sr)) < 0) //解析CIE指令码
return ret;
ret = parse_fde (c, ip, sr); //解析FDE指令码
break;
case UNW_INFO_FORMAT_DYNAMIC:
ret = parse_dynamic (c, ip, sr);
break;
default:
Debug (1, "Unexpected unwind-info format %d\n", c->pi.format);
ret = -UNW_EINVAL;
}
return ret;
}
static inline int
setup_fde (struct dwarf_cursor *c, dwarf_state_record_t *sr)
{
int i, ret;
assert (c->pi_valid);
memset (sr, 0, sizeof (*sr));
for (i = 0; i < DWARF_NUM_PRESERVED_REGS + 2; ++i)
set_reg (sr, i, DWARF_WHERE_SAME, 0);
struct dwarf_cie_info *dci = c->pi.unwind_info;
sr->rs_current.ret_addr_column = dci->ret_addr_column;
unw_word_t addr = dci->cie_instr_start; //CIE指令码首地址
unw_word_t curr_ip = 0; //ip = 0
dwarf_stackable_reg_state_t *rs_stack = NULL;
ret = run_cfi_program (c, sr, &curr_ip, ~(unw_word_t) 0, &addr,
dci->cie_instr_end,
&rs_stack, dci);
empty_rstate_stack(&rs_stack);
if (ret < 0)
return ret;
memcpy (&sr->rs_initial, &sr->rs_current, sizeof (sr->rs_initial));
return 0;
}
static inline int
parse_fde (struct dwarf_cursor *c, unw_word_t ip, dwarf_state_record_t *sr)
{
int ret;
struct dwarf_cie_info *dci = c->pi.unwind_info;
unw_word_t addr = dci->fde_instr_start; //FDE的指令码首地址
unw_word_t curr_ip = c->pi.start_ip; //FDE覆盖的首地址
dwarf_stackable_reg_state_t *rs_stack = NULL;
/* Process up to current `ip` for signal frame and `ip - 1` for normal call frame
See `c->use_prev_instr` use in `fetch_proc_info` for details. */
// c->use_prev_instr = 0 或1, 用于当前ip或ip - 1 去回溯
ret = run_cfi_program (c, sr, &curr_ip, ip - c->use_prev_instr, &addr, dci->fde_instr_end,
&rs_stack, dci);
empty_rstate_stack(&rs_stack);
if (ret < 0)
return ret;
return 0;
}
[1.2] 根据解析FDE后得到的register state推算下一级frame的寄存器值,这个就是对照指令码表进行解析即可
static int
apply_reg_state (struct dwarf_cursor *c, struct dwarf_reg_state *rs)
{
unw_word_t regnum, addr, cfa, ip;
unw_word_t prev_ip, prev_cfa;
unw_addr_space_t as;
dwarf_loc_t cfa_loc;
unw_accessors_t *a;
int i, ret;
void *arg;
prev_ip = c->ip;
prev_cfa = c->cfa;
as = c->as;
arg = c->as_arg;
a = unw_get_accessors_int (as);
/* Evaluate the CFA first, because it may be referred to by other
expressions. */
if (rs->reg.where[DWARF_CFA_REG_COLUMN] == DWARF_WHERE_REG)
{
/* CFA is equal to [reg] + offset: */
/* As a special-case, if the stack-pointer is the CFA and the
stack-pointer wasn't saved, popping the CFA implicitly pops
the stack-pointer as well. */
if ((rs->reg.val[DWARF_CFA_REG_COLUMN] == UNW_TDEP_SP)
&& (UNW_TDEP_SP < ARRAY_SIZE(rs->reg.val))
&& (rs->reg.where[UNW_TDEP_SP] == DWARF_WHERE_SAME))
cfa = c->cfa;
else
{
regnum = dwarf_to_unw_regnum (rs->reg.val[DWARF_CFA_REG_COLUMN]);
if ((ret = unw_get_reg ((unw_cursor_t *) c, regnum, &cfa)) < 0)
return ret;
}
cfa += rs->reg.val[DWARF_CFA_OFF_COLUMN];
}
else
{
/* CFA is equal to EXPR: */
assert (rs->reg.where[DWARF_CFA_REG_COLUMN] == DWARF_WHERE_EXPR);
addr = rs->reg.val[DWARF_CFA_REG_COLUMN];
if ((ret = eval_location_expr (c, as, a, addr, &cfa_loc, arg)) < 0)
return ret;
/* the returned location better be a memory location... */
if (DWARF_IS_REG_LOC (cfa_loc))
return -UNW_EBADFRAME;
cfa = DWARF_GET_LOC (cfa_loc);
}
dwarf_loc_t new_loc[DWARF_NUM_PRESERVED_REGS];
memcpy(new_loc, c->loc, sizeof(new_loc));
for (i = 0; i < DWARF_NUM_PRESERVED_REGS; ++i)
{
switch ((dwarf_where_t) rs->reg.where[i])
{
case DWARF_WHERE_UNDEF:
new_loc[i] = DWARF_NULL_LOC;
break;
case DWARF_WHERE_SAME:
break;
case DWARF_WHERE_CFAREL:
new_loc[i] = DWARF_MEM_LOC (c, cfa + rs->reg.val[i]);
break;
case DWARF_WHERE_REG:
new_loc[i] = DWARF_REG_LOC (c, dwarf_to_unw_regnum (rs->reg.val[i]));
break;
case DWARF_WHERE_EXPR:
addr = rs->reg.val[i];
if ((ret = eval_location_expr (c, as, a, addr, new_loc + i, arg)) < 0)
return ret;
break;
case DWARF_WHERE_VAL_EXPR:
addr = rs->reg.val[i];
if ((ret = eval_location_expr (c, as, a, addr, new_loc + i, arg)) < 0)
return ret;
new_loc[i] = DWARF_VAL_LOC (c, DWARF_GET_LOC (new_loc[i]));
break;
}
}
memcpy(c->loc, new_loc, sizeof(new_loc));
c->cfa = cfa;
/* DWARF spec says undefined return address location means end of stack. */
if (DWARF_IS_NULL_LOC (c->loc[rs->ret_addr_column]))
{
c->ip = 0;
ret = 0;
}
else
{
ret = dwarf_get (c, c->loc[rs->ret_addr_column], &ip);
if (ret < 0)
return ret;
c->ip = ip;
ret = 1;
}
/* XXX: check for ip to be code_aligned */
if (c->ip == prev_ip && c->cfa == prev_cfa)
{
Dprintf ("%s: ip and cfa unchanged; stopping here (ip=0x%lx)\n",
__FUNCTION__, (long) c->ip);
return -UNW_EBADFRAME;
}
if (c->stash_frames)
tdep_stash_frame (c, rs);
return ret;
}
动态库原文件:libc-2.31.so
通过readelf -S libc-2.31.so得到eh_frame_hdr和eh_frame section的位置:
[17] .eh_frame_hdr PROGBITS 00000000001293e8 001293e8
0000000000005944 0000000000000000 A 0 0 4
[18] .eh_frame PROGBITS 000000000012ed30 0012ed30
0000000000022234 0000000000000000 A 0 0 8
查看eh_frame_hdr的二进制内容:
根据前文eh_frame_hdr的结构说明,地址1293e8处解析得各成员变量值如下:
version: 01 //byte
eh_frame_ptr_enc:1b //byte
fde_count_enc:03 //byte
table_enc:3b //byte
eh_frame_ptr:1293ec + 5944 = 12ed30 //与readelf读取出来得一致
fde_count: b27
binary search table:
------------------------
start_ip: ffefa898 + 1293e8(eh_frame_hdr首地址) = 23c80
fde_addr: 5998 + 1293e8 = 12ed80
------------------------
start_ip: ffefa8a4 + 1293e8(eh_frame_hdr首地址) = 23c8c
fde_addr: 5a94 + 1293e8 = 12ef7c
------------------------
.......
第一条FDE覆盖的首地址为23c80,其FDE的位置处于0x12ed80处,其二进制文件内容如下:
FDE各成员变量的值如下:
Length:0x14
CIE Pointer:0x54 //则CIE addr为 12ed84 - 54 = 12ed30
由于FDE内的其他成员变量依赖于CIE的编码,所以需要首先解析CIE的内容:
CIE各成员变量的值如下:
Length:0x10
CIE ID:00000000
version:0x01
Augmentation String: 7a 52 00 //对应右侧的zR字符串
Code Alignment Factor:0x04
Data Alignment Factor: 0x78
Return Address Register: 0x1e
Augmentation Data Length:0x01
Augmentation Data:0x1b //为fde encoding,
Inital Instructions: 0c 1f 00 //总长度为0x10
根据CIE的内容,得FDE的各成员变量解析如下:
Length:0x14
CIE Pointer:0x54 //则CIE addr为 12ed84 - 54 = 12ed30
PC begin:0xffef4ef8 + 0x12ed88 = 0x23c80 //12ed88为当前地址
PC Range:0x0c //pc range encoding = fde encoding & 0x0f = 0x0b
Augmentation Data Length:0x00
Call Frame Instructions:41 0e 10 9d 02 9e 01//总长度为0x14
指令码的解析可以参考代码,与readelf -wf libc-2.31.so读取的eh_frame对比如下,可以发现和最后一条是一致的。
Contents of the .eh_frame section:
00000000 0000000000000010 00000000 CIE
Version: 1
Augmentation: "zR"
Code alignment factor: 4
Data alignment factor: -8
Return address column: 30
Augmentation data: 1b
DW_CFA_def_cfa: r31 (sp) ofs 0
00000014 0000000000000010 00000018 FDE cie=00000000 pc=0000000000024040..0000000000024044
DW_CFA_nop
DW_CFA_nop
DW_CFA_nop
00000028 0000000000000024 0000002c FDE cie=00000000 pc=0000000000024048..00000000000240e8
DW_CFA_advance_loc: 4 to 000000000002404c
DW_CFA_def_cfa_offset: 48
DW_CFA_offset: r29 (x29) at cfa-48
DW_CFA_offset: r30 (x30) at cfa-40
DW_CFA_advance_loc: 16 to 000000000002405c
DW_CFA_offset: r19 (x19) at cfa-32
DW_CFA_offset: r20 (x20) at cfa-24
DW_CFA_advance_loc: 80 to 00000000000240ac
DW_CFA_remember_state
DW_CFA_restore: r30 (x30)
DW_CFA_restore: r29 (x29)
DW_CFA_restore: r19 (x19)
DW_CFA_restore: r20 (x20)
DW_CFA_def_cfa_offset: 0
DW_CFA_advance_loc: 4 to 00000000000240b0
DW_CFA_restore_state
DW_CFA_nop
00000050 0000000000000014 00000054 FDE cie=00000000 pc=0000000000023c80..0000000000023c8c
DW_CFA_advance_loc: 4 to 0000000000023c84
DW_CFA_def_cfa_offset: 16
DW_CFA_offset: r29 (x29) at cfa-16
DW_CFA_offset: r30 (x30) at cfa-8