http://blog.chinaunix.net/uid-725631-id-253186.html
ld.so分析1
1.入口
elf/rtld.c中
#ifdef RTLD_START
RTLD_START
#else
# error "sysdeps/MACHINE/dl-machine.h fails to define RTLD_START"
#endif
该宏定义在sysdeps/i386/dl-machine.h
#define RTLD_START asm ("\n\
.text\n\
.align 16\n\
0: movl (%esp), %ebx\n\
ret\n\
.align 16\n\
.globl _start\n\ ld.so入口
.globl _dl_start_user\n\
_start:\n\
# Note that _dl_start gets the parameter in %eax.\n\
movl %esp, %eax\n\ 当值esp值作为参数传递给_dl_start,_dl_start函数原型是static Elf32_Addr __attribute__ ((__used__)) __attribute__ ((regparm (3), stdcall)) _dl_start (void *arg)
call _dl_start\n\ //调用_dl_start,完成动态链接,返回用户入口地址,_dl_start自己平栈
_dl_start_user:\n\
# Save the user entry point address in %edi.\n\
movl %eax, %edi\n\ 保存用户程序入口地址
# Point %ebx at the GOT.\n\
call 0b\n\ //等价于 call 1f;1:pop %ebx;addl $_GLOBAL_OFFSET_TABLE_+[.-1b], %ebx;获取GOT地址,存入%ebx
addl $_GLOBAL_OFFSET_TABLE_, %ebx\n\//%ebx指向本条指令地址,加上GOT相对于本指令的偏移,即得到GOT地址
# Store the highest stack address\n\
/*
00000020 0000950a R_386_GOTPC 00000000 _GLOBAL_OFFSET_TABLE_
00000026 00009603 R_386_GOT32 00000000 __libc_stack_end
0000002e 00009709 R_386_GOTOFF 00000004 _dl_skip_args
*/
movl __libc_stack_end@GOT(%ebx), %eax\n\ __libc_stack_end是GLOBAL变量,存在于GOT中
movl %esp, (%eax)\n\ 存入esp
# See if we were run as a command with the executable file\n\
# name as an extra leading argument.\n\
movl _dl_skip_args@GOTOFF(%ebx), %eax\n\ _dl_skip_args是LOCAL变量,不存在于GOT中
# Pop the original argument count.\n\
popl %edx\n\弹出原始参数个数
# Adjust the stack pointer to skip _dl_skip_args words.\n\
leal (%esp,%eax,4), %esp\n\//跳过需要skip的参数,这些参数被ld.so处理了
# Subtract _dl_skip_args from argc.\n\
subl %eax, %edx\n\减掉
# Push argc back on the stack.\n\
push %edx\n\//重新压回
# The special initializer gets called with the stack just\n\
# as the application's entry point will see it; it can\n\
# switch stacks if it moves these contents over.\n\
" RTLD_START_SPECIAL_INIT "\n\ 空宏
# Load the parameters again.\n\
# (eax, edx, ecx, *--esp) = (_dl_loaded, argc, argv, envp)\n\
//为_dl_init准备参数,_dl_init的原型是
//void __attribute__((regparm(3),stdcall)) _dl_init (struct link_map *main_map, int argc, char **argv, char **env)
movl _rtld_local@GOTOFF(%ebx), %eax\n\//取ld.so的_rtld_local入%eax
leal 8(%esp,%edx,4), %esi\n\ esi指向envp
leal 4(%esp), %ecx\n\ ecx指向argv
pushl %esi\n\ 第四个参数使用堆栈传递
# Call the function to run the initializers.\n\
call _dl_init_internal@PLT\n\调用_dl_init
# Pass our finalizer function to the user in %edx, as per ELF ABI.\n\
leal _dl_fini@GOTOFF(%ebx), %edx\n\ 取_dl_fini入edx,传给user
# Jump to the user's entry point.\n\
jmp *%edi\n\
.previous\n\
");
结合crt1.o的分析,大家就很清楚控制和参数是如何被传递的。
注意扩展属性regparm(3)表示传参数时前三个参数使用%eax,%edx,%ecx寄存器,后面的参数仍然使用堆栈传
stdcall属性表示函数自己平栈,除非使用了可变参数(仍然由调用者平栈)
2.内核传递给ld.so的参数在堆栈中的形式如下
position content size (bytes) + comment
------------------------------------------------------------------------
stack pointer -> [ argc = number of args ] 4
[ argv[0] (pointer) ] 4 (program name)
[ argv[1] (pointer) ] 4
[ argv[..] (pointer) ] 4 * x
[ argv[n - 1] (pointer) ] 4
[ argv[n] (pointer) ] 4 (= NULL)
[ envp[0] (pointer) ] 4
[ envp[1] (pointer) ] 4
[ envp[..] (pointer) ] 4
[ envp[term] (pointer) ] 4 (= NULL)
[ auxv[0] AT_PHDR (Elf32_auxv_t) ] 8
[ auxv[1] AT_PHENT (Elf32_auxv_t) ] 8
[ auxv[2] AT_PHNUM (Elf32_auxv_t) ] 8
[ auxv[3] AT_BASE (Elf32_auxv_t) ] 8
[ auxv[4] AT_FLAGS (Elf32_auxv_t) ] 8
[ auxv[5] AT_ENTRY (Elf32_auxv_t) ] 8
[ auxv[6] AT_UID (Elf32_auxv_t) ] 8
[ auxv[7] AT_EUID (Elf32_auxv_t) ] 8
[ auxv[8] AT_GID (Elf32_auxv_t) ] 8
[ auxv[9] AT_EGID (Elf32_auxv_t) ] 8
[ auxv[10] AT_HWCAP (Elf32_auxv_t) ] 8
[ auxv[11] AT_PAGESZ (Elf32_auxv_t) ] 8
[ auxv[12] AT_CLKTCK (Elf32_auxv_t) ] 8
[ auxv[13] AT_PLATFORM (Elf32_auxv_t) ] 8
[ auxv[14] (Elf32_auxv_t) ] 8 (= AT_NULL vector)
[ padding ] 0 - 15
[ padding ] 16
[ padding ] 0 - 15
[k_platform] 0 - 65
[ argument ASCIIZ strings ] >= 0
[ environment ASCIIZ str. ] >= 0
[filename] >=0
(0xbffffffc) [ end marker ] 4 (= NULL)
(0xc0000000) < top of stack > 0 (virtual)
ld.so分析2
内核是如何执行程序的,本分析基于内核版本2.4.0
1.用户空间接口
man execve显示如下的函数原型
execve - execute program
SYNOPSIS
#include
int execve(const char *filename, char *const argv [], char *const
envp[]);
2.glibc中实现
在glibc中,execve对应的文件是
sysdeps/unix/sysv/linux/execve.c
int
__execve (file, argv, envp)
const char *file;
char *const argv[];
char *const envp[];
{
/* If this is a threaded application kill all other threads. */
if (__pthread_kill_other_threads_np)
__pthread_kill_other_threads_np ();
#if __BOUNDED_POINTERS__ //该宏未定义
{
char *const *v;
int i;
char *__unbounded *__unbounded ubp_argv;
char *__unbounded *__unbounded ubp_envp;
char *__unbounded *__unbounded ubp_v;
for (v = argv; *v; v++)
;
i = v - argv + 1;
ubp_argv = (char *__unbounded *__unbounded) alloca (sizeof (*ubp_argv) * i);
for (v = argv, ubp_v = ubp_argv; --i; v++, ubp_v++)
*ubp_v = CHECK_STRING (*v);
*ubp_v = 0;
for (v = envp; *v; v++)
;
i = v - envp + 1;
ubp_envp = (char *__unbounded *__unbounded) alloca (sizeof (*ubp_envp) * i);
for (v = envp, ubp_v = ubp_envp; --i; v++, ubp_v++)
*ubp_v = CHECK_STRING (*v);
*ubp_v = 0;
return INLINE_SYSCALL (execve, 3, CHECK_STRING (file), ubp_argv, ubp_envp);
}
#else
return INLINE_SYSCALL (execve, 3, file, argv, envp);//所以这行有效
#endif
}
INLINE_SYSCALL的定义在
sysdeps/unix/sysv/linux/i386/sysdeps.h
#define INLINE_SYSCALL(name, nr, args...) \
({ \
unsigned int resultvar; \
asm volatile ( \
LOADARGS_##nr \
"movl %1, %%eax\n\t" \
"int $0x80\n\t" \
RESTOREARGS_##nr \
: "=a" (resultvar) \
: "i" (__NR_##name) ASMFMT_##nr(args) : "memory", "cc"); \
if (resultvar >= 0xfffff001) \
{ \
__set_errno (-resultvar); \
resultvar = 0xffffffff; \
} \
(int) resultvar; })
3.手工展开看看
({
unsigned int resultvar;
asm volatile (
LOADARGS_3
"movl %1, %%eax\n\t"
"int $0x80\n\t"
RESTOREARGS_3
: "=a" (resultvar)
: "i" (__NR_execve) ASMFMT_3(args) : "memory", "cc");
if (resultvar >= 0xfffff001)
{
__set_errno (-resultvar);
resultvar = 0xffffffff;
}
(int) resultvar; })
其中__NR_execve是execve的系统调用号,为11,定义在头文件unistd.h中
这其中又涉及到三个宏
#define LOADARGS_1 \
"bpushl .L__X'%k2, %k2\n\t" \
"bmovl .L__X'%k2, %k2\n\t"
#define LOADARGS_3 LOADARGS_1
#define RESTOREARGS_1 \
"bpopl .L__X'%k2, %k2\n\t"
#define RESTOREARGS_3 RESTOREARGS_1
#define ASMFMT_3(arg1, arg2, arg3) \
, "aCD" (arg1), "c" (arg2), "d" (arg3)
展开
({
unsigned int resultvar;
asm volatile (
"bpushl .L__X'%k2, %k2\n\t"
"bmovl .L__X'%k2, %k2\n\t"
"movl %1, %%eax\n\t"
"int $0x80\n\t"
"bpopl .L__X'%k2, %k2\n\t"
: "=a" (resultvar)
: "i" (11) , "aCD" (arg1), "c" (arg2), "d" (arg3) : "memory", "cc");
if (resultvar >= 0xfffff001)
{
__set_errno (-resultvar);
resultvar = 0xffffffff;
}
(int) resultvar; })
这里又涉及到三个asm宏,bpushl,bmovl,bpopl
定义如下(也在该文件sysdeps.h中)
asm (".L__X'%ebx = 1\n\t"
".L__X'%ecx = 2\n\t"
".L__X'%edx = 2\n\t"
".L__X'%eax = 3\n\t"
".L__X'%esi = 3\n\t"
".L__X'%edi = 3\n\t"
".L__X'%ebp = 3\n\t"
".L__X'%esp = 3\n\t"
".macro bpushl name reg\n\t"
".if 1 - \\name\n\t"
".if 2 - \\name\n\t"
"pushl %ebx\n\t"
".else\n\t"
"xchgl \\reg, %ebx\n\t"
".endif\n\t"
".endif\n\t"
".endm\n\t"
".macro bpopl name reg\n\t"
".if 1 - \\name\n\t"
".if 2 - \\name\n\t"
"popl %ebx\n\t"
".else\n\t"
"xchgl \\reg, %ebx\n\t"
".endif\n\t"
".endif\n\t"
".endm\n\t"
".macro bmovl name reg\n\t"
".if 1 - \\name\n\t"
".if 2 - \\name\n\t"
"movl \\reg, %ebx\n\t"
".endif\n\t"
".endif\n\t"
".endm\n\t");
根据约束条件
%eax分配给resultvar
%ecx分配给argv
%edx分配给envp
则约束条件"aCD"中,a(%eax)已分配,C无效,因此分配%edi给file
手工展开
mov file,%edi
mov argv,%ecx
mov envp,%edx
bpushl .L__X'%edi, %edi
bmovl .L__X'·%edi, %%edi
movl 11, %%eax
int $0x80
bpopl .L__X'%edi, %edi
手工展开
mov file,%edi
mov argv,%ecx
mov envp,%edx
.if 1 - .L_X'%edi
.if 2 - .L_X'%edi
pushl %ebx
.else
xchgl %edi, %ebx
.endif
.endif
.if 1 - .L_X'%edi
.if 2 - .L_X'%edi
movl %edi, %ebx
.endif
.endif
movl 11, %%eax
int $0x80
.if 1 - .L_X'%edi
.if 2 - .L_X'%edi
popl %ebx
.else
xchgl %edi, %ebx
.endif
.endif
由于L__X'%edi = 3,展开
mov file,%edi
mov argv,%ecx
mov envp,%edx
.if 1 - 3
.if 2 - 3
pushl %ebx
.else
xchgl %edi, %ebx
.endif
.endif
.if 1 - 3
.if 2 - 3
movl %edi, %ebx
.endif
.endif
movl 11, %%eax
int $0x80
.if 1 - 3
.if 2 - 3
popl %ebx
.else
xchgl %edi, %ebx
.endif
.endif
.if为真的条件是不等于0,展开
mov file,%edi
mov argv,%ecx
mov envp,%edx
pushl %ebx
movl %edi, %ebx
movl 11, %%eax
int $0x80
popl %ebx
最终编译结果是
mov 0x8(%ebp),%edi
mov 0xc(%ebp),%ecx
mov 0x10(%ebp),%edx
push %ebx
mov %edi,%ebx
mov $0xb,%eax
int $0x80
pop %ebx
正好一致
系统调用传参使用%ebx,%ecx,%edx,%esi,%edi这五个寄存器,因此最多只能传五个参数.
4.返回值的处理
# define __set_errno(val) (*__errno_location ()) = (val)
if (resultvar >= 0xfffff001)//如果返回值>=0xfffff001,则出错
{
__set_errno (-resultvar);// 预处理时被替换成(*__errno_location ()) = (-resultvar);设置errno为-resultvar
resultvar = 0xffffffff; //-1
}
__errno_location的定义是
sysdeps/generic/errno-loc.c
int * __errno_location (void)
{
return &errno;
}
5.也可使用如下宏生成调用系统调用execve的代码
linux/include/asm-i386/unistd.h
#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \
type name(type1 arg1,type2 arg2,type3 arg3) \
{ \
long __res; \
__asm__ volatile ("int $0x80" \
: "=a" (__res) \
: "0" (__NR_##name),"b" ((long)(arg1)),"c" ((long)(arg2)), \
"d" ((long)(arg3))); \
__syscall_return(type,__res); \
}
例如
_syscall3(int,execve,const char *,file,char *const,argv[],char *const,envp[])
能生成和glibc相似的代码
6.sys_execve
linux/arch/i386/kernel/process.c
/*
* sys_execve() executes a new program.
*/
asmlinkage int sys_execve(struct pt_regs regs)
{
int error;
char * filename;
filename = getname((char *) regs.ebx);
error = PTR_ERR(filename);
if (IS_ERR(filename))
goto out;
//do_execve成功替换掉执行影像后,在返回到用户空间时,执行权才交给新的影像
error = do_execve(filename, (char **) regs.ecx, (char **) regs.edx, ®s);
if (error == 0)
current->ptrace &= ~PT_DTRACE;//取消单步跟踪
putname(filename);
out:
return error;
}
7.do_execve(sys_execve->do_execve)
fs/exec.c
/*
* sys_execve() executes a new program.
*/
int do_execve(char * filename, char ** argv, char ** envp, struct pt_regs * regs)
{
struct linux_binprm bprm;
struct file *file;
int retval;
int i;
file = open_exec(filename);
retval = PTR_ERR(file);
if (IS_ERR(file))
return retval;
bprm.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);//参数最多占32个页面,最后一个字存放NULL
memset(bprm.page, 0, MAX_ARG_PAGES*sizeof(bprm.page[0]));//清空页指针
bprm.file = file;
bprm.filename = filename;
bprm.sh_bang = 0;
bprm.loader = 0;
bprm.exec = 0;
//计算argv数组的长度,该数组是0结束
if ((bprm.argc = count(argv, bprm.p / sizeof(void *))) < 0) {
allow_write_access(file);
fput(file);
return bprm.argc;
}
//计算envp数组的长度
if ((bprm.envc = count(envp, bprm.p / sizeof(void *))) < 0) {
allow_write_access(file);
fput(file);
return bprm.envc;
}
retval = prepare_binprm(&bprm);
if (retval < 0)
goto out;
retval = copy_strings_kernel(1, &bprm.filename, &bprm);//复制文件名
if (retval < 0)
goto out;
bprm.exec = bprm.p;
retval = copy_strings(bprm.envc, envp, &bprm);//复制envp
if (retval < 0)
goto out;
retval = copy_strings(bprm.argc, argv, &bprm);//复制argv
if (retval < 0)
goto out;
retval = search_binary_handler(&bprm,regs);
if (retval >= 0)
/* execve success */
return retval;
out:
/* Something went wrong, return the inode and free the argument pages*/
allow_write_access(bprm.file);
if (bprm.file)
fput(bprm.file);
for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
struct page * page = bprm.page[i];
if (page)
__free_page(page);
}
return retval;
}
8.copy_strings(sys_execve->do_execve->copy_strings)
fs/exec.c
/*
* 'copy_strings()' copies argument/envelope strings from user
* memory to free pages in kernel mem. These are in a format ready
* to be put directly into the top of new user memory.
*/
//从用户空间拷贝数据到空闲页
int copy_strings(int argc,char ** argv, struct linux_binprm *bprm)
{
while (argc-- > 0) {//argc--
char *str;
int len;
unsigned long pos;
//上面argc--
if (get_user(str, argv+argc) || !str || !(len = strnlen_user(str, bprm->p)))
return -EFAULT;
if (bprm->p < len) //空间不够
return -E2BIG;
bprm->p -= len;//从后往前考
/* XXX: add architecture specific overflow check here. */
pos = bprm->p;
while (len > 0) {
char *kaddr;
int i, new, err;
struct page *page;
int offset, bytes_to_copy;
offset = pos % PAGE_SIZE;//页内偏移
i = pos/PAGE_SIZE;//页号
page = bprm->page[i];
new = 0;
if (!page) {
page = alloc_page(GFP_HIGHUSER);
bprm->page[i] = page;
if (!page)
return -ENOMEM;
new = 1;
}
kaddr = kmap(page);
if (new && offset)//是新页,offset>0,清[0,offset)
memset(kaddr, 0, offset);
bytes_to_copy = PAGE_SIZE - offset;
if (bytes_to_copy > len) {
bytes_to_copy = len;
if (new)//清[offset+len,PAGE_SIZE)
memset(kaddr+offset+len, 0, PAGE_SIZE-offset-len);
}
err = copy_from_user(kaddr + offset, str, bytes_to_copy);
kunmap(page);
if (err)
return -EFAULT;
pos += bytes_to_copy;//可能跨页
str += bytes_to_copy;
len -= bytes_to_copy;
}
}
return 0;
}
执行到这里bprm->p内存空间布局如下
[ argument ASCIIZ strings ] >= 0
[ environment ASCIIZ str. ] >= 0
[filename]
(0xbffffffc) [ end marker ] 4 (= NULL)
(0xc0000000) < top of stack > 0 (virtual)
写一个程序验证一下
系统redhat7.2
[root@proxy ~]# uname -a
Linux proxy 2.4.7-10smp #1 SMP Thu Sep 6 17:09:31 EDT 2001 i686 unknown
[root@proxy ~]#
root@proxy ~]# cat 1.c
#include
int main(int argc,char * argv[],char * envp[])
{
unsigned char * p;
printf("%d,%p,%p\n",argc,argv,envp);
p=(unsigned char *)argv;
for(;p<(unsigned char *)0xc0000000;p++)
if(isprint(*p))
printf("%c",*p);
else
printf("\\%x",*p);
return 0;
}
[root@proxy ~]# ./a.out
1,0xbffffb04,0xbffffb0c
\3\fc\ff\bf\0\0\0\0\b\fc\ff\bf\15\fc\ff\bf$\fc\ff\bf<\fc\ff\bf^\fc\ff\bfj\fc\ff\bft\fc\ff\bf7\fe\ff\bfV\fe\ff\bfp\fe\ff\bf\85\fe\ff\bf\9c\fe\ff\bf\a7\fe\ff\bf\b4\fe\ff\bf\bc\fe\ff\bf\cc\fe\ff\bf\da\fe\ff\bf\e8\fe\ff\bf\f9\fe\ff\bf\7\ff\ff\bf\12\ff\ff\bf\1d\ff\ff\bfI\ff\ff\bf|\ff\ff\bf\d7\ff\ff\bf\ea\ff\ff\bf\0\0\0\0\10\0\0\0\ff\fb\83\3\6\0\0\0\0\10\0\0\11\0\0\0d\0\0\0\3\0\0\04\80\4\8\4\0\0\0 \0\0\0\5\0\0\0\6\0\0\0\7\0\0\0\0\0\0@\8\0\0\0\0\0\0\0\9\0\0\0\90\83\4\8\b\0\0\0\0\0\0\0\c\0\0\0\0\0\0\0\d\0\0\0\0\0\0\0\e\0\0\0\0\0\0\0\f\0\0\0\fe\fb\ff\bf\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0i686\0./a.out\0PWD=/root\0HOSTNAME=proxy\0QTDIR=/usr/lib/qt-2.3.1\0LESSOPEN=|/usr/bin/lesspipe.sh %s\0KDEDIR=/usr\0USER=root\0LS_COLORS=no=00:fi=00:di=01;34:ln=01;36:pi=40;33:so=01;35:bd=40;33;01:cd=40;33;01:or=01;05;37;41:mi=01;05;37;41:ex=01;32:*.cmd=01;32:*.exe=01;32:*.com=01;32:*.btm=01;32:*.bat=01;32:*.sh=01;32:*.csh=01;32:*.tar=01;31:*.tgz=01;31:*.arj=01;31:*.taz=01;31:*.lzh=01;31:*.zip=01;31:*.z=01;31:*.Z=01;31:*.gz=01;31:*.bz2=01;31:*.bz=01;31:*.tz=01;31:*.rpm=01;31:*.cpio=01;31:*.jpg=01;35:*.gif=01;35:*.bmp=01;35:*.xbm=01;35:*.xpm=01;35:*.png=01;35:*.tif=01;35:\0MACHTYPE=i386-redhat-linux-gnu\0MAIL=/var/spool/mail/root\0INPUTRC=/etc/inputrc\0BASH_ENV=/root/.bashrc\0LANG=en_US\0LOGNAME=root\0SHLVL=1\0SHELL=/bin/bash\0USERNAME=root\0HOSTTYPE=i386\0OSTYPE=linux-gnu\0HISTSIZE=1000\0HOME=/root\0TERM=linux\0SSH_AUTH_SOCK=/tmp/ssh-XXi40Qtw/agent.23262\0SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass\0PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin:/usr/X11R6/bin:/root/bin\0SSH_TTY=/dev/pts/0\0_=./a.out\0./a.out\0\0\0\0\0
9.search_binary_handler(sys_execve->do_execve->search_binary_handler)
fs/exec.c
/*
* cycle the list of binary formats handler, until one recognizes the image
*/
int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
{
int try,retval=0;
struct linux_binfmt *fmt;
#ifdef __alpha__
/* handle /sbin/loader.. */
{
struct exec * eh = (struct exec *) bprm->buf;
if (!bprm->loader && eh->fh.f_magic == 0x183 &&
(eh->fh.f_flags & 0x3000) == 0x3000)
{
char * dynloader[] = { "/sbin/loader" };
struct file * file;
unsigned long loader;
allow_write_access(bprm->file);
fput(bprm->file);
bprm->file = NULL;
loader = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
file = open_exec(dynloader[0]);
retval = PTR_ERR(file);
if (IS_ERR(file))
return retval;
bprm->file = file;
bprm->loader = loader;
retval = prepare_binprm(bprm);
if (retval<0)
return retval;
/* should call search_binary_handler recursively here,
but it does not matter */
}
}
#endif
for (try=0; try<2; try++) {
read_lock(&binfmt_lock);
for (fmt = formats ; fmt ; fmt = fmt->next) {
int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary;
if (!fn)
continue;
if (!try_inc_mod_count(fmt->module))
continue;
read_unlock(&binfmt_lock);
retval = fn(bprm, regs);//调用该文件格式的load_binary
if (retval >= 0) {//成功
put_binfmt(fmt);
allow_write_access(bprm->file);//allow write
if (bprm->file)
fput(bprm->file);
bprm->file = NULL;
current->did_exec = 1;//可以执行了
return retval;
}
read_lock(&binfmt_lock);
put_binfmt(fmt);
if (retval != -ENOEXEC)
break;
if (!bprm->file) {
read_unlock(&binfmt_lock);
return retval;
}
}
read_unlock(&binfmt_lock);
if (retval != -ENOEXEC) {
break;
#ifdef CONFIG_KMOD
}else{
#define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e))
char modname[20];
if (printable(bprm->buf[0]) &&
printable(bprm->buf[1]) &&
printable(bprm->buf[2]) &&
printable(bprm->buf[3]))
break; /* -ENOEXEC 不允许都是可打印字符*/
sprintf(modname, "binfmt-%04x", *(unsigned short *)(&bprm->buf[2]));
request_module(modname);
#endif
}
}
return retval;
}
elf文件的相关处理结构在fs/binfmt_elf.c中
static int __init init_elf_binfmt(void)
{
return register_binfmt(&elf_format);
}
static struct linux_binfmt elf_format = {
NULL, THIS_MODULE, load_elf_binary, load_elf_library, elf_core_dump, ELF_EXEC_PAGESIZE
};
因此elf的load_binary函数是load_elf_binary
ld.so分析3
内核中load_elf_binary如何执行
1.load_elf_binary
fs/binfmt_elf.c
/*
* These are the functions used to load ELF style executables and shared
* libraries. There is no binary dependent code anywhere else.
*/
#define INTERPRETER_NONE 0
#define INTERPRETER_AOUT 1
#define INTERPRETER_ELF 2
static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
{
struct file *interpreter = NULL; /* to shut gcc up */
unsigned long load_addr = 0, load_bias;
int load_addr_set = 0;
char * elf_interpreter = NULL;
unsigned int interpreter_type = INTERPRETER_NONE;
unsigned char ibcs2_interpreter = 0;
mm_segment_t old_fs;
unsigned long error;
struct elf_phdr * elf_ppnt, *elf_phdata;
unsigned long elf_bss, k, elf_brk;
int elf_exec_fileno;
int retval, size, i;
unsigned long elf_entry, interp_load_addr = 0;
unsigned long start_code, end_code, start_data, end_data;
struct elfhdr elf_ex;
struct elfhdr interp_elf_ex;
struct exec interp_ex;
char passed_fileno[6];
/* Get the exec-header */
elf_ex = *((struct elfhdr *) bprm->buf);
retval = -ENOEXEC;
/* First of all, some simple consistency checks */
//检查magic
if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
goto out;
//既非可执行文件又非动态链接库,动态链接库也可直接执行
if (elf_ex.e_type != ET_EXEC && elf_ex.e_type != ET_DYN)
goto out;
if (!elf_check_arch(&elf_ex))//体系结构检查
goto out;
if (!bprm->file->f_op||!bprm->file->f_op->mmap)//不能mmap,error
goto out;
/* Now read in all of the header information */
retval = -ENOMEM;
/*
typedef struct elf32_hdr{
unsigned char e_ident[EI_NIDENT];
Elf32_Half e_type;
Elf32_Half e_machine;
Elf32_Word e_version;
Elf32_Addr e_entry;
Elf32_Off e_phoff;
Elf32_Off e_shoff;
Elf32_Word e_flags;
Elf32_Half e_ehsize;
Elf32_Half e_phentsize;
Elf32_Half e_phnum;
Elf32_Half e_shentsize;
Elf32_Half e_shnum;
Elf32_Half e_shstrndx;
} Elf32_Ehdr;
*/
//e_phentsize 该成员保存着在文件的程序头表(program header table)
//中一个入口的大小(以字节计数)。所有的入口都是同样的大小。
//e_phnum 该成员保存着在程序头表中入口的个数。因此,e_phentsize和e_phnum
//的乘机就是表的大小(以字节计数).假如没有程序头表(program header table),
//e_phnum变量为0。
size = elf_ex.e_phentsize * elf_ex.e_phnum;
if (size > 65536)
goto out;
elf_phdata = (struct elf_phdr *) kmalloc(size, GFP_KERNEL);
if (!elf_phdata)
goto out;
//读入 program headers
retval = kernel_read(bprm->file, elf_ex.e_phoff, (char *) elf_phdata, size);
if (retval < 0)
goto out_free_ph;
retval = get_unused_fd();
if (retval < 0)
goto out_free_ph;
get_file(bprm->file);
//保存原始打开文件
fd_install(elf_exec_fileno = retval, bprm->file);//flush old exec不会关闭
elf_ppnt = elf_phdata;//program headers
elf_bss = 0;
elf_brk = 0;
start_code = ~0UL;// -1
end_code = 0;
start_data = 0;
end_data = 0;
for (i = 0; i < elf_ex.e_phnum; i++) {//处理每一个program headers,寻找PT_INTERP
if (elf_ppnt->p_type == PT_INTERP) {
retval = -EINVAL;
if (elf_interpreter)//已经有interpreter
goto out_free_dentry;
/* This is the program interpreter used for
* shared libraries - for now assume that this
* is an a.out format binary
*/
/*
typedef struct elf32_phdr{
Elf32_Word p_type;
Elf32_Off p_offset;
Elf32_Addr p_vaddr;
Elf32_Addr p_paddr;
Elf32_Word p_filesz;
Elf32_Word p_memsz;
Elf32_Word p_flags;
Elf32_Word p_align;
} Elf32_Phdr;
*/
retval = -ENOMEM;
elf_interpreter = (char *) kmalloc(elf_ppnt->p_filesz,
GFP_KERNEL);
if (!elf_interpreter)
goto out_free_file;
retval = kernel_read(bprm->file, elf_ppnt->p_offset,
elf_interpreter,
elf_ppnt->p_filesz);//读入interp
if (retval < 0)
goto out_free_interp;
/* If the program interpreter is one of these two,
* then assume an iBCS2 image. Otherwise assume
* a native linux image.
redhat 7.2 中是 /lib/ld-linux.so.2
*/
if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 ||
strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0)
ibcs2_interpreter = 1;
#if 0
printk("Using ELF interpreter %s\n", elf_interpreter);
#endif
#ifdef __sparc__
if (ibcs2_interpreter) {
unsigned long old_pers = current->personality;
struct exec_domain *old_domain = current->exec_domain;
struct exec_domain *new_domain;
struct fs_struct *old_fs = current->fs, *new_fs;
get_exec_domain(old_domain);
atomic_inc(&old_fs->count);
set_personality(PER_SVR4);
interpreter = open_exec(elf_interpreter);
new_domain = current->exec_domain;
new_fs = current->fs;
current->personality = old_pers;
current->exec_domain = old_domain;
current->fs = old_fs;
put_exec_domain(new_domain);
put_fs_struct(new_fs);
} else
#endif
{
interpreter = open_exec(elf_interpreter);//打开/lib/ld-linux.so.2
}
retval = PTR_ERR(interpreter);
if (IS_ERR(interpreter))
goto out_free_interp;
retval = kernel_read(interpreter, 0, bprm->buf, BINPRM_BUF_SIZE);//读入头部
if (retval < 0)
goto out_free_dentry;
/* Get the exec headers */
interp_ex = *((struct exec *) bprm->buf);//可能是a.out
interp_elf_ex = *((struct elfhdr *) bprm->buf);//可能是elf
}
elf_ppnt++;
}
/* Some simple consistency checks for the interpreter */
if (elf_interpreter) {//有interp,执行/lib/ld-linux.so.2时,没有,或静态链接的可执行文件也没有
interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
/* Now figure out which format our binary is */
if ((N_MAGIC(interp_ex) != OMAGIC) &&
(N_MAGIC(interp_ex) != ZMAGIC) &&
(N_MAGIC(interp_ex) != QMAGIC))
interpreter_type = INTERPRETER_ELF;//是interp elf
if (memcmp(interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
interpreter_type &= ~INTERPRETER_ELF;//是interp aout
retval = -ELIBBAD;
if (!interpreter_type)
goto out_free_dentry;
/* Make sure only one type was selected */
if ((interpreter_type & INTERPRETER_ELF) &&
interpreter_type != INTERPRETER_ELF) {
printk(KERN_WARNING "ELF: Ambiguous type, using ELF\n");
interpreter_type = INTERPRETER_ELF;
}
}
/* OK, we are done with that, now set up the arg stuff,
and then start this sucker up */
if (!bprm->sh_bang) {
char * passed_p;
if (interpreter_type == INTERPRETER_AOUT) {//a.out
sprintf(passed_fileno, "%d", elf_exec_fileno);//原始打开文件号
passed_p = passed_fileno;
if (elf_interpreter) {//interp文件名
retval = copy_strings_kernel(1,&passed_p,bprm);
if (retval)
goto out_free_dentry;
bprm->argc++;//打开文件号作为参数
}
}
}
/* Flush all traces of the currently running executable */
retval = flush_old_exec(bprm);//清除旧的执行影像
if (retval)
goto out_free_dentry;
/* OK, This is the point of no return */
current->mm->start_data = 0;
current->mm->end_data = 0;
current->mm->end_code = 0;
current->mm->mmap = NULL;
current->flags &= ~PF_FORKNOEXEC;
elf_entry = (unsigned long) elf_ex.e_entry;//原文件代码入口
/* Do this immediately, since STACK_TOP as used in setup_arg_pages
may depend on the personality. */
SET_PERSONALITY(elf_ex, ibcs2_interpreter);
/* Do this so that we can load the interpreter, if need be. We will
change some of these later */
current->mm->rss = 0;
setup_arg_pages(bprm); /* XXX: check error */
2.setup_arg_pages
load_elf_binayr->setup_arg_page
fs/exec.c
//把arg pages页和进程挂钩
int setup_arg_pages(struct linux_binprm *bprm)
{
unsigned long stack_base;
struct vm_area_struct *mpnt;
int i;
stack_base = STACK_TOP - MAX_ARG_PAGES*PAGE_SIZE;//堆栈基址
bprm->p += stack_base;//变换成地址
if (bprm->loader)
bprm->loader += stack_base;
bprm->exec += stack_base;
mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);//为堆栈段分配vm_area_struct结构
if (!mpnt)
return -ENOMEM;
down(¤t->mm->mmap_sem);
{
mpnt->vm_mm = current->mm;
mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;//下对齐
mpnt->vm_end = STACK_TOP;
mpnt->vm_page_prot = PAGE_COPY;
mpnt->vm_flags = VM_STACK_FLAGS;
mpnt->vm_ops = NULL;
mpnt->vm_pgoff = 0;
mpnt->vm_file = NULL;
mpnt->vm_private_data = (void *) 0;
insert_vm_struct(current->mm, mpnt);
current->mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
}
for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
struct page *page = bprm->page[i];
if (page) {
bprm->page[i] = NULL;
current->mm->rss++;//驻内页
put_dirty_page(current,page,stack_base);//该页挂入进程空间
}
stack_base += PAGE_SIZE;
}
up(¤t->mm->mmap_sem);
return 0;
}
3.返回到load_elf_binary
current->mm->start_stack = bprm->p;
/* Try and get dynamic programs out of the way of the default mmap
base, as well as whatever program they might try to exec. This
is because the brk will follow the loader, and is not movable. */
//普通可执行文件load_bias=0;动态链接库load_bias=0x8000 0000,即2G处(单独执行时,给ld-linux.so.2让路)
/*
例如/lib/ld-2.3.2.so执行时的maps如下
[root@mail /proc/30019]# cat maps
80000000-80015000 r-xp 00000000 08:01 272070 /lib/ld-2.3.2.so
80015000-80016000 rw-p 00014000 08:01 272070 /lib/ld-2.3.2.so
bfffe000-c0000000 rwxp fffff000 00:00 0
/lib/libc-2.3.2.so执行时的maps如下
[root@mail /proc/30097]# cat /proc/14541/maps
40000000-40015000 r-xp 00000000 08:01 272070 /lib/ld-2.3.2.so
40015000-40016000 rw-p 00014000 08:01 272070 /lib/ld-2.3.2.so
80000000-80133000 r-xp 00000000 08:01 272077 /lib/libc-2.3.2.so
80133000-80137000 rw-p 00132000 08:01 272077 /lib/libc-2.3.2.so
80137000-80139000 rwxp 00000000 00:00 0
bfffe000-c0000000 rwxp fffff000 00:00 0
*/
load_bias = ELF_PAGESTART(elf_ex.e_type==ET_DYN ? ELF_ET_DYN_BASE : 0);
/* Now we do a little grungy work by mmaping the ELF image into
the correct location in memory. At this point, we assume that
the image should be loaded at fixed address, not at a variable
address. */
old_fs = get_fs();
set_fs(get_ds());
for(i = 0, elf_ppnt = elf_phdata; i < elf_ex.e_phnum; i++, elf_ppnt++) {
//处理每一个program headers
/*
typedef struct elf32_phdr{
Elf32_Word p_type;
Elf32_Off p_offset;//该成员给出了该段的驻留位置相对于文件开始处的偏移。
Elf32_Addr p_vaddr;//该成员给出了该段在内存中的首字节地址。(连接器推荐的加载基址)
Elf32_Addr p_paddr;
Elf32_Word p_filesz;//该成员给出了文件映像中该段的字节数;它可能是 0 。
Elf32_Word p_memsz;//该成员给出了内存映像中该段的字节数;它可能是 0 。
Elf32_Word p_flags;//该成员给出了和该段相关的标志。定义的标志值如下所述。
Elf32_Word p_align;
} Elf32_Phdr;
*/
int elf_prot = 0, elf_flags;
unsigned long vaddr;
if (elf_ppnt->p_type != PT_LOAD)//必须是PT_LOAD
continue;
if (elf_ppnt->p_flags & PF_R) elf_prot |= PROT_READ;
if (elf_ppnt->p_flags & PF_W) elf_prot |= PROT_WRITE;
if (elf_ppnt->p_flags & PF_X) elf_prot |= PROT_EXEC;
elf_flags = MAP_PRIVATE|MAP_DENYWRITE|MAP_EXECUTABLE;
vaddr = elf_ppnt->p_vaddr;
if (elf_ex.e_type == ET_EXEC || load_addr_set) {
//是可执行文件或者起始加载地址已设置
elf_flags |= MAP_FIXED;
}
error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, elf_prot, elf_flags);
//不判断出错 ???
/*
elf文件有两种视图,一种节表,是从程序编译连接的角度看。一种是程序头,是从程序执行的角度看。
举例看看这两种视图的关系
[root@mail /proc/30097]# readelf -l /bin/ls
Elf file type is EXEC (Executable file)
Entry point 0x8049690
There are 7 program headers, starting at offset 52
Program Headers:
Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align
PHDR 0x000034 0x08048034 0x08048034 0x000e0 0x000e0 R E 0x4
INTERP 0x000114 0x08048114 0x08048114 0x00013 0x00013 R 0x1
[Requesting program interpreter: /lib/ld-linux.so.2]
LOAD 0x000000 0x08048000 0x08048000 0x0fa98 0x0fa98 R E 0x1000
LOAD 0x010000 0x08058000 0x08058000 0x00348 0x006c8 RW 0x1000
DYNAMIC 0x010114 0x08058114 0x08058114 0x000d0 0x000d0 RW 0x4
NOTE 0x000128 0x08048128 0x08048128 0x00020 0x00020 R 0x4
GNU_EH_FRAME 0x00f960 0x08057960 0x08057960 0x0002c 0x0002c R 0x4
Section to Segment mapping:
Segment Sections...
00
01 .interp
02 .interp .note.ABI-tag .hash .dynsym .dynstr .gnu.version .gnu.version_r .rel.dyn .rel.plt .init .plt .text .fini .rodata
.eh_frame_hdr .eh_frame
03 .data .dynamic .ctors .dtors .jcr .got .bss
04 .dynamic
05 .note.ABI-tag
06 .eh_frame_hdr
注意下面的Section to Segment mapping,说明了每个程序头包含了哪些节。我们关心的是代码段程序头和数据段程序头。
LOAD 0x000000 0x08048000 0x08048000 0x0fa98 0x0fa98 R E 0x1000
LOAD 0x010000 0x08058000 0x08058000 0x00348 0x006c8 RW 0x1000
分别对应
02 .interp .note.ABI-tag .hash .dynsym .dynstr .gnu.version .gnu.version_r .rel.dyn .rel.plt .init .plt .text .fini .rodata
.eh_frame_hdr .eh_frame
03 .data .dynamic .ctors .dtors .jcr .got .bss
代码段开始文件地址是0,开始虚拟地址是0x8048000,文件大小是0xfa98,内存大小是0xfa98,flag是可读可执行,对齐大小是4k
数据段开始文件地址是0x10000,开始虚拟地址0x8058000,文件大小是0x348,内存大小是0x6c8,flag是可读可写,对齐大小是4
k
下面列出节表
[zws@mail /proc/1]$readelf -S /bin/ls
There are 26 section headers, starting at offset 0x10444:
Section Headers:
[Nr] Name Type Addr Off Size ES Flg Lk Inf Al
[ 0] NULL 00000000 000000 000000 00 0 0 0
[ 1] .interp PROGBITS 08048114 000114 000013 00 A 0 0 1
[ 2] .note.ABI-tag NOTE 08048128 000128 000020 00 A 0 0 4
[ 3] .hash HASH 08048148 000148 00028c 04 A 4 0 4
[ 4] .dynsym DYNSYM 080483d4 0003d4 0005e0 10 A 5 1 4
[ 5] .dynstr STRTAB 080489b4 0009b4 0003ea 00 A 0 0 1
[ 6] .gnu.version VERSYM 08048d9e 000d9e 0000bc 02 A 4 0 2
[ 7] .gnu.version_r VERNEED 08048e5c 000e5c 000070 00 A 5 1 4
[ 8] .rel.dyn REL 08048ecc 000ecc 000028 08 A 4 0 4
[ 9] .rel.plt REL 08048ef4 000ef4 000278 08 A 4 11 4
[10] .init PROGBITS 0804916c 00116c 000017 00 AX 0 0 4
[11] .plt PROGBITS 08049184 001184 000500 04 AX 0 0 4
[12] .text PROGBITS 08049690 001690 00ab4c 00 AX 0 0 16
[13] .fini PROGBITS 080541dc 00c1dc 00001b 00 AX 0 0 4
[14] .rodata PROGBITS 08054200 00c200 003760 00 A 0 0 32
[15] .eh_frame_hdr PROGBITS 08057960 00f960 00002c 00 A 0 0 4
[16] .eh_frame PROGBITS 0805798c 00f98c 00010c 00 A 0 0 4
[17] .data PROGBITS 08058000 010000 000114 00 WA 0 0 32
[18] .dynamic DYNAMIC 08058114 010114 0000d0 08 WA 5 0 4
[19] .ctors PROGBITS 080581e4 0101e4 000008 00 WA 0 0 4
[20] .dtors PROGBITS 080581ec 0101ec 000008 00 WA 0 0 4
[21] .jcr PROGBITS 080581f4 0101f4 000004 00 WA 0 0 4
[22] .got PROGBITS 080581f8 0101f8 000150 04 WA 0 0 4
[23] .bss NOBITS 08058360 010360 000368 00 WA 0 0 32
[24] .gnu_debuglink PROGBITS 00000000 010360 000010 00 0 0 4
[25] .shstrtab STRTAB 00000000 010370 0000d2 00 0 0 1
Key to Flags:
W (write), A (alloc), X (execute), M (merge), S (strings)
I (info), L (link order), G (group), x (unknown)
O (extra OS processing required) o (OS specific), p (processor specific)
从开始文件地址和内存大小可看出
代码段包含的节从[0]到[16],数据段包含的节从[17]到[23],和前面的显示正好一致。
由于内存映射以页为单位,映射起始地址向下对齐到页边界,映射大小向上对齐到页边界,因此在进行内存映射的时候,
代码段映射关系是虚拟地址[0x8048000,0x8048000+0x10000)->文件偏移[0,0x10000)
数据段映射关系是虚拟地址[0x8058000,0x1000)->文件偏移[0x10000,0x10000+0x1000)
/bin/ls的文件大小是小于0x11000的,数据的映射超出了,不过没有关系,超出的部分会被当做零页分配。可见这个文件都被映
射了。显然0x8048000就是这个文件镜像加载的起始地址,这个地址后面有用。
我还注意到[23].bss节type是NOBITS,说明文件中没有对应内容,这从[24].gnu_debuglink的文件偏移和[23].bss的文件偏移相
等侧面证明。但是它却是有大小的,0x368,Flag是WA,说明可写且需要分配。
bss是未初始化节,程序中的未初始化变量都放在这个节中,由于未初始化变量的值默认都为0,因此也就不再文件中为其分配空
间了。但是到了内存中就不同了,必须为其分配空间,且清0。这一点后面还会谈到。
还有就是程序中的常量被放在.rodata节中,常量只能读,不能写,代码段是可读,可执行,因此.rodata节被插入在代码段中,没
有创建额外的程序头了。
可执行文件的一般从0x08000000(512M)开始编址,可执行文件加载时不重定位。
动态链接库的一般从0开始编址.动态链接库加载时重定位,重定位地址从0x40000000(1G)开始。
映射的时候,代码段只读映射该页,而数据段COW映射该页.因此虽然上面代码段和数据段是连续的,但是页属性是不同的。
代码段和数据段在文件中的映射可能有重叠,例如
[zws@mail ~]$ readelf -l /lib/libc-2.3.2.so
Elf file type is DYN (Shared object file)
Entry point 0x159d0
There are 7 program headers, starting at offset 52
Program Headers:
Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align
PHDR 0x000034 0x00000034 0x00000034 0x000e0 0x000e0 R E 0x4
INTERP 0x1312f0 0x001312f0 0x001312f0 0x00013 0x00013 R 0x1
[Requesting program interpreter: /lib/ld-linux.so.2]
LOAD 0x000000 0x00000000 0x00000000 0x132804 0x132804 R E 0x1000
LOAD 0x132820 0x00133820 0x00133820 0x02c90 0x056c4 RW 0x1000
DYNAMIC 0x1350d4 0x001360d4 0x001360d4 0x000d8 0x000d8 RW 0x4
NOTE 0x000114 0x00000114 0x00000114 0x00020 0x00020 R 0x4
GNU_EH_FRAME 0x131304 0x00131304 0x00131304 0x0032c 0x0032c R 0x4
假设实际加载地址是x
代码段映射关系是虚拟地址[x+0,x+0+0x133000)->文件偏移[0,0x133000)
数据段映射关系是虚拟地址[x+0x133000,x+0x133000+0x6000)->文件偏移[0x132000,0x132000+0x3000)+零页[0x132000+
0x3000,0x133000+0x6000)
文件偏移有一页重叠,但这种重叠不会引起冲突和访问错误.
总之,ELF文件的节和程序头关系紧密,其中暗藏玄机,值得细细揣摩。
*/
4.计算start_code,end_code等
if (!load_addr_set) {
load_addr_set = 1;
//load_addr 计算整个镜像加载基址
/*
load_bias load_addr
ET_EXEC 0x00000000 0x08048000 /bin/ls
ET_DYN 0x80000000 0x80000000 /lib/ld-linux.so.2
*/
load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);//链接器推荐的镜像加载基址,对于可执
行文件就是该地址,对于动态链接库,一般是0
if (elf_ex.e_type == ET_DYN) {//load_bias是0x80000000
load_bias += error -//动态链接库实际加载地址-内核推荐的加载地址=加载偏移(一般
是0)
ELF_PAGESTART(load_bias + vaddr);
load_addr += error;//一般load_addr==error
}
}
k = elf_ppnt->p_vaddr;//通常代码段和数据段紧挨在一起,代码段在前,数据段在后
if (k < start_code) start_code = k;//start_code 的初值为0xffffffff,定位代码段开始地址
if (start_data < k) start_data = k;//start_data初值为0,定位数据段开始
k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;//file size,不是mem size
if (k > elf_bss)//elf_bss初值为0
elf_bss = k;//计算bss起始地址
if ((elf_ppnt->p_flags & PF_X) && end_code < k)
end_code = k;//代码段结束地址
if (end_data < k)
end_data = k;//数据段结束地址
k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;//mem size
if (k > elf_brk)
elf_brk = k;
//[elf_bss,elf_brk)之间是bss section
}
set_fs(old_fs);
//全部重定位
elf_entry += load_bias;
elf_bss += load_bias;//bss为初始化段开始
elf_brk += load_bias;//brk动态内存分配起始地址
start_code += load_bias;
end_code += load_bias;
start_data += load_bias;
end_data += load_bias;
if (elf_interpreter) {
//elf_entry被覆盖
if (interpreter_type == INTERPRETER_AOUT)
elf_entry = load_aout_interp(&interp_ex,
interpreter);
else
elf_entry = load_elf_interp(&interp_elf_ex,
interpreter,
&interp_load_addr);
5.load_elf_interp
/* This is much more generalized than the library routine read function,
so we keep this separate. Technically the library read function
is only provided so that we can read a.out libraries that have
an ELF header */
static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex,
struct file * interpreter,
unsigned long *interp_load_addr)
{
struct elf_phdr *elf_phdata;
struct elf_phdr *eppnt;
unsigned long load_addr = 0;
int load_addr_set = 0;
unsigned long last_bss = 0, elf_bss = 0;
unsigned long error = ~0UL;
int retval, i, size;
/* First of all, some simple consistency checks */
if (interp_elf_ex->e_type != ET_EXEC &&
interp_elf_ex->e_type != ET_DYN)
goto out;
if (!elf_check_arch(interp_elf_ex))
goto out;
if (!interpreter->f_op || !interpreter->f_op->mmap)
goto out;
/*
* If the size of this structure has changed, then punt, since
* we will be doing the wrong thing.
*/
if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
goto out;
/* Now read in all of the header information */
size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
if (size > ELF_MIN_ALIGN)
goto out;
elf_phdata = (struct elf_phdr *) kmalloc(size, GFP_KERNEL);
if (!elf_phdata)
goto out;
//读入interp的prgoram header
retval = kernel_read(interpreter,interp_elf_ex->e_phoff,(char *)elf_phdata,size);
error = retval;
if (retval < 0)
goto out_close;
eppnt = elf_phdata;
for (i=0; ie_phnum; i++, eppnt++) {
if (eppnt->p_type == PT_LOAD) {
int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
int elf_prot = 0;
unsigned long vaddr = 0;
unsigned long k, map_addr;
if (eppnt->p_flags & PF_R) elf_prot = PROT_READ;
if (eppnt->p_flags & PF_W) elf_prot |= PROT_WRITE;
if (eppnt->p_flags & PF_X) elf_prot |= PROT_EXEC;
vaddr = eppnt->p_vaddr;
if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
elf_type |= MAP_FIXED;
/*
readelf -l /lib/ld-linux.so.2
Program Headers:
Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align
LOAD 0x000000 0x00000000 0x00000000 0x15229 0x15229 R E 0x1000
LOAD 0x015240 0x00016240 0x00016240 0x00300 0x00758 RW 0x1000
DYNAMIC 0x015490 0x00016490 0x00016490 0x000b0 0x000b0 RW 0x4
load_addr + vaddr=0,mmap将从1G处开始处映射
*/
map_addr = elf_map(interpreter, load_addr + vaddr, eppnt, elf_prot, elf_type);
if (!load_addr_set && interp_elf_ex->e_type == ET_DYN) {
//只计算ET_DYN
load_addr = map_addr - ELF_PAGESTART(vaddr);
load_addr_set = 1;
}
/*
* Find the end of the file mapping for this phdr, and keep
* track of the largest address we see for this.
*/
k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
if (k > elf_bss)//elf bss
elf_bss = k;
/*
* Do the same thing for the memory mapping - between
* elf_bss and last_bss is the bss section.
[elf_bss,last_bss)是bss section
*/
k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
if (k > last_bss)//last bss
last_bss = k;
}
}
/* Now use mmap to map the library into memory. */
/*
* Now fill out the bss section. First pad the last page up
* to the page boundary, and then perform a mmap to make sure
* that there are zero-mapped pages up to and including the
* last bss page.
*/
padzero(elf_bss);//清bss
elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1); /* What we have mapped so far */
/* Map the last of the bss segment */
if (last_bss > elf_bss)//未映射到文件的部分,分配0页
do_brk(elf_bss, last_bss - elf_bss);//匿名映射[addr,addr+len)
*interp_load_addr = load_addr;//镜像加载基址
error = ((unsigned long) interp_elf_ex->e_entry) + load_addr;//入口地址
out_close:
kfree(elf_phdata);
out:
return error;
}
6.返回load_elf_binary
allow_write_access(interpreter);
fput(interpreter);
kfree(elf_interpreter);
if (elf_entry == ~0UL) {
printk(KERN_ERR "Unable to load interpreter\n");
kfree(elf_phdata);
send_sig(SIGSEGV, current, 0);
return 0;
}
}
kfree(elf_phdata);
if (interpreter_type != INTERPRETER_AOUT)
sys_close(elf_exec_fileno);//ELF不需要
set_binfmt(&elf_format);
compute_creds(bprm);
current->flags &= ~PF_FORKNOEXEC;
//注意bprm->p被更新,指向argc地址
bprm->p = (unsigned long)
create_elf_tables((char *)bprm->p,
bprm->argc,
bprm->envc,
(interpreter_type == INTERPRETER_ELF ? &elf_ex : NULL),
load_addr, load_bias,
interp_load_addr,
(interpreter_type == INTERPRETER_AOUT ? 0 : 1));
7.create_elf_tables
static elf_addr_t *
create_elf_tables(char *p, int argc, int envc,
struct elfhdr * exec,
unsigned long load_addr,
unsigned long load_bias,
unsigned long interp_load_addr, int ibcs)
{
elf_caddr_t *argv;
elf_caddr_t *envp;
elf_addr_t *sp, *csp;
char *k_platform, *u_platform;
long hwcap;
size_t platform_len = 0;
/*
* Get hold of platform and hardware capabilities masks for
* the machine we are running on. In some cases (Sparc),
* this info is impossible to get, in others (i386) it is
* merely difficult.
*/
hwcap = ELF_HWCAP;//CPU特性描述字
k_platform = ELF_PLATFORM;//CPU类型名 例如i686
//p指向argv字符串首地址 即下面的0xbffffc07处
/*
0xbffffc02: "i686"
0xbffffc07: "/root/3/88"
0xbffffc12: "PWD=/root"
0xbffffc1c: "HOSTNAME=proxy"
0xbffffc2b: "QTDIR=/usr/lib/qt-2.3.1"
0xbffffc43: "LESSOPEN=|/usr/bin/lesspipe.sh %s"
0xbffffc65: "KDEDIR=/usr"
*/
if (k_platform) {//一般不为空
platform_len = strlen(k_platform) + 1;
u_platform = p - platform_len;
__copy_to_user(u_platform, k_platform, platform_len);//i686
} else
u_platform = p;
/*
* Force 16 byte _final_ alignment here for generality.
* Leave an extra 16 bytes free so that on the PowerPC we
* can move the aux table up to start on a 16-byte boundary.
*/
//向低地址方向对齐到16字节边界,再减16字节
//较新的内核使用一个随机数,因此布局有所不同
sp = (elf_addr_t *)((~15UL & (unsigned long)(u_platform)) - 16UL);
csp = sp;
//DLINFO -> 动态链接信息?? 这些信息是为ld.so准备的,ld.so需要用到
csp -= ((exec ? DLINFO_ITEMS*2 : 4) + (k_platform ? 2 : 0));//DLINFO_ITEMS*2 + 2,DLINFO_ITEMS定义为13
csp -= envc+1;
csp -= argc+1;
//ibcs 0->a.out 1->elf
csp -= (!ibcs ? 3 : 1);// 1-> argc /* argc itself */
if ((unsigned long)csp & 15UL)//不与16字节边界对齐
//下移sp,使argc对齐到16字节边界
sp -= ((unsigned long)csp & 15UL) / sizeof(*sp);
/*
内存布局如下
position content size (bytes) + comment
------------------------------------------------------------------------
stack pointer -> [ argc = number of args ] 4
[ argv[0] (pointer) ] 4 (program name)
[ argv[1] (pointer) ] 4
[ argv[..] (pointer) ] 4 * x
[ argv[n - 1] (pointer) ] 4
[ argv[n] (pointer) ] 4 (= NULL)
[ envp[0] (pointer) ] 4
[ envp[1] (pointer) ] 4
[ envp[..] (pointer) ] 4
[ envp[term] (pointer) ] 4 (= NULL)
[ auxv[0] AT_PHDR (Elf32_auxv_t) ] 8
[ auxv[1] AT_PHENT (Elf32_auxv_t) ] 8
[ auxv[2] AT_PHNUM (Elf32_auxv_t) ] 8
[ auxv[3] AT_BASE (Elf32_auxv_t) ] 8
[ auxv[4] AT_FLAGS (Elf32_auxv_t) ] 8
[ auxv[5] AT_ENTRY (Elf32_auxv_t) ] 8
[ auxv[6] AT_UID (Elf32_auxv_t) ] 8
[ auxv[7] AT_EUID (Elf32_auxv_t) ] 8
[ auxv[8] AT_GID (Elf32_auxv_t) ] 8
[ auxv[9] AT_EGID (Elf32_auxv_t) ] 8
[ auxv[10] AT_HWCAP (Elf32_auxv_t) ] 8
[ auxv[11] AT_PAGESZ (Elf32_auxv_t) ] 8
[ auxv[12] AT_CLKTCK (Elf32_auxv_t) ] 8
[ auxv[13] AT_PLATFORM (Elf32_auxv_t) ] 8
[ auxv[14] (Elf32_auxv_t) ] 8 (= AT_NULL vector)
[ padding ] 0 - 15
[ padding ] 16
[ padding ] 0 - 15
[k_platform] 0 - 65
[ argument ASCIIZ strings ] >= 0
[ environment ASCIIZ str. ] >= 0
[filename] >=0
(0xbffffffc) [ end marker ] 4 (= NULL)
(0xc0000000) < top of stack > 0 (virtual)
*/
/*
* Put the ELF interpreter info on the stack
*/
#define NEW_AUX_ENT(nr, id, val) \
__put_user ((id), sp+(nr*2)); \
__put_user ((val), sp+(nr*2+1)); \
//开始存放辅助向量
sp -= 2;
NEW_AUX_ENT(0, AT_NULL, 0);//end of vector
if (k_platform) {
sp -= 2;
NEW_AUX_ENT(0, AT_PLATFORM, (elf_addr_t)(unsigned long) u_platform);
}
sp -= 3*2;
NEW_AUX_ENT(0, AT_HWCAP, hwcap);
NEW_AUX_ENT(1, AT_PAGESZ, ELF_EXEC_PAGESIZE);// 4096
NEW_AUX_ENT(2, AT_CLKTCK, CLOCKS_PER_SEC);// 100
if (exec) {//elf interp
sp -= 10*2;
NEW_AUX_ENT(0, AT_PHDR, load_addr + exec->e_phoff);
NEW_AUX_ENT(1, AT_PHENT, sizeof (struct elf_phdr));
NEW_AUX_ENT(2, AT_PHNUM, exec->e_phnum);
NEW_AUX_ENT(3, AT_BASE, interp_load_addr);//interp加载基址,如果就是/lib/ld-linux.so.2或静态链
接可执行文件,则为0
NEW_AUX_ENT(4, AT_FLAGS, 0);
NEW_AUX_ENT(5, AT_ENTRY, load_bias + exec->e_entry);//原程序入口
NEW_AUX_ENT(6, AT_UID, (elf_addr_t) current->uid);
NEW_AUX_ENT(7, AT_EUID, (elf_addr_t) current->euid);
NEW_AUX_ENT(8, AT_GID, (elf_addr_t) current->gid);
NEW_AUX_ENT(9, AT_EGID, (elf_addr_t) current->egid);
}
#undef NEW_AUX_ENT
sp -= envc+1;
envp = (elf_caddr_t *) sp;
sp -= argc+1;
argv = (elf_caddr_t *) sp;
if (!ibcs) {//a.out
__put_user((elf_addr_t)(unsigned long) envp,--sp);
__put_user((elf_addr_t)(unsigned long) argv,--sp);
}
//处理argv数组
__put_user((elf_addr_t)argc,--sp);//argc入栈
current->mm->arg_start = (unsigned long) p;//arg_start
while (argc-->0) {
__put_user((elf_caddr_t)(unsigned long)p,argv++);
p += strlen_user(p);//计算下一个字符串的长度,更新p
}
__put_user(NULL, argv);
//处理envp数组
current->mm->arg_end = current->mm->env_start = (unsigned long) p;
while (envc-->0) {
__put_user((elf_caddr_t)(unsigned long)p,envp++);
p += strlen_user(p);
}
__put_user(NULL, envp);
current->mm->env_end = (unsigned long) p;
return sp;//返回argc地址
}
8.返回load_elf_binary
/* N.B. passed_fileno might not be initialized? */
if (interpreter_type == INTERPRETER_AOUT)
current->mm->arg_start += strlen(passed_fileno) + 1;//多了passed_fileno参数
current->mm->start_brk = current->mm->brk = elf_brk;//动态分配内存起始地址
current->mm->end_code = end_code;
current->mm->start_code = start_code;
current->mm->start_data = start_data;
current->mm->end_data = end_data;
current->mm->start_stack = bprm->p;
/* Calling set_brk effectively mmaps the pages that we need
* for the bss and break sections
*/
set_brk(elf_bss, elf_brk);//elf_bss上取整
9.set_brk
static void set_brk(unsigned long start, unsigned long end)
{
start = ELF_PAGEALIGN(start);//上取整到页边界
end = ELF_PAGEALIGN(end);
if (end <= start)
return;
do_brk(start, end - start);
}
10.返回load_elf_binary
padzero(elf_bss);//对最后一映射文件的页中的bss清零
#if 0
printk("(start_brk) %lx\n" , (long) current->mm->start_brk);
printk("(end_code) %lx\n" , (long) current->mm->end_code);
printk("(start_code) %lx\n" , (long) current->mm->start_code);
printk("(start_data) %lx\n" , (long) current->mm->start_data);
printk("(end_data) %lx\n" , (long) current->mm->end_data);
printk("(start_stack) %lx\n" , (long) current->mm->start_stack);
printk("(brk) %lx\n" , (long) current->mm->brk);
#endif
if ( current->personality == PER_SVR4 )
{
/* Why this, you ask??? Well SVr4 maps page 0 as read-only,
and some applications "depend" upon this behavior.
Since we do not have the power to recompile these, we
emulate the SVr4 behavior. Sigh. */
/* N.B. Shouldn't the size here be PAGE_SIZE?? */
down(¤t->mm->mmap_sem);
error = do_mmap(NULL, 0, 4096, PROT_READ | PROT_EXEC,
MAP_FIXED | MAP_PRIVATE, 0);
up(¤t->mm->mmap_sem);
}
#ifdef ELF_PLAT_INIT
/*
* The ABI may specify that certain registers be set up in special
* ways (on i386 %edx is the address of a DT_FINI function, for
* example. This macro performs whatever initialization to
* the regs structure is required.
*/
ELF_PLAT_INIT(regs);//清空所有寄存器
#endif
/*
#define ELF_PLAT_INIT(_r) do { \
_r->ebx = 0; _r->ecx = 0; _r->edx = 0; \
_r->esi = 0; _r->edi = 0; _r->ebp = 0; \
_r->eax = 0; \
} while (0)
*/
start_thread(regs, elf_entry, bprm->p);//一般此处的elf_entry是interp的entry
11.start_thread
//清fs,gs
#define start_thread(regs, new_eip, new_esp) do { \
__asm__("movl %0,%%fs ; movl %0,%%gs": :"r" (0)); \
set_fs(USER_DS); \
regs->xds = __USER_DS; \
regs->xes = __USER_DS; \
regs->xss = __USER_DS; \
regs->xcs = __USER_CS; \
regs->eip = new_eip;/*设置eip,一般指向ld-linux.so.2的入口*/ \
regs->esp = new_esp;/*设置esp,指向argc地址*/ \
} while (0)
12.返回load_elf_binary
if (current->ptrace & PT_PTRACED)
send_sig(SIGTRAP, current, 0);//如果进程被调试,通知父进程
retval = 0;
out:
return retval;
/* error cleanup */
out_free_dentry:
allow_write_access(interpreter);
fput(interpreter);
out_free_interp:
if (elf_interpreter)
kfree(elf_interpreter);
out_free_file:
sys_close(elf_exec_fileno);
out_free_ph:
kfree(elf_phdata);
goto out;
}
ld.so分析4 PIC,GOT和PLT
1.PIC
PIC就是Position Independent Code(位置无关代码).那么何谓位置无关代码?
如果代码不需要被重定位,那么这种代码就是位置无关的。
我们要区分位置无关代码和可重入代码(Reentry Code)的不同,两者是无关的概念,不能混淆。
例如
int f()
{
return 1;
}
[zws@mail ~]$gcc -S x.c
[zws@mail ~]$cat x.s
.file "x.c"
.text
.globl f
.type f,@function
f:
pushl %ebp
movl %esp, %ebp
movl $1, %eax
leave
ret
.Lfe1:
.size f,.Lfe1-f
.ident "GCC: (GNU) 3.2.2 20030222 (Red Hat Linux 3.2.2-5)"
这是PIC也是RC
char * f()
{
return "a";
}
[zws@mail ~]$gcc -S x.c
[zws@mail ~]$cat x.s
.file "x.c"
.section .rodata
.LC0:
.string "a"
.text
.globl f
.type f,@function
f:
pushl %ebp
movl %esp, %ebp
movl $.LC0, %eax
leave
ret
.Lfe1:
.size f,.Lfe1-f
.ident "GCC: (GNU) 3.2.2 20030222 (Red Hat Linux 3.2.2-5)"
这不是PIC但是是RC
int f()
{
static int a=0;
a++;
return a;
}
[zws@mail ~]$gcc -S x.c
[zws@mail ~]$cat x.s
.file "x.c"
.data
.align 4
.type a.0,@object
.size a.0,4
a.0:
.long 0
.text
.globl f
.type f,@function
f:
pushl %ebp
movl %esp, %ebp
incl a.0
movl a.0, %eax
leave
ret
.Lfe1:
.size f,.Lfe1-f
.ident "GCC: (GNU) 3.2.2 20030222 (Red Hat Linux 3.2.2-5)"
两者都不是
将上面的汇编语言改成PIC
[zws@mail ~]$cat x.s
.file "x.c"
.data
.align 4
.type a.0,@object
.size a.0,4
a.0:
.long 0
.text
.globl f
.type f,@function
f:
pushl %ebp
movl %esp, %ebp
pushl %ebx
call .L2
.L2:
popl %ebx// %ebx中为当前指令的地址
subl $.L2-a.0, %ebx//该指令地址-相对a.0的偏移,即为a.0的地址,在%ebx中
incl (%ebx)
movl (%ebx), %eax
popl %ebx
leave
ret
.Lfe1:
.size f,.Lfe1-f
.ident "GCC: (GNU) 3.2.2 20030222 (Red Hat Linux 3.2.2-5)"
现在这个代码是PIC而不是RC
综上,PIC代码中不能引用绝对地址,否则需要重定位.(上面的 subl .L2-a.0, %ebx不算引用,因为gas会计算偏移,最终形成的指令中存放的是数)
RC代码不能使用共享变量,否则需要锁。
位置无关代码有什么优点?多个可执行程序运行这样的代码(例如动态链接库)时,虽然加载地址可能不一样,但是该代码不需要重定位,也就是不
需要修改代码,那么多个可执行程序就能共享一个代码副本,从而节省内存。缺点是占用一个寄存器计算地址,代码长度增加一点,执行时间增加
一点。
2.GOT
GOT是GLOBAL OFFSET TABLE(全局偏移表).
我们称一个可执行文件或动态链接库为一个模块.
一个模块中的数据或函数只允许被自己访问,称为本地局部数据或本地局部函数.例如static 类型的变量或函数就是这种类型.
一个模块中的数据或函数不但允许被自己访问,也允许外部访问,称为本地全局数据或本地全局函数.例如没有static修饰的变量或函数就是这种类
型.
相应地一个模块引用另一个模块中的数据或函数,则称为外部全局数据或外部全局函数.例如使用extern修饰的类型的变量或函数就是这种类型
.
局部肯定是本地的,外部一定是全局的。
GOT有四种功能:
>>为本地访问本地局部数据(静态变量或常量)访问提供PIC支持。
>>为本地访问本地全局数据访问提供PIC支持(配合.got节)
>>为本地访问外地全局数据访问提供PIC支持(配合.got节)
>>为本地访问本地全局函数访问提供PIC支持(配合.plt节和.got.plt节)
>>为本地访问外地全局函数调用提供PIC支持(配合.plt节和.got.plt节)
>>为动态链接提供支持(配合.rel.dyn节,rel.plt节,.got节,.got.plt节)
由于函数调用使用的都是相对寻址,且本地局部函数地址已知,因此本地访问本地局部函数调用不需要GOT支持.
(1)为本地访问本地局部数据(静态变量或常量)访问提供PIC支持。
[zws@mail ~]$cat x.c
static int a=0;
int f()
{
a++;
return a;
}
[zws@mail ~]$gcc -fPIC -S x.c
[zws@mail ~]$cat x.s
.file "x.c"
.data
.align 4
.type a,@object
.size a,4
a:
.long 0
.text
.globl f
.type f,@function
f:
pushl %ebp
movl %esp, %ebp
pushl %ebx
call .L2
.L2:
popl %ebx
addl $_GLOBAL_OFFSET_TABLE_+[.-.L2], %ebx
incl a@GOTOFF(%ebx)
movl a@GOTOFF(%ebx), %eax
popl %ebx
leave
ret
.Lfe1:
.size f,.Lfe1-f
.ident "GCC: (GNU) 3.2.2 20030222 (Red Hat Linux 3.2.2-5)"
[zws@mail ~]$readelf -a x.o
ELF Header:
Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00
Class: ELF32
Data: 2's complement, little endian
Version: 1 (current)
OS/ABI: UNIX - System V
ABI Version: 0
Type: REL (Relocatable file)
Machine: Intel 80386
Version: 0x1
Entry point address: 0x0
Start of program headers: 0 (bytes into file)
Start of section headers: 196 (bytes into file)
Flags: 0x0
Size of this header: 52 (bytes)
Size of program headers: 0 (bytes)
Number of program headers: 0
Size of section headers: 40 (bytes)
Number of section headers: 9
Section header string table index: 6
Section Headers:
[Nr] Name Type Addr Off Size ES Flg Lk Inf Al
[ 0] NULL 00000000 000000 000000 00 0 0 0
[ 1] .text PROGBITS 00000000 000034 00001f 00 AX 0 0 4
[ 2] .rel.text REL 00000000 0002dc 000018 08 7 1 4
[ 3] .data PROGBITS 00000000 000054 000004 00 WA 0 0 4
[ 4] .bss NOBITS 00000000 000058 000000 00 WA 0 0 4
[ 5] .comment PROGBITS 00000000 000058 000033 00 0 0 1
[ 6] .shstrtab STRTAB 00000000 00008b 000039 00 0 0 1
[ 7] .symtab SYMTAB 00000000 00022c 000090 10 8 7 4
[ 8] .strtab STRTAB 00000000 0002bc 00001f 00 0 0 1
Key to Flags:
W (write), A (alloc), X (execute), M (merge), S (strings)
I (info), L (link order), G (group), x (unknown)
O (extra OS processing required) o (OS specific), p (processor specific)
There are no section groups in this file.
There are no program headers in this file.
Relocation section '.rel.text' at offset 0x2dc contains 3 entries:
Offset Info Type Sym.Value Sym. Name
0000000c 0000080a R_386_GOTPC 00000000 _GLOBAL_OFFSET_TABLE_
00000012 00000309 R_386_GOTOFF 00000000 .data
00000018 00000309 R_386_GOTOFF 00000000 .data
There are no unwind sections in this file.
Symbol table '.symtab' contains 9 entries:
Num: Value Size Type Bind Vis Ndx Name
0: 00000000 0 NOTYPE LOCAL DEFAULT UND
1: 00000000 0 FILE LOCAL DEFAULT ABS x.c
2: 00000000 0 SECTION LOCAL DEFAULT 1
3: 00000000 0 SECTION LOCAL DEFAULT 3
4: 00000000 0 SECTION LOCAL DEFAULT 4
5: 00000000 4 OBJECT LOCAL DEFAULT 3 a
6: 00000000 0 SECTION LOCAL DEFAULT 5
7: 00000000 31 FUNC GLOBAL DEFAULT 1 f
8: 00000000 0 NOTYPE GLOBAL DEFAULT UND _GLOBAL_OFFSET_TABLE_
No version information found in this file.
[zws@mail ~]$ objdump -d x.o
x.o: file format elf32-i386
Disassembly of section .text:
00000000 :
0: 55 push %ebp
1: 89 e5 mov %esp,%ebp
3: 53 push %ebx
4: e8 00 00 00 00 call 9
9: 5b pop %ebx
a: 81 c3 03 00 00 00 add $0x3,%ebx
10: ff 83 00 00 00 00 incl 0x0(%ebx)
16: 8b 83 00 00 00 00 mov 0x0(%ebx),%eax
1c: 5b pop %ebx
1d: c9 leave
1e: c3 ret
分析
call .L2,将下一条指令地址压栈
popl %ebx,将本指令地址弹出到%ebx寄存器
addl $_GLOBAL_OFFSET_TABLE_+[.-.L2], %ebx
$说明这个操作数是立即数,_GLOBAL_OFFSET_TABLE_,特殊符号,gas能够识别,并为改该操作数生成R_386_GOTPC重定位类型.例如上面的
0000000c 0000080a R_386_GOTPC 00000000 _GLOBAL_OFFSET_TABLE_
地址0000000c指向指令 a: 81 c3 03 00 00 00 add $0x3,%ebx的源操作数部分[03 00 00 00]
ld链接时,检查重定位表,发现包含R_386_GOTPC重定位项,创建.got和.got.plt节,.got节存放全局数据地址,.got.plt存放全局函数地址,GOT地
址是.got.plt的地址(ld也可以合并这两个节成一个.got节),并计算地址GOT和地址0000000c的差值,加入0000000c处的值并写入,这就是R_386_GO
TPC重定位的内容。
上面的[.-.L2]意思是计算当前指令地址和.L2地址之差,即popl %ebx指令长度,应该是1.但是为何最终的指令却是add $0x3,%ebx呢?操作数3是如
何计算出来的呢?
这是因为重定位R_386_GOTPC项时计算的是该操作数与GOT的差值,而不是该条指令与GOT的差值.因此需要计算该操作数的偏移,即指令
a: 81 c3 03 00 00 00 add $0x3,%ebx
地址00000009加上81 c3这两字节操作码长度,形成最终地址0000000c.
相应的%ebx存放的应该是该操作数的加载地址,即(popl %ebx指令地址)+(popl %ebx指令长度)+(add
$0x3,%ebx指令操作码长度)=%ebx+1+2=add $0x3,%ebx
然而指令
addl $_GLOBAL_OFFSET_TABLE_+[.-.L2], %ebx
并没有明确指出再加上addl的操作码长度,其实这是gas替我们隐含计算了.gas分析该指令的操作数,碰到是立即数,且含有符号_GLOBAL_OFFS
ET_TABLE_,会在形成最终的操作时,自动加上操作码长度,得到我们想要的结果。
a++生成的指令是 incl a@GOTOFF(%ebx)生成的机器指令是
10: ff 83 00 00 00 00 incl 0x0(%ebx)
重定位项是
00000012 00000309 R_386_GOTOFF 00000000 .data
在连接时重定位类型R_386_GOTOFF执行的操作时计算计算该符号与GOT的偏移,并加入重定位处(GOTOFF即GOT OFFSET).
可见a@GOTOFF会指示gas生成R_386_GOTOFF重定位项,比较适合只被自己使用的变量。
在符号表中
5: 00000000 4 OBJECT LOCAL DEFAULT 3 a
a的bind类型是local.
(2)为本地访问本地全局数据访问提供PIC支持(配合.got节)
[zws@mail ~]$cat x.c
int a=0;
int f()
{
a++;
return a;
}
[zws@mail ~]$cat x.s
.file "x.c"
.globl a
.data
.align 4
.type a,@object
.size a,4
a:
.long 0
.text
.globl f
.type f,@function
f:
pushl %ebp
movl %esp, %ebp
pushl %ebx
call .L2
.L2:
popl %ebx
addl $_GLOBAL_OFFSET_TABLE_+[.-.L2], %ebx
movl a@GOT(%ebx), %eax
incl (%eax)
movl a@GOT(%ebx), %eax
movl (%eax), %eax
popl %ebx
leave
ret
.Lfe1:
.size f,.Lfe1-f
.ident "GCC: (GNU) 3.2.2 20030222 (Red Hat Linux 3.2.2-5)"
[zws@mail ~]$readelf -a x.o
ELF Header:
Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00
Class: ELF32
Data: 2's complement, little endian
Version: 1 (current)
OS/ABI: UNIX - System V
ABI Version: 0
Type: REL (Relocatable file)
Machine: Intel 80386
Version: 0x1
Entry point address: 0x0
Start of program headers: 0 (bytes into file)
Start of section headers: 200 (bytes into file)
Flags: 0x0
Size of this header: 52 (bytes)
Size of program headers: 0 (bytes)
Number of program headers: 0
Size of section headers: 40 (bytes)
Number of section headers: 9
Section header string table index: 6
Section Headers:
[Nr] Name Type Addr Off Size ES Flg Lk Inf Al
[ 0] NULL 00000000 000000 000000 00 0 0 0
[ 1] .text PROGBITS 00000000 000034 000023 00 AX 0 0 4
[ 2] .rel.text REL 00000000 0002e0 000018 08 7 1 4
[ 3] .data PROGBITS 00000000 000058 000004 00 WA 0 0 4
[ 4] .bss NOBITS 00000000 00005c 000000 00 WA 0 0 4
[ 5] .comment PROGBITS 00000000 00005c 000033 00 0 0 1
[ 6] .shstrtab STRTAB 00000000 00008f 000039 00 0 0 1
[ 7] .symtab SYMTAB 00000000 000230 000090 10 8 6 4
[ 8] .strtab STRTAB 00000000 0002c0 00001f 00 0 0 1
Key to Flags:
W (write), A (alloc), X (execute), M (merge), S (strings)
I (info), L (link order), G (group), x (unknown)
O (extra OS processing required) o (OS specific), p (processor specific)
There are no section groups in this file.
There are no program headers in this file.
Relocation section '.rel.text' at offset 0x2e0 contains 3 entries:
Offset Info Type Sym.Value Sym. Name
0000000c 0000080a R_386_GOTPC 00000000 _GLOBAL_OFFSET_TABLE_
00000012 00000603 R_386_GOT32 00000000 a
0000001a 00000603 R_386_GOT32 00000000 a
There are no unwind sections in this file.
Symbol table '.symtab' contains 9 entries:
Num: Value Size Type Bind Vis Ndx Name
0: 00000000 0 NOTYPE LOCAL DEFAULT UND
1: 00000000 0 FILE LOCAL DEFAULT ABS x.c
2: 00000000 0 SECTION LOCAL DEFAULT 1
3: 00000000 0 SECTION LOCAL DEFAULT 3
4: 00000000 0 SECTION LOCAL DEFAULT 4
5: 00000000 0 SECTION LOCAL DEFAULT 5
6: 00000000 4 OBJECT GLOBAL DEFAULT 3 a
7: 00000000 35 FUNC GLOBAL DEFAULT 1 f
8: 00000000 0 NOTYPE GLOBAL DEFAULT UND _GLOBAL_OFFSET_TABLE_
No version information found in this file.
[zws@mail ~]$
和前面的唯一差别就是对变量的访问方式.由a@GOTOFF变成a@GOT,重定位方式也从R_386_GOTOFF变成R_386_GOT32.
a@GOT的访问方式是,将变量a的地址值存入.got节,访问a时,先根据GOT计算存放变量a的地址值在.got中的地址,然后取该地址值,即为变量a的
地址,用一条指令就能实现
movl a@GOT(%ebx), %eax
然后就可以对该变量执行操作了
例如a++生成的指令时 incl (%eax),对该地址处的值增一。
在符号表中
6: 00000000 4 OBJECT GLOBAL DEFAULT 3 a
a的bind类型是GLOBAL.
再写一个y.c
[zws@mail ~]$cat y.c
void f();
int main()
{
f();
return 0;
}
[zws@mail ~]$gcc y.c x.o
分析生成的可执行文件a.out可发现
ld在处理R_386_GOT32时,将该符号的地址x存入.got节,并记录其在.got
节中的地址y,然后计算y相对于GOT偏移,存入该符号所有的R_386_GOT32类型重定位地址处。最后在目标文件中为该符号生成R_386_GLOB_D
AT类型重定位项例如
readelf -a a.out
[20] .got PROGBITS 080494e0 0004e0 000008 04 WA 0 0 4
[21] .got.plt PROGBITS 080494e8 0004e8 000010 04 WA 0 0 4
080494e4 00000406 R_386_GLOB_DAT 08049504 a(显然地址080494e4在.got中)
R_386_GLOB_DAT类型执行的操作是,将模块加载地址加入该重定位处.这样变量的地址就确定了,可以功过y来访问,而且不需要对代码重定位。
如果该变量被其他模块访问(例如动态链接库中的变量被可执行文件访问或动态链接库中的变量被其他动态链接库库访问),则执行动态链接时,只
需要将该变量所在的地址x存入引用模块的.got节y处,就能实现共享且PIC.
其实本地访问本地全局数据访问也可以使用GOTOFF方式(例如本例的x.c).想一想为什么不这样做?从指导ld的方面去想。
(3)为本地访问外部全局数据访问提供PIC支持(配合.got节)
[zws@mail ~]$cat x.c
extern int a;
int f()
{
a++;
return a;
}
[zws@mail ~]$gcc -fPIC -S x.c
[zws@mail ~]$cat x.s
.file "x.c"
.text
.globl f
.type f,@function
f:
pushl %ebp
movl %esp, %ebp
pushl %ebx
call .L2
.L2:
popl %ebx
addl $_GLOBAL_OFFSET_TABLE_+[.-.L2], %ebx
movl a@GOT(%ebx), %eax
incl (%eax)
movl a@GOT(%ebx), %eax
movl (%eax), %eax
popl %ebx
leave
ret
.Lfe1:
.size f,.Lfe1-f
.ident "GCC: (GNU) 3.2.2 20030222 (Red Hat Linux 3.2.2-5)"
[zws@mail ~]$gcc -shared x.s -o libx.so
readelf 查看一下是否和上面分析的一致
(4)为本地访问本地全局函数调用提供PIC支持(配合.plt节和.got.plt节)
[zws@mail ~]$cat x.c
void f()
{
}
void g()
{
f();
}
[zws@mail ~]$gcc -fPIC -S x.c
[zws@mail ~]$cat x.s
.file "x.c"
.text
.globl f
.type f,@function
f:
pushl %ebp
movl %esp, %ebp
leave
ret
.Lfe1:
.size f,.Lfe1-f
.globl g
.type g,@function
g:
pushl %ebp
movl %esp, %ebp
pushl %ebx
subl $4, %esp
call .L3
.L3:
popl %ebx
addl $_GLOBAL_OFFSET_TABLE_+[.-.L3], %ebx
call f@PLT
addl $4, %esp
popl %ebx
leave
ret
.Lfe2:
.size g,.Lfe2-g
.ident "GCC: (GNU) 3.2.2 20030222 (Red Hat Linux 3.2.2-5)"
[zws@mail ~]$as x.s -o x.o
[zws@mail ~]$readelf -r x.o
Relocation section '.rel.text' at offset 0x2dc contains 2 entries:
Offset Info Type Sym.Value Sym. Name
00000014 0000080a R_386_GOTPC 00000000 _GLOBAL_OFFSET_TABLE_
00000019 00000604 R_386_PLT32 00000000 f
本地调用本地全局函数生成的代码是 call f@PLT
gas为call f@PLT生成的重定位项是R_386_PLT32 ,指导ld生成.plt节。
[zws@mail ~]$gcc -shared x.o -o libx.so
[zws@mail ~]$readelf -r libx.so
Relocation section '.rel.dyn' at offset 0x22c contains 5 entries:
Offset Info Type Sym.Value Sym. Name
00001500 00000008 R_386_RELATIVE
00001504 00000008 R_386_RELATIVE
000014dc 00000106 R_386_GLOB_DAT 00000000 __gmon_start__
000014e0 00000206 R_386_GLOB_DAT 00000000 _Jv_RegisterClasses
000014e4 00000806 R_386_GLOB_DAT 00000000 __cxa_finalize
Relocation section '.rel.plt' at offset 0x254 contains 3 entries:
Offset Info Type Sym.Value Sym. Name
000014f4 00000207 R_386_JUMP_SLOT 00000000 _Jv_RegisterClasses
000014f8 00000607 R_386_JUMP_SLOT 00000390 f
000014fc 00000807 R_386_JUMP_SLOT 00000000 __cxa_finalize
[zws@mail ~]$objdump -d libx.so
Disassembly of section .plt:
00000284 <_Jv_RegisterClasses@plt-0x10>:
284: ff b3 04 00 00 00 pushl 0x4(%ebx)
28a: ff a3 08 00 00 00 jmp *0x8(%ebx)
290: 00 00 add %al,(%eax)
...
00000294 <_Jv_RegisterClasses@plt>:
294: ff a3 0c 00 00 00 jmp *0xc(%ebx)
29a: 68 00 00 00 00 push $0x0
29f: e9 e0 ff ff ff jmp 284 <_init+0x18>
000002a4 :
2a4: ff a3 10 00 00 00 jmp *0x10(%ebx)
2aa: 68 08 00 00 00 push $0x8
2af: e9 d0 ff ff ff jmp 284 <_init+0x18>
000002b4 <__cxa_finalize@plt>:
2b4: ff a3 14 00 00 00 jmp *0x14(%ebx)
2ba: 68 10 00 00 00 push $0x10
2bf: e9 c0 ff ff ff jmp 284 <_init+0x18>
。。。。。。。。。。。。。
00000390 :
390: 55 push %ebp
391: 89 e5 mov %esp,%ebp
393: c9 leave
394: c3 ret
00000395 :
395: 55 push %ebp
396: 89 e5 mov %esp,%ebp
398: 53 push %ebx
399: 83 ec 04 sub $0x4,%esp
39c: e8 00 00 00 00 call 3a1
3a1: 5b pop %ebx
3a2: 81 c3 47 11 00 00 add $0x1147,%ebx
3a8: e8 f7 fe ff ff call 2a4
3ad: 83 c4 04 add $0x4,%esp
3b0: 5b pop %ebx
3b1: c9 leave
3b2: c3 ret
3b3: 90 nop
至于这里的涉及到的原理看下面,这里的%ebx存放的是本模块的GOT地址
(5)为本地访问外部全局函数调用提供PIC支持(配合.plt节和.got.plt节)
[zws@mail ~]$cat x.c
int a=0;
int f()
{
a++;
return a;
}
[zws@mail ~]$gcc -fPIC -shared x.c -o x.o
[zws@mail ~]$cat y.c
void f();
int main()
{
f();
return 0;
}
[zws@mail ~]$gcc y.c libx.so
[zws@mail ~]objdump -d a.out
看看外部全局函数调用使用什么方式
080483e8 :
80483e8: 55 push %ebp
80483e9: 89 e5 mov %esp,%ebp
80483eb: 83 ec 08 sub $0x8,%esp
80483ee: 83 e4 f0 and $0xfffffff0,%esp
80483f1: b8 00 00 00 00 mov $0x0,%eax
80483f6: 29 c4 sub %eax,%esp
80483f8: e8 2b ff ff ff call 8048328
80483fd: b8 00 00 00 00 mov $0x0,%eax
8048402: c9 leave
8048403: c3 ret
call 8048328,这个地址在.plt节中
Disassembly of section .plt:
08048308 <__libc_start_main@plt-0x10>:
8048308: ff 35 a0 95 04 08 pushl 0x80495a0
804830e: ff 25 a4 95 04 08 jmp *0x80495a4
8048314: 00 00 add %al,(%eax)
...
08048318 <__libc_start_main@plt>:
8048318: ff 25 a8 95 04 08 jmp *0x80495a8
804831e: 68 00 00 00 00 push $0x0
8048323: e9 e0 ff ff ff jmp 8048308 <_init+0x18>
08048328 :
8048328: ff 25 ac 95 04 08 jmp *0x80495ac
804832e: 68 08 00 00 00 push $0x8
8048333: e9 d0 ff ff ff jmp 8048308 <_init+0x18>
jmp *0x80495ac,这个地址在.got.plt节中
[zws@mail ~]$objdump -sj .got.plt a.out
a.out: file format elf32-i386
Contents of section .got.plt:
804959c c8940408 00000000 00000000 1e830408 ................
80495ac 2e830408 ....
该地址处的值是0804832e,就是前面jmp *0x80495ac的下一条指令地址
push $0x8,压入立即数8,其实是f的重定位项的在.rel.plt节中偏移(一个重定位项占8字节)
Relocation section '.rel.plt' at offset 0x2e0 contains 2 entries:
Offset Info Type Sym.Value Sym. Name
080495a8 00000407 R_386_JUMP_SLOT 00000000 __libc_start_main
080495ac 00000807 R_386_JUMP_SLOT 00000000 f
该f符号的重定位偏移是080495ac(就是在前面的.got.plt节中),类型是R_386_JUMP_SLOT.这样动态连接时,查找到f的地址后,写入080495ac处.
这样下次调用f时,就会直接跳到f的真实地址。
push $0x8的下一条指令时jmp 8048308,8048308处的指令时
8048308: ff 35 a0 95 04 08 pushl 0x80495a0
804830e: ff 25 a4 95 04 08 jmp *0x80495a4
第一条pushl 0x80495a0,将0x80495a0地址处的值压栈。0x80495a0在.got.plt中
[zws@mail ~]$objdump -sj .got.plt a.out
a.out: file format elf32-i386
Contents of section .got.plt:
804959c c8940408 00000000 00000000 1e830408 ................
80495ac 2e830408 ....
.got.plt的前三项是有特殊意义的,他们都是地址,在执行动态连接时要用到.第0项080494c8是.dynamic节地址.第1项是本模块的link_map地址,这
里是0,动态连接时会存入真实地址,第2项是_dl_runtime_resolve的地址,动态链接时存入.
将本模块的link_map地址压栈后,jmp *0x80495a4, 显然是跳到_dl_runtime_resolve中,执行链接f任务,_dl_runtime_resolve解析到f地址后,
会存入80495ac处,并将该地址替换栈上的返回地址,这样,_dl_runtime_resolve返回时,直接返回到f中,并执行f.而下次再执行f时就不需要这么
麻烦了。
这种在需要执行时才进行符号链接是所谓的lazy方式动态链接,还有一种就是模块加载时一次性为所有的符号进行链接,无论用不用得到,所谓的
now方式动态链接。
综上.got节存放的都是被本地引用的本地全局数据(没有被本地引用的不会出现)和外部全局数据,.got.plt前三项特殊,后面都是被本地引用的本地全
局函数(没有被本地引用的不会出现)和外部全局函数地址..plt存放过程链接信息(procedure link
table)..rel.dyn重定位.got(类型为R_386_GLOB_DAT的项),.rel.plt重定位.got.plt.
ld.so分析5 _dl_start
对于不关心的地方,我们都//或/**/注释掉
1._dl_start中的变量声明
static Elf32_Addr //我们假设是i386 32位平台,ElfW(Addr)被宏扩展为Elf32_Addr
//ElfW(Addr)
//__attribute_used__ internal_function
//__attribute__ ((__used__)) __attribute__ ((regparm (3), stdcall))
_dl_start (void *arg)//arg参数值argc地址
{
//#ifdef DONT_USE_BOOTSTRAP_MAP
# define bootstrap_map GL(dl_rtld_map)
//#else
// struct dl_start_final_info info;
//# define bootstrap_map info.l
//#endif
//#if USE_TLS || (!DONT_USE_BOOTSTRAP_MAP && !HAVE_BUILTIN_MEMSET)
// size_t cnt;
//#endif
//#ifdef USE_TLS
// ElfW(Ehdr) *ehdr;
// ElfW(Phdr) *phdr;
// dtv_t initdtv[3];
//#endif
宏GL定义如下
# define GL(name) _rtld_local._##name
展开
#define bootstrap_map _rtld_local._dl_rtld_map
_rtld_local是什么呢?
查看rtld.c的预处理文件可发现如下定义
struct rtld_global _rtld_global =
{
# 1 "../sysdeps/unix/sysv/linux/i386/dl-procinfo.c" 1
# 47 "../sysdeps/unix/sysv/linux/i386/dl-procinfo.c"
._dl_x86_cap_flags
= {
"fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
"cx8", "apic", "10", "sep", "mtrr", "pge", "mca", "cmov",
"pat", "pse36", "pn", "clflush", "20", "dts", "acpi", "mmx",
"fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "amd3d"
}
,
._dl_x86_platforms
= {
"i386", "i486", "i586", "i686"
}
,
# 92 "rtld.c" 2
._dl_debug_fd = 2,
._dl_dynamic_weak = 1,
._dl_lazy = 1,
._dl_fpu_control = 0x037f,
._dl_correct_cache_id = 3,
._dl_hwcap_mask = HWCAP_IMPORTANT,
._dl_load_lock = {{0, 0, 0, PTHREAD_MUTEX_RECURSIVE_NP, { 0, 0 }}}
};
extern struct rtld_global _rtld_local __attribute__ ((visibility ("hidden")));
extern __typeof (_rtld_global) _rtld_local __attribute__ ((alias ("_rtld_global")));;
结构rtld_global的内容就不贴出来了,大家自己查吧
这里指出,_rtld_local是_rtld_global的别名.查看ld.so的符号表也能例证
[zws@mail ~/glibc-2.3/build/elf]$readelf -s ld.so|grep _rtld
332: 00012140 980 OBJECT LOCAL HIDDEN 14 _rtld_local
462: 00012140 980 OBJECT GLOBAL DEFAULT 14 _rtld_global
_rtld_local._dl_rtld_map的类型是struct link_map.这个类型非常重要,是动态链接的核心数据结构
注意这里的HIDDEN属性,这个属性保证访问_rtld_local使用_rtld_local@GOTOFF而不是_rtld_local@GOT,
从而_rtld_local不需要重定位,这个一定很重要
2._dl_start中的动态链接内联函数
/* This #define produces dynamic linking inline functions for
bootstrap relocation instead of general-purpose relocation. */
#define RTLD_BOOTSTRAP
#define RESOLVE_MAP(sym, version, flags) \
((*(sym))->st_shndx == SHN_UNDEF ? 0 : &bootstrap_map)
#define RESOLVE(sym, version, flags) \
((*(sym))->st_shndx == SHN_UNDEF ? 0 : bootstrap_map.l_addr)
#include "dynamic-link.h"
这里先定义了三个宏,然后包含dynamic-link.h头文件,里面定义了几个动态链接需要用到的宏或函数。
这些宏或函数用到了前面定义的三个宏,因此,根据这三个宏定义的不同,动态链接宏或函数的功能会有所不同,
前面的注释也说明了这一点。至于有这些动态链接宏或函数的功能,后面涉及到的时候再分析。
3.获取ld.so的加载基址
if (HP_TIMING_INLINE && HP_TIMING_AVAIL)
//#ifdef DONT_USE_BOOTSTRAP_MAP
HP_TIMING_NOW (start_time);//获得开始时间
//#else
// HP_TIMING_NOW (info.start_time);
//#endif
/* Partly clean the `bootstrap_map' structure up. 部分清空bootstrap_map结构. Don't use
`memset' since it might not be built in or inlined and we cannot
不使用memset是因为它不是内建的或内联函数,我们现在还不能调用.
make function calls at this point. Use '__builtin_memset' if we
如果有效的话,使用__builtin_memset
know it is available. We do not have to clear the memory if we
如果不必使用临时bootstrap_map则不需要清0
do not have to use the temporary bootstrap_map. Global variables
全局变量缺省初始化为0
are initialized to zero by default. */
/*
#ifndef DONT_USE_BOOTSTRAP_MAP
# ifdef HAVE_BUILTIN_MEMSET
__builtin_memset (bootstrap_map.l_info, '\0', sizeof (bootstrap_map.l_info));
# else
for (cnt = 0;
cnt < sizeof (bootstrap_map.l_info) / sizeof (bootstrap_map.l_info[0]);
++cnt)
bootstrap_map.l_info[cnt] = 0;
# endif
#endif
*/
/* Figure out the run-time load address of the dynamic linker itself. */
bootstrap_map.l_addr = elf_machine_load_address ();// 加载地址 _rtld_local._dl_rtld_map.l_addr = elf_machine_load_address ();
/* Read our own dynamic section and fill in the info array. */
bootstrap_map.l_ld = (void *) bootstrap_map.l_addr + elf_machine_dynamic ();//动态节地址
elf_get_dynamic_info (&bootstrap_map);//取动态信息
4.elf_machine_dynamic和elf_machine_load_address (sysdeps/i386/dl-machine.h)
/* Return the link-time address of _DYNAMIC. Conveniently, this is the
first element of the GOT, a special entry that is never relocated. */
static inline Elf32_Addr //__attribute__ ((unused, const))
elf_machine_dynamic (void)
{
/* This produces a GOTOFF reloc that resolves to zero at link time, so in
fact just loads from the GOT register directly. By doing it without
an asm we can let the compiler choose any register. */
extern const Elf32_Addr _GLOBAL_OFFSET_TABLE_[] attribute_hidden;
return _GLOBAL_OFFSET_TABLE_[0];
}
/* Return the run-time load address of the shared object. */
static inline Elf32_Addr //__attribute__ ((unused))
elf_machine_load_address (void)
{
/* Compute the difference between the runtime address of _DYNAMIC as seen
by a GOTOFF reference, and the link-time address found in the special
unrelocated first GOT entry. */
extern Elf32_Dyn bygotoff[] asm ("_DYNAMIC");// attribute_hidden;
return (Elf32_Addr) &bygotoff - elf_machine_dynamic ();
}
有点晦涩难懂,看看汇编代码
bootstrap_map.l_addr = elf_machine_load_address ();
生成的汇编代码如下
movl _GLOBAL_OFFSET_TABLE_@GOTOFF(%ebx), %edx//取GOT[0],即ld.so的dynamic节被ld静态链接时安排的地址
leal _DYNAMIC@GOTOFF(%ebx), %eax//取dynamic节运行时加载到内存中的地址
subl %edx, %eax//dynamic的地址-got[0],即得镜像加载基址
movl %eax, 456+_rtld_local@GOTOFF(%ebx)//该地址存入l_addr
C代码和汇编代码对照着看,就能明白一二。
5.elf_get_dynamic_info (dynamic-link.h)
/* Read the dynamic section at DYN and fill in INFO with indices DT_*. */
static inline void //__attribute__ ((unused, always_inline))
elf_get_dynamic_info (struct link_map *l)
{
ElfW(Dyn) *dyn = l->l_ld;
ElfW(Dyn) **info;
//#ifndef RTLD_BOOTSTRAP
if (dyn == NULL)
return;
//#endif
/*
[zws@mail elf]$ readelf -d ld.so
Dynamic section at offset 0x12000 contains 18 entries:
Tag Type Name/Value
0x0000000e (SONAME) Library soname: [ld-linux.so.2]
0x00000004 (HASH) 0x94
0x00000005 (STRTAB) 0x48c
0x00000006 (SYMTAB) 0x1dc
0x0000000a (STRSZ) 719 (bytes)
0x0000000b (SYMENT) 16 (bytes)
0x00000003 (PLTGOT) 0x120e8
0x00000002 (PLTRELSZ) 72 (bytes)
0x00000014 (PLTREL) REL
0x00000017 (JMPREL) 0x8c8
0x00000011 (REL) 0x858
0x00000012 (RELSZ) 112 (bytes)
0x00000013 (RELENT) 8 (bytes)
0x6ffffffc (VERDEF) 0x7b4
0x6ffffffd (VERDEFNUM) 5
0x6ffffff0 (VERSYM) 0x75c
0x6ffffffa (RELCOUNT) 5
0x00000000 (NULL) 0x0
[zws@mail elf]$ readelf -x 11 ld.so
Hex dump of section '.dynamic':
0x00012000 0e000000 95020000 04000000 94000000 ................
0x00012010 05000000 8c040000 06000000 dc010000 ................
0x00012020 0a000000 cf020000 0b000000 10000000 ................
0x00012030 03000000 e8200100 02000000 48000000 ..... ......H...
0x00012040 14000000 11000000 17000000 c8080000 ................
0x00012050 11000000 58080000 12000000 70000000 ....X.......p...
0x00012060 13000000 08000000 fcffff6f b4070000 ...........o....
0x00012070 fdffff6f 05000000 f0ffff6f 5c070000 ...o.......o\...
0x00012080 faffff6f 05000000 00000000 00000000 ...o............
0x00012090 00000000 00000000 00000000 00000000 ................
0x000120a0 00000000 00000000 00000000 00000000 ................
*/
info = l->l_info;//取保存dynamic信息的数据结构
while (dyn->d_tag != DT_NULL)//遍历
{
if (dyn->d_tag < DT_NUM)//长度34,索引范围 [0,33]
info[dyn->d_tag] = dyn;
else if (dyn->d_tag >= DT_LOPROC &&
dyn->d_tag < DT_LOPROC + DT_THISPROCNUM)//0,(0x70000000,0x70000000)
info[dyn->d_tag - DT_LOPROC + DT_NUM] = dyn;
else if ((Elf32_Word) DT_VERSIONTAGIDX (dyn->d_tag) < DT_VERSIONTAGNUM)// 16,[0x6ffffff0,0x6fffffff]->[49,34]
info[VERSYMIDX (dyn->d_tag)] = dyn;
else if ((Elf32_Word) DT_EXTRATAGIDX (dyn->d_tag) < DT_EXTRANUM)// 3,[0x7fffffffd,0x7fffffff]
info[DT_EXTRATAGIDX (dyn->d_tag) + DT_NUM + DT_THISPROCNUM
+ DT_VERSIONTAGNUM] = dyn;
else if ((Elf32_Word) DT_VALTAGIDX (dyn->d_tag) < DT_VALNUM)// 12,[0x6ffffdf4,0x6ffffdff]
info[DT_VALTAGIDX (dyn->d_tag) + DT_NUM + DT_THISPROCNUM
+ DT_VERSIONTAGNUM + DT_EXTRANUM] = dyn;
else if ((Elf32_Word) DT_ADDRTAGIDX (dyn->d_tag) < DT_ADDRNUM)// 10 ,[0x6ffffef6,0x6ffffeff]
info[DT_ADDRTAGIDX (dyn->d_tag) + DT_NUM + DT_THISPROCNUM
+ DT_VERSIONTAGNUM + DT_EXTRANUM + DT_VALNUM] = dyn;
++dyn;
}
//#ifndef DL_RO_DYN_SECTION
/* Don't adjust .dynamic unnecessarily. */
if (l->l_addr != 0)//加载地址
{
//调整地址
ElfW(Addr) l_addr = l->l_addr;
if (info[DT_HASH] != NULL)
info[DT_HASH]->d_un.d_ptr += l_addr;
if (info[DT_PLTGOT] != NULL)
info[DT_PLTGOT]->d_un.d_ptr += l_addr;
if (info[DT_STRTAB] != NULL)
info[DT_STRTAB]->d_un.d_ptr += l_addr;
if (info[DT_SYMTAB] != NULL)
info[DT_SYMTAB]->d_un.d_ptr += l_addr;
//# if ! ELF_MACHINE_NO_RELA
if (info[DT_RELA] != NULL)
info[DT_RELA]->d_un.d_ptr += l_addr;
//# endif
//# if ! ELF_MACHINE_NO_REL
if (info[DT_REL] != NULL)
info[DT_REL]->d_un.d_ptr += l_addr;
//# endif
if (info[DT_JMPREL] != NULL)
info[DT_JMPREL]->d_un.d_ptr += l_addr;
if (info[VERSYMIDX (DT_VERSYM)] != NULL)
info[VERSYMIDX (DT_VERSYM)]->d_un.d_ptr += l_addr;
}
//#endif
if (info[DT_PLTREL] != NULL)
{
//#if ELF_MACHINE_NO_RELA
// assert (info[DT_PLTREL]->d_un.d_val == DT_REL);
//#elif ELF_MACHINE_NO_REL
// assert (info[DT_PLTREL]->d_un.d_val == DT_RELA);
//#else
assert (info[DT_PLTREL]->d_un.d_val == DT_REL
|| info[DT_PLTREL]->d_un.d_val == DT_RELA);
//#endif
}
//#if ! ELF_MACHINE_NO_RELA
if (info[DT_RELA] != NULL)
assert (info[DT_RELAENT]->d_un.d_val == sizeof (ElfW(Rela)));
//# endif
//# if ! ELF_MACHINE_NO_REL
if (info[DT_REL] != NULL)
assert (info[DT_RELENT]->d_un.d_val == sizeof (ElfW(Rel)));
//#endif
if (info[DT_FLAGS] != NULL)
{
/* Flags are used. Translate to the old form where available.
Since these l_info entries are only tested for NULL pointers it
is ok if they point to the DT_FLAGS entry. */
l->l_flags = info[DT_FLAGS]->d_un.d_val;
//#ifdef RTLD_BOOTSTRAP
/* These three flags must not be set for ld.so. */
// assert ((l->l_flags & (DF_SYMBOLIC | DF_TEXTREL | DF_BIND_NOW)) == 0);
//#else
if (l->l_flags & DF_SYMBOLIC)
info[DT_SYMBOLIC] = info[DT_FLAGS];
if (l->l_flags & DF_TEXTREL)
info[DT_TEXTREL] = info[DT_FLAGS];
if (l->l_flags & DF_BIND_NOW)
info[DT_BIND_NOW] = info[DT_FLAGS];
//#endif
}
if (info[VERSYMIDX (DT_FLAGS_1)] != NULL)
l->l_flags_1 = info[VERSYMIDX (DT_FLAGS_1)]->d_un.d_val;
//#ifdef RTLD_BOOTSTRAP
/* The dynamic linker should have none of these set. */
// assert (info[DT_RUNPATH] == NULL);
// assert (info[DT_RPATH] == NULL);
//#else
if (info[DT_RUNPATH] != NULL)
/* If both RUNPATH and RPATH are given, the latter is ignored. */
info[DT_RPATH] = NULL;
//#endif
}
6._dl_start执行自我重定位
/*
#if USE_TLS
# if !defined HAVE___THREAD && !defined DONT_USE_BOOTSTRAP_MAP
/* Signal that we have not found TLS data so far. * /
bootstrap_map.l_tls_modid = 0;
# endif
/* Get the dynamic linker's own program header. First we need the ELF
file header. The `_begin' symbol created by the linker script points
to it. When we have something like GOTOFF relocs, we can use a plain
reference to find the runtime address. Without that, we have to rely
on the `l_addr' value, which is not the value we want when prelinked. * /
#ifdef DONT_USE_BOOTSTRAP_MAP
ehdr = (ElfW(Ehdr) *) &_begin;
#else
ehdr = (ElfW(Ehdr) *) bootstrap_map.l_addr;
#endif
phdr = (ElfW(Phdr) *) ((ElfW(Addr)) ehdr + ehdr->e_phoff);
for (cnt = 0; cnt < ehdr->e_phnum; ++cnt)
if (phdr[cnt].p_type == PT_TLS)
{
void *tlsblock;
size_t max_align = MAX (TLS_INIT_TCB_ALIGN, phdr[cnt].p_align);
char *p;
bootstrap_map.l_tls_blocksize = phdr[cnt].p_memsz;
bootstrap_map.l_tls_align = phdr[cnt].p_align;
assert (bootstrap_map.l_tls_blocksize != 0);
bootstrap_map.l_tls_initimage_size = phdr[cnt].p_filesz;
bootstrap_map.l_tls_initimage = (void *) (bootstrap_map.l_addr
+ phdr[cnt].p_vaddr);
/* We can now allocate the initial TLS block. This can happen
on the stack. We'll get the final memory later when we
know all about the various objects loaded at startup
time. * /
# if TLS_TCB_AT_TP
tlsblock = alloca (roundup (bootstrap_map.l_tls_blocksize,
TLS_INIT_TCB_ALIGN)
+ TLS_INIT_TCB_SIZE
+ max_align);
# elif TLS_DTV_AT_TP
tlsblock = alloca (roundup (TLS_INIT_TCB_SIZE,
bootstrap_map.l_tls_align)
+ bootstrap_map.l_tls_blocksize
+ max_align);
# else
/* In case a model with a different layout for the TCB and DTV
is defined add another #elif here and in the following #ifs. * /
# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
# endif
/* Align the TLS block. * /
tlsblock = (void *) (((uintptr_t) tlsblock + max_align - 1)
& ~(max_align - 1));
/* Initialize the dtv. [0] is the length, [1] the generation
counter. * /
initdtv[0].counter = 1;
initdtv[1].counter = 0;
/* Initialize the TLS block. * /
# if TLS_TCB_AT_TP
initdtv[2].pointer = tlsblock;
# elif TLS_DTV_AT_TP
bootstrap_map.l_tls_offset = roundup (TLS_INIT_TCB_SIZE,
bootstrap_map.l_tls_align);
initdtv[2].pointer = (char *) tlsblock + bootstrap_map.l_tls_offset;
# else
# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
# endif
p = __mempcpy (initdtv[2].pointer, bootstrap_map.l_tls_initimage,
bootstrap_map.l_tls_initimage_size);
# ifdef HAVE_BUILTIN_MEMSET
__builtin_memset (p, '\0', (bootstrap_map.l_tls_blocksize
- bootstrap_map.l_tls_initimage_size));
# else
{
size_t remaining = (bootstrap_map.l_tls_blocksize
- bootstrap_map.l_tls_initimage_size);
while (remaining-- > 0)
*p++ = '\0';
}
#endif
/* Install the pointer to the dtv. * /
/* Initialize the thread pointer. * /
# if TLS_TCB_AT_TP
bootstrap_map.l_tls_offset
= roundup (bootstrap_map.l_tls_blocksize, TLS_INIT_TCB_ALIGN);
INSTALL_DTV ((char *) tlsblock + bootstrap_map.l_tls_offset,
initdtv);
if (TLS_INIT_TP ((char *) tlsblock + bootstrap_map.l_tls_offset, 0)
!= 0)
_dl_fatal_printf ("cannot setup thread-local storage\n");
# elif TLS_DTV_AT_TP
INSTALL_DTV (tlsblock, initdtv);
if (TLS_INIT_TP (tlsblock, 0) != 0)
_dl_fatal_printf ("cannot setup thread-local storage\n");
# else
# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
# endif
/* So far this is module number one. * /
bootstrap_map.l_tls_modid = 1;
/* The TP got initialized. * /
bootstrap_map.l_tls_tp_initialized = 1;
/* There can only be one PT_TLS entry. * /
break;
}
#endif /* use TLS * /
*/
//#ifdef ELF_MACHINE_BEFORE_RTLD_RELOC
// ELF_MACHINE_BEFORE_RTLD_RELOC (bootstrap_map.l_info);
//#endif
if (bootstrap_map.l_addr || ! bootstrap_map.l_info[VALIDX(DT_GNU_PRELINKED)])
{
/* Relocate ourselves so we can do normal function calls and 自我重定位,以便能够使用GOT调用函数和访问数据
data access using the global offset table. */
ELF_DYNAMIC_RELOCATE (&bootstrap_map, 0, 0);
}
7._dl_start->ELF_DYNAMIC_RELOCATE (dynamic-link.h)
/* This can't just be an inline function because GCC is too dumb
to inline functions containing inlines themselves. */
# define ELF_DYNAMIC_RELOCATE(map, lazy, consider_profile) \
do { \
int edr_lazy = elf_machine_runtime_setup ((map), (lazy), \
(consider_profile)); \
ELF_DYNAMIC_DO_REL ((map), edr_lazy); \
ELF_DYNAMIC_DO_RELA ((map), edr_lazy); \
} while (0)
8._dl_start->ELF_DYNAMIC_RELOCATE ->elf_machine_runtime_setup(sysdeps/i386/dl-machine.h)
/* Set up the loaded object described by L so its unrelocated PLT
entries will jump to the on-demand fixup code in dl-runtime.c. */
static inline int //__attribute__ ((unused))
elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
{
Elf32_Addr *got;
extern void _dl_runtime_resolve (Elf32_Word);// attribute_hidden;
extern void _dl_runtime_profile (Elf32_Word);// attribute_hidden;
if (l->l_info[DT_JMPREL] && lazy)//有JMPREL且lazy
{
/* The GOT entries for functions in the PLT have not yet been filled
in. Their initial contents will arrange when called to push an
offset into the .rel.plt section, push _GLOBAL_OFFSET_TABLE_[1],
and then jump to _GLOBAL_OFFSET_TABLE[2]. */
got = (Elf32_Addr *) D_PTR (l, l_info[DT_PLTGOT]);//取PLTGOT地址
/* If a library is prelinked but we have to relocate anyway,
we have to be able to undo the prelinking of .got.plt.
The prelinker saved us here address of .plt + 0x16. */
/*
[zws@mail elf]$ readelf -x 21 a.out
Hex dump of section '.got.plt':
0x080494e8 1c940408 00000000 00000000 5e820408 ............^...
0x080494f8 6e820408 n...
第一个存放.dynamic节的地址
第二个存放link_map地址
第三个存放_dl_runtime_resolve地址
*/
if (got[1])
{
l->l_mach.plt = got[1] + l->l_addr;
l->l_mach.gotplt = (Elf32_Addr) &got[3];
}
got[1] = (Elf32_Addr) l; /* Identify this shared object.存放本模块的link_map */
/* The got[2] entry contains the address of a function which gets
called to get the address of a so far unresolved function and
jump to it. The profiling extension of the dynamic linker allows
to intercept the calls to collect information. In this case we
don't store the address in the GOT so that all future calls also
end in this function. */
if (__builtin_expect (profile, 0))
{
got[2] = (Elf32_Addr) &_dl_runtime_profile;
if (_dl_name_match_p (GL(dl_profile), l))
/* This is the object we are looking for. Say that we really
want profiling and the timers are started. */
GL(dl_profile_map) = l;
}
else
/* This function will get called to fix up the GOT entry indicated by
the offset on the stack, and then jump to the resolved address. */
got[2] = (Elf32_Addr) &_dl_runtime_resolve;//存放解析函数
}
return lazy;
}
前面传给lazy参数值为0,因此直接返回0,接下来的两个宏定义如下,注意lazy==0
#define ELF_DYNAMIC_DO_REL(map,lazy) _ELF_DYNAMIC_DO_RELOC (REL, rel, map, lazy, _ELF_CHECK_REL)
#define ELF_DYNAMIC_DO_RELA(map,lazy)
9._dl_start->ELF_DYNAMIC_RELOCATE ->_ELF_DYNAMIC_DO_RELOC(elf/dynamic-link.h)
处理.rel.dyn和.rel.plt重定位节
# define _ELF_DYNAMIC_DO_RELOC(RELOC, reloc, map, do_lazy, test_rel) \
do { \
struct { ElfW(Addr) start, size; int lazy; } ranges[2]; \
ranges[0].lazy = 0; \
ranges[0].size = ranges[1].size = 0; \
ranges[0].start = 0; \
\
if ((map)->l_info[DT_##RELOC]) /* DT_REL,是否有.rel.dyn节,0x00000011 (REL) 0x858*/ \
{ \
ranges[0].start = D_PTR ((map), l_info[DT_##RELOC]);/*节地址,节长*/ \
ranges[0].size = (map)->l_info[DT_##RELOC##SZ]->d_un.d_val;/* 0x00000012 (RELSZ) 112 (bytes)*/ \
} \
if ((map)->l_info[DT_PLTREL]/*是否有.rel.plt
节, 0x00000014 (PLTREL) REL*/ \
&& (!test_rel/*test_rel==0*/ || (map)->l_info[DT_PLTREL]->d_un.d_val == DT_##RELOC/*值是否为DT_REL*/)) \
{ \
ElfW(Addr) start = D_PTR ((map), l_info[DT_JMPREL]); /*.rel.plt节地址, 0x00000017 (JMPREL) 0x8c8*/ \
\
if (! ELF_DURING_STARTUP /*该宏定位为1*/ \
&& ((do_lazy)/*do_lazy==0*/ \
/* This test does not only detect whether the relocation \
sections are in the right order, it also checks whether \
there is a DT_REL/DT_RELA section. */ \
|| ranges[0].start + ranges[0].size != start))/*.rel.dyn节和.rel.plt节不连续*/ \
{ \
ranges[1].start = start; \
ranges[1].size = (map)->l_info[DT_PLTRELSZ]->d_un.d_val; \
ranges[1].lazy = (do_lazy); \
} \
else \
{ \
/* Combine processing the sections.显然应该走这里 */ \
assert (ranges[0].start + ranges[0].size == start); /*地址连续*/ \
ranges[0].size += (map)->l_info[DT_PLTRELSZ]->d_un.d_val;/*合并大小, 0x00000002 (PLTRELSZ) 72 (bytes)*/ \
} \
} \
\
if (ELF_DURING_STARTUP) /*1*/ \
elf_dynamic_do_##reloc ((map), ranges[0].start, ranges[0].size, 0); /*调用elf_dynamic_do_rel */ \
else \
{ \
int ranges_index; \
for (ranges_index = 0; ranges_index < 2; ++ranges_index) \
elf_dynamic_do_##reloc ((map), \
ranges[ranges_index].start, \
ranges[ranges_index].size, \
ranges[ranges_index].lazy); \
} \
} while (0)
看看ld.so的重定位信息
[zws@mail ~/glibc-2.3/build/elf]$readelf -r ld.so
Relocation section '.rel.dyn' at offset 0x858 contains 14 entries:
Offset Info Type Sym.Value Sym. Name
000120c0 00000008 R_386_RELATIVE
000120c8 00000008 R_386_RELATIVE
000120d8 00000008 R_386_RELATIVE
000120dc 00000008 R_386_RELATIVE
000120e0 00000008 R_386_RELATIVE
000120b0 00000106 R_386_GLOB_DAT 000126d0 __libc_internal_tsd_se
000120b4 00000206 R_386_GLOB_DAT 00012140 _rtld_global
000120b8 00000606 R_386_GLOB_DAT 00000000 __pthread_mutex_lock
000120bc 00000706 R_386_GLOB_DAT 000126d4 __libc_stack_end
000120c4 00000a06 R_386_GLOB_DAT 00000000 __pthread_mutex_init
000120cc 00001106 R_386_GLOB_DAT 000126e4 __libc_internal_tsd_ge
000120d0 00001306 R_386_GLOB_DAT 00000000 __pthread_mutex_unlock
000120d4 00001806 R_386_GLOB_DAT 00000000 __pthread_mutex_destro
000120e4 00002606 R_386_GLOB_DAT 000126f8 _r_debug
Relocation section '.rel.plt' at offset 0x8c8 contains 9 entries:
Offset Info Type Sym.Value Sym. Name
000120f4 00000607 R_386_JUMP_SLOT 00000000 __pthread_mutex_lock
000120f8 00000907 R_386_JUMP_SLOT 0000bdc4 __libc_memalign
000120fc 00000a07 R_386_JUMP_SLOT 00000000 __pthread_mutex_init
00012100 00000b07 R_386_JUMP_SLOT 0000bea0 malloc
00012104 00001207 R_386_JUMP_SLOT 0000bec2 calloc
00012108 00001307 R_386_JUMP_SLOT 00000000 __pthread_mutex_unlock
0001210c 00001807 R_386_JUMP_SLOT 00000000 __pthread_mutex_destro
00012110 00001b07 R_386_JUMP_SLOT 0000bf25 realloc
00012114 00002907 R_386_JUMP_SLOT 0000beff free
[zws@mail ~/glibc-2.3/build/elf]$
10._dl_start->ELF_DYNAMIC_RELOCATE ->_ELF_DYNAMIC_DO_RELOC->elf_dynamic_do_rel(elf/dynamic-link.h)
执行实质的重定位操作
/* Perform the relocations in MAP on the running program image as specified
by RELTAG, SZTAG. If LAZY is nonzero, this is the first pass on PLT
relocations; they should be set up to call _dl_runtime_resolve, rather
than fully resolved now. */
static inline void
elf_dynamic_do_rel (struct link_map *map,
ElfW(Addr) reladdr, ElfW(Addr) relsize,
int lazy)
{
const ElfW(Rel) *r = (const void *) reladdr;
const ElfW(Rel) *end = (const void *) (reladdr + relsize);
ElfW(Addr) l_addr = map->l_addr;
/*
#if (!defined DO_RELA || !defined ELF_MACHINE_PLT_REL) && !defined RTLD_BOOTSTRAP
/* We never bind lazily during ld.so bootstrap. Unfortunately gcc is
not clever enough to see through all the function calls to realize
that. * /
if (lazy)
{
/* Doing lazy PLT relocations; they need very little info. * /
for (; r < end; ++r)
elf_machine_lazy_rel (map, l_addr, r);
}
else
#endif
*/
{
const ElfW(Sym) *const symtab =
(const void *) D_PTR (map, l_info[DT_SYMTAB]);//取符号表
ElfW(Word) nrelative = (map->l_info[RELCOUNT_IDX] == NULL
? 0 : map->l_info[RELCOUNT_IDX]->d_un.d_val);//R_386_RELATIVE重定位项个数 0x6ffffffa (RELCOUNT) 5
const ElfW(Rel) *relative = r;// 0x00000011 (REL) 0x858
r = r + MIN (nrelative, relsize / sizeof (ElfW(Rel)));
/*
#ifndef RTLD_BOOTSTRAP
/* This is defined in rtld.c, but nowhere in the static libc.a; make
the reference weak so static programs can still link. This
declaration cannot be done when compiling rtld.c (i.e. #ifdef
RTLD_BOOTSTRAP) because rtld.c contains the common defn for
_dl_rtld_map, which is incompatible with a weak decl in the same
file. * /
# ifndef SHARED
weak_extern (GL(dl_rtld_map));
# endif
if (map != &GL(dl_rtld_map)) /* Already done in rtld itself. * /
# if !defined DO_RELA || defined ELF_MACHINE_REL_RELATIVE
/* Rela platforms get the offset from r_addend and this must
be copied in the relocation address. Therefore we can skip
the relative relocations only if this is for rel
relocations or rela relocations if they are computed as
memory_loc += l_addr... * /
if (l_addr != 0)
# else
/* ...or we know the object has been prelinked. * /
if (l_addr != 0 || ! map->l_info[VALIDX(DT_GNU_PRELINKED)])
# endif
#endif
*/
for (; relative < r; ++relative)
DO_ELF_MACHINE_REL_RELATIVE (map, l_addr, relative);//先处理前面的相对重定位
11._dl_start->ELF_DYNAMIC_RELOCATE ->_ELF_DYNAMIC_DO_RELOC->elf_dynamic_do_rel->DO_ELF_MACHINE_REL_RELATIVE (elf/do-rel.h)
重定位R_386_RELATIVE重定位项
# define DO_ELF_MACHINE_REL_RELATIVE(map, l_addr, relative) \
elf_machine_rel_relative (l_addr, relative, \
(void *) (l_addr + relative->r_offset))
11._dl_start->ELF_DYNAMIC_RELOCATE ->_ELF_DYNAMIC_DO_RELOC->elf_dynamic_do_rel->DO_ELF_MACHINE_REL_RELATIVE-> elf_machine_rel_relative (sysdeps/i386/dl-machine.h)
static inline void
elf_machine_rel_relative (Elf32_Addr l_addr, const Elf32_Rel *reloc,
Elf32_Addr *const reloc_addr)
{
assert (ELF32_R_TYPE (reloc->r_info) == R_386_RELATIVE);//肯定是R_386_RELATIVE重定位类型
*reloc_addr += l_addr;//原地址加上模块加载地址
}
12._dl_start->ELF_DYNAMIC_RELOCATE ->_ELF_DYNAMIC_DO_RELOC->elf_dynamic_do_rel(elf/dynamic-link.h)
//#ifdef RTLD_BOOTSTRAP
/* The dynamic linker always uses versioning. */
assert (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL);//动态链接器总是使用版本信息
//#else
// if (map->l_info[VERSYMIDX (DT_VERSYM)])
//#endif
{
const ElfW(Half) *const version =
(const void *) D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);//0x6ffffff0 (VERSYM) 0x75c
for (; r < end; ++r)
{
ElfW(Half) ndx = version[ELFW(R_SYM) (r->r_info)] & 0x7fff;
elf_machine_rel (map, r, &symtab[ELFW(R_SYM) (r->r_info)],
&map->l_versions[ndx],
(void *) (l_addr + r->r_offset));
/*等价于
Elf32_Half ndx = version[((r->r_info) >> 8)] & 0x7fff;
elf_machine_rel (map, r, &symtab[((r->r_info) >> 8)],
&map->l_versions[ndx],
(void *) (l_addr + r->r_offset));
*/
}
}
/*
#ifndef RTLD_BOOTSTRAP
else
for (; r < end; ++r)
elf_machine_rel (map, r, &symtab[ELFW(R_SYM) (r->r_info)], NULL,
(void *) (l_addr + r->r_offset));
#endif
*/
}
}
ld.so的版本符号表是
[zws@mail ~/glibc-2.3/build/elf]$objdump -sj .gnu.version ld.so
ld.so: file format elf32-i386
Contents of section .gnu.version:
075c 00000500 05000500 05000500 00000500 ................
076c 05000200 00000200 05000300 05000500 ................
077c 05000500 02000000 05000500 05000500 ................
078c 00000200 05000200 05000500 05000500 ................
079c 05000500 05000300 05000500 02000500 ................
07ac 04000200 0500 ......
typedef uint16_t Elf32_Half;
map->l_versions其实为空,不过elf_machine_rel 中没有用到
11._dl_start->ELF_DYNAMIC_RELOCATE ->_ELF_DYNAMIC_DO_RELOC->elf_dynamic_do_rel->DO_ELF_MACHINE_REL_RELATIVE-> elf_machine_relmap->l_versions其实为空,不过elf_machine_rel (sysdeps/i386/dl-machine.h)
/* Perform the relocation specified by RELOC and SYM (which is fully resolved).
MAP is the object containing the reloc. */
static inline void
elf_machine_rel (struct link_map *map, const Elf32_Rel *reloc,
const Elf32_Sym *sym, const struct r_found_version *version,
Elf32_Addr *const reloc_addr)
{
const unsigned int r_type = ELF32_R_TYPE (reloc->r_info);
/*
#if !defined RTLD_BOOTSTRAP || !defined HAVE_Z_COMBRELOC
if (__builtin_expect (r_type == R_386_RELATIVE, 0))
{
# if !defined RTLD_BOOTSTRAP && !defined HAVE_Z_COMBRELOC
/* This is defined in rtld.c, but nowhere in the static libc.a;
make the reference weak so static programs can still link.
This declaration cannot be done when compiling rtld.c
(i.e. #ifdef RTLD_BOOTSTRAP) because rtld.c contains the
common defn for _dl_rtld_map, which is incompatible with a
weak decl in the same file. * /
# ifndef SHARED
weak_extern (_dl_rtld_map);
# endif
if (map != &GL(dl_rtld_map)) /* Already done in rtld itself. * /
# endif
*reloc_addr += map->l_addr;
}
# ifndef RTLD_BOOTSTRAP
else if (__builtin_expect (r_type == R_386_NONE, 0))
return;
# endif
else
#endif
*/
{
const Elf32_Sym *const refsym = sym;
//#if defined USE_TLS && !defined RTLD_BOOTSTRAP
// struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type);
// Elf32_Addr value = sym == NULL ? 0 : sym_map->l_addr + sym->st_value;
//#else
Elf32_Addr value = RESOLVE (&sym, version, r_type);//等价于Elf32_Addr value = ((*(&sym))->st_shndx == 0 ? 0 : _rtld_local._dl_rtld_map.l_addr);
//# ifndef RTLD_BOOTSTRAP
// if (sym != NULL)
//# endif
value += sym->st_value;//加上sym->st_value中的值
//#endif
switch (r_type)
{
case R_386_GLOB_DAT: //ld.so中只有这两个
case R_386_JMP_SLOT:
*reloc_addr = value;
break;
一路返回到_dl_start中,就完成重定位了。
大家想一想如何保证到现在还没有用到重定位的数据?
通过全部使用inline函数或宏,且只使用_rtld_local(vis为hidden)和局部变量来保证.
12.返回_dl_start,完成动态链接
/* Please note that we don't allow profiling of this object and
therefore need not test whether we have to allocate the array
for the relocation results (as done in dl-reloc.c). */
/* Now life is sane; we can call functions and access global data.
Set up to use the operating system facilities, and find out from
the operating system's program loader where to find the program
header table in core. Put the rest of _dl_start into a separate
将_dl_start中剩下的工作放在独立的函数中,这样编译器就不会将需要
function, that way the compiler cannot put accesses to the GOT
访问GOT的操作放在ELF_DYNAMIC_RELOCATE之前
before ELF_DYNAMIC_RELOCATE. */
{
//#ifdef DONT_USE_BOOTSTRAP_MAP
ElfW(Addr) entry = _dl_start_final (arg);//完成动态链接,返回可执行文件入口
//#else
// ElfW(Addr) entry = _dl_start_final (arg, &info);
//#endif
//#ifndef ELF_MACHINE_START_ADDRESS
# define ELF_MACHINE_START_ADDRESS(map, start) (start)
//#endif
return ELF_MACHINE_START_ADDRESS (GL(dl_loaded), entry);//等价于return entry;
}
}
ld.so 分析之6 _dl_start_final
1.setup hash
/* This is the second half of _dl_start (below). It can be inlined safely
这是_dl_start的第二部分.
under DONT_USE_BOOTSTRAP_MAP, where it is careful not to make any GOT
在DONT_USE_BOOTSTRAP_MAP下它能被安全内联,在DONT_USE_BOOTSTRAP_MAP下不允许引用GOT
references. When the tools don't permit us to avoid using a GOT entry
for _dl_rtld_global (no attribute_hidden support), we must make sure
this function is not inlined (see below).
当编译器允许我们使用GOT访问_dl_rtld_global,我们必须让f该函数不内联
*/
//#ifdef DONT_USE_BOOTSTRAP_MAP
static inline Elf32_Addr//
//ElfW(Addr) __attribute__ ((always_inline)) 总是内联
_dl_start_final (void *arg)
//#else
//static ElfW(Addr) __attribute__ ((noinline))
//_dl_start_final (void *arg, struct dl_start_final_info *info)
//#endif
{
ElfW(Addr) start_addr;
if (HP_TIMING_AVAIL)// 1
{
/* If it hasn't happen yet record the startup time. */
// if (! HP_TIMING_INLINE)// 1
// HP_TIMING_NOW (start_time);
//#if !defined DONT_USE_BOOTSTRAP_MAP && !defined HP_TIMING_NONAVAIL
// else
// start_time = info->start_time;
//#endif
/* Initialize the timing functions. */
HP_TIMING_DIFF_INIT ();
}
/* Transfer data about ourselves to the permanent link_map structure. */
/*
#ifndef DONT_USE_BOOTSTRAP_MAP
GL(dl_rtld_map).l_addr = info->l.l_addr;
GL(dl_rtld_map).l_ld = info->l.l_ld;
memcpy (GL(dl_rtld_map).l_info, info->l.l_info,
sizeof GL(dl_rtld_map).l_info);
GL(dl_rtld_map).l_mach = info->l.l_mach;
#endif
*/
_dl_setup_hash (&GL(dl_rtld_map));// _dl_setup_hash (&_rtld_local._dl_rtld_map);
GL(dl_rtld_map).l_opencount = 1;
2._dl_start_final->_dl_setup_hash
/* Cache the location of MAP's hash table. */
void
//internal_function
_dl_setup_hash (struct link_map *map)
{
Elf_Symndx *hash;//typedef uint32_t Elf_Symndx;
Elf_Symndx nchain;
if (!map->l_info[DT_HASH])// 例如ld.so的0x00000004 (HASH) 0x94
return;
hash = (void *) D_PTR (map, l_info[DT_HASH]);//map->l_info[DT_HASH]->d_un.d_ptr,取hash表地址
map->l_nbuckets = *hash++;
nchain = *hash++;
map->l_buckets = hash;
hash += map->l_nbuckets;
map->l_chain = hash;
}
ld.so hash表的内容是
[zws@mail ~/glibc-2.3/build/elf]$ objdump -sj .hash ld.so
ld.so: file format elf32-i386
Contents of section .hash:
0094 25000000 2b000000 0d000000 21000000 %...+.......!...
00a4 28000000 00000000 06000000 22000000 (..........."...
00b4 00000000 00000000 00000000 08000000 ................
00c4 1e000000 00000000 1a000000 23000000 ............#...
00d4 26000000 0e000000 1d000000 17000000 &...............
00e4 25000000 24000000 00000000 13000000 %...$...........
00f4 00000000 0b000000 18000000 14000000 ................
0104 27000000 1b000000 00000000 15000000 '...............
0114 00000000 29000000 1c000000 00000000 ....)...........
0124 0c000000 2a000000 19000000 00000000 ....*...........
0134 00000000 00000000 00000000 00000000 ................
0144 00000000 00000000 00000000 00000000 ................
0154 00000000 00000000 00000000 00000000 ................
0164 05000000 02000000 03000000 07000000 ................
0174 00000000 10000000 00000000 00000000 ................
0184 00000000 00000000 00000000 00000000 ................
0194 00000000 00000000 00000000 11000000 ................
01a4 01000000 0a000000 00000000 0f000000 ................
01b4 00000000 09000000 20000000 04000000 ........ .......
01c4 00000000 16000000 12000000 00000000 ................
01d4 00000000 1f000000 ........
hash表的作用是加快链接速度。当在动态链接库中查找是否有需要被外部链接的函数时,
如果直接线性搜索库的动态符号表且表比较大,速度很慢。采用散列的方法查找就比较好。
这里l_nbuckets值为0x25=37,nchain 值为0x2b=43,l_buckets存放散列表入口,l_chain用于将散列值相同的符号连接成单链表。
nchain其实就是动态符号数。该链表中第一个符号索引值A存在l_buckets中,下一个符号的索引值B存放在索引值A在l_chain中的偏移处等等。
问题1.hash表大小是如何计算的?
由binutils 1.18 的bfd/elflink.c文件中compute_bucket_count 计算
/* Array used to determine the number of hash table buckets to use
based on the number of symbols there are. If there are fewer than
定义一个数组用于根据符号数来计算hash表大小
3 symbols we use 1 bucket, fewer than 17 symbols we use 3 buckets,
少于3个符号使用一个桶,少于17个符号使用3个桶
fewer than 37 we use 17 buckets, and so forth. We never use more
少于37个符号使用17个桶,等等
than 32771 buckets.
我们从不使用超过32771个桶的hash
*/
static const size_t elf_buckets[] =
{
1, 3, 17, 37, 67, 97, 131, 197, 263, 521, 1031, 2053, 4099, 8209,
16411, 32771, 0
};
/* Compute bucket count for hashing table. We do not use a static set
of possible tables sizes anymore. Instead we determine for all
possible reasonable sizes of the table the outcome (i.e., the
number of collisions etc) and choose the best solution. The
weighting functions are not too simple to allow the table to grow
without bounds. Instead one of the weighting factors is the size.
Therefore the result is always a good payoff between few collisions
(= short chain lengths) and table size. */
static size_t
compute_bucket_count (struct bfd_link_info *info, unsigned long int *hashcodes,
unsigned long int nsyms, int gnu_hash)
{
size_t dynsymcount = elf_hash_table (info)->dynsymcount;
size_t best_size = 0;
unsigned long int i;
bfd_size_type amt;
/* We have a problem here. The following code to optimize the table
size requires an integer type with more the 32 bits. If
BFD_HOST_U_64_BIT is set we know about such a type. */
#ifdef BFD_HOST_U_64_BIT
。。。忽略64位系统
#endif /* defined (BFD_HOST_U_64_BIT) * /
{
/* This is the fallback solution if no 64bit type is available or if we
are not supposed to spend much time on optimizations. We select the
bucket count using a fixed set of numbers. */
for (i = 0; elf_buckets[i] != 0; i++)//循环查找elf_buckets数组
{
best_size = elf_buckets[i];//取桶大小
if (nsyms < elf_buckets[i + 1])//如果符号数小于适用的符号数
break;//找到
}
if (gnu_hash && best_size < 2)
best_size = 2;
}
return best_size;
}
由于本例中符号数是43,根据计算得桶大小是37,和前面的桶值相等。
问题2:符号的hash值如何计算?
同样在elflink.c中,由bfd_elf_hash计算
/* Standard ELF hash function. Do not change this function; you will
cause invalid hash tables to be generated. */
unsigned long
bfd_elf_hash (const char *namearg)
{
const unsigned char *name = (const unsigned char *) namearg;
unsigned long h = 0;
unsigned long g;
int ch;
while ((ch = *name++) != '\0')
{
h = (h << 4) + ch;
if ((g = (h & 0xf0000000)) != 0)
{
h ^= g >> 24;
/* The ELF ABI says `h &= ~g', but this is equivalent in
this case and on some machines one insn instead of two. */
h ^= g;
}
}
return h & 0xffffffff;
}
一个符号的hash值%桶大小即得其在hash表中的索引.
问题3.动态符号表的内容是什么?
动态符号表中存放的是全局符号,包括本地和外地。
本地提供给其他模块使用,本地符号的Ndx是数,指出该符号所在的节。外地符号的Ndx是UND,需要动态链接。
例如
readelf -s ld.so
Symbol table '.dynsym' contains 43 entries:
Num: Value Size Type Bind Vis Ndx Name
0: 00000000 0 NOTYPE LOCAL DEFAULT UND
1: 000126d0 4 OBJECT GLOBAL DEFAULT 15 __libc_internal_tsd_set@@GLIBC_PRIVATE
2: 00012140 980 OBJECT GLOBAL DEFAULT 14 _rtld_global@@GLIBC_PRIVATE
3: 00009f6b 44 FUNC GLOBAL DEFAULT 9 _dl_debug_printf@@GLIBC_PRIVATE
4: 0000a372 1066 FUNC GLOBAL DEFAULT 9 _dl_check_map_versions@@GLIBC_PRIVATE
5: 00006808 757 FUNC GLOBAL DEFAULT 9 _dl_lookup_versioned_symb@@GLIBC_PRIVATE
6: 00000000 0 NOTYPE WEAK DEFAULT UND __pthread_mutex_lock
7: 000126d4 4 OBJECT GLOBAL DEFAULT 15 __libc_stack_end@@GLIBC_PRIVATE
8: 000096e0 307 FUNC GLOBAL DEFAULT 9 _dl_init@@GLIBC_PRIVATE
9: 0000bdc4 220 FUNC WEAK DEFAULT 9 __libc_memalign@@GLIBC_2.0
10: 00000000 0 NOTYPE WEAK DEFAULT UND __pthread_mutex_init
11: 0000bea0 34 FUNC WEAK DEFAULT 9 malloc@@GLIBC_2.0
12: 00006189 605 FUNC GLOBAL DEFAULT 9 _dl_lookup_symbol_skip@@GLIBC_PRIVATE
13: 00000000 0 OBJECT GLOBAL DEFAULT ABS GLIBC_2.1
14: 000063e6 1058 FUNC GLOBAL DEFAULT 9 _dl_lookup_versioned_symb@@GLIBC_PRIVATE
15: 00012514 4 OBJECT GLOBAL DEFAULT 14 __libc_enable_secure@@GLIBC_PRIVATE
16: 00005ec4 709 FUNC GLOBAL DEFAULT 9 _dl_lookup_symbol@@GLIBC_PRIVATE
17: 000126e4 4 OBJECT GLOBAL DEFAULT 15 __libc_internal_tsd_get@@GLIBC_PRIVATE
18: 0000bec2 61 FUNC WEAK DEFAULT 9 calloc@@GLIBC_2.0
19: 00000000 0 NOTYPE WEAK DEFAULT UND __pthread_mutex_unlock
20: 000126ec 4 OBJECT GLOBAL DEFAULT 15 __libc_internal_tsd_addre@@GLIBC_PRIVATE
21: 00009b86 5 FUNC GLOBAL DEFAULT 9 _dl_debug_state@@GLIBC_PRIVATE
22: 00012120 4 OBJECT GLOBAL DEFAULT 14 _dl_argv@@GLIBC_PRIVATE
23: 000030db 272 FUNC GLOBAL DEFAULT 9 _dl_dst_substitute@@GLIBC_PRIVATE
24: 00000000 0 NOTYPE WEAK DEFAULT UND __pthread_mutex_destroy
25: 00000000 0 OBJECT GLOBAL DEFAULT ABS GLIBC_2.0
26: 00000000 0 OBJECT GLOBAL DEFAULT ABS GLIBC_PRIVATE
27: 0000bf25 139 FUNC WEAK DEFAULT 9 realloc@@GLIBC_2.0
28: 0000b43c 373 FUNC GLOBAL DEFAULT 9 _dl_get_origin@@GLIBC_PRIVATE
29: 0000a868 1981 FUNC GLOBAL DEFAULT 9 _dl_start_profile@@GLIBC_PRIVATE
30: 00007ca0 1072 FUNC GLOBAL DEFAULT 9 _dl_relocate_object@@GLIBC_PRIVATE
31: 00003056 133 FUNC GLOBAL DEFAULT 9 _dl_dst_count@@GLIBC_PRIVATE
32: 00012124 4 OBJECT GLOBAL DEFAULT 14 _dl_starting_up@@GLIBC_PRIVATE
33: 00005b89 75 FUNC GLOBAL DEFAULT 9 _dl_unload_cache@@GLIBC_PRIVATE
34: 0000f460 14 OBJECT GLOBAL DEFAULT 10 _dl_out_of_memory@@GLIBC_PRIVATE
35: 0000b025 562 FUNC GLOBAL DEFAULT 9 _dl_mcount@@GLIBC_2.1
36: 00004b84 1917 FUNC GLOBAL DEFAULT 9 _dl_map_object@@GLIBC_PRIVATE
37: 000091cc 433 FUNC GLOBAL DEFAULT 9 _dl_signal_error@@GLIBC_PRIVATE
38: 000126f8 20 OBJECT GLOBAL DEFAULT 15 _r_debug@@GLIBC_2.0
39: 0000940d 318 FUNC GLOBAL DEFAULT 9 _dl_catch_error@@GLIBC_PRIVATE
40: 00000000 0 OBJECT GLOBAL DEFAULT ABS GLIBC_2.3
41: 0000beff 38 FUNC WEAK DEFAULT 9 free@@GLIBC_2.0
42: 00008255 3291 FUNC GLOBAL DEFAULT 9 _dl_map_object_deps@@GLIBC_PRIVATE
符号表的第一个符号总是被保留的.因此实际可用的符号数是42
上面是通过节表来显示动态符号表的,我们也可以通过动态节来显示动态符号表
[zws@mail ~/glibc-2.3/build/elf]$readelf -Ds ld.so
Symbol table for image:
Num Buc: Value Size Type Bind Vis Ndx Name
13 0: 00000000 0 OBJECT GLOBAL DEFAULT ABS GLIBC_2.1
5 0: 00006808 757 FUNC GLOBAL DEFAULT 9 _dl_lookup_versioned_symbol_skip
33 1: 00005b89 75 FUNC GLOBAL DEFAULT 9 _dl_unload_cache
40 2: 00000000 0 OBJECT GLOBAL DEFAULT ABS GLIBC_2.3
6 4: 00000000 0 NOTYPE WEAK DEFAULT UND __pthread_mutex_lock
34 5: 0000f460 14 OBJECT GLOBAL DEFAULT 10 _dl_out_of_memory
9 5: 0000bdc4 220 FUNC WEAK DEFAULT 9 __libc_memalign
8 9: 000096e0 307 FUNC GLOBAL DEFAULT 9 _dl_init
30 10: 00007ca0 1072 FUNC GLOBAL DEFAULT 9 _dl_relocate_object
10 10: 00000000 0 NOTYPE WEAK DEFAULT UND __pthread_mutex_init
26 12: 00000000 0 OBJECT GLOBAL DEFAULT ABS GLIBC_PRIVATE
35 13: 0000b025 562 FUNC GLOBAL DEFAULT 9 _dl_mcount
32 13: 00012124 4 OBJECT GLOBAL DEFAULT 14 _dl_starting_up
15 13: 00012514 4 OBJECT GLOBAL DEFAULT 14 __libc_enable_secure
3 13: 00009f6b 44 FUNC GLOBAL DEFAULT 9 _dl_debug_printf
38 14: 000126f8 20 OBJECT GLOBAL DEFAULT 15 _r_debug
22 14: 00012120 4 OBJECT GLOBAL DEFAULT 14 _dl_argv
14 15: 000063e6 1058 FUNC GLOBAL DEFAULT 9 _dl_lookup_versioned_symbol
2 15: 00012140 980 OBJECT GLOBAL DEFAULT 14 _rtld_global
29 16: 0000a868 1981 FUNC GLOBAL DEFAULT 9 _dl_start_profile
1 16: 000126d0 4 OBJECT GLOBAL DEFAULT 15 __libc_internal_tsd_set
23 17: 000030db 272 FUNC GLOBAL DEFAULT 9 _dl_dst_substitute
37 18: 000091cc 433 FUNC GLOBAL DEFAULT 9 _dl_signal_error
36 19: 00004b84 1917 FUNC GLOBAL DEFAULT 9 _dl_map_object
4 19: 0000a372 1066 FUNC GLOBAL DEFAULT 9 _dl_check_map_versions
19 21: 00000000 0 NOTYPE WEAK DEFAULT UND __pthread_mutex_unlock
11 23: 0000bea0 34 FUNC WEAK DEFAULT 9 malloc
24 24: 00000000 0 NOTYPE WEAK DEFAULT UND __pthread_mutex_destroy
20 25: 000126ec 4 OBJECT GLOBAL DEFAULT 15 __libc_internal_tsd_address
39 26: 0000940d 318 FUNC GLOBAL DEFAULT 9 _dl_catch_error
18 26: 0000bec2 61 FUNC WEAK DEFAULT 9 calloc
16 26: 00005ec4 709 FUNC GLOBAL DEFAULT 9 _dl_lookup_symbol
7 26: 000126d4 4 OBJECT GLOBAL DEFAULT 15 __libc_stack_end
27 27: 0000bf25 139 FUNC WEAK DEFAULT 9 realloc
21 29: 00009b86 5 FUNC GLOBAL DEFAULT 9 _dl_debug_state
41 31: 0000beff 38 FUNC WEAK DEFAULT 9 free
28 32: 0000b43c 373 FUNC GLOBAL DEFAULT 9 _dl_get_origin
17 32: 000126e4 4 OBJECT GLOBAL DEFAULT 15 __libc_internal_tsd_get
12 34: 00006189 605 FUNC GLOBAL DEFAULT 9 _dl_lookup_symbol_skip
42 35: 00008255 3291 FUNC GLOBAL DEFAULT 9 _dl_map_object_deps
31 35: 00003056 133 FUNC GLOBAL DEFAULT 9 _dl_dst_count
25 36: 00000000 0 OBJECT GLOBAL DEFAULT ABS GLIBC_2.0
这里Num列显示该符号在符号表中的索引,Buc列显示该符号在Hash表中的索引,索引值相同的符号按照选后顺序显示.
可见索引为0的保留符号是不进入hash表的
可用如下命令显示各种长度桶的直方图,用于分析散列效果.
[zws@mail ~/glibc-2.3/build/elf]$readelf -I ld.so
Histogram for bucket list length (total of 37 buckets):
Length Number % of total Coverage
0 10 ( 27.0%) //长度为0的桶占总桶的27%
1 16 ( 43.2%) 38.1% //长度为1的桶占总桶的43.2%,其总符号数占总符号的38.1%
2 9 ( 24.3%) 81.0% //....
3 0 ( 0.0%) 81.0%
4 2 ( 5.4%) 100.0%
length为桶长,number为相同桶长的个数,% of total为相同桶长的个数占总桶的百分比,coverage为相同桶长的桶中总符号数
占总符号数的百分比。
问题4:.dynsym节和.symtab节的联系和区别?
[zws@mail ~/glibc-2.3/build/elf]$readelf -S ld.so
There are 30 section headers, starting at offset 0x96078:
Section Headers:
[Nr] Name Type Addr Off Size ES Flg Lk Inf Al
[ 0] NULL 00000000 000000 000000 00 0 0 0
[ 1] .hash HASH 00000094 000094 000148 04 A 2 0 4
[ 2] .dynsym DYNSYM 000001dc 0001dc 0002b0 10 A 3 1 4
[ 3] .dynstr STRTAB 0000048c 00048c 0002cf 00 A 0 0 1
[ 4] .gnu.version VERSYM 0000075c 00075c 000056 02 A 2 0 2
[ 5] .gnu.version_d VERDEF 000007b4 0007b4 0000a4 00 A 3 5 4
[ 6] .rel.dyn REL 00000858 000858 000070 08 A 2 0 4
[ 7] .rel.plt REL 000008c8 0008c8 000048 08 A 2 8 4
[ 8] .plt PROGBITS 00000910 000910 0000a0 04 AX 0 0 4
[ 9] .text PROGBITS 000009b0 0009b0 00e6ce 00 AX 0 0 16
[10] .rodata PROGBITS 0000f080 00f080 002e60 00 A 0 0 32
[11] .dynamic DYNAMIC 00012000 012000 0000b0 08 WA 3 0 4
[12] .got PROGBITS 000120b0 0120b0 000038 04 WA 0 0 4
[13] .got.plt PROGBITS 000120e8 0120e8 000030 04 WA 0 0 4
[14] .data PROGBITS 00012120 012120 000408 00 WA 0 0 32
[15] .bss NOBITS 00012540 012528 0001cc 00 WA 0 0 32
[16] .stab PROGBITS 00000000 012528 0004f8 0c 17 0 4
[17] .stabstr STRTAB 00000000 012a20 000276 00 0 0 1
[18] .comment PROGBITS 00000000 012c96 0009f6 00 0 0 1
[19] .debug_aranges PROGBITS 00000000 01368c 0005e0 00 0 0 1
[20] .debug_pubnames PROGBITS 00000000 013c6c 000bd9 00 0 0 1
[21] .debug_info PROGBITS 00000000 014845 06722d 00 0 0 1
[22] .debug_abbrev PROGBITS 00000000 07ba72 006978 00 0 0 1
[23] .debug_line PROGBITS 00000000 0823ea 009e0e 00 0 0 1
[24] .debug_frame PROGBITS 00000000 08c1f8 001934 00 0 0 4
[25] .debug_str PROGBITS 00000000 08db2c 0083e3 01 MS 0 0 1
[26] .gnu.warning.llse PROGBITS 00000000 095f20 00003f 00 0 0 32
[27] .shstrtab STRTAB 00000000 095f5f 000118 00 0 0 1
[28] .symtab SYMTAB 00000000 096528 001f70 10 29 461 4
[29] .strtab STRTAB 00000000 098498 00174a 00 0 0 1
Key to Flags:
W (write), A (alloc), X (execute), M (merge), S (strings)
I (info), L (link order), G (group), x (unknown)
O (extra OS processing required) o (OS specific), p (processor specific)
strip ld.so后发现
[zws@mail ~/glibc-2.3/build/elf]$readelf -S ldx.so
There are 19 section headers, starting at offset 0x12ffc:
Section Headers:
[Nr] Name Type Addr Off Size ES Flg Lk Inf Al
[ 0] NULL 00000000 000000 000000 00 0 0 0
[ 1] .hash HASH 00000094 000094 000148 04 A 2 0 4
[ 2] .dynsym DYNSYM 000001dc 0001dc 0002b0 10 A 3 1 4
[ 3] .dynstr STRTAB 0000048c 00048c 0002cf 00 A 0 0 1
[ 4] .gnu.version VERSYM 0000075c 00075c 000056 02 A 2 0 2
[ 5] .gnu.version_d VERDEF 000007b4 0007b4 0000a4 00 A 3 5 4
[ 6] .rel.dyn REL 00000858 000858 000070 08 A 2 0 4
[ 7] .rel.plt REL 000008c8 0008c8 000048 08 A 2 8 4
[ 8] .plt PROGBITS 00000910 000910 0000a0 04 AX 0 0 4
[ 9] .text PROGBITS 000009b0 0009b0 00e6ce 00 AX 0 0 16
[10] .rodata PROGBITS 0000f080 00f080 002e60 00 A 0 0 32
[11] .dynamic DYNAMIC 00012000 012000 0000b0 08 WA 3 0 4
[12] .got PROGBITS 000120b0 0120b0 000038 04 WA 0 0 4
[13] .got.plt PROGBITS 000120e8 0120e8 000030 04 WA 0 0 4
[14] .data PROGBITS 00012120 012120 000408 00 WA 0 0 32
[15] .bss NOBITS 00012540 012528 0001cc 00 WA 0 0 32
[16] .comment PROGBITS 00000000 012528 0009f6 00 0 0 1
[17] .gnu.warning.llse PROGBITS 00000000 012f20 00003f 00 0 0 32
[18] .shstrtab STRTAB 00000000 012f5f 00009c 00 0 0 1
Key to Flags:
W (write), A (alloc), X (execute), M (merge), S (strings)
I (info), L (link order), G (group), x (unknown)
O (extra OS processing required) o (OS specific), p (processor specific)
少了11个节,分别是
[16] .stab PROGBITS 00000000 012528 0004f8 0c 17 0 4
[17] .stabstr STRTAB 00000000 012a20 000276 00 0 0 1
[19] .debug_aranges PROGBITS 00000000 01368c 0005e0 00 0 0 1
[20] .debug_pubnames PROGBITS 00000000 013c6c 000bd9 00 0 0 1
[21] .debug_info PROGBITS 00000000 014845 06722d 00 0 0 1
[22] .debug_abbrev PROGBITS 00000000 07ba72 006978 00 0 0 1
[23] .debug_line PROGBITS 00000000 0823ea 009e0e 00 0 0 1
[24] .debug_frame PROGBITS 00000000 08c1f8 001934 00 0 0 4
[25] .debug_str PROGBITS 00000000 08db2c 0083e3 01 MS 0 0 1
[28] .symtab SYMTAB 00000000 096528 001f70 10 29 461 4
[29] .strtab STRTAB 00000000 098498 00174a 00 0 0 1
前面9个都是调试信息节。后面一个是.symtab,一个是.symtab的字符表节,他们的符号表的Lk值是29,指向.strtab节.
说明这些信息都不是执行程序必须的。执行程序用的符号表是.dynsym节,以及为其服务的.hash和.dynstr.
.dynsym和.symtab分别占用文件空间,两者磁盘空间不相干。在内容上,.dynsym是.symtab的子集。.dynsym用于执行时动态链接,
.symtab由objdump等分析程序使用.这里也体现了EFL的执行和存储两种视图。
两者侧重点不同,但是相通。链接程序将所有执行时需要的节安排在一起,并安排在最前面。这样程序加载的时候,基址就在文件头处。
在hash节前面的ELF文件头,然后是程序头。
执行时不需要的节放在后面,准确说是.bss后。.bss不占用文件空间,但是占用内存空间。
因此前面的
[16] .comment PROGBITS 00000000 012528 0009f6 00 0 0 1
[17] .gnu.warning.llse PROGBITS 00000000 012f20 00003f 00 0 0 32
[18] .shstrtab STRTAB 00000000 012f5f 00009c 00 0 0 1
也可以删掉
还有节表本身,被安排在最后,也可以删掉,不会影响程序的执行。
我们可以作如下实验,编写一个简单的hello world程序,编译执行。
使用readelf -S hello,找到.bss节,假设文件偏移是x.
使用python脚本将其截断。
f=open("hello","r+")
f.seek(x)
f.truncate()
f.close
再执行一下hello,看是否还是可执行.
3.设置map_start和map_end
GL(dl_rtld_map).l_map_start = (ElfW(Addr)) _begin;// 0
GL(dl_rtld_map).l_map_end = (ElfW(Addr)) _end;// bss 最后
/* Copy the TLS related data if necessary. */
/*#if USE_TLS && !defined DONT_USE_BOOTSTRAP_MAP
//# ifdef HAVE___THREAD
// assert (info->l.l_tls_modid != 0);
//# else
if (info->l.l_tls_modid != 0)
//# endif
{
GL(dl_rtld_map).l_tls_blocksize = info->l.l_tls_blocksize;
GL(dl_rtld_map).l_tls_align = info->l.l_tls_align;
GL(dl_rtld_map).l_tls_initimage_size = info->l.l_tls_initimage_size;
GL(dl_rtld_map).l_tls_initimage = info->l.l_tls_initimage;
GL(dl_rtld_map).l_tls_offset = info->l.l_tls_offset;
GL(dl_rtld_map).l_tls_modid = 1;
GL(dl_rtld_map).l_tls_tp_initialized
= info->l.l_tls_tp_initialized;
}
#endif
*/
//#if HP_TIMING_AVAIL
HP_TIMING_NOW (GL(dl_cpuclock_offset));
//#endif
查看ld.so的符号表
405: 00000000 0 NOTYPE LOCAL DEFAULT ABS _begin
417: 0001270c 0 NOTYPE LOCAL DEFAULT ABS _end
对比节表可发现
_end正好执行bss结尾(是地址而不是文件偏移)
4._dl_sysdep_start
/* Call the OS-dependent function to set up life so we can do things like
调用操作系统相关函数,建立操作环境,这样就能执行文件访问等操作
file access. It will call `dl_main' (below) to do all the real work
of the dynamic linker, and then unwind our frame and run the user
这将会调用dl_main完成所有的动态链接工作
entry point on the same stack we entered on.
最后退出并执行用户入口
*/
start_addr = _dl_sysdep_start (arg, &dl_main);//传递dl_main函数,返回用户入口地址
//#ifndef HP_TIMING_NONAVAIL
if (HP_TIMING_AVAIL)// 1
{
hp_timing_t end_time;
/* Get the current time. */
HP_TIMING_NOW (end_time);//记录end_time
/* Compute the difference. */
HP_TIMING_DIFF (rtld_total_time, start_time, end_time);//计算耗时
}
//#endif
if (__builtin_expect (GL(dl_debug_mask) & DL_DEBUG_STATISTICS, 0))
print_statistics ();//如果需要,输出统计信息
return start_addr;
}
输出的统计信息如下
[zws@mail ~/glibc-2.3/build/elf]$ LD_DEBUG=statistics ls /proc/slabinfo
30251:
30251: runtime linker statistics:
30251: total startup time in dynamic loader: 1141112 clock cycles
30251: time needed for relocation: 498188 clock cycles (43.6%)
30251: number of relocations: 103
30251: number of relocations from cache: 5
30251: time needed to load objects: 377760 clock cycles (33.1%)
/proc/slabinfo
30251:
30251: runtime linker statistics:
30251: final number of relocations: 156
30251: final number of relocations from cache: 5
[zws@mail ~/glibc-2.3/build/elf]$
ld.so分析之7 _dl_sysdep_start
(sysdeps/generic/dl-sysdep.c)
1.获取内核传递过来的信息
Elf32_Addr//ElfW(Addr)
_dl_sysdep_start (void **start_argptr,
void (*dl_main) (const ElfW(Phdr) *phdr, ElfW(Word) phnum,
ElfW(Addr) *user_entry))
{
const ElfW(Phdr) *phdr = NULL;
ElfW(Word) phnum = 0;
ElfW(Addr) user_entry;
ElfW(auxv_t) *av;
uid_t uid = 0;
gid_t gid = 0;
//#ifdef HAVE_AUX_XID
//# define set_seen(tag) (tag) /* Evaluate for the side effects. */
//#else
unsigned int seen = 0;
# define M(type) (1 << (type))
# define set_seen(tag) seen |= M ((tag)->a_type)
//#endif
DL_FIND_ARG_COMPONENTS (start_argptr, _dl_argc, INTUSE(_dl_argv), _environ,
_dl_auxv);
2.DL_FIND_ARG_COMPONENTS
# define DL_FIND_ARG_COMPONENTS(cookie, argc, argv, envp, auxp) \
do { \
void **_tmp; \
(argc) = *(long int *) cookie; \
(argv) = (char **) ((long int *) cookie + 1); \
(envp) = (argv) + (argc) + 1; \
for (_tmp = (void **) (envp); *_tmp; ++_tmp) \
continue; \
(auxp) = (void *) ++_tmp; \
} while (0)
start_argptr指向argc在栈上的地址,因此这个宏的目的很简单,取得argc,argv,envp,auxp变量值。现在再次把内核传递信息贴出
注意前面的实参
start_argptr, 是局部变量
_dl_argc, 有hidden属性
INTUSE(_dl_argv),即_dl_argv_internal 有hidden属性,是_dl_argv的alias,_dl_argv是全局变量
_environ, 有hidden属性,但是最终的符号属性是
307: 00012524 4 OBJECT LOCAL DEFAULT 14 _environ
有点不一样,不知道什么原因?
_dl_auxv 是局部变量
因此会使用GOFOFF访问他们而不需要重定位
/*
内存布局如下
position content size (bytes) + comment
------------------------------------------------------------------------
stack pointer -> [ argc = number of args ] 4
[ argv[0] (pointer) ] 4 (program name)
[ argv[1] (pointer) ] 4
[ argv[..] (pointer) ] 4 * x
[ argv[n - 1] (pointer) ] 4
[ argv[n] (pointer) ] 4 (= NULL)
[ envp[0] (pointer) ] 4
[ envp[1] (pointer) ] 4
[ envp[..] (pointer) ] 4
[ envp[term] (pointer) ] 4 (= NULL)
[ auxv[0] AT_PHDR (Elf32_auxv_t) ] 8
[ auxv[1] AT_PHENT (Elf32_auxv_t) ] 8
[ auxv[2] AT_PHNUM (Elf32_auxv_t) ] 8
[ auxv[3] AT_BASE (Elf32_auxv_t) ] 8
[ auxv[4] AT_FLAGS (Elf32_auxv_t) ] 8
[ auxv[5] AT_ENTRY (Elf32_auxv_t) ] 8
[ auxv[6] AT_UID (Elf32_auxv_t) ] 8
[ auxv[7] AT_EUID (Elf32_auxv_t) ] 8
[ auxv[8] AT_GID (Elf32_auxv_t) ] 8
[ auxv[9] AT_EGID (Elf32_auxv_t) ] 8
[ auxv[10] AT_HWCAP (Elf32_auxv_t) ] 8
[ auxv[11] AT_PAGESZ (Elf32_auxv_t) ] 8
[ auxv[12] AT_CLKTCK (Elf32_auxv_t) ] 8
[ auxv[13] AT_PLATFORM (Elf32_auxv_t) ] 8
[ auxv[14] (Elf32_auxv_t) ] 8 (= AT_NULL vector)
[ padding ] 0 - 15
[ padding ] 16
[ padding ] 0 - 15
[k_platform] 0 - 65
[ argument ASCIIZ strings ] >= 0
[ environment ASCIIZ str. ] >= 0
[filename] >=0
(0xbffffffc) [ end marker ] 4 (= NULL)
(0xc0000000) < top of stack > 0 (virtual)
*/
3.解析auxv
user_entry = (ElfW(Addr)) ENTRY_POINT;//_start,默认为ld.so的_start,_start有属性hidden
GL(dl_platform) = NULL; /* Default to nothing known about the platform. */
for (av = _dl_auxv; av->a_type != AT_NULL; set_seen (av++))
switch (av->a_type)
{
case AT_PHDR:
phdr = av->a_un.a_ptr;
break;
case AT_PHNUM:
phnum = av->a_un.a_val;
break;
case AT_PAGESZ:
GL(dl_pagesize) = av->a_un.a_val;//4k
break;
case AT_ENTRY:
user_entry = av->a_un.a_val;//用户入口
break;
//#ifdef NEED_DL_BASE_ADDR
// case AT_BASE:
// _dl_base_addr = av->a_un.a_val;
// break;
//#endif
case AT_UID:
case AT_EUID:
uid ^= av->a_un.a_val;//等价于uid=0^AT_UID^AT_EUID=AT_UID^AT_EUID,即判断AT_UID和AT_EUID是否相等
break;
case AT_GID:
case AT_EGID:
gid ^= av->a_un.a_val;//同理判断AT_GID和AT_EGID是否相等
break;
case AT_PLATFORM:
GL(dl_platform) = av->a_un.a_ptr;
break;
case AT_HWCAP:
GL(dl_hwcap) = av->a_un.a_val;
break;
case AT_CLKTCK:
GL(dl_clktck) = av->a_un.a_val;
break;
case AT_FPUCW:
GL(dl_fpu_control) = av->a_un.a_val;
break;
}
//#ifdef DL_SYSDEP_OSCHECK
DL_SYSDEP_OSCHECK (dl_fatal);//编译时为空
//#endif
/* Fill in the values we have not gotten from the kernel through the
auxiliary vector. */
//#ifndef HAVE_AUX_XID
# define SEE(UID, var, uid) \
if ((seen & M (AT_##UID)) == 0) var ^= __get##uid ()/就如果没有该属性,就调用系统调用取得
SEE (UID, uid, uid);//if ((seen & (1 << (AT_UID))) == 0) uid ^= __getuid ();
SEE (EUID, uid, euid);//if ((seen & (1 << (AT_EUID))) == 0) uid ^= __geteuid ();
SEE (GID, gid, gid);//if ((seen & (1 << (AT_GID))) == 0) gid ^= __getgid ();
SEE (EGID, gid, egid);//if ((seen & (1 << (AT_EGID))) == 0) gid ^= __getegid ();
//#endif
/* If one of the two pairs of IDs does not mattch this is a setuid
如果两对id中有一个不等,则这是一个setuid和setgid程序
or setgid run. */
INTUSE(__libc_enable_secure) = uid | gid;
//#ifndef HAVE_AUX_PAGESIZE
if (GL(dl_pagesize) == 0)
GL(dl_pagesize) = __getpagesize ();
//#endif
//#ifdef DL_SYSDEP_INIT
DL_SYSDEP_INIT;
//#endif
//#ifdef DL_PLATFORM_INIT
DL_PLATFORM_INIT;
//#endif
4.DL_SYSDEP_INIT(sysdeps/unix/sysv/linux/dl-sysdep.c)
#define DL_SYSDEP_INIT frob_brk ()
static inline void
frob_brk (void)
{
__brk (0); /* Initialize the break. 取得brk起始地址*/
}
5.DL_SYSDEP_INIT->frob_brk ->__brk(sysdeps/unix/sysv/linux/i386/brk.c)
/* This must be initialized data because commons can't have aliases. */
void *__curbrk = 0;
/* Old braindamage in GCC's crtstuff.c requires this symbol in an attempt
to work around different old braindamage in the old Linux ELF dynamic
linker. */
weak_alias (__curbrk, ___brk_addr)
int
__brk (void *addr)
{
void *__unbounded newbrk, *__unbounded scratch;
asm ("movl %%ebx, %1\n" /* Save %ebx in scratch register. 保存%ebx,这是GOT地址*/
"movl %3, %%ebx\n" /* Put ADDR in %ebx to be syscall arg. 将addr值存入%ebx*/
"int $0x80 # %2\n" /* Perform the system call. 执行系统调用*/
"movl %1, %%ebx\n" /* Restore %ebx from scratch register. 还原%ebx */
: "=a" (newbrk), "=r" (scratch)
: "0" (SYS_ify (brk)), "g" (__ptrvalue (addr)));//SYS_ify(brk)等价于__NR_brk,即系统调用sys_brk
__curbrk = newbrk;//返回新地址
if (newbrk < addr)
{
__set_errno (ENOMEM);
return -1;
}
return 0;
}
weak_alias (__brk, brk)//brk是__brk的alias且weak
6.DL_SYSDEP_INIT->frob_brk ->__brk->sys_brk(内核中 2.4.0)
asmlinkage unsigned long sys_brk(unsigned long brk)
{
unsigned long rlim, retval;
unsigned long newbrk, oldbrk;
struct mm_struct *mm = current->mm;
down(&mm->mmap_sem);
if (brk < mm->end_code)
goto out;
由于前面的调用参数是0,因此直接out
out:
retval = mm->brk;
up(&mm->mmap_sem);
return retval;
}
返回mm->brk,由于到目前为止还没有调用sys_brk,应该返回的是mm->start_brk,即紧随bss后的地址.
验证
[zws@mail ~/glibc-2.3/build/elf]$strace -e brk ls
brk(0) = 0x80586c8
...
[zws@mail ~]$readelf -S /bin/ls
[23] .bss NOBITS 08058360 010360 000368 00 WA 0 0 32
0x8058360+0x368=0x80586c8
7.DL_PLATFORM_INIT(sysdeps/i386/dl-machine.h)
/* We define an initialization functions. This is called very early in
_dl_sysdep_start. */
#define DL_PLATFORM_INIT dl_platform_init ()
static inline void //__attribute__ ((unused))
dl_platform_init (void)//这个函数没什么可说的
{
if (GL(dl_platform) != NULL && *GL(dl_platform) == '\0')
/* Avoid an empty string which would disturb us. */
GL(dl_platform) = NULL;
}
8.调用__sbrk
/* Determine the length of the platform name. */
if (GL(dl_platform) != NULL)
GL(dl_platformlen) = strlen (GL(dl_platform));
if (__sbrk (0) == &_end)
/* The dynamic linker was run as a program, and so the initial break
动态链接器本身直接运行,所有起始break就紧随bss,在&_end处
starts just after our bss, at &_end. The malloc in dl-minimal.c
will consume the rest of this page, so tell the kernel to move the
在dl-minimal.c中的malloc将消耗掉该页剩下部分,所有告诉内核移动break跳过该部分
break up that far. When the user program examines its break, it
will see this new value and not clobber our data.
当用户程序检查它的break,它将会看到新值,而不会破坏我们的数据.
不太明白这里的意思?
*/
__sbrk (GL(dl_pagesize) - ((&_end - (void *) 0) & (GL(dl_pagesize) - 1)));
9.__sbrk(sysdeps/generic/sbrk.c)
/* Extend the process's data space by INCREMENT.
根据INCREMENT扩展进程数据空间
If INCREMENT is negative, shrink data space by - INCREMENT.
如果INCREMENT是负数,缩减数据空间INCREMENT大小
Return start of new space allocated, or -1 for errors.
返回新分配空间的起始地址
*/
void *
__sbrk (intptr_t increment)
{
void *oldbrk;
/* If this is not part of the dynamic library or the library is used
via dynamic loading in a statically linked program update
__curbrk from the kernel's brk value. That way two separate
instances of __brk and __sbrk can share the heap, returning
interleaved pieces of it. */
if (__curbrk == NULL || __libc_multiple_libcs)//__libc_multiple_libcs=0,因此本条件为假,不会调用__brk
if (__brk (0) < 0) /* Initialize the break. */
return (void *) -1;
if (increment == 0)//为0,直接返回__curbrk
return __curbrk;
oldbrk = __curbrk;
if (__brk (oldbrk + increment) < 0)//扩展到oldbrk+increment
return (void *) -1;
return oldbrk;
}
10.返回_dl_sysdep_start
/* If this is a SUID program we make sure that FDs 0, 1, and 2 are
allocated. If necessary we are doing it ourself. If it is not
如果是SUID程序,确保FD 0,1,2都被分配,如果必须,我们自己分配它们。
possible we stop the program.
否则停止程序
*/
if (__builtin_expect (INTUSE(__libc_enable_secure), 0))
__libc_check_standard_fds ();
11.__libc_check_standard_fds (sysdeps/generic/check_fds.c)
void
__libc_check_standard_fds (void)
{
/* This is really paranoid but some people actually are. If /dev/null
这确实有点偏执
should happen to be a symlink to somewhere else and not the device
如果/dev/null碰巧被符号链接到某处,而不是我们通常认为的那个/dev/null设备,我们退出
commonly known as "/dev/null" we bail out. We can detect this with
the O_NOFOLLOW flag for open() but only on some system.
我们能使用O_NOFOLLOW标识调用open来测试这种情况,仅对某些系统可以.
*/
//#ifndef O_NOFOLLOW //已定义,是0400000
//# define O_NOFOLLOW 0
//#endif
/* Check all three standard file descriptors. */
check_one_fd (STDIN_FILENO, O_RDONLY | O_NOFOLLOW);
check_one_fd (STDOUT_FILENO, O_RDWR | O_NOFOLLOW);
check_one_fd (STDERR_FILENO, O_RDWR | O_NOFOLLOW);
}
12.__libc_check_standard_fds->check_one_fd (sysdeps/generic/check_fds.c)
/* Should other OSes (e.g., Hurd) have different versions which can
be written in a better way? */
static void
check_one_fd (int fd, int mode)
{
if (__builtin_expect (__libc_fcntl (fd, F_GETFD), 0) == -1
&& errno == EBADF)//该fd不存在
{
struct stat64 st;
/* Something is wrong with this descriptor, it's probably not
该描述符出错,可能是未打开
opened. Open /dev/null so that the SUID program we are
打开/dev/null以便SUID程序能使用它
about to start does not accidently use this descriptor. */
int nullfd = __libc_open (_PATH_DEVNULL, mode);
/* We are very paranoid here. With all means we try to ensure
that we are actually opening the /dev/null device and nothing
else.
Note that the following code assumes that STDIN_FILENO,
STDOUT_FILENO, STDERR_FILENO are the three lowest file
decsriptor numbers, in this order. */
if (__builtin_expect (nullfd != fd, 0)//安装的fd不是想要的
|| __builtin_expect (__fxstat64 (_STAT_VER, fd, &st), 0) != 0//不能stat该fd
|| __builtin_expect (S_ISCHR (st.st_mode), 1) == 0//该fd不是字符设备
#if defined DEV_NULL_MAJOR && defined DEV_NULL_MINOR
|| st.st_rdev != makedev (DEV_NULL_MAJOR, DEV_NULL_MINOR)//该设备不是空设备
#endif
)
/* We cannot even give an error message here since it would
run into the same problems.
不能给出错误消息,因为可能会碰到同样的问题
*/
while (1)
/* Try for ever and ever. */
ABORT_INSTRUCTION;//asm ("hlt");
}
}
13.一且都准备好了,调用dl_main
(*dl_main) (phdr, phnum, &user_entry);
return user_entry;
}
ld.so分析8 dl_main->process_envars处理环境变量
dl_main函数是ld.so的真实主体,很大很复杂,想读懂它必须选择一条主线或情景.我们就看hello world程序如何被动态链接的吧。
1.准备例子
hello.c
#include
int main()
{
printf("Hello World!\n");
return 0;
}
gcc hello.c -o hello
显示完整的elf信息
[zws@mail ~/glibc-2.3/build/elf]$readelf -a hello
ELF Header:
Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00
Class: ELF32
Data: 2's complement, little endian
Version: 1 (current)
OS/ABI: UNIX - System V
ABI Version: 0
Type: EXEC (Executable file)
Machine: Intel 80386
Version: 0x1
Entry point address: 0x8048278
Start of program headers: 52 (bytes into file)
Start of section headers: 7460 (bytes into file)
Flags: 0x0
Size of this header: 52 (bytes)
Size of program headers: 32 (bytes)
Number of program headers: 6
Size of section headers: 40 (bytes)
Number of section headers: 35
Section header string table index: 32
Section Headers:
[Nr] Name Type Addr Off Size ES Flg Lk Inf Al
[ 0] NULL 00000000 000000 000000 00 0 0 0
[ 1] .interp PROGBITS 080480f4 0000f4 000013 00 A 0 0 1
[ 2] .note.ABI-tag NOTE 08048108 000108 000020 00 A 0 0 4
[ 3] .hash HASH 08048128 000128 000028 04 A 4 0 4
[ 4] .dynsym DYNSYM 08048150 000150 000050 10 A 5 1 4
[ 5] .dynstr STRTAB 080481a0 0001a0 00004c 00 A 0 0 1
[ 6] .gnu.version VERSYM 080481ec 0001ec 00000a 02 A 4 0 2
[ 7] .gnu.version_r VERNEED 080481f8 0001f8 000020 00 A 5 1 4
[ 8] .rel.dyn REL 08048218 000218 000008 08 A 4 0 4
[ 9] .rel.plt REL 08048220 000220 000010 08 A 4 11 4
[10] .init PROGBITS 08048230 000230 000017 00 AX 0 0 4
[11] .plt PROGBITS 08048248 000248 000030 04 AX 0 0 4
[12] .text PROGBITS 08048278 000278 000160 00 AX 0 0 4
[13] .fini PROGBITS 080483d8 0003d8 00001b 00 AX 0 0 4
[14] .rodata PROGBITS 080483f4 0003f4 000016 00 A 0 0 4
[15] .eh_frame PROGBITS 0804840c 00040c 000004 00 A 0 0 4
[16] .ctors PROGBITS 08049410 000410 000008 00 WA 0 0 4
[17] .dtors PROGBITS 08049418 000418 000008 00 WA 0 0 4
[18] .jcr PROGBITS 08049420 000420 000004 00 WA 0 0 4
[19] .dynamic DYNAMIC 08049424 000424 0000c8 08 WA 5 0 4
[20] .got PROGBITS 080494ec 0004ec 000004 04 WA 0 0 4
[21] .got.plt PROGBITS 080494f0 0004f0 000014 04 WA 0 0 4
[22] .data PROGBITS 08049504 000504 00000c 00 WA 0 0 4
[23] .bss NOBITS 08049510 000510 000004 00 WA 0 0 4
[24] .comment PROGBITS 00000000 000510 000132 00 0 0 1
[25] .debug_aranges PROGBITS 00000000 000648 000078 00 0 0 8
[26] .debug_pubnames PROGBITS 00000000 0006c0 000025 00 0 0 1
[27] .debug_info PROGBITS 00000000 0006e5 000a84 00 0 0 1
[28] .debug_abbrev PROGBITS 00000000 001169 000138 00 0 0 1
[29] .debug_line PROGBITS 00000000 0012a1 00027c 00 0 0 1
[30] .debug_frame PROGBITS 00000000 001520 000014 00 0 0 4
[31] .debug_str PROGBITS 00000000 001534 0006ba 01 MS 0 0 1
[32] .shstrtab STRTAB 00000000 001bee 000134 00 0 0 1
[33] .symtab SYMTAB 00000000 00229c 0006a0 10 34 88 4
[34] .strtab STRTAB 00000000 00293c 0003ee 00 0 0 1
Key to Flags:
W (write), A (alloc), X (execute), M (merge), S (strings)
I (info), L (link order), G (group), x (unknown)
O (extra OS processing required) o (OS specific), p (processor specific)
There are no section groups in this file.
Program Headers:
Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align
PHDR 0x000034 0x08048034 0x08048034 0x000c0 0x000c0 R E 0x4
INTERP 0x0000f4 0x080480f4 0x080480f4 0x00013 0x00013 R 0x1
[Requesting program interpreter: /lib/ld-linux.so.2]
LOAD 0x000000 0x08048000 0x08048000 0x00410 0x00410 R E 0x1000
LOAD 0x000410 0x08049410 0x08049410 0x00100 0x00104 RW 0x1000
DYNAMIC 0x000424 0x08049424 0x08049424 0x000c8 0x000c8 RW 0x4
NOTE 0x000108 0x08048108 0x08048108 0x00020 0x00020 R 0x4
Section to Segment mapping:
Segment Sections...
00
01 .interp
02 .interp .note.ABI-tag .hash .dynsym .dynstr .gnu.version .gnu.version_r .rel.dyn .rel.plt .init .plt .text .fini .rodata .eh_frame
03 .ctors .dtors .jcr .dynamic .got .got.plt .data .bss
04 .dynamic
05 .note.ABI-tag
Dynamic section at offset 0x424 contains 20 entries:
Tag Type Name/Value
0x00000001 (NEEDED) Shared library: [libc.so.6]
0x0000000c (INIT) 0x8048230
0x0000000d (FINI) 0x80483d8
0x00000004 (HASH) 0x8048128
0x00000005 (STRTAB) 0x80481a0
0x00000006 (SYMTAB) 0x8048150
0x0000000a (STRSZ) 76 (bytes)
0x0000000b (SYMENT) 16 (bytes)
0x00000015 (DEBUG) 0x0
0x00000003 (PLTGOT) 0x80494f0
0x00000002 (PLTRELSZ) 16 (bytes)
0x00000014 (PLTREL) REL
0x00000017 (JMPREL) 0x8048220
0x00000011 (REL) 0x8048218
0x00000012 (RELSZ) 8 (bytes)
0x00000013 (RELENT) 8 (bytes)
0x6ffffffe (VERNEED) 0x80481f8
0x6fffffff (VERNEEDNUM) 1
0x6ffffff0 (VERSYM) 0x80481ec
0x00000000 (NULL) 0x0
Relocation section '.rel.dyn' at offset 0x218 contains 1 entries:
Offset Info Type Sym.Value Sym. Name
080494ec 00000106 R_386_GLOB_DAT 00000000 __gmon_start__
Relocation section '.rel.plt' at offset 0x220 contains 2 entries:
Offset Info Type Sym.Value Sym. Name
080494fc 00000207 R_386_JUMP_SLOT 00000000 __libc_start_main
08049500 00000407 R_386_JUMP_SLOT 00000000 printf
There are no unwind sections in this file.
Symbol table '.dynsym' contains 5 entries:
Num: Value Size Type Bind Vis Ndx Name
0: 00000000 0 NOTYPE LOCAL DEFAULT UND
1: 00000000 0 NOTYPE WEAK DEFAULT UND __gmon_start__
2: 00000000 251 FUNC GLOBAL DEFAULT UND __libc_start_main@GLIBC_2.0 (2)
3: 080483f8 4 OBJECT GLOBAL DEFAULT 14 _IO_stdin_used
4: 00000000 57 FUNC GLOBAL DEFAULT UND printf@GLIBC_2.0 (2)
Symbol table '.symtab' contains 106 entries:
Num: Value Size Type Bind Vis Ndx Name
0: 00000000 0 NOTYPE LOCAL DEFAULT UND
1: 080480f4 0 SECTION LOCAL DEFAULT 1
2: 08048108 0 SECTION LOCAL DEFAULT 2
3: 08048128 0 SECTION LOCAL DEFAULT 3
4: 08048150 0 SECTION LOCAL DEFAULT 4
5: 080481a0 0 SECTION LOCAL DEFAULT 5
6: 080481ec 0 SECTION LOCAL DEFAULT 6
7: 080481f8 0 SECTION LOCAL DEFAULT 7
8: 08048218 0 SECTION LOCAL DEFAULT 8
9: 08048220 0 SECTION LOCAL DEFAULT 9
10: 08048230 0 SECTION LOCAL DEFAULT 10
11: 08048248 0 SECTION LOCAL DEFAULT 11
12: 08048278 0 SECTION LOCAL DEFAULT 12
13: 080483d8 0 SECTION LOCAL DEFAULT 13
14: 080483f4 0 SECTION LOCAL DEFAULT 14
15: 0804840c 0 SECTION LOCAL DEFAULT 15
16: 08049410 0 SECTION LOCAL DEFAULT 16
17: 08049418 0 SECTION LOCAL DEFAULT 17
18: 08049420 0 SECTION LOCAL DEFAULT 18
19: 08049424 0 SECTION LOCAL DEFAULT 19
20: 080494ec 0 SECTION LOCAL DEFAULT 20
21: 080494f0 0 SECTION LOCAL DEFAULT 21
22: 08049504 0 SECTION LOCAL DEFAULT 22
23: 08049510 0 SECTION LOCAL DEFAULT 23
24: 00000000 0 SECTION LOCAL DEFAULT 24
25: 00000000 0 SECTION LOCAL DEFAULT 25
26: 00000000 0 SECTION LOCAL DEFAULT 26
27: 00000000 0 SECTION LOCAL DEFAULT 27
28: 00000000 0 SECTION LOCAL DEFAULT 28
29: 00000000 0 SECTION LOCAL DEFAULT 29
30: 00000000 0 SECTION LOCAL DEFAULT 30
31: 00000000 0 SECTION LOCAL DEFAULT 31
32: 00000000 0 FILE LOCAL DEFAULT ABS
33: 00000000 0 FILE LOCAL DEFAULT ABS /usr/src/build/231499-i38
34: 00000000 0 FILE LOCAL DEFAULT ABS
35: 00000000 0 FILE LOCAL DEFAULT ABS
36: 00000000 0 FILE LOCAL DEFAULT ABS abi-note.S
37: 00000000 0 FILE LOCAL DEFAULT ABS /usr/src/build/231499-i38
38: 00000000 0 FILE LOCAL DEFAULT ABS abi-note.S
39: 00000000 0 FILE LOCAL DEFAULT ABS /usr/src/build/231499-i38
40: 00000000 0 FILE LOCAL DEFAULT ABS abi-note.S
41: 00000000 0 FILE LOCAL DEFAULT ABS
42: 00000000 0 FILE LOCAL DEFAULT ABS /usr/src/build/231499-i38
43: 00000000 0 FILE LOCAL DEFAULT ABS
44: 00000000 0 FILE LOCAL DEFAULT ABS
45: 00000000 0 FILE LOCAL DEFAULT ABS abi-note.S
46: 00000000 0 FILE LOCAL DEFAULT ABS init.c
47: 00000000 0 FILE LOCAL DEFAULT ABS /usr/src/build/231499-i38
48: 00000000 0 FILE LOCAL DEFAULT ABS /usr/src/build/231499-i38
49: 00000000 0 FILE LOCAL DEFAULT ABS initfini.c
50: 00000000 0 FILE LOCAL DEFAULT ABS /usr/src/build/231499-i38
51: 00000000 0 FILE LOCAL DEFAULT ABS
52: 00000000 0 FILE LOCAL DEFAULT ABS /usr/src/build/231499-i38
53: 00000000 0 FILE LOCAL DEFAULT ABS
54: 00000000 0 FILE LOCAL DEFAULT ABS
55: 00000000 0 FILE LOCAL DEFAULT ABS /usr/src/build/231499-i38
56: 0804829c 0 FUNC LOCAL DEFAULT 12 call_gmon_start
57: 00000000 0 FILE LOCAL DEFAULT ABS crtstuff.c
58: 08049410 0 OBJECT LOCAL DEFAULT 16 __CTOR_LIST__
59: 08049418 0 OBJECT LOCAL DEFAULT 17 __DTOR_LIST__
60: 0804840c 0 OBJECT LOCAL DEFAULT 15 __EH_FRAME_BEGIN__
61: 08049420 0 OBJECT LOCAL DEFAULT 18 __JCR_LIST__
62: 0804950c 0 OBJECT LOCAL DEFAULT 22 p.0
63: 08049510 1 OBJECT LOCAL DEFAULT 23 completed.1
64: 080482c0 0 FUNC LOCAL DEFAULT 12 __do_global_dtors_aux
65: 080482fc 0 FUNC LOCAL DEFAULT 12 frame_dummy
66: 00000000 0 FILE LOCAL DEFAULT ABS crtstuff.c
67: 08049414 0 OBJECT LOCAL DEFAULT 16 __CTOR_END__
68: 0804941c 0 OBJECT LOCAL DEFAULT 17 __DTOR_END__
69: 0804840c 0 OBJECT LOCAL DEFAULT 15 __FRAME_END__
70: 08049420 0 OBJECT LOCAL DEFAULT 18 __JCR_END__
71: 080483b4 0 FUNC LOCAL DEFAULT 12 __do_global_ctors_aux
72: 00000000 0 FILE LOCAL DEFAULT ABS /usr/src/build/231499-i38
73: 00000000 0 FILE LOCAL DEFAULT ABS /usr/src/build/231499-i38
74: 00000000 0 FILE LOCAL DEFAULT ABS initfini.c
75: 00000000 0 FILE LOCAL DEFAULT ABS /usr/src/build/231499-i38
76: 00000000 0 FILE LOCAL DEFAULT ABS
77: 00000000 0 FILE LOCAL DEFAULT ABS /usr/src/build/231499-i38
78: 00000000 0 FILE LOCAL DEFAULT ABS
79: 00000000 0 FILE LOCAL DEFAULT ABS
80: 00000000 0 FILE LOCAL DEFAULT ABS /usr/src/build/231499-i38
81: 00000000 0 FILE LOCAL DEFAULT ABS 1.c
82: 08049410 0 NOTYPE LOCAL HIDDEN 16 __fini_array_end
83: 080494f0 0 OBJECT LOCAL HIDDEN 21 _GLOBAL_OFFSET_TABLE_
84: 08049410 0 NOTYPE LOCAL HIDDEN 16 __fini_array_start
85: 08049410 0 NOTYPE LOCAL HIDDEN 16 __init_array_end
86: 08049410 0 NOTYPE LOCAL HIDDEN 16 __init_array_start
87: 08049424 0 OBJECT LOCAL HIDDEN 19 _DYNAMIC
88: 08049504 0 NOTYPE WEAK DEFAULT 22 data_start
89: 08048380 52 FUNC GLOBAL DEFAULT 12 __libc_csu_fini
90: 08048278 0 FUNC GLOBAL DEFAULT 12 _start
91: 00000000 0 NOTYPE WEAK DEFAULT UND __gmon_start__
92: 00000000 0 NOTYPE WEAK DEFAULT UND _Jv_RegisterClasses
93: 080483f4 4 OBJECT GLOBAL DEFAULT 14 _fp_hw
94: 080483d8 0 FUNC GLOBAL DEFAULT 13 _fini
95: 00000000 251 FUNC GLOBAL DEFAULT UND __libc_start_main@@GLIBC_
96: 080483f8 4 OBJECT GLOBAL DEFAULT 14 _IO_stdin_used
97: 08049504 0 NOTYPE GLOBAL DEFAULT 22 __data_start
98: 08049508 0 OBJECT GLOBAL HIDDEN 22 __dso_handle
99: 08048350 48 FUNC GLOBAL DEFAULT 12 __libc_csu_init
100: 00000000 57 FUNC GLOBAL DEFAULT UND printf@@GLIBC_2.0
101: 08049510 0 NOTYPE GLOBAL DEFAULT ABS __bss_start
102: 08049514 0 NOTYPE GLOBAL DEFAULT ABS _end
103: 08049510 0 NOTYPE GLOBAL DEFAULT ABS _edata
104: 08048328 39 FUNC GLOBAL DEFAULT 12 main
105: 08048230 0 FUNC GLOBAL DEFAULT 10 _init
Histogram for bucket list length (total of 3 buckets):
Length Number % of total Coverage
0 0 ( 0.0%)
1 2 ( 66.7%) 50.0%
2 1 ( 33.3%) 100.0%
Version symbols section '.gnu.version' contains 5 entries:
Addr: 00000000080481ec Offset: 0x0001ec Link: 4 (.dynsym)
000: 0 (*local*) 0 (*local*) 2 (GLIBC_2.0) 1 (*global*)
004: 2 (GLIBC_2.0)
Version needs section '.gnu.version_r' contains 1 entries:
Addr: 0x00000000080481f8 Offset: 0x0001f8 Link to section: 5 (.dynstr)
000000: Version: 1 File: libc.so.6 Cnt: 1
0x0010: Name: GLIBC_2.0 Flags: none Version: 2
Notes at offset 0x00000108 with length 0x00000020:
Owner Data size Description
GNU 0x00000010 NT_GNU_ABI_TAG (ABI version tag)
我们发现有上三个重定位项
Relocation section '.rel.dyn' at offset 0x218 contains 1 entries:
Offset Info Type Sym.Value Sym. Name
080494ec 00000106 R_386_GLOB_DAT 00000000 __gmon_start__
Relocation section '.rel.plt' at offset 0x220 contains 2 entries:
Offset Info Type Sym.Value Sym. Name
080494fc 00000207 R_386_JUMP_SLOT 00000000 __libc_start_main
08049500 00000407 R_386_JUMP_SLOT 00000000 printf
2. dl_main 局部变量定义
static void
dl_main (const ElfW(Phdr) *phdr,
ElfW(Word) phnum,
ElfW(Addr) *user_entry)
{
const ElfW(Phdr) *ph;
enum mode mode;
struct link_map **preloads;
unsigned int npreloads;
size_t file_size;
char *file;
bool has_interp = false;
unsigned int i;
bool prelinked = false;
bool rtld_is_main = false;
//#ifndef HP_TIMING_NONAVAIL
hp_timing_t start;
hp_timing_t stop;
hp_timing_t diff;
//#endif
//#ifdef USE_TLS
// void *tcbp;
//#endif
/* Process the environment variable which control the behaviour. */
process_envvars (&mode);//处理环境变量
/* Set up a flag which tells we are just starting. */
INTUSE(_dl_starting_up) = 1;
3.先处理环境变量 process_envvars
static void
process_envvars (enum mode *modep)
{
char **runp = _environ;//指向_environ
char *envline;
enum mode mode = normal;
char *debug_output = NULL;
/* This is the default place for profiling data file. */
GL(dl_profile_output)
= &"/var/tmp\0/var/profile"[INTUSE(__libc_enable_secure) ? 9 : 0];//根据libc_enable_secure的值不同取/var/tmp 或 /var/profile
while ((envline = _dl_next_ld_env_entry (&runp)) != NULL)
4.process_envvars->_dl_next_ld_env_entry(sysdeps/generic/dl-environ.c)
/* Walk through the environment of the process and return all entries
遍历环境变量,返回所有以LD_开始的项目
starting with `LD_'.
有效的LD_环境变量有
LD_LIBRARY_PATH
LD_PRELOAD
LD_TRACE_LOADED_OBJECTS
LD_BIND_NOW
LD_WARN
LD_DEBUG
LD_DEBUG_OUTPUT
LD_VERBOSE
在man ld.so中有详细说明
*/
char *
//internal_function
_dl_next_ld_env_entry (char ***position)
{
char **current = *position;
char *result = NULL;
while (*current != NULL)
{
if (__builtin_expect ((*current)[0] == 'L', 0)
&& (*current)[1] == 'D' && (*current)[2] == '_')
{
result = &(*current)[3];//返回LD_后面的字符串
/* Save current position for next visit. */
*position = ++current;//为下次访问保存下一个位置
break;
}
++current;
}
return result;
}
5返回process_envvars
{
size_t len = 0;
while (envline[len] != '\0' && envline[len] != '=')//查找=
++len;
if (envline[len] != '=')//不是key=value格式
/* This is a "LD_" variable at the end of the string without
这个LD_变量在字符串末尾没有=字符
a '=' character. Ignore it since otherwise we will access
为了避免后面访问无效内存,忽略它
invalid memory below. */
continue;
switch (len)
{
case 4:
/* Warning level, verbose or not. 警告级别,详细或没有*/
if (memcmp (envline, "WARN", 4) == 0)
GL(dl_verbose) = envline[5] != '\0';//=号后面是否为空串,例如LD_WARN=1会详细显示
break;
case 5:
/* Debugging of the dynamic linker? */
if (memcmp (envline, "DEBUG", 5) == 0)
process_dl_debug (&envline[6]);//进一步处理
break;
case 7:
/* Print information about versions. */
if (memcmp (envline, "VERBOSE", 7) == 0)
{
version_info = envline[8] != '\0';//=号后面是否为空串
break;
}
/* List of objects to be preloaded. */
if (memcmp (envline, "PRELOAD", 7) == 0)
{
preloadlist = &envline[8];
break;
}
/* Which shared object shall be profiled. */
if (memcmp (envline, "PROFILE", 7) == 0 && envline[8] != '\0')
GL(dl_profile) = &envline[8];
break;
case 8:
/* Do we bind early? */
if (memcmp (envline, "BIND_NOW", 8) == 0)
{
GL(dl_lazy) = envline[9] == '\0';//LD_BIND_NOW=1,立即bind,LD_BIND_NOW=,lazy bind
break;
}
if (memcmp (envline, "BIND_NOT", 8) == 0)//LD_BIND_NOT,作用未知?
GL(dl_bind_not) = envline[9] != '\0';
break;
case 9:
/* Test whether we want to see the content of the auxiliary
array passed up from the kernel. */
if (memcmp (envline, "SHOW_AUXV", 9) == 0)//显示AUXV数组
_dl_show_auxv ();
break;
case 10:
/* Mask for the important hardware capabilities. */
if (memcmp (envline, "HWCAP_MASK", 10) == 0)
GL(dl_hwcap_mask) = __strtoul_internal (&envline[11], NULL, 0, 0);
break;
case 11:
/* Path where the binary is found. */
if (!INTUSE(__libc_enable_secure)
&& memcmp (envline, "ORIGIN_PATH", 11) == 0)
GL(dl_origin_path) = &envline[12];
break;
case 12:
/* The library search path. */
if (memcmp (envline, "LIBRARY_PATH", 12) == 0)
{
library_path = &envline[13];
break;
}
/* Where to place the profiling data file. */
if (memcmp (envline, "DEBUG_OUTPUT", 12) == 0)
{
debug_output = &envline[13];
break;
}
if (memcmp (envline, "DYNAMIC_WEAK", 12) == 0)
GL(dl_dynamic_weak) = 1;
break;
case 14:
/* Where to place the profiling data file. */
if (!INTUSE(__libc_enable_secure)
&& memcmp (envline, "PROFILE_OUTPUT", 14) == 0
&& envline[15] != '\0')
GL(dl_profile_output) = &envline[15];
break;
case 16:
/* The mode of the dynamic linker can be set. */
if (memcmp (envline, "TRACE_PRELINKING", 16) == 0)
{
mode = trace;
GL(dl_verbose) = 1;
GL(dl_debug_mask) |= DL_DEBUG_PRELINK;
GL(dl_trace_prelink) = &envline[17];
}
break;
case 20:
/* The mode of the dynamic linker can be set. */
if (memcmp (envline, "TRACE_LOADED_OBJECTS", 20) == 0)
mode = trace;
break;
/* We might have some extra environment variable to handle. This
is tricky due to the pre-processing of the length of the name
in the switch statement here. The code here assumes that added
environment variables have a different length. */
#ifdef EXTRA_LD_ENVVARS
EXTRA_LD_ENVVARS
#endif
}
}
上面的处理流程很清晰,各个参数什么用,后面会涉及到
查看生成的汇编代码发现gcc对switch做了优化,使用数组索引各个case.memcmp虽然没有定义但是也因为使用-O参数而被被优化成内联
subl $4, %eax//%eax为len,减去4
cmpl $16, %eax//和16比较
ja .L718//如果大于16,则进入下一轮循环
movl .L770@GOTOFF(%ebx,%eax,4), %eax//取各个case的地址,%ebx为GOT基址,%eax为索引,4为元素大小,.L770@GOTOFF为.L770相对于GOT偏移
addl %ebx, %eax//GOT加上case的地址相对于GOT偏移
jmp *%eax//跳到该case处
.section .rodata//case数组存入.rodata
.align 4
.align 4
.L770:
.long .L728@GOTOFF//len=4
.long .L730@GOTOFF//len=5
.long .L718@GOTOFF//len=6,没有,置为下一轮循环地址
.long .L732@GOTOFF//len=7
.long .L736@GOTOFF//len=8
.long .L739@GOTOFF//len=9
.long .L741@GOTOFF//len=10
.long .L743@GOTOFF//len=11
.long .L745@GOTOFF//len=12
.long .L755@GOTOFF//len=13
.long .L749@GOTOFF//len=14
.long .L766@GOTOFF//len=15
.long .L751@GOTOFF//len=16
.long .L718@GOTOFF//len=17没有,置为下一轮循环地址
.long .L718@GOTOFF//len=18没有,置为下一轮循环地址
.long .L718@GOTOFF//len=19没有,置为下一轮循环地址
.long .L753@GOTOFF//len=20
.text
.L728:
.loc 1 1775 0
movl -40(%ebp), %esi
leal .LC62@GOTOFF(%ebx), %edi
movl $4, %eax
cld //memcmp被内联
movl %eax, %ecx
repz
cmpsb
seta %dl
setb %al
cmpb %al, %dl
jne .L718
.loc 1 1776 0
movl -40(%ebp), %eax
cmpb $0, 5(%eax)
setne %al
movzbl %al, %eax
movl %eax, 80+_rtld_local@GOTOFF(%ebx)
.loc 1 1777 0
jmp .L718
6.process_envvars->process_dl_debug 分析debug选项
/* Nonzero if any of the debugging options is enabled. */
static int any_debug;
/* Process the string given as the parameter which explains which debugging
options are enabled. */
static void
process_dl_debug (const char *dl_debug)
{
/* When adding new entries make sure that the maximal length of a name
is correctly handled in the LD_DEBUG_HELP code below. */
//定义LD_DEBUG=value的value有哪些
static const struct
{
unsigned char len;
const char name[10];
const char helptext[41];
unsigned short int mask;
}
debopts[] =
{
#define LEN_AND_STR(str) sizeof (str) - 1, str
{ LEN_AND_STR ("libs"), "display library search paths",
DL_DEBUG_LIBS | DL_DEBUG_IMPCALLS },
{ LEN_AND_STR ("reloc"), "display relocation processing",
DL_DEBUG_RELOC | DL_DEBUG_IMPCALLS },
{ LEN_AND_STR ("files"), "display progress for input file",
DL_DEBUG_FILES | DL_DEBUG_IMPCALLS },
{ LEN_AND_STR ("symbols"), "display symbol table processing",
DL_DEBUG_SYMBOLS | DL_DEBUG_IMPCALLS },
{ LEN_AND_STR ("bindings"), "display information about symbol binding",
DL_DEBUG_BINDINGS | DL_DEBUG_IMPCALLS },
{ LEN_AND_STR ("versions"), "display version dependencies",
DL_DEBUG_VERSIONS | DL_DEBUG_IMPCALLS },
{ LEN_AND_STR ("all"), "all previous options combined",
DL_DEBUG_LIBS | DL_DEBUG_RELOC | DL_DEBUG_FILES | DL_DEBUG_SYMBOLS
| DL_DEBUG_BINDINGS | DL_DEBUG_VERSIONS | DL_DEBUG_IMPCALLS },
{ LEN_AND_STR ("statistics"), "display relocation statistics",
DL_DEBUG_STATISTICS },
{ LEN_AND_STR ("help"), "display this help message and exit",
DL_DEBUG_HELP },
};
#define ndebopts (sizeof (debopts) / sizeof (debopts[0]))
/* Skip separating white spaces and commas. 跳过分界符空格和逗号*/
while (*dl_debug != '\0')//value未结束
{
if (*dl_debug != ' ' && *dl_debug != ',' && *dl_debug != ':')//跳过多余的空格,逗号,冒号,如果有的话
{
size_t cnt;
size_t len = 1;
while (dl_debug[len] != '\0' && dl_debug[len] != ' '
&& dl_debug[len] != ',' && dl_debug[len] != ':')
++len;//查找本value后的分隔符
for (cnt = 0; cnt < ndebopts; ++cnt)//在debopts中查找
if (debopts[cnt].len == len
&& memcmp (dl_debug, debopts[cnt].name, len) == 0)
{
GL(dl_debug_mask) |= debopts[cnt].mask;//置位
any_debug = 1;//有调试选项
break;
}
if (cnt == ndebopts)//未找到,无效
{
/* Display a warning and skip everything until next 显示警告,在找到下一个分隔符之前跳过所有字符
separator. */
char *copy = strndupa (dl_debug, len);
_dl_error_printf ("\
warning: debug option `%s' unknown; try LD_DEBUG=help\n", copy);
}
dl_debug += len;
continue;
}
++dl_debug;
}
if (GL(dl_debug_mask) & DL_DEBUG_HELP)//显示help
{
size_t cnt;
_dl_printf ("\
Valid options for the LD_DEBUG environment variable are:\n\n");
for (cnt = 0; cnt < ndebopts; ++cnt)
_dl_printf (" %.*s%s%s\n", debopts[cnt].len, debopts[cnt].name,
" " + debopts[cnt].len - 3,
debopts[cnt].helptext);
_dl_printf ("\n\
To direct the debugging output into a file instead of standard output\n\
a filename can be specified using the LD_DEBUG_OUTPUT environment variable.\n");
_exit (0);
}
}
举例
[zws@mail ~/glibc-2.3/build/elf]$LD_DEBUG=help ls
Valid options for the LD_DEBUG environment variable are:
libs display library search paths
reloc display relocation processing
files display progress for input file
symbols display symbol table processing
bindings display information about symbol binding
versions display version dependencies
all all previous options combined
statistics display relocation statistics
help display this help message and exit
To direct the debugging output into a file instead of standard output
a filename can be specified using the LD_DEBUG_OUTPUT environment variable.
其他的选项大家自己试验一下看看
7.process_envvars->process_dl_debug->strndupa (string/string.h)
/* Return an alloca'd copy of at most N bytes of string. */
# define strndupa(s, n) \
(__extension__ \
({ \
__const char *__old = (s); \
size_t __len = strnlen (__old, (n)); \
char *__new = (char *) __builtin_alloca (__len + 1); \
__new[__len] = '\0'; \
(char *) memcpy (__new, __old, __len); \
}))
对应的汇编代码是
.LBB78:
subl $8, %esp
pushl -16(%ebp) //参数len
pushl 8(%ebp) //参数dl_debug
.LCFI78:
call __strnlen@PLT//调用__strnlen,strnlen是__strnlen的weark_alias
addl $16, %esp//平栈,8+4+4
leal 16(%eax), %edx//16+__len->%edx
andl $-16, %edx//%edx向地地址方向对齐到16字节边界
subl %edx, %esp//__builtin_alloca 在栈上分配空间,实际分配的大小>=__len+16
movl %esp, %edx//__new->%edx
movb $0, (%esp,%eax)//__new[__len] = '\0';
subl $4, %esp
pushl %eax//参数__len
pushl 8(%ebp)//__old
pushl %edx//__new
call memcpy@PLT
addl $12, %esp//平栈
.LBE78:
.loc 1 1710 0
pushl %eax//copy
leal .LC56@GOTOFF(%ebx), %eax//warning: debug option `%s' unknown; try LD_DEBUG=help\n
pushl %eax
pushl $2//STDERR_FILENO
call _dl_dprintf
.LBE77:
addl $16, %esp//平栈,12+前面的subl $4,%esp的4字节
.L700:
.loc 1 1714 0
movl -16(%ebp), %eax
addl %eax, 8(%ebp)//dl_debug += len;
.loc 1 1715 0
jmp .L685
还要指出多次调用__builtin_alloca会不断的在栈上分配空间,即%esp向低地址方向增长。但是随着函数的返回,这些空间自然全部被释放。
8.process_envvars->_dl_show_auxv(sysdeps/generic/dl-sysdep.c)
显示AUXV信息
void
//internal_function
_dl_show_auxv (void)
{
char buf[64];
ElfW(auxv_t) *av;
/* Terminate string. */
buf[63] = '\0';
/* The following code assumes that the AT_* values are encoded
下面的代码假定AT_*值从0(AT_NULL)开始编码,1代表AT_IGNORE,其他值?
starting from 0 with AT_NULL, 1 for AT_IGNORE, and all other values
close by (otherwise the array will be too large). In case we have
为避免必须支持不同平台,必须采用可定制实现
to support a platform where these requirements are not fulfilled
some alternative implementation has to be used. */
for (av = _dl_auxv; av->a_type != AT_NULL; ++av)
{
static const struct
{
const char label[20];
enum { dec, hex, str } form;
}
auxvars[] =
{
[AT_EXECFD - 2] = { "AT_EXECFD: ", dec },
[AT_PHDR - 2] = { "AT_PHDR: 0x", hex },
[AT_PHENT - 2] = { "AT_PHENT: ", dec },
[AT_PHNUM - 2] = { "AT_PHNUM: ", dec },
[AT_PAGESZ - 2] = { "AT_PAGESZ: ", dec },
[AT_BASE - 2] = { "AT_BASE: 0x", hex },
[AT_FLAGS - 2] = { "AT_FLAGS: 0x", hex },
[AT_ENTRY - 2] = { "AT_ENTRY: 0x", hex },
[AT_NOTELF - 2] = { "AT_NOTELF: ", hex },
[AT_UID - 2] = { "AT_UID: ", dec },
[AT_EUID - 2] = { "AT_EUID: ", dec },
[AT_GID - 2] = { "AT_GID: ", dec },
[AT_EGID - 2] = { "AT_EGID: ", dec },
[AT_PLATFORM - 2] = { "AT_PLATFORM: ", str },
[AT_HWCAP - 2] = { "AT_HWCAP: ", hex },
[AT_CLKTCK - 2] = { "AT_CLKTCK: ", dec },
[AT_FPUCW - 2] = { "AT_FPUCW: ", hex },
[AT_DCACHEBSIZE - 2] = { "AT_DCACHEBSIZE: 0x", hex },
[AT_ICACHEBSIZE - 2] = { "AT_ICACHEBSIZE: 0x", hex },
[AT_UCACHEBSIZE - 2] = { "AT_UCACHEBSIZE: 0x", hex }
};
unsigned int idx = (unsigned int) (av->a_type - 2);// -2忽略0,和1
assert (AT_NULL == 0);
assert (AT_IGNORE == 1);
if (idx < sizeof (auxvars) / sizeof (auxvars[0]))
{
if (av->a_type != AT_HWCAP || _dl_procinfo (av->a_un.a_val) < 0)//不是AT_HWCAP,或者是AT_HWCAP就调用_dl_procinfo,代码自己看
{
const char *val = av->a_un.a_ptr;
if (__builtin_expect (auxvars[idx].form, dec) == dec)//十进制数据
val = _itoa ((unsigned long int) av->a_un.a_val,
buf + sizeof buf - 1, 10, 0);//调用_itoa,代码自己看
else if (__builtin_expect (auxvars[idx].form, hex) == hex)
val = _itoa ((unsigned long int) av->a_un.a_val,
buf + sizeof buf - 1, 16, 0);
_dl_printf ("%s%s\n", auxvars[idx].label, val);
}
}
}
}
举例
[zws@mail elf]$ LD_SHOW_AUXV=1 ls
AT_SYSINFO: 0xffffe000
AT_HWCAP: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe
AT_PAGESZ: 4096
AT_CLKTCK: 100
AT_PHDR: 0x8048034
AT_PHENT: 32
AT_PHNUM: 7
AT_BASE: 0x40000000
AT_FLAGS: 0x0
AT_ENTRY: 0x8049690
AT_UID: 503
AT_EUID: 503
AT_GID: 504
AT_EGID: 504
AT_PLATFORM: i686
9.process_envvars->EXTRA_LD_ENVVARS (sysdeps/unix/sysv/linux/i386/dl-librecron.c)
额外的环境变量
#define EXTRA_LD_ENVVARS \
case 13: \
if (memcmp (envline, "ASSUME_KERNEL", 13) == 0)/*指定内核版本*/ \
{ \
unsigned long int i, j, osversion = 0; \
char *p = &envline[14], *q; \
\
for (i = 0; i < 3; i++, p = q + 1)/*i代表x.y.z中数的个数,共三个*/ \
{ \
j = __strtoul_internal (p, &q, 0, 0);/*版本字符串转换成数x.y.z*/ \
if (j >= 255/*j不能>=255*/ || p == q /*该字符串不是数*/|| (i < 2 && *q && *q != '.')/*前两个数且后面还有字符,且该字符不为'.'*/) \
{ \
osversion = 0; \
break; \
} \
osversion |= j << (16 - 8 * i);/*i==0,j<<16;i=-1;j<<8;i==2,j<<0,说明每个数占一个字节*/ \
if (!*q) \
break; \
} \
if (osversion) \
GL(dl_osversion) = osversion; \
break; \
} \
\
case 15: \
if (memcmp (envline, "LIBRARY_VERSION", 15) == 0) \
{ \
GL(dl_correct_cache_id) = envline[16] == '5' ? 2 : 3;/*值为5,则dl_correct_cache_id=2;否则为dl_correct_cache_id=3*/ \
break; \
}
10.返回process_envvars
/* The caller wants this information. */
*modep = mode;
/* Extra security for SUID binaries. Remove all dangerous environment
因为是SUID程序,移除所有危险环境变量
variables. */
if (__builtin_expect (INTUSE(__libc_enable_secure), 0))
{
static const char unsecure_envvars[] =
//#ifdef EXTRA_UNSECURE_ENVVARS
/*该宏定义在sysdeps/unix/sysv/linux/i386/dl-librecon.h
/*
Extra unsecure variables. The names are all stuffed in a single
特别不安全变量。这些名字被组合成单个字符串,因此需要显示使用'\0' 结束这些名字
string which means they have to be terminated with a '\0' explicitly. * /
#define EXTRA_UNSECURE_ENVVARS \
"LD_AOUT_LIBRARY_PATH\0" \
"LD_AOUT_PRELOAD\0"
*/
EXTRA_UNSECURE_ENVVARS
//#endif
/*该宏定义在sysdeps/generic/unsecvars.h
/*
Environment variable to be removed for SUID programs. The names are
all stuffed in a single string which means they have to be terminated
with a '\0' explicitly. * /
#define UNSECURE_ENVVARS \
"LD_PRELOAD\0" \
"LD_LIBRARY_PATH\0" \
"LD_ORIGIN_PATH\0" \
"LD_DEBUG_OUTPUT\0" \
"LD_PROFILE\0" \
"GCONV_PATH\0" \
"HOSTALIASES\0" \
"LOCALDOMAIN\0" \
"LOCPATH\0" \
"MALLOC_TRACE\0" \
"NLSPATH\0" \
"RESOLV_HOST_CONF\0" \
"RES_OPTIONS\0" \
"TMPDIR\0" \
"TZDIR\0"
*/
UNSECURE_ENVVARS;
const char *nextp;
nextp = unsecure_envvars;
do
{
unsetenv (nextp);//注销该环境变量,其实就是将后面的环境变量指针往前移,覆盖掉该变量指针
/* We could use rawmemchr but this need not be fast. */
nextp = (char *) (strchr) (nextp, '\0') + 1;
}
while (*nextp != '\0');
if (__access ("/etc/suid-debug", F_OK) != 0)//不存在文件/etc/suid-debug
unsetenv ("MALLOC_CHECK_");//注销MALLOC_CHECK_
}
/* If we have to run the dynamic linker in debugging mode and the
不是SUID程序,如果我们必须运行dl在调试模式下,并且存在环境变量LD_DEBUG_OUTPUT
LD_DEBUG_OUTPUT environment variable is given, we write the debug
则将调试信息写入这个文件
messages to this file. */
else if (any_debug && debug_output != NULL)
{
//#ifdef O_NOFOLLOW
const int flags = O_WRONLY | O_APPEND | O_CREAT | O_NOFOLLOW;
//#else
// const int flags = O_WRONLY | O_APPEND | O_CREAT;
//#endif
size_t name_len = strlen (debug_output);
char buf[name_len + 12];//在栈上动态分配
char *startp;
buf[name_len + 11] = '\0';
startp = _itoa (__getpid (), &buf[name_len + 11], 10, 0);//存入buf的最后
*--startp = '.';//前面加.
startp = memcpy (startp - name_len, debug_output, name_len);//复制debug_output,组成x.y格式
GL(dl_debug_fd) = __open (startp, flags, DEFFILEMODE);
if (GL(dl_debug_fd) == -1)
/* We use standard output if opening the file failed. */
GL(dl_debug_fd) = STDOUT_FILENO;
}
}