阅读本文需要先了解 ELF文件格式 的相关知识,以下引用的kernel源代码,都是基于linux kernel源代码版本:3.4。
linux内核模块分两种形态,一是静态编译进内核的模块,二是用insmod命令动态加载的模块,也就是后缀名为KO的文件。这里主要讨论linux内核动态加载模块的过程,也就是KO文件被动态加载进内核,并运行的过程。
后缀为KO的文件其实是一种ELF格式文件,很类似于ELF目标文件(.o文件),但是又与ELF目标文件有一点小区别。使用readelf工具可以看到,KO文件里有一个叫.gnu.linkonce.this_module的段,而普通目标文件是没有这个段的。这个段的内容其实是一个struct module结构体(段的地址就等于module结构体的首地址),记录了KO模块的一些信息,这个结构体在linux kernel源代码里也有定义(include/linux/module.h),因为内核在加载模块时要用到这个结构体。
当linux顺利启动,进入shell的时候,就可以输入insmod命令,加载我们自己的内核模块拉。insmod命令封装了一个叫 sys_init_module 的系统调用,sys_init_module源码如下:
SYSCALL_DEFINE3(init_module, void __user *, umod,
unsigned long, len, const char __user *, uargs)
{
struct module *mod;
int ret = 0;
/* Must have permission */
if (!capable(CAP_SYS_MODULE) || modules_disabled)
return -EPERM;
/* Do all the hard work */
mod = load_module(umod, len, uargs);
if (IS_ERR(mod))
return PTR_ERR(mod);
blocking_notifier_call_chain(&module_notify_list,
MODULE_STATE_COMING, mod);
/* Set RO and NX regions for core */
set_section_ro_nx(mod->module_core,
mod->core_text_size,
mod->core_ro_size,
mod->core_size);
/* Set RO and NX regions for init */
set_section_ro_nx(mod->module_init,
mod->init_text_size,
mod->init_ro_size,
mod->init_size);
do_mod_ctors(mod);
/* Start the module */
printk(KERN_ERR "mod->init = %p\n", mod->init);
if (mod->init != NULL)
ret = do_one_initcall(mod->init);
if (ret < 0) {
/* Init routine failed: abort. Try to protect us from
buggy refcounters. */
mod->state = MODULE_STATE_GOING;
synchronize_sched();
module_put(mod);
blocking_notifier_call_chain(&module_notify_list,
MODULE_STATE_GOING, mod);
free_module(mod);
wake_up(&module_wq);
return ret;
}
if (ret > 0) {
printk(KERN_WARNING
"%s: '%s'->init suspiciously returned %d, it should follow 0/-E convention\n"
"%s: loading module anyway...\n",
__func__, mod->name, ret,
__func__);
dump_stack();
}
/* Now it's a first class citizen! Wake up anyone waiting for it. */
mod->state = MODULE_STATE_LIVE;
wake_up(&module_wq);
blocking_notifier_call_chain(&module_notify_list,
MODULE_STATE_LIVE, mod);
/* We need to finish all async code before the module init sequence is done */
async_synchronize_full();
mutex_lock(&module_mutex);
/* Drop initial reference. */
module_put(mod);
trim_init_extable(mod);
#ifdef CONFIG_KALLSYMS
mod->num_symtab = mod->core_num_syms;
mod->symtab = mod->core_symtab;
mod->strtab = mod->core_strtab;
#endif
unset_module_init_ro_nx(mod);
module_free(mod, mod->module_init);
mod->module_init = NULL;
mod->init_size = 0;
mod->init_ro_size = 0;
mod->init_text_size = 0;
mutex_unlock(&module_mutex);
return 0;
}
这是一个用宏定义的函数,展开后函数名就是sys_init_module,参数有3个,umod是ko文件在用户空间的首地址,len是ko文件的大小,uargs是用户空间的参数指针。进入这个函数后先check一下permission,然后调用load_module(),加载ko模块的工作主要就是在load_module()这个函数中完成的。
static struct module *load_module(void __user *umod,
unsigned long len,
const char __user *uargs)
{
struct load_info info = { NULL, };
struct module *mod;
long err;
pr_debug("load_module: umod=%p, len=%lu, uargs=%p\n",
umod, len, uargs);
printk(KERN_ERR "load_module: umod=%p, len=%lu, uargs=%p\n", umod, len, uargs);
printk(KERN_ERR "load_module: umod=%x, len=%lu, uargs=%p\n", umod, len, uargs);
/* Copy in the blobs from userspace, check they are vaguely sane. */
err = copy_and_check(&info, umod, len, uargs);
if (err)
return ERR_PTR(err);
/* Figure out module layout, and allocate all the memory. */
mod = layout_and_allocate(&info);
printk(KERN_ERR "..mod->init: %x\n", mod->init);
if (IS_ERR(mod)) {
err = PTR_ERR(mod);
goto free_copy;
}
/* Now module is in final location, initialize linked lists, etc. */
err = module_unload_init(mod);
if (err)
goto free_module;
/* Now we've got everything in the final locations, we can
* find optional sections. */
find_module_sections(mod, &info);
err = check_module_license_and_versions(mod);
if (err)
goto free_unload;
/* Set up MODINFO_ATTR fields */
setup_modinfo(mod, &info);
/* Fix up syms, so that st_value is a pointer to location. */
printk(KERN_ERR "---------------------------------------\n");
err = simplify_symbols(mod, &info);
if (err < 0)
goto free_modinfo;
printk(KERN_ERR "11..mod->init: %x\n", mod->init);
err = apply_relocations(mod, &info);
printk(KERN_ERR "22..mod->init: %x\n", mod->init);
if (err < 0)
goto free_modinfo;
err = post_relocation(mod, &info);
if (err < 0)
goto free_modinfo;
flush_module_icache(mod);
/* Now copy in args */
mod->args = strndup_user(uargs, ~0UL >> 1);
if (IS_ERR(mod->args)) {
err = PTR_ERR(mod->args);
goto free_arch_cleanup;
}
/* Mark state as coming so strong_try_module_get() ignores us. */
mod->state = MODULE_STATE_COMING;
/* Now sew it into the lists so we can get lockdep and oops
* info during argument parsing. No one should access us, since
* strong_try_module_get() will fail.
* lockdep/oops can run asynchronous, so use the RCU list insertion
* function to insert in a way safe to concurrent readers.
* The mutex protects against concurrent writers.
*/
mutex_lock(&module_mutex);
if (find_module(mod->name)) {
err = -EEXIST;
goto unlock;
}
/* This has to be done once we're sure module name is unique. */
dynamic_debug_setup(info.debug, info.num_debug);
/* Find duplicate symbols */
err = verify_export_symbols(mod);
if (err < 0)
goto ddebug;
module_bug_finalize(info.hdr, info.sechdrs, mod);
list_add_rcu(&mod->list, &modules);
mutex_unlock(&module_mutex);
/* Module is ready to execute: parsing args may do that. */
err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp,
-32768, 32767, NULL);
if (err < 0)
goto unlink;
/* Link in to syfs. */
err = mod_sysfs_setup(mod, &info, mod->kp, mod->num_kp);
if (err < 0)
goto unlink;
/* Get rid of temporary copy. */
free_copy(&info);
/* Done! */
trace_module_load(mod);
return mod;
unlink:
mutex_lock(&module_mutex);
/* Unlink carefully: kallsyms could be walking list. */
list_del_rcu(&mod->list);
module_bug_cleanup(mod);
ddebug:
dynamic_debug_remove(info.debug);
unlock:
mutex_unlock(&module_mutex);
synchronize_sched();
kfree(mod->args);
free_arch_cleanup:
module_arch_cleanup(mod);
free_modinfo:
free_modinfo(mod);
free_unload:
module_unload_free(mod);
free_module:
module_deallocate(mod, &info);
free_copy:
free_copy(&info);
return ERR_PTR(err);
}
进入load_module()后定义了两个重要的变量:
struct load_info info = { NULL, };
struct module *mod;
struct load_info {
Elf_Ehdr *hdr;
unsigned long len;
Elf_Shdr *sechdrs;
char *secstrings, *strtab;
unsigned long symoffs, stroffs;
struct _ddebug *debug;
unsigned int num_debug;
struct {
unsigned int sym, str, mod, vers, info, pcpu;
} index;
};
struct module这个结构体的内容和.gnu.linkonce.this_module段的内容是一一对应的,定义有点复杂,用到的时候再看。
接着load_module()调用copy_and_check(),copy_and_check函数主要是vmalloc一块临时的内核空间,将用户空间的ko文件整个复制进去,然后再检查一下文件是否为ELF类型、文件长度有没有超过段表尾端等等。最后将ELF文件的一些信息赋值给info。
static int copy_and_check(struct load_info *info,
const void __user *umod, unsigned long len,
const char __user *uargs)
{
int err;
Elf_Ehdr *hdr;
if (len < sizeof(*hdr))
return -ENOEXEC;
/* Suck in entire file: we'll want most of it. */
if ((hdr = vmalloc(len)) == NULL)
return -ENOMEM;
if (copy_from_user(hdr, umod, len) != 0) {
err = -EFAULT;
goto free_hdr;
}
/* Sanity checks against insmoding binaries or wrong arch,
weird elf version */
if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0
|| hdr->e_type != ET_REL
|| !elf_check_arch(hdr)
|| hdr->e_shentsize != sizeof(Elf_Shdr)) {
err = -ENOEXEC;
goto free_hdr;
}
if (len < hdr->e_shoff + hdr->e_shnum * sizeof(Elf_Shdr)) {
err = -ENOEXEC;
goto free_hdr;
}
info->hdr = hdr;
info->len = len;
printk(KERN_ERR "temp space hdr: %x\n", hdr);
return 0;
free_hdr:
vfree(hdr);
return err;
}
static struct module *layout_and_allocate(struct load_info *info)
{
/* Module within temporary copy. */
struct module *mod;
Elf_Shdr *pcpusec;
int err;
mod = setup_load_info(info);
if (IS_ERR(mod))
return mod;
printk(KERN_ERR "mod name = %s\n", mod->name);
printk(KERN_ERR "mod init = %s\n", mod->init);
err = check_modinfo(mod, info);
if (err)
return ERR_PTR(err);
/* Allow arches to frob section contents and sizes. */
err = module_frob_arch_sections(info->hdr, info->sechdrs,
info->secstrings, mod);
if (err < 0)
goto out;
pcpusec = &info->sechdrs[info->index.pcpu];
printk(KERN_ERR "pcpusec size: %d\n",pcpusec->sh_size);
if (pcpusec->sh_size) {
/* We have a special allocation for this section. */
err = percpu_modalloc(mod,
pcpusec->sh_size, pcpusec->sh_addralign);
if (err)
goto out;
pcpusec->sh_flags &= ~(unsigned long)SHF_ALLOC;
}
/* Determine total sizes, and put offsets in sh_entsize. For now
this is done generically; there doesn't appear to be any
special cases for the architectures. */
layout_sections(mod, info);
layout_symtab(mod, info);
/* Allocate and move to the final place */
err = move_module(mod, info);
if (err)
goto free_percpu;
/* Module has been copied to its final place now: return it. */
mod = (void *)info->sechdrs[info->index.mod].sh_addr;
kmemleak_load_module(mod, info);
return mod;
free_percpu:
percpu_modfree(mod);
out:
return ERR_PTR(err);
}
layout_and_allocate()函数先调用setup_load_info()对info进一步初始化,接着调用layout_sections()分配各个段在最终虚拟地址上的偏移,.init段会被单独分配偏移,因为.init段的虚拟地址是单独分配的,后面将详述。然后调用layout_symtab()分配符号表和字符串表在虚拟地址上的偏移。最后调用move_module()将段移动到最终的虚拟地址上去。
setup_load_info()函数继续用ELF信息去初始化info结构体,并调用rewrite_section_headers()。
static struct module *setup_load_info(struct load_info *info)
{
unsigned int i;
int err;
struct module *mod;
/* Set up the convenience variables */
info->sechdrs = (void *)info->hdr + info->hdr->e_shoff;
info->secstrings = (void *)info->hdr
+ info->sechdrs[info->hdr->e_shstrndx].sh_offset;
err = rewrite_section_headers(info);
if (err)
return ERR_PTR(err);
/* Find internal symbols and strings. */
for (i = 1; i < info->hdr->e_shnum; i++) {
if (info->sechdrs[i].sh_type == SHT_SYMTAB) {
info->index.sym = i;
info->index.str = info->sechdrs[i].sh_link;
info->strtab = (char *)info->hdr
+ info->sechdrs[info->index.str].sh_offset;
break;
}
}
info->index.mod = find_sec(info, ".gnu.linkonce.this_module");
if (!info->index.mod) {
printk(KERN_WARNING "No module found in object\n");
return ERR_PTR(-ENOEXEC);
}
/* This is temporary: point mod into copy of data. */
mod = (void *)info->sechdrs[info->index.mod].sh_addr;
printk(KERN_ERR "temp mod: %x, &mod->init: %x\n", mod, &(mod->init));
if (info->index.sym == 0) {
printk(KERN_WARNING "%s: module has no symbols (stripped?)\n",
mod->name);
return ERR_PTR(-ENOEXEC);
}
info->index.pcpu = find_pcpusec(info);
/* Check module struct version now, before we try to use module. */
if (!check_modstruct_version(info->sechdrs, info->index.vers, mod))
return ERR_PTR(-ENOEXEC);
return mod;
}
static int rewrite_section_headers(struct load_info *info)
{
unsigned int i;
/* This should always be true, but let's be sure. */
info->sechdrs[0].sh_addr = 0;
for (i = 1; i < info->hdr->e_shnum; i++) {
Elf_Shdr *shdr = &info->sechdrs[i];
if (shdr->sh_type != SHT_NOBITS
&& info->len < shdr->sh_offset + shdr->sh_size) {
printk(KERN_ERR "Module len %lu truncated\n",
info->len);
return -ENOEXEC;
}
/* Mark all sections sh_addr with their address in the
temporary image. */
shdr->sh_addr = (size_t)info->hdr + shdr->sh_offset;
printk(KERN_ERR "section:%s sh_addr: %x\n", info->secstrings + shdr->sh_name, shdr->sh_addr);
#ifndef CONFIG_MODULE_UNLOAD
/* Don't load .exit sections */
if (strstarts(info->secstrings+shdr->sh_name, ".exit"))
shdr->sh_flags &= ~(unsigned long)SHF_ALLOC;
#endif
}
/* Track but don't keep modinfo and version sections. */
info->index.vers = find_sec(info, "__versions");
info->index.info = find_sec(info, ".modinfo");
info->sechdrs[info->index.info].sh_flags &= ~(unsigned long)SHF_ALLOC;
info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC;
return 0;
}
rewrite_section_headers()返回后,将符号表和字符串表的信息记录在info中。
info->index.mod = find_sec(info, ".gnu.linkonce.this_module");
这里将.gnu.linkonce.this_module段在段表中的下标记录在mod中。
mod = (void *)info->sechdrs[info->index.mod].sh_addr;
现在,mod指针就指向临时空间中的.gnu.linkonce.this_module段的地址了,而.gnu.linkonce.this_module段的内容是编译器生成的并初始化的,因此struct module这个结构体的初始值相当于编译时就设置好了。不信可以打印mod->name看看,就是KO文件的文件名。
好了,现在程序执行完setup_load_info()返回到layout_and_allocate(),接着layout_and_allocate()调用layout_sections()。
static void layout_sections(struct module *mod, struct load_info *info)
{
static unsigned long const masks[][2] = {
/* NOTE: all executable code must be the first section
* in this array; otherwise modify the text_size
* finder in the two loops below */
{ SHF_EXECINSTR | SHF_ALLOC, ARCH_SHF_SMALL },
{ SHF_ALLOC, SHF_WRITE | ARCH_SHF_SMALL },
{ SHF_WRITE | SHF_ALLOC, ARCH_SHF_SMALL },
{ ARCH_SHF_SMALL | SHF_ALLOC, 0 }
};
unsigned int m, i;
for (i = 0; i < info->hdr->e_shnum; i++)
info->sechdrs[i].sh_entsize = ~0UL;
pr_debug("Core section allocation order:\n");
printk(KERN_ERR "masks array size: %d\n", ARRAY_SIZE(masks));
for (m = 0; m < ARRAY_SIZE(masks); ++m) {
for (i = 0; i < info->hdr->e_shnum; ++i) {
Elf_Shdr *s = &info->sechdrs[i];
const char *sname = info->secstrings + s->sh_name;
printk(KERN_ERR "sname : %s\n", sname);
if ((s->sh_flags & masks[m][0]) != masks[m][0]
|| (s->sh_flags & masks[m][1])
|| s->sh_entsize != ~0UL
|| strstarts(sname, ".init")) {
printk(KERN_ERR "skip..\n");
continue;
}
printk(KERN_ERR "core_size: %d\n", mod->core_size);
s->sh_entsize = get_offset(mod, &mod->core_size, s, i);
printk(KERN_ERR "sh_entsize: %d, core_size: %d\n", s->sh_entsize, mod->core_size);
pr_debug("\t%s\n", sname);
}
switch (m) {
case 0: /* executable */
mod->core_size = debug_align(mod->core_size);
printk(KERN_ERR "case 0 core_size = %d\n", mod->core_size);
mod->core_text_size = mod->core_size;
break;
case 1: /* RO: text and ro-data */
mod->core_size = debug_align(mod->core_size);
printk(KERN_ERR "case 1 core_size = %d\n", mod->core_size);
mod->core_ro_size = mod->core_size;
break;
case 3: /* whole core */
mod->core_size = debug_align(mod->core_size);
break;
}
}
printk(KERN_ERR "parse init..\n");
pr_debug("Init section allocation order:\n");
for (m = 0; m < ARRAY_SIZE(masks); ++m) {
for (i = 0; i < info->hdr->e_shnum; ++i) {
Elf_Shdr *s = &info->sechdrs[i];
const char *sname = info->secstrings + s->sh_name;
printk(KERN_ERR "sname : %s\n", sname);
if ((s->sh_flags & masks[m][0]) != masks[m][0]
|| (s->sh_flags & masks[m][1])
|| s->sh_entsize != ~0UL
|| !strstarts(sname, ".init")) {
printk(KERN_ERR "skip..\n");
continue;
}
printk(KERN_ERR "init_size: %d\n", mod->init_size);
s->sh_entsize = (get_offset(mod, &mod->init_size, s, i)
| INIT_OFFSET_MASK);
printk(KERN_ERR "sh_entsize: %d, init_size: %d\n", s->sh_entsize, mod->init_size);
pr_debug("\t%s\n", sname);
}
switch (m) {
case 0: /* executable */
mod->init_size = debug_align(mod->init_size);
mod->init_text_size = mod->init_size;
break;
case 1: /* RO: text and ro-data */
mod->init_size = debug_align(mod->init_size);
mod->init_ro_size = mod->init_size;
break;
case 3: /* whole init */
mod->init_size = debug_align(mod->init_size);
break;
}
}
}
layout_sections()利用了struct module 里的两个成员变量:core_size 和 init_size,后面会看到,kernel为ko文件分配最终虚拟地址的时候,实际上分配了两块地址,一块叫core,另一块叫init, 这两个变量分别记录了这两块地址的size。一个内核模块为什么要分配两块地址呢?这是考虑到内核模块的__init函数只运行一次,所以将它单独放在一块内存中可以方便运行结束后,回收这块内存。。__init函数就是用 __init 宏定义的函数, #define __init __section(.init.text) ,编译器会将它放入ko文件的.init.text段中。
第一个for循环将所有段的sh_entsize设置为一个特殊值——0xffffffff。这是个标记,凡是sh_entsize等于这个值的段,就是还未被分配虚拟空间偏移的段。
前面说了,为ko文件分配的最终虚拟地址有两块,core空间和init空间,core_size和init_size记录了这两个空间的size,初始值为0。
第二个for循环为所有具有SHF_ALLOC标志,并且非.init的段分配其在core虚拟空间的偏移,这些段后面将会被复制到core虚拟空间,这是不会被自动释放,常驻内核的空间。
s->sh_entsize = get_offset(mod, &mod->core_size, s, i);
每个段在core空间的偏移记录在sh_entsize中,偏移是通过get_offset得到的,这个函数很简单,arch_mod_section_prepend()函数为每个段额外的分配几个byte,可以先认为返回0,第一次调用传入的*size为0,ALIGN()宏是对齐用的宏,一般仍然返回*size,所以可以简化为ret = *size; 接着将core_size加上这个段的size,返回。后面再为下一个段分配偏移的时候,core_size已经非0了,分配的偏移就是*size的大小,每次分配core_size都增加相应的段的size。
static long get_offset(struct module *mod, unsigned int *size,
Elf_Shdr *sechdr, unsigned int section)
{
long ret;
*size += arch_mod_section_prepend(mod, section);
ret = ALIGN(*size, sechdr->sh_addralign ?: 1);
*size = ret + sechdr->sh_size;
printk(KERN_ERR "sh_addralign: %d, ret: %ld, sh_size: %d\n", sechdr->sh_addralign, ret, sechdr->sh_size);
return ret;
}
当第二个for循环完毕,第三个for循环就为.init段分配其在init虚拟空间的偏移,分配方法和前面一样,然后返回layout_and_allocate()函数。layout_and_allocate()函数接着调用layout_symtab()为符号表和字符串表分配虚拟空间。symsect和strsect分别是表示符号表和字符串表的段描述符。符号表和字符串表会在core空间与init空间同时分配。
static void layout_symtab(struct module *mod, struct load_info *info)
{
Elf_Shdr *symsect = info->sechdrs + info->index.sym;
Elf_Shdr *strsect = info->sechdrs + info->index.str;
const Elf_Sym *src;
unsigned int i, nsrc, ndst, strtab_size;
/* Put symbol section at end of init part of module. */
symsect->sh_flags |= SHF_ALLOC;
symsect->sh_entsize = get_offset(mod, &mod->init_size, symsect,
info->index.sym) | INIT_OFFSET_MASK;
printk(KERN_ERR "symsect->sh_entsize: %d\n", symsect->sh_entsize);
pr_debug("\t%s\n", info->secstrings + symsect->sh_name);
src = (void *)info->hdr + symsect->sh_offset;
nsrc = symsect->sh_size / sizeof(*src);
printk(KERN_ERR "symb num : %d\n", nsrc);
/* strtab always starts with a nul, so offset 0 is the empty string. */
strtab_size = 1;
/* Compute total space required for the core symbols' strtab. */
for (ndst = i = 0; i < nsrc; i++) {
if (i == 0 ||
is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) {
strtab_size += strlen(&info->strtab[src[i].st_name])+1;
ndst++;
}
}
/* Append room for core symbols at end of core part. */
info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1);
info->stroffs = mod->core_size = info->symoffs + ndst * sizeof(Elf_Sym);
mod->core_size += strtab_size;
/* Put string table section at end of init part of module. */
strsect->sh_flags |= SHF_ALLOC;
strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect,
info->index.str) | INIT_OFFSET_MASK;
pr_debug("\t%s\n", info->secstrings + strsect->sh_name);
}
先为符号表在init空间分配偏移:
symsect->sh_entsize = get_offset(mod, &mod->init_size, symsect,
info->index.sym) | INIT_OFFSET_MASK;
接着为“core符号”及其对应的字符串在core空间分配偏移,其实就是只将部分符号表在core空间分配偏移,遍历符号表,对每个符号表项调用is_core_symbol()函数判断是否为“core符号”,如果是,为core符号对应的字符串分配空间,字符串空间记录在strtab_size中。
src = (void *)info->hdr + symsect->sh_offset;
nsrc = symsect->sh_size / sizeof(*src);
printk(KERN_ERR "symb num : %d\n", nsrc);
/* strtab always starts with a nul, so offset 0 is the empty string. */
strtab_size = 1;
/* Compute total space required for the core symbols' strtab. */
for (ndst = i = 0; i < nsrc; i++) {
if (i == 0 ||
is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) {
strtab_size += strlen(&info->strtab[src[i].st_name])+1;
ndst++;
}
}
这三句就为core空间的符号表与字符串表分配好了偏移。(注意:分配的偏移没有记录在sh_entsize中,只是记录在info结构体中,也就是说只会为core空间的符号表与字符串表预留好位置,不会真的将符号表、字符串表复制到core空间来)
info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1);
info->stroffs = mod->core_size = info->symoffs + ndst * sizeof(Elf_Sym);
mod->core_size += strtab_size;
最后为字符串表分配init空间的偏移。
strsect->sh_flags |= SHF_ALLOC;
strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect,
info->index.str) | INIT_OFFSET_MASK;
static int move_module(struct module *mod, struct load_info *info)
{
int i;
void *ptr;
/* Do the allocs. */
ptr = module_alloc_update_bounds(mod->core_size);
/*
* The pointer to this block is stored in the module structure
* which is inside the block. Just mark it as not being a
* leak.
*/
kmemleak_not_leak(ptr);
if (!ptr)
return -ENOMEM;
memset(ptr, 0, mod->core_size);
mod->module_core = ptr;
ptr = module_alloc_update_bounds(mod->init_size);
/*
* The pointer to this block is stored in the module structure
* which is inside the block. This block doesn't need to be
* scanned as it contains data and code that will be freed
* after the module is initialized.
*/
kmemleak_ignore(ptr);
if (!ptr && mod->init_size) {
module_free(mod, mod->module_core);
return -ENOMEM;
}
memset(ptr, 0, mod->init_size);
mod->module_init = ptr;
/* Transfer each section which specifies SHF_ALLOC */
pr_debug("final section addresses:\n");
for (i = 0; i < info->hdr->e_shnum; i++) {
void *dest;
Elf_Shdr *shdr = &info->sechdrs[i];
if (!(shdr->sh_flags & SHF_ALLOC))
continue;
if (shdr->sh_entsize & INIT_OFFSET_MASK)
dest = mod->module_init
+ (shdr->sh_entsize & ~INIT_OFFSET_MASK);
else
dest = mod->module_core + shdr->sh_entsize;
printk(KERN_ERR "name: %s, dest : %x\n",info->secstrings + shdr->sh_name, dest);
if (shdr->sh_type != SHT_NOBITS)
memcpy(dest, (void *)shdr->sh_addr, shdr->sh_size);
/* Update sh_addr to point to copy in image. */
shdr->sh_addr = (unsigned long)dest;
pr_debug("\t0x%lx %s\n",
(long)shdr->sh_addr, info->secstrings + shdr->sh_name);
}
return 0;
}
ptr = module_alloc_update_bounds(mod->core_size);
mod->module_core = ptr;
再为init空间申请一块大小为init_size的内存,将其首地址赋值给struct module结构体的module_init成员:
ptr = module_alloc_update_bounds(mod->init_size);
mod->module_init = ptr;
下面的for循环对每个有SHF_ALLOC标记的段分配绝对虚拟地址(前面分配的只是各个段相对于未来要分配的虚拟地址的偏移,也就是相对于module_core和module_init的偏移)。分配绝对虚拟地址很简单,将申请的虚拟空间的地址(分别保存在module_core和module_init中)直接加上之前分配好的偏移量就行了。如下:(符号表和字符串表的绝对虚拟地址都被分配到了init空间内,所以后面搬移的时候是把这两个表搬移到了init空间而非core空间)
if (shdr->sh_entsize & INIT_OFFSET_MASK)
dest = mod->module_init
+ (shdr->sh_entsize & ~INIT_OFFSET_MASK);
else
dest = mod->module_core + shdr->sh_entsize;
开始段的搬移,将段从临时内核空间,搬移到运行时的虚拟地址上去:
if (shdr->sh_type != SHT_NOBITS)
memcpy(dest, (void *)shdr->sh_addr, shdr->sh_size);
最后把绝对虚拟地址赋值给相应段表项的sh_addr成员。返回。
到目前为止,ko文件的对应的段,已经被加载进了内核空间,每个被加载的段都有了自己的运行时地址。但现在还不能运行,因为代码中对符号的引用还没有修正,也就是还没有进行符号重定位。前面说过ko文件类似与.o文件,.o文件对全局符号的引用都是待重定位的,是需要链接器对符号进行链接的,ko文件也一样。只不过ko文件代码中对符号的引用是由内核来进行重定位的。
move_module()返回后,返回到layout_and_allocate()函数中。layout_and_allocate()函数最后将mod指针变量重新指向搬移后的.gnu.linkonce.this_module段的虚拟地址值。
mod = (void *)info->sechdrs[info->index.mod].sh_addr;
现在layout_and_allocate()函数执行完了,返回到load_module函数继续往下执行,
find_module_sections(mod, &info);
这主要是对info进一步初始化。
接着调用simplify_symbols()函数,这个函数将符号表里的符号的绝对地址写入到st_value域中(符号表、字符串表现在都在init空间了)。
for循环遍历init空间的符号表,分析每个符号表项的st_shndx域,st_shndx通常表示符号所在的段,但它有三个特殊值:SHN_ABS,SHN_COMMON,SHN_UNDEF。所以函数中分了4种case来进行处理。SHN_COMMON和SHN_ABS的case先不考虑,因为我们的代码中的符号主要分两种,模块内的符号和内核export出的符号(如printk),这两种符号的st_shndx大部分对应default和SHN_UNDEF这两种case。
对于模块内的符号,程序进入default进行处理,处理很简单,st_value = st_value + 符号所在段的绝对虚拟地址(st_value中原本保存着符号在其所在段的offset)。这样一来,st_value中现在保存的就是符号的绝对虚拟地址了。
对于内核导出的符号,由于它在模块中没有定义,所以它的st_shndx为SHN_UNDEF。对于SHN_UNDEF 这种case的处理过程如下:
1:调用resolve_symbol_wait()函数解析内核符号,这个函数返回一个struct kernel_symbol结构体。
2:将这个结构体的value成员直接赋值给st_value。
结构体定义在include/linux/export.h中:
struct kernel_symbol
{
unsigned long value;
const char *name;
};
static int simplify_symbols(struct module *mod, const struct load_info *info)
{
Elf_Shdr *symsec = &info->sechdrs[info->index.sym];
Elf_Sym *sym = (void *)symsec->sh_addr;
unsigned long secbase;
unsigned int i;
int ret = 0;
const struct kernel_symbol *ksym;
for (i = 1; i < symsec->sh_size / sizeof(Elf_Sym); i++) {
const char *name = info->strtab + sym[i].st_name;
printk(KERN_ERR "symb name: %s\n", name);
switch (sym[i].st_shndx) {
case SHN_COMMON:
/* We compiled with -fno-common. These are not
supposed to happen. */
pr_debug("Common symbol: %s\n", name);
printk("%s: please compile with -fno-common\n",
mod->name);
ret = -ENOEXEC;
break;
case SHN_ABS:
/* Don't need to do anything */
pr_debug("Absolute symbol: 0x%08lx\n",
(long)sym[i].st_value);
break;
case SHN_UNDEF:
printk(KERN_ERR "Undefine symb!!\n");
ksym = resolve_symbol_wait(mod, info, name);
printk(KERN_ERR "resolve ok!\n");
/* Ok if resolved. */
if (ksym && !IS_ERR(ksym)) {
sym[i].st_value = ksym->value;
break;
}
/* Ok if weak. */
if (!ksym && ELF_ST_BIND(sym[i].st_info) == STB_WEAK)
break;
printk(KERN_WARNING "%s: Unknown symbol %s (err %li)\n",
mod->name, name, PTR_ERR(ksym));
ret = PTR_ERR(ksym) ?: -ENOENT;
break;
default:
/* Divert to percpu allocation if a percpu var. */
if (sym[i].st_shndx == info->index.pcpu)
secbase = (unsigned long)mod_percpu(mod);
else
secbase = info->sechdrs[sym[i].st_shndx].sh_addr;
printk(KERN_ERR "section base: %x\n", secbase);
sym[i].st_value += secbase;
printk(KERN_ERR "st_value: %x\n", sym[i].st_value);
break;
}
}
return ret;
}
resolve_symbol_wait()函数用来解析内核导出的符号。不是所有的内核符号都默认导出的,默认内核中的符号在运行时是对外“不可见的”,而内核本身对那些符号地址的引用,都是静态编译链接内核时,链接器写进去的。所以外部模块无法得到内核符号的地址。如果外部模块想要引用内核符号,除非内核将符号地址导出来!内核中的符号可以通过EXPORT_SYMBOL()宏来导出,这个宏就是将符号信息保存在一个struct kernel_symbol结构体中,再将这个结构体编译进内核的一个特殊段,以后如果外部想引用这个符号,只需要在这个段中寻找对应的符号的struct kernel_symbol结构体就行了。更详细的细节google上有很多。
现在来看这个resolve_symbol_wait()函数:
static const struct kernel_symbol *
resolve_symbol_wait(struct module *mod,
const struct load_info *info,
const char *name)
{
const struct kernel_symbol *ksym;
char owner[MODULE_NAME_LEN];
if (wait_event_interruptible_timeout(module_wq,
!IS_ERR(ksym = resolve_symbol(mod, info, name, owner))
|| PTR_ERR(ksym) != -EBUSY,
30 * HZ) <= 0) {
printk(KERN_WARNING "%s: gave up waiting for init of module %s.\n",
mod->name, owner);
}
return ksym;
}
resolve_symbol_wait()函数会先调用resolve_symbol()解析内核导出符号。
因此进入resolve_symbol()函数:
static const struct kernel_symbol *resolve_symbol(struct module *mod,
const struct load_info *info,
const char *name,
char ownername[])
{
struct module *owner;
const struct kernel_symbol *sym;
const unsigned long *crc;
int err;
printk(KERN_ERR "resolve symbol: %s...\n", name);
mutex_lock(&module_mutex);
sym = find_symbol(name, &owner, &crc,
!(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)), true);
if (!sym)
goto unlock;
if (!check_version(info->sechdrs, info->index.vers, name, mod, crc,
owner)) {
sym = ERR_PTR(-EINVAL);
goto getname;
}
err = ref_module(mod, owner);
if (err) {
sym = ERR_PTR(err);
goto getname;
}
getname:
/* We must make copy under the lock if we failed to get ref. */
strncpy(ownername, module_name(owner), MODULE_NAME_LEN);
unlock:
mutex_unlock(&module_mutex);
return sym;
}
resolve_symbol()函数继续调用find_symbol()函数,返回一个struct kernel_symbol结构指针。
const struct kernel_symbol *find_symbol(const char *name,
struct module **owner,
const unsigned long **crc,
bool gplok,
bool warn)
{
struct find_symbol_arg fsa;
fsa.name = name;
fsa.gplok = gplok;
fsa.warn = warn;
if (each_symbol_section(find_symbol_in_section, &fsa)) {
if (owner)
*owner = fsa.owner;
if (crc)
*crc = fsa.crc;
return fsa.sym;
}
pr_debug("Failed to find symbol %s\n", name);
return NULL;
}
bool each_symbol_section(bool (*fn)(const struct symsearch *arr,
struct module *owner,
void *data),
void *data)
{
struct module *mod;
static const struct symsearch arr[] = {
{ __start___ksymtab, __stop___ksymtab, __start___kcrctab,
NOT_GPL_ONLY, false },
{ __start___ksymtab_gpl, __stop___ksymtab_gpl,
__start___kcrctab_gpl,
GPL_ONLY, false },
{ __start___ksymtab_gpl_future, __stop___ksymtab_gpl_future,
__start___kcrctab_gpl_future,
WILL_BE_GPL_ONLY, false },
#ifdef CONFIG_UNUSED_SYMBOLS
{ __start___ksymtab_unused, __stop___ksymtab_unused,
__start___kcrctab_unused,
NOT_GPL_ONLY, true },
{ __start___ksymtab_unused_gpl, __stop___ksymtab_unused_gpl,
__start___kcrctab_unused_gpl,
GPL_ONLY, true },
#endif
};
if (each_symbol_in_section(arr, ARRAY_SIZE(arr), NULL, fn, data))
return true;
list_for_each_entry_rcu(mod, &modules, list) {
struct symsearch arr[] = {
{ mod->syms, mod->syms + mod->num_syms, mod->crcs,
NOT_GPL_ONLY, false },
{ mod->gpl_syms, mod->gpl_syms + mod->num_gpl_syms,
mod->gpl_crcs,
GPL_ONLY, false },
{ mod->gpl_future_syms,
mod->gpl_future_syms + mod->num_gpl_future_syms,
mod->gpl_future_crcs,
WILL_BE_GPL_ONLY, false },
#ifdef CONFIG_UNUSED_SYMBOLS
{ mod->unused_syms,
mod->unused_syms + mod->num_unused_syms,
mod->unused_crcs,
NOT_GPL_ONLY, true },
{ mod->unused_gpl_syms,
mod->unused_gpl_syms + mod->num_unused_gpl_syms,
mod->unused_gpl_crcs,
GPL_ONLY, true },
#endif
};
if (each_symbol_in_section(arr, ARRAY_SIZE(arr), mod, fn, data))
return true;
}
return false;
}
struct symsearch {
const struct kernel_symbol *start, *stop;
const unsigned long *crcs;
enum {
NOT_GPL_ONLY,
GPL_ONLY,
WILL_BE_GPL_ONLY,
} licence;
bool unused;
};
可以看出内核定义了3~5个内核符号表。。这些描述符号表的结构体的成员都已经赋了值,像__start___ksymtab,__stop___ksymtab等等,这些值定义在arch/arm/kernel/vmlinux.lds中,也就是定义在链接脚本中。前面说过,内核符号通过EXPORT_SYMBOL()宏导出到一个特殊段,在链接内核的时候,链接脚本就将这些段合并为几个内核符号表段,并定义了几个标志开始和结束地址的符号,如__start___ksymtab,__stop___ksymtab就标识了___ksymtab符号表段的开始地址和结束地址。
static const struct symsearch arr[] = {
{ __start___ksymtab, __stop___ksymtab, __start___kcrctab,
NOT_GPL_ONLY, false },
{ __start___ksymtab_gpl, __stop___ksymtab_gpl,
__start___kcrctab_gpl,
GPL_ONLY, false },
{ __start___ksymtab_gpl_future, __stop___ksymtab_gpl_future,
__start___kcrctab_gpl_future,
WILL_BE_GPL_ONLY, false },
#ifdef CONFIG_UNUSED_SYMBOLS
{ __start___ksymtab_unused, __stop___ksymtab_unused,
__start___kcrctab_unused,
NOT_GPL_ONLY, true },
{ __start___ksymtab_unused_gpl, __stop___ksymtab_unused_gpl,
__start___kcrctab_unused_gpl,
GPL_ONLY, true },
#endif
};
接着回到each_symbol_section()函数,函数调用each_symbol_in_section(),参数是这个static数组指针和数组的size,owner为NULL,*fn就是 find_symbol_in_section,data其实是&fsa这个指针。each_symbol_in_section()这个函数做的事很简单,就是遍历arr数组中的每个内核符号表,调用find_symbol_in_section函数在每个内核符号表里搜索&fsa指定的符号。
static bool each_symbol_in_section(const struct symsearch *arr,
unsigned int arrsize,
struct module *owner,
bool (*fn)(const struct symsearch *syms,
struct module *owner,
void *data),
void *data)
{
unsigned int j;
for (j = 0; j < arrsize; j++) {
if (fn(&arr[j], owner, data))
return true;
}
return false;
}
find_symbol_in_section函数如下,先将data指针转化为struct find_symbol_arg结构体指针,再调用bsearch在syms描述的内核符号表里搜索fsa指定的内核符号,我们假设搜索到了这个符号,搜索的结果就存放在struct kernel_symbol这个结构体中。
static bool find_symbol_in_section(const struct symsearch *syms,
struct module *owner,
void *data)
{
struct find_symbol_arg *fsa = data;
struct kernel_symbol *sym;
sym = bsearch(fsa->name, syms->start, syms->stop - syms->start,
sizeof(struct kernel_symbol), cmp_name);
if(sym != NULL)
printk(KERN_ERR "name: %s, sym value: %x\n", fsa->name, sym->value);
if (sym != NULL && check_symbol(syms, owner, sym - syms->start, data))
return true;
return false;
}
bsearch函数就是用“二分法”查表,不说了。。
void *bsearch(const void *key, const void *base, size_t num, size_t size,
int (*cmp)(const void *key, const void *elt))
{
size_t start = 0, end = num;
int result;
while (start < end) {
size_t mid = start + (end - start) / 2;
result = cmp(key, base + mid * size);
if (result < 0)
end = mid;
else if (result > 0)
start = mid + 1;
else
return (void *)base + mid * size;
}
return NULL;
}
bsearch返回后,调用check_symbol()函数,这个函数在返回前将搜索到的内核导出符号的struct kernel_symbol结构指针赋值给fsa->sym。
static bool check_symbol(const struct symsearch *syms,
struct module *owner,
unsigned int symnum, void *data)
{
struct find_symbol_arg *fsa = data;
if (!fsa->gplok) {
if (syms->licence == GPL_ONLY)
return false;
if (syms->licence == WILL_BE_GPL_ONLY && fsa->warn) {
printk(KERN_WARNING "Symbol %s is being used "
"by a non-GPL module, which will not "
"be allowed in the future\n", fsa->name);
printk(KERN_WARNING "Please see the file "
"Documentation/feature-removal-schedule.txt "
"in the kernel source tree for more details.\n");
}
}
#ifdef CONFIG_UNUSED_SYMBOLS
if (syms->unused && fsa->warn) {
printk(KERN_WARNING "Symbol %s is marked as UNUSED, "
"however this module is using it.\n", fsa->name);
printk(KERN_WARNING
"This symbol will go away in the future.\n");
printk(KERN_WARNING
"Please evalute if this is the right api to use and if "
"it really is, submit a report the linux kernel "
"mailinglist together with submitting your code for "
"inclusion.\n");
}
#endif
fsa->owner = owner;
fsa->crc = symversion(syms->crcs, symnum);
fsa->sym = &syms->start[symnum];
return true;
}
int ref_module(struct module *a, struct module *b)
{
int err;
if (b == NULL || already_uses(a, b))
return 0;
/* If module isn't available, we fail. */
err = strong_try_module_get(b);
if (err)
return err;
err = add_module_usage(a, b);
if (err) {
module_put(b);
return err;
}
return 0;
}
返回到resolve_symbol(),resolve_symbol()最后将描述内核导出符号的struct kernel_symbol结构体的指针sym返回。
现在我们已经返回到resolve_symbol_wait()函数了,接下来调用wait_event_interruptible_timeout(),因为我们刚才已经得到了内核导出符号,那么现在就不用睡眠等待,否则会睡眠。最后resolve_symbol_wait()返回resolve_symbol()刚刚返回的sym。
向上返回到simplify_symbols(),前面说过,将这个struct kernel_symbol结构体的value成员直接赋值给st_value。然后返回。
这样绕了一大圈,simplify_symbols()终于返回了。。。simplify_symbols()返回后,在init空间的符号表的每个符号表项中,st_value域就指向符号的绝对虚拟地址值了。-_-
接下来load_module()调用apply_relocations()进行真正的重定位工作。
static int apply_relocations(struct module *mod, const struct load_info *info)
{
unsigned int i;
int err = 0;
/* Now do relocations. */
for (i = 1; i < info->hdr->e_shnum; i++) {
unsigned int infosec = info->sechdrs[i].sh_info;
/* Not a valid relocation section? */
if (infosec >= info->hdr->e_shnum)
continue;
/* Don't bother with non-allocated sections */
if (!(info->sechdrs[infosec].sh_flags & SHF_ALLOC))
continue;
if (info->sechdrs[i].sh_type == SHT_REL) {
printk(KERN_ERR "relocate section : %s, type: %s\n", info->secstrings + (info->sechdrs[i].sh_name), "SHT_REL");
err = apply_relocate(info->sechdrs, info->strtab,
info->index.sym, i, mod);
printk(KERN_ERR "mod init: %x\n", mod->init);
}
else if (info->sechdrs[i].sh_type == SHT_RELA) {
printk(KERN_ERR "relocate section : %s, type: %s\n", info->secstrings + (info->sechdrs[i].sh_name), "SHT_RELA");
err = apply_relocate_add(info->sechdrs, info->strtab,
info->index.sym, i, mod);
}
if (err < 0)
break;
}
return err;
}
int
apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
unsigned int relindex, struct module *module)
{
Elf32_Shdr *symsec = sechdrs + symindex;
Elf32_Shdr *relsec = sechdrs + relindex;
Elf32_Shdr *dstsec = sechdrs + relsec->sh_info;
Elf32_Rel *rel = (void *)relsec->sh_addr;
unsigned int i;
for (i = 0; i < relsec->sh_size / sizeof(Elf32_Rel); i++, rel++) {
unsigned long loc;
Elf32_Sym *sym;
const char *symname;
s32 offset;
#ifdef CONFIG_THUMB2_KERNEL
u32 upper, lower, sign, j1, j2;
#endif
offset = ELF32_R_SYM(rel->r_info);
if (offset < 0 || offset > (symsec->sh_size / sizeof(Elf32_Sym))) {
pr_err("%s: section %u reloc %u: bad relocation sym offset\n",
module->name, relindex, i);
return -ENOEXEC;
}
sym = ((Elf32_Sym *)symsec->sh_addr) + offset;
symname = strtab + sym->st_name;
if (rel->r_offset < 0 || rel->r_offset > dstsec->sh_size - sizeof(u32)) {
pr_err("%s: section %u reloc %u sym '%s': out of bounds relocation, offset %d size %u\n",
module->name, relindex, i, symname,
rel->r_offset, dstsec->sh_size);
return -ENOEXEC;
}
loc = dstsec->sh_addr + rel->r_offset;
switch (ELF32_R_TYPE(rel->r_info)) {
case R_ARM_NONE:
/* ignore */
break;
case R_ARM_ABS32:
*(u32 *)loc += sym->st_value;
break;
case R_ARM_PC24:
case R_ARM_CALL:
case R_ARM_JUMP24:
offset = (*(u32 *)loc & 0x00ffffff) << 2;
if (offset & 0x02000000)
offset -= 0x04000000;
offset += sym->st_value - loc;
if (offset & 3 ||
offset <= (s32)0xfe000000 ||
offset >= (s32)0x02000000) {
pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n",
module->name, relindex, i, symname,
ELF32_R_TYPE(rel->r_info), loc,
sym->st_value);
return -ENOEXEC;
}
offset >>= 2;
*(u32 *)loc &= 0xff000000;
*(u32 *)loc |= offset & 0x00ffffff;
break;
case R_ARM_V4BX:
/* Preserve Rm and the condition code. Alter
* other bits to re-code instruction as
* MOV PC,Rm.
*/
*(u32 *)loc &= 0xf000000f;
*(u32 *)loc |= 0x01a0f000;
break;
case R_ARM_PREL31:
offset = *(u32 *)loc + sym->st_value - loc;
*(u32 *)loc = offset & 0x7fffffff;
break;
case R_ARM_MOVW_ABS_NC:
case R_ARM_MOVT_ABS:
offset = *(u32 *)loc;
offset = ((offset & 0xf0000) >> 4) | (offset & 0xfff);
offset = (offset ^ 0x8000) - 0x8000;
offset += sym->st_value;
if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_ABS)
offset >>= 16;
*(u32 *)loc &= 0xfff0f000;
*(u32 *)loc |= ((offset & 0xf000) << 4) |
(offset & 0x0fff);
break;
#ifdef CONFIG_THUMB2_KERNEL
case R_ARM_THM_CALL:
case R_ARM_THM_JUMP24:
upper = *(u16 *)loc;
lower = *(u16 *)(loc + 2);
/*
* 25 bit signed address range (Thumb-2 BL and B.W
* instructions):
* S:I1:I2:imm10:imm11:0
* where:
* S = upper[10] = offset[24]
* I1 = ~(J1 ^ S) = offset[23]
* I2 = ~(J2 ^ S) = offset[22]
* imm10 = upper[9:0] = offset[21:12]
* imm11 = lower[10:0] = offset[11:1]
* J1 = lower[13]
* J2 = lower[11]
*/
sign = (upper >> 10) & 1;
j1 = (lower >> 13) & 1;
j2 = (lower >> 11) & 1;
offset = (sign << 24) | ((~(j1 ^ sign) & 1) << 23) |
((~(j2 ^ sign) & 1) << 22) |
((upper & 0x03ff) << 12) |
((lower & 0x07ff) << 1);
if (offset & 0x01000000)
offset -= 0x02000000;
offset += sym->st_value - loc;
/*
* For function symbols, only Thumb addresses are
* allowed (no interworking).
*
* For non-function symbols, the destination
* has no specific ARM/Thumb disposition, so
* the branch is resolved under the assumption
* that interworking is not required.
*/
if ((ELF32_ST_TYPE(sym->st_info) == STT_FUNC &&
!(offset & 1)) ||
offset <= (s32)0xff000000 ||
offset >= (s32)0x01000000) {
pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n",
module->name, relindex, i, symname,
ELF32_R_TYPE(rel->r_info), loc,
sym->st_value);
return -ENOEXEC;
}
sign = (offset >> 24) & 1;
j1 = sign ^ (~(offset >> 23) & 1);
j2 = sign ^ (~(offset >> 22) & 1);
*(u16 *)loc = (u16)((upper & 0xf800) | (sign << 10) |
((offset >> 12) & 0x03ff));
*(u16 *)(loc + 2) = (u16)((lower & 0xd000) |
(j1 << 13) | (j2 << 11) |
((offset >> 1) & 0x07ff));
break;
case R_ARM_THM_MOVW_ABS_NC:
case R_ARM_THM_MOVT_ABS:
upper = *(u16 *)loc;
lower = *(u16 *)(loc + 2);
/*
* MOVT/MOVW instructions encoding in Thumb-2:
*
* i = upper[10]
* imm4 = upper[3:0]
* imm3 = lower[14:12]
* imm8 = lower[7:0]
*
* imm16 = imm4:i:imm3:imm8
*/
offset = ((upper & 0x000f) << 12) |
((upper & 0x0400) << 1) |
((lower & 0x7000) >> 4) | (lower & 0x00ff);
offset = (offset ^ 0x8000) - 0x8000;
offset += sym->st_value;
if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_ABS)
offset >>= 16;
*(u16 *)loc = (u16)((upper & 0xfbf0) |
((offset & 0xf000) >> 12) |
((offset & 0x0800) >> 1));
*(u16 *)(loc + 2) = (u16)((lower & 0x8f00) |
((offset & 0x0700) << 4) |
(offset & 0x00ff));
break;
#endif
default:
printk(KERN_ERR "%s: unknown relocation: %u\n",
module->name, ELF32_R_TYPE(rel->r_info));
return -ENOEXEC;
}
}
return 0;
}
这里有一点要注意下,前面不是提到.gnu.linkonce.this_module段吗?这个段也有一个自己的重定位表,叫.rel.gnu.linkonce.this_module,这个重定位表里只有两个重定位表项,还记得前面提到的struct module结构体吗?现在给出struct module结构体的定义。。module结构内有两个成员,init和exit。这两个成员存放着模块的__init函数和__exit函数的指针,.rel.gnu.linkonce.this_module重定位表中的两个重定位项就分别对应着.gnu.linkonce.this_module段中的这两个指针!
也就是说,这两个指针的值也会被apply_relocate()函数重定位,重定位这两个指针有什么用呢?因为后面将会用这两个指针,调用模块的__init函数和__exit函数。
struct module
{
enum module_state state;
/* Member of list of modules */
struct list_head list;
/* Unique handle for this module */
char name[MODULE_NAME_LEN];
/* Sysfs stuff. */
struct module_kobject mkobj;
struct module_attribute *modinfo_attrs;
const char *version;
const char *srcversion;
struct kobject *holders_dir;
/* Exported symbols */
const struct kernel_symbol *syms;
const unsigned long *crcs;
unsigned int num_syms;
/* Kernel parameters. */
struct kernel_param *kp;
unsigned int num_kp;
/* GPL-only exported symbols. */
unsigned int num_gpl_syms;
const struct kernel_symbol *gpl_syms;
const unsigned long *gpl_crcs;
#ifdef CONFIG_UNUSED_SYMBOLS
/* unused exported symbols. */
const struct kernel_symbol *unused_syms;
const unsigned long *unused_crcs;
unsigned int num_unused_syms;
/* GPL-only, unused exported symbols. */
unsigned int num_unused_gpl_syms;
const struct kernel_symbol *unused_gpl_syms;
const unsigned long *unused_gpl_crcs;
#endif
/* symbols that will be GPL-only in the near future. */
const struct kernel_symbol *gpl_future_syms;
const unsigned long *gpl_future_crcs;
unsigned int num_gpl_future_syms;
/* Exception table */
unsigned int num_exentries;
struct exception_table_entry *extable;
/* Startup function. */
int (*init)(void);
/* If this is non-NULL, vfree after init() returns */
void *module_init;
/* Here is the actual code + data, vfree'd on unload. */
void *module_core;
/* Here are the sizes of the init and core sections */
unsigned int init_size, core_size;
/* The size of the executable code in each section. */
unsigned int init_text_size, core_text_size;
/* Size of RO sections of the module (text+rodata) */
unsigned int init_ro_size, core_ro_size;
/* Arch-specific module values */
struct mod_arch_specific arch;
unsigned int taints; /* same bits as kernel:tainted */
#ifdef CONFIG_GENERIC_BUG
/* Support for BUG */
unsigned num_bugs;
struct list_head bug_list;
struct bug_entry *bug_table;
#endif
#ifdef CONFIG_KALLSYMS
/*
* We keep the symbol and string tables for kallsyms.
* The core_* fields below are temporary, loader-only (they
* could really be discarded after module init).
*/
Elf_Sym *symtab, *core_symtab;
unsigned int num_symtab, core_num_syms;
char *strtab, *core_strtab;
/* Section attributes */
struct module_sect_attrs *sect_attrs;
/* Notes attributes */
struct module_notes_attrs *notes_attrs;
#endif
/* The command line arguments (may be mangled). People like
keeping pointers to this stuff */
char *args;
#ifdef CONFIG_SMP
/* Per-cpu data. */
void __percpu *percpu;
unsigned int percpu_size;
#endif
#ifdef CONFIG_TRACEPOINTS
unsigned int num_tracepoints;
struct tracepoint * const *tracepoints_ptrs;
#endif
#ifdef HAVE_JUMP_LABEL
struct jump_entry *jump_entries;
unsigned int num_jump_entries;
#endif
#ifdef CONFIG_TRACING
unsigned int num_trace_bprintk_fmt;
const char **trace_bprintk_fmt_start;
#endif
#ifdef CONFIG_EVENT_TRACING
struct ftrace_event_call **trace_events;
unsigned int num_trace_events;
#endif
#ifdef CONFIG_FTRACE_MCOUNT_RECORD
unsigned int num_ftrace_callsites;
unsigned long *ftrace_callsites;
#endif
#ifdef CONFIG_MODULE_UNLOAD
/* What modules depend on me? */
struct list_head source_list;
/* What modules do I depend on? */
struct list_head target_list;
/* Who is waiting for us to be unloaded */
struct task_struct *waiter;
/* Destruction function. */
void (*exit)(void);
struct module_ref __percpu *refptr;
#endif
#ifdef CONFIG_CONSTRUCTORS
/* Constructor functions. */
ctor_fn_t *ctors;
unsigned int num_ctors;
#endif
};
继续一路返回到load_module()函数,load_module()后面的代码不看,一路返回到sys_init_module(),在sys_init_module()中,后面会调用do_one_initcall(),参数就是被重定位过的init指针。
if (mod->init != NULL)
ret = do_one_initcall(mod->init);
这个do_one_initcall()函数会调用这个init指针所指向的函数,至此,我们模块的__init函数就被调用了。
(__init函数是指用__init前缀定义的函数,__exit函数是指用__exit前缀定义的函数,前面提过。)