linux内核模块动态加载过程

阅读本文需要先了解 ELF文件格式 的相关知识,以下引用的kernel源代码,都是基于linux kernel源代码版本:3.4。

linux内核模块分两种形态,一是静态编译进内核的模块,二是用insmod命令动态加载的模块,也就是后缀名为KO的文件。这里主要讨论linux内核动态加载模块的过程,也就是KO文件被动态加载进内核,并运行的过程。


后缀为KO的文件其实是一种ELF格式文件,很类似于ELF目标文件(.o文件),但是又与ELF目标文件有一点小区别。使用readelf工具可以看到,KO文件里有一个叫.gnu.linkonce.this_module的段,而普通目标文件是没有这个段的。这个段的内容其实是一个struct module结构体(段的地址就等于module结构体的首地址),记录了KO模块的一些信息,这个结构体在linux kernel源代码里也有定义(include/linux/module.h),因为内核在加载模块时要用到这个结构体。


当linux顺利启动,进入shell的时候,就可以输入insmod命令,加载我们自己的内核模块拉。insmod命令封装了一个叫 sys_init_module 的系统调用,sys_init_module源码如下:

SYSCALL_DEFINE3(init_module, void __user *, umod,
		unsigned long, len, const char __user *, uargs)
{
	struct module *mod;
	int ret = 0;

	/* Must have permission */
	if (!capable(CAP_SYS_MODULE) || modules_disabled)
		return -EPERM;

	/* Do all the hard work */
	mod = load_module(umod, len, uargs);
	if (IS_ERR(mod))
		return PTR_ERR(mod);

	blocking_notifier_call_chain(&module_notify_list,
			MODULE_STATE_COMING, mod);

	/* Set RO and NX regions for core */
	set_section_ro_nx(mod->module_core,
				mod->core_text_size,
				mod->core_ro_size,
				mod->core_size);

	/* Set RO and NX regions for init */
	set_section_ro_nx(mod->module_init,
				mod->init_text_size,
				mod->init_ro_size,
				mod->init_size);

	do_mod_ctors(mod);
	/* Start the module */
    printk(KERN_ERR "mod->init = %p\n", mod->init);
	if (mod->init != NULL)
		ret = do_one_initcall(mod->init);
	if (ret < 0) {
		/* Init routine failed: abort.  Try to protect us from
                   buggy refcounters. */
		mod->state = MODULE_STATE_GOING;
		synchronize_sched();
		module_put(mod);
		blocking_notifier_call_chain(&module_notify_list,
					     MODULE_STATE_GOING, mod);
		free_module(mod);
		wake_up(&module_wq);
		return ret;
	}
	if (ret > 0) {
		printk(KERN_WARNING
"%s: '%s'->init suspiciously returned %d, it should follow 0/-E convention\n"
"%s: loading module anyway...\n",
		       __func__, mod->name, ret,
		       __func__);
		dump_stack();
	}

	/* Now it's a first class citizen!  Wake up anyone waiting for it. */
	mod->state = MODULE_STATE_LIVE;
	wake_up(&module_wq);
	blocking_notifier_call_chain(&module_notify_list,
				     MODULE_STATE_LIVE, mod);

	/* We need to finish all async code before the module init sequence is done */
	async_synchronize_full();

	mutex_lock(&module_mutex);
	/* Drop initial reference. */
	module_put(mod);
	trim_init_extable(mod);
#ifdef CONFIG_KALLSYMS
	mod->num_symtab = mod->core_num_syms;
	mod->symtab = mod->core_symtab;
	mod->strtab = mod->core_strtab;
#endif
	unset_module_init_ro_nx(mod);
	module_free(mod, mod->module_init);
	mod->module_init = NULL;
	mod->init_size = 0;
	mod->init_ro_size = 0;
	mod->init_text_size = 0;
	mutex_unlock(&module_mutex);

	return 0;
}

这是一个用宏定义的函数,展开后函数名就是sys_init_module,参数有3个,umod是ko文件在用户空间的首地址,len是ko文件的大小,uargs是用户空间的参数指针。进入这个函数后先check一下permission,然后调用load_module(),加载ko模块的工作主要就是在load_module()这个函数中完成的。

static struct module *load_module(void __user *umod,
				  unsigned long len,
				  const char __user *uargs)
{
	struct load_info info = { NULL, };
	struct module *mod;
	long err;

	pr_debug("load_module: umod=%p, len=%lu, uargs=%p\n",
	       umod, len, uargs);
    printk(KERN_ERR "load_module: umod=%p, len=%lu, uargs=%p\n", umod, len, uargs);
    printk(KERN_ERR "load_module: umod=%x, len=%lu, uargs=%p\n", umod, len, uargs);
	/* Copy in the blobs from userspace, check they are vaguely sane. */
	err = copy_and_check(&info, umod, len, uargs);
	if (err)
		return ERR_PTR(err);

	/* Figure out module layout, and allocate all the memory. */
	mod = layout_and_allocate(&info);
    
    printk(KERN_ERR "..mod->init: %x\n", mod->init);
	if (IS_ERR(mod)) {
		err = PTR_ERR(mod);
		goto free_copy;
	}

	/* Now module is in final location, initialize linked lists, etc. */
	err = module_unload_init(mod);
	if (err)
		goto free_module;

	/* Now we've got everything in the final locations, we can
	 * find optional sections. */
	find_module_sections(mod, &info);

	err = check_module_license_and_versions(mod);
	if (err)
		goto free_unload;

	/* Set up MODINFO_ATTR fields */
	setup_modinfo(mod, &info);

	/* Fix up syms, so that st_value is a pointer to location. */
    printk(KERN_ERR "---------------------------------------\n");
    
	err = simplify_symbols(mod, &info);
	if (err < 0)
		goto free_modinfo;
    printk(KERN_ERR "11..mod->init: %x\n", mod->init);
	err = apply_relocations(mod, &info);
    printk(KERN_ERR "22..mod->init: %x\n", mod->init);
	if (err < 0)
		goto free_modinfo;

	err = post_relocation(mod, &info);
	if (err < 0)
		goto free_modinfo;

	flush_module_icache(mod);
	/* Now copy in args */
	mod->args = strndup_user(uargs, ~0UL >> 1);
	if (IS_ERR(mod->args)) {
		err = PTR_ERR(mod->args);
		goto free_arch_cleanup;
	}

	/* Mark state as coming so strong_try_module_get() ignores us. */
	mod->state = MODULE_STATE_COMING;

	/* Now sew it into the lists so we can get lockdep and oops
	 * info during argument parsing.  No one should access us, since
	 * strong_try_module_get() will fail.
	 * lockdep/oops can run asynchronous, so use the RCU list insertion
	 * function to insert in a way safe to concurrent readers.
	 * The mutex protects against concurrent writers.
	 */
	mutex_lock(&module_mutex);
	if (find_module(mod->name)) {
		err = -EEXIST;
		goto unlock;
	}

	/* This has to be done once we're sure module name is unique. */
	dynamic_debug_setup(info.debug, info.num_debug);

	/* Find duplicate symbols */
	err = verify_export_symbols(mod);
	if (err < 0)
		goto ddebug;

	module_bug_finalize(info.hdr, info.sechdrs, mod);
	list_add_rcu(&mod->list, &modules);
	mutex_unlock(&module_mutex);

	/* Module is ready to execute: parsing args may do that. */
	err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp,
			 -32768, 32767, NULL);
	if (err < 0)
		goto unlink;

	/* Link in to syfs. */
	err = mod_sysfs_setup(mod, &info, mod->kp, mod->num_kp);
	if (err < 0)
		goto unlink;

	/* Get rid of temporary copy. */
	free_copy(&info);

	/* Done! */
	trace_module_load(mod);
	return mod;

 unlink:
	mutex_lock(&module_mutex);
	/* Unlink carefully: kallsyms could be walking list. */
	list_del_rcu(&mod->list);
	module_bug_cleanup(mod);

 ddebug:
	dynamic_debug_remove(info.debug);
 unlock:
	mutex_unlock(&module_mutex);
	synchronize_sched();
	kfree(mod->args);
 free_arch_cleanup:
	module_arch_cleanup(mod);
 free_modinfo:
	free_modinfo(mod);
 free_unload:
	module_unload_free(mod);
 free_module:
	module_deallocate(mod, &info);
 free_copy:
	free_copy(&info);
	return ERR_PTR(err);
}

进入load_module()后定义了两个重要的变量:

	struct load_info info = { NULL, };
	struct module *mod;

其中info是一个struct load_info结构体,这个结构体主要保存了ELF文件的一些基本信息:

struct load_info {
	Elf_Ehdr *hdr;
	unsigned long len;
	Elf_Shdr *sechdrs;
	char *secstrings, *strtab;
	unsigned long symoffs, stroffs;
	struct _ddebug *debug;
	unsigned int num_debug;
	struct {
		unsigned int sym, str, mod, vers, info, pcpu;
	} index;
};

hdr是ELF文件头的指针,len是文件长度,sechdrs是段表指针,secstrings 和 strtab 分别是段表字符串表和字符串表的首地址。index结构体里保存的是一些段在段表里的索引号,看到有个mod段了吧,这个mod其实就是上面提到过的.gnu.linkonce.this_module段在段表中的下标。

struct module这个结构体的内容和.gnu.linkonce.this_module段的内容是一一对应的,定义有点复杂,用到的时候再看。

接着load_module()调用copy_and_check(),copy_and_check函数主要是vmalloc一块临时的内核空间,将用户空间的ko文件整个复制进去,然后再检查一下文件是否为ELF类型、文件长度有没有超过段表尾端等等。最后将ELF文件的一些信息赋值给info。

static int copy_and_check(struct load_info *info,
			  const void __user *umod, unsigned long len,
			  const char __user *uargs)
{
	int err;
	Elf_Ehdr *hdr;

	if (len < sizeof(*hdr))
		return -ENOEXEC;

	/* Suck in entire file: we'll want most of it. */
	if ((hdr = vmalloc(len)) == NULL)
		return -ENOMEM;

	if (copy_from_user(hdr, umod, len) != 0) {
		err = -EFAULT;
		goto free_hdr;
	}

	/* Sanity checks against insmoding binaries or wrong arch,
	   weird elf version */
	if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0
	    || hdr->e_type != ET_REL
	    || !elf_check_arch(hdr)
	    || hdr->e_shentsize != sizeof(Elf_Shdr)) {
		err = -ENOEXEC;
		goto free_hdr;
	}

	if (len < hdr->e_shoff + hdr->e_shnum * sizeof(Elf_Shdr)) {
		err = -ENOEXEC;
		goto free_hdr;
	}

	info->hdr = hdr;
	info->len = len;
    printk(KERN_ERR "temp space hdr: %x\n", hdr);
	return 0;

free_hdr:
	vfree(hdr);
	return err;
}

接着load_module()调用layout_and_allocate()函数,这个函数主要任务是决定ko文件中哪些段需要为其分配地址空间,并为ko文件中的每个需要加载的段计算并分配虚拟地址,也就是运行时地址。前面说过,ko文件是类似于.o目标文件的,所以它的每个段的虚拟地址就像目标文件一样,都为0,因此需要链接后才能运行。分配每个段的运行时地址就是链接的第一步。


static struct module *layout_and_allocate(struct load_info *info)
{
	/* Module within temporary copy. */
	struct module *mod;
	Elf_Shdr *pcpusec;
	int err;

	mod = setup_load_info(info);
	if (IS_ERR(mod))
		return mod;
    printk(KERN_ERR "mod name = %s\n", mod->name);
    printk(KERN_ERR "mod init = %s\n", mod->init);
	err = check_modinfo(mod, info);
	if (err)
		return ERR_PTR(err);

	/* Allow arches to frob section contents and sizes.  */
	err = module_frob_arch_sections(info->hdr, info->sechdrs,
					info->secstrings, mod);
	if (err < 0)
		goto out;

	pcpusec = &info->sechdrs[info->index.pcpu];
    printk(KERN_ERR "pcpusec size: %d\n",pcpusec->sh_size);
	if (pcpusec->sh_size) {
		/* We have a special allocation for this section. */
		err = percpu_modalloc(mod,
				      pcpusec->sh_size, pcpusec->sh_addralign);
		if (err)
			goto out;
		pcpusec->sh_flags &= ~(unsigned long)SHF_ALLOC;
	}

	/* Determine total sizes, and put offsets in sh_entsize.  For now
	   this is done generically; there doesn't appear to be any
	   special cases for the architectures. */
	layout_sections(mod, info);
	layout_symtab(mod, info);

	/* Allocate and move to the final place */
	err = move_module(mod, info);
	if (err)
		goto free_percpu;

	/* Module has been copied to its final place now: return it. */
	mod = (void *)info->sechdrs[info->index.mod].sh_addr;
	kmemleak_load_module(mod, info);
	return mod;

free_percpu:
	percpu_modfree(mod);
out:
	return ERR_PTR(err);
}

layout_and_allocate()函数先调用setup_load_info()对info进一步初始化,接着调用layout_sections()分配各个段在最终虚拟地址上的偏移,.init段会被单独分配偏移,因为.init段的虚拟地址是单独分配的,后面将详述。然后调用layout_symtab()分配符号表和字符串表在虚拟地址上的偏移。最后调用move_module()将段移动到最终的虚拟地址上去。


setup_load_info()函数继续用ELF信息去初始化info结构体,并调用rewrite_section_headers()。

static struct module *setup_load_info(struct load_info *info)
{
	unsigned int i;
	int err;
	struct module *mod;

	/* Set up the convenience variables */
	info->sechdrs = (void *)info->hdr + info->hdr->e_shoff;
	info->secstrings = (void *)info->hdr
		+ info->sechdrs[info->hdr->e_shstrndx].sh_offset;

	err = rewrite_section_headers(info);
	if (err)
		return ERR_PTR(err);

	/* Find internal symbols and strings. */
	for (i = 1; i < info->hdr->e_shnum; i++) {
		if (info->sechdrs[i].sh_type == SHT_SYMTAB) {
			info->index.sym = i;
			info->index.str = info->sechdrs[i].sh_link;
			info->strtab = (char *)info->hdr
				+ info->sechdrs[info->index.str].sh_offset;
			break;
		}
	}

	info->index.mod = find_sec(info, ".gnu.linkonce.this_module");
	if (!info->index.mod) {
		printk(KERN_WARNING "No module found in object\n");
		return ERR_PTR(-ENOEXEC);
	}
	/* This is temporary: point mod into copy of data. */
	mod = (void *)info->sechdrs[info->index.mod].sh_addr;
    printk(KERN_ERR "temp mod: %x, &mod->init: %x\n", mod, &(mod->init));
	if (info->index.sym == 0) {
		printk(KERN_WARNING "%s: module has no symbols (stripped?)\n",
		       mod->name);
		return ERR_PTR(-ENOEXEC);
	}

	info->index.pcpu = find_pcpusec(info);

	/* Check module struct version now, before we try to use module. */
	if (!check_modstruct_version(info->sechdrs, info->index.vers, mod))
		return ERR_PTR(-ENOEXEC);

	return mod;
}

rewrite_section_headers()这个函数将每个段的虚拟地址暂时设置为其在临时空间中的地址。并将info段和vers段的SHF_ALLOC符号清零,表示不为这两个段分配空间。

static int rewrite_section_headers(struct load_info *info)
{
	unsigned int i;

	/* This should always be true, but let's be sure. */
	info->sechdrs[0].sh_addr = 0;

	for (i = 1; i < info->hdr->e_shnum; i++) {
		Elf_Shdr *shdr = &info->sechdrs[i];
		if (shdr->sh_type != SHT_NOBITS
		    && info->len < shdr->sh_offset + shdr->sh_size) {
			printk(KERN_ERR "Module len %lu truncated\n",
			       info->len);
			return -ENOEXEC;
		}

		/* Mark all sections sh_addr with their address in the
		   temporary image. */
		shdr->sh_addr = (size_t)info->hdr + shdr->sh_offset;
        printk(KERN_ERR "section:%s sh_addr: %x\n", info->secstrings + shdr->sh_name, shdr->sh_addr);
#ifndef CONFIG_MODULE_UNLOAD
		/* Don't load .exit sections */
		if (strstarts(info->secstrings+shdr->sh_name, ".exit"))
			shdr->sh_flags &= ~(unsigned long)SHF_ALLOC;
#endif
	}

	/* Track but don't keep modinfo and version sections. */
	info->index.vers = find_sec(info, "__versions");
	info->index.info = find_sec(info, ".modinfo");
	info->sechdrs[info->index.info].sh_flags &= ~(unsigned long)SHF_ALLOC;
	info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC;
	return 0;
}


rewrite_section_headers()返回后,将符号表和字符串表的信息记录在info中。

	info->index.mod = find_sec(info, ".gnu.linkonce.this_module");
这里将.gnu.linkonce.this_module段在段表中的下标记录在mod中。

        mod = (void *)info->sechdrs[info->index.mod].sh_addr;
现在,mod指针就指向临时空间中的.gnu.linkonce.this_module段的地址了,而.gnu.linkonce.this_module段的内容是编译器生成的并初始化的,因此struct  module这个结构体的初始值相当于编译时就设置好了。不信可以打印mod->name看看,就是KO文件的文件名。

好了,现在程序执行完setup_load_info()返回到layout_and_allocate(),接着layout_and_allocate()调用layout_sections()。

static void layout_sections(struct module *mod, struct load_info *info)
{
	static unsigned long const masks[][2] = {
		/* NOTE: all executable code must be the first section
		 * in this array; otherwise modify the text_size
		 * finder in the two loops below */
		{ SHF_EXECINSTR | SHF_ALLOC, ARCH_SHF_SMALL },
		{ SHF_ALLOC, SHF_WRITE | ARCH_SHF_SMALL },
		{ SHF_WRITE | SHF_ALLOC, ARCH_SHF_SMALL },
		{ ARCH_SHF_SMALL | SHF_ALLOC, 0 }
	};
	unsigned int m, i;

	for (i = 0; i < info->hdr->e_shnum; i++)
		info->sechdrs[i].sh_entsize = ~0UL;

	pr_debug("Core section allocation order:\n");
    printk(KERN_ERR "masks array size: %d\n", ARRAY_SIZE(masks));
	for (m = 0; m < ARRAY_SIZE(masks); ++m) {
		for (i = 0; i < info->hdr->e_shnum; ++i) {
			Elf_Shdr *s = &info->sechdrs[i];
			const char *sname = info->secstrings + s->sh_name;
            printk(KERN_ERR "sname : %s\n", sname);
			if ((s->sh_flags & masks[m][0]) != masks[m][0]
			    || (s->sh_flags & masks[m][1])
			    || s->sh_entsize != ~0UL
			    || strstarts(sname, ".init")) {
                printk(KERN_ERR "skip..\n");
				continue;
            }
            printk(KERN_ERR "core_size: %d\n", mod->core_size);
			s->sh_entsize = get_offset(mod, &mod->core_size, s, i);
            printk(KERN_ERR "sh_entsize: %d, core_size: %d\n", s->sh_entsize, mod->core_size);
			pr_debug("\t%s\n", sname);
		}
		switch (m) {
		case 0: /* executable */
			mod->core_size = debug_align(mod->core_size);
            printk(KERN_ERR "case 0 core_size = %d\n", mod->core_size);
			mod->core_text_size = mod->core_size;
			break;
		case 1: /* RO: text and ro-data */
			mod->core_size = debug_align(mod->core_size);
            printk(KERN_ERR "case 1 core_size = %d\n", mod->core_size);
			mod->core_ro_size = mod->core_size;
			break;
		case 3: /* whole core */
			mod->core_size = debug_align(mod->core_size);
			break;
		}
	}
    printk(KERN_ERR "parse init..\n");
	pr_debug("Init section allocation order:\n");
	for (m = 0; m < ARRAY_SIZE(masks); ++m) {
		for (i = 0; i < info->hdr->e_shnum; ++i) {
			Elf_Shdr *s = &info->sechdrs[i];
			const char *sname = info->secstrings + s->sh_name;
            printk(KERN_ERR "sname : %s\n", sname);
			if ((s->sh_flags & masks[m][0]) != masks[m][0]
			    || (s->sh_flags & masks[m][1])
			    || s->sh_entsize != ~0UL
			    || !strstarts(sname, ".init")) {
                printk(KERN_ERR "skip..\n");
				continue;
            }
            printk(KERN_ERR "init_size: %d\n", mod->init_size);
			s->sh_entsize = (get_offset(mod, &mod->init_size, s, i)
					 | INIT_OFFSET_MASK);
            printk(KERN_ERR "sh_entsize: %d, init_size: %d\n", s->sh_entsize, mod->init_size);
			pr_debug("\t%s\n", sname);
		}
		switch (m) {
		case 0: /* executable */
			mod->init_size = debug_align(mod->init_size);
			mod->init_text_size = mod->init_size;
			break;
		case 1: /* RO: text and ro-data */
			mod->init_size = debug_align(mod->init_size);
			mod->init_ro_size = mod->init_size;
			break;
		case 3: /* whole init */
			mod->init_size = debug_align(mod->init_size);
			break;
		}
	}
}

layout_sections()利用了struct module 里的两个成员变量:core_size 和 init_size,后面会看到,kernel为ko文件分配最终虚拟地址的时候,实际上分配了两块地址,一块叫core,另一块叫init, 这两个变量分别记录了这两块地址的size。一个内核模块为什么要分配两块地址呢?这是考虑到内核模块的__init函数只运行一次,所以将它单独放在一块内存中可以方便运行结束后,回收这块内存。。__init函数就是用 __init 宏定义的函数, #define __init        __section(.init.text) ,编译器会将它放入ko文件的.init.text段中。

第一个for循环将所有段的sh_entsize设置为一个特殊值——0xffffffff。这是个标记,凡是sh_entsize等于这个值的段,就是还未被分配虚拟空间偏移的段。

前面说了,为ko文件分配的最终虚拟地址有两块,core空间和init空间,core_size和init_size记录了这两个空间的size,初始值为0。

第二个for循环为所有具有SHF_ALLOC标志,并且非.init的段分配其在core虚拟空间的偏移,这些段后面将会被复制到core虚拟空间,这是不会被自动释放,常驻内核的空间。

	s->sh_entsize = get_offset(mod, &mod->core_size, s, i);
每个段在core空间的偏移记录在sh_entsize中,偏移是通过get_offset得到的,这个函数很简单,arch_mod_section_prepend()函数为每个段额外的分配几个byte,可以先认为返回0,第一次调用传入的*size为0,ALIGN()宏是对齐用的宏,一般仍然返回*size,所以可以简化为ret = *size; 接着将core_size加上这个段的size,返回。后面再为下一个段分配偏移的时候,core_size已经非0了,分配的偏移就是*size的大小,每次分配core_size都增加相应的段的size。

static long get_offset(struct module *mod, unsigned int *size,
		       Elf_Shdr *sechdr, unsigned int section)
{
	long ret;

	*size += arch_mod_section_prepend(mod, section);
	ret = ALIGN(*size, sechdr->sh_addralign ?: 1);
	*size = ret + sechdr->sh_size;
        printk(KERN_ERR "sh_addralign: %d, ret: %ld, sh_size: %d\n", sechdr->sh_addralign, ret, sechdr->sh_size);
	return ret;
}

当第二个for循环完毕,第三个for循环就为.init段分配其在init虚拟空间的偏移,分配方法和前面一样,然后返回layout_and_allocate()函数。layout_and_allocate()函数接着调用layout_symtab()为符号表和字符串表分配虚拟空间。symsect和strsect分别是表示符号表和字符串表的段描述符。符号表和字符串表会在core空间与init空间同时分配。


static void layout_symtab(struct module *mod, struct load_info *info)
{
	Elf_Shdr *symsect = info->sechdrs + info->index.sym;
	Elf_Shdr *strsect = info->sechdrs + info->index.str;
	const Elf_Sym *src;
	unsigned int i, nsrc, ndst, strtab_size;

	/* Put symbol section at end of init part of module. */
	symsect->sh_flags |= SHF_ALLOC;
	symsect->sh_entsize = get_offset(mod, &mod->init_size, symsect,
					 info->index.sym) | INIT_OFFSET_MASK;
        printk(KERN_ERR "symsect->sh_entsize: %d\n", symsect->sh_entsize);
	pr_debug("\t%s\n", info->secstrings + symsect->sh_name);

	src = (void *)info->hdr + symsect->sh_offset;
	nsrc = symsect->sh_size / sizeof(*src);
        printk(KERN_ERR "symb num : %d\n", nsrc);

	/* strtab always starts with a nul, so offset 0 is the empty string. */
	strtab_size = 1;

	/* Compute total space required for the core symbols' strtab. */
	for (ndst = i = 0; i < nsrc; i++) {
		if (i == 0 ||
		    is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) {
			strtab_size += strlen(&info->strtab[src[i].st_name])+1;
			ndst++;
		}
	}

	/* Append room for core symbols at end of core part. */
	info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1);
	info->stroffs = mod->core_size = info->symoffs + ndst * sizeof(Elf_Sym);
	mod->core_size += strtab_size;

	/* Put string table section at end of init part of module. */
	strsect->sh_flags |= SHF_ALLOC;
	strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect,
					 info->index.str) | INIT_OFFSET_MASK;
	pr_debug("\t%s\n", info->secstrings + strsect->sh_name);
}

先为符号表在init空间分配偏移:

	symsect->sh_entsize = get_offset(mod, &mod->init_size, symsect,
					 info->index.sym) | INIT_OFFSET_MASK;

接着为“core符号”及其对应的字符串在core空间分配偏移,其实就是只将部分符号表在core空间分配偏移,遍历符号表,对每个符号表项调用is_core_symbol()函数判断是否为“core符号”,如果是,为core符号对应的字符串分配空间,字符串空间记录在strtab_size中。

	src = (void *)info->hdr + symsect->sh_offset;
	nsrc = symsect->sh_size / sizeof(*src);
        printk(KERN_ERR "symb num : %d\n", nsrc);

	/* strtab always starts with a nul, so offset 0 is the empty string. */
	strtab_size = 1;

	/* Compute total space required for the core symbols' strtab. */
	for (ndst = i = 0; i < nsrc; i++) {
		if (i == 0 ||
		    is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) {
			strtab_size += strlen(&info->strtab[src[i].st_name])+1;
			ndst++;
		}
	}

这三句就为core空间的符号表与字符串表分配好了偏移。(注意:分配的偏移没有记录在sh_entsize中,只是记录在info结构体中,也就是说只会为core空间的符号表与字符串表预留好位置,不会真的将符号表、字符串表复制到core空间来)

	info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1);
	info->stroffs = mod->core_size = info->symoffs + ndst * sizeof(Elf_Sym);
	mod->core_size += strtab_size;

最后为字符串表分配init空间的偏移。

	strsect->sh_flags |= SHF_ALLOC;
	strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect,
					 info->index.str) | INIT_OFFSET_MASK;

返回layout_and_allocate()函数。调用move_module函数进行虚拟空间的实际申请,和段的加载操作。

static int move_module(struct module *mod, struct load_info *info)
{
	int i;
	void *ptr;

	/* Do the allocs. */
	ptr = module_alloc_update_bounds(mod->core_size);
	/*
	 * The pointer to this block is stored in the module structure
	 * which is inside the block. Just mark it as not being a
	 * leak.
	 */
	kmemleak_not_leak(ptr);
	if (!ptr)
		return -ENOMEM;

	memset(ptr, 0, mod->core_size);
	mod->module_core = ptr;

	ptr = module_alloc_update_bounds(mod->init_size);
	/*
	 * The pointer to this block is stored in the module structure
	 * which is inside the block. This block doesn't need to be
	 * scanned as it contains data and code that will be freed
	 * after the module is initialized.
	 */
	kmemleak_ignore(ptr);
	if (!ptr && mod->init_size) {
		module_free(mod, mod->module_core);
		return -ENOMEM;
	}
	memset(ptr, 0, mod->init_size);
	mod->module_init = ptr;

	/* Transfer each section which specifies SHF_ALLOC */
	pr_debug("final section addresses:\n");
	for (i = 0; i < info->hdr->e_shnum; i++) {
		void *dest;
		Elf_Shdr *shdr = &info->sechdrs[i];

		if (!(shdr->sh_flags & SHF_ALLOC))
			continue;

		if (shdr->sh_entsize & INIT_OFFSET_MASK)
			dest = mod->module_init
				+ (shdr->sh_entsize & ~INIT_OFFSET_MASK);
		else
			dest = mod->module_core + shdr->sh_entsize;
        printk(KERN_ERR "name: %s, dest : %x\n",info->secstrings + shdr->sh_name, dest);
		if (shdr->sh_type != SHT_NOBITS)
			memcpy(dest, (void *)shdr->sh_addr, shdr->sh_size);
		/* Update sh_addr to point to copy in image. */
		shdr->sh_addr = (unsigned long)dest;
		pr_debug("\t0x%lx %s\n",
			 (long)shdr->sh_addr, info->secstrings + shdr->sh_name);
	}

	return 0;
}

先为core空间申请一块大小为core_size的内存,将其首地址赋值给struct module结构体的module_core成员:

    ptr = module_alloc_update_bounds(mod->core_size);

	mod->module_core = ptr;
再为init空间申请一块大小为init_size的内存,将其首地址赋值给struct module结构体的module_init成员:

	ptr = module_alloc_update_bounds(mod->init_size);
	mod->module_init = ptr;
下面的for循环对每个有SHF_ALLOC标记的段分配绝对虚拟地址(前面分配的只是各个段相对于未来要分配的虚拟地址的偏移,也就是相对于module_core和module_init的偏移)。分配绝对虚拟地址很简单,将申请的虚拟空间的地址(分别保存在module_core和module_init中)直接加上之前分配好的偏移量就行了。如下:(符号表和字符串表的绝对虚拟地址都被分配到了init空间内,所以后面搬移的时候是把这两个表搬移到了init空间而非core空间)

		if (shdr->sh_entsize & INIT_OFFSET_MASK)
			dest = mod->module_init
				+ (shdr->sh_entsize & ~INIT_OFFSET_MASK);
		else
			dest = mod->module_core + shdr->sh_entsize;
开始段的搬移,将段从临时内核空间,搬移到运行时的虚拟地址上去:

		if (shdr->sh_type != SHT_NOBITS)
			memcpy(dest, (void *)shdr->sh_addr, shdr->sh_size);
最后把绝对虚拟地址赋值给相应段表项的sh_addr成员。返回。

到目前为止,ko文件的对应的段,已经被加载进了内核空间,每个被加载的段都有了自己的运行时地址。但现在还不能运行,因为代码中对符号的引用还没有修正,也就是还没有进行符号重定位。前面说过ko文件类似与.o文件,.o文件对全局符号的引用都是待重定位的,是需要链接器对符号进行链接的,ko文件也一样。只不过ko文件代码中对符号的引用是由内核来进行重定位的。

move_module()返回后,返回到layout_and_allocate()函数中。layout_and_allocate()函数最后将mod指针变量重新指向搬移后的.gnu.linkonce.this_module段的虚拟地址值。

	mod = (void *)info->sechdrs[info->index.mod].sh_addr;

现在layout_and_allocate()函数执行完了,返回到load_module函数继续往下执行,

	find_module_sections(mod, &info);
这主要是对info进一步初始化。

接着调用simplify_symbols()函数,这个函数将符号表里的符号的绝对地址写入到st_value域中(符号表、字符串表现在都在init空间了)。

for循环遍历init空间的符号表,分析每个符号表项的st_shndx域,st_shndx通常表示符号所在的段,但它有三个特殊值:SHN_ABS,SHN_COMMON,SHN_UNDEF。所以函数中分了4种case来进行处理。SHN_COMMON和SHN_ABS的case先不考虑,因为我们的代码中的符号主要分两种,模块内的符号和内核export出的符号(如printk),这两种符号的st_shndx大部分对应default和SHN_UNDEF这两种case。

对于模块内的符号,程序进入default进行处理,处理很简单,st_value = st_value + 符号所在段的绝对虚拟地址(st_value中原本保存着符号在其所在段的offset)。这样一来,st_value中现在保存的就是符号的绝对虚拟地址了。

对于内核导出的符号,由于它在模块中没有定义,所以它的st_shndx为SHN_UNDEF。对于SHN_UNDEF 这种case的处理过程如下:

1:调用resolve_symbol_wait()函数解析内核符号,这个函数返回一个struct kernel_symbol结构体。

2:将这个结构体的value成员直接赋值给st_value。

结构体定义在include/linux/export.h中:

struct kernel_symbol
{
	unsigned long value;
	const char *name;
};

static int simplify_symbols(struct module *mod, const struct load_info *info)
{
	Elf_Shdr *symsec = &info->sechdrs[info->index.sym];
	Elf_Sym *sym = (void *)symsec->sh_addr;
	unsigned long secbase;
	unsigned int i;
	int ret = 0;
	const struct kernel_symbol *ksym;

	for (i = 1; i < symsec->sh_size / sizeof(Elf_Sym); i++) {
		const char *name = info->strtab + sym[i].st_name;
        printk(KERN_ERR "symb name: %s\n", name);
		switch (sym[i].st_shndx) {
		case SHN_COMMON:
			/* We compiled with -fno-common.  These are not
			   supposed to happen.  */
			pr_debug("Common symbol: %s\n", name);
			printk("%s: please compile with -fno-common\n",
			       mod->name);
			ret = -ENOEXEC;
			break;

		case SHN_ABS:
			/* Don't need to do anything */
			pr_debug("Absolute symbol: 0x%08lx\n",
			       (long)sym[i].st_value);
			break;

		case SHN_UNDEF:
            printk(KERN_ERR "Undefine symb!!\n");
			ksym = resolve_symbol_wait(mod, info, name);
            printk(KERN_ERR "resolve ok!\n");
			/* Ok if resolved.  */
			if (ksym && !IS_ERR(ksym)) {
				sym[i].st_value = ksym->value;
				break;
			}

			/* Ok if weak.  */
			if (!ksym && ELF_ST_BIND(sym[i].st_info) == STB_WEAK)
				break;

			printk(KERN_WARNING "%s: Unknown symbol %s (err %li)\n",
			       mod->name, name, PTR_ERR(ksym));
			ret = PTR_ERR(ksym) ?: -ENOENT;
			break;

		default:
			/* Divert to percpu allocation if a percpu var. */
			if (sym[i].st_shndx == info->index.pcpu)
				secbase = (unsigned long)mod_percpu(mod);
			else
				secbase = info->sechdrs[sym[i].st_shndx].sh_addr;
            printk(KERN_ERR "section base: %x\n", secbase);
			sym[i].st_value += secbase;
            printk(KERN_ERR "st_value: %x\n", sym[i].st_value);
			break;
		}
	}

	return ret;
}

resolve_symbol_wait()函数用来解析内核导出的符号。不是所有的内核符号都默认导出的,默认内核中的符号在运行时是对外“不可见的”,而内核本身对那些符号地址的引用,都是静态编译链接内核时,链接器写进去的。所以外部模块无法得到内核符号的地址。如果外部模块想要引用内核符号,除非内核将符号地址导出来!内核中的符号可以通过EXPORT_SYMBOL()宏来导出,这个宏就是将符号信息保存在一个struct kernel_symbol结构体中,再将这个结构体编译进内核的一个特殊段,以后如果外部想引用这个符号,只需要在这个段中寻找对应的符号的struct kernel_symbol结构体就行了。更详细的细节google上有很多。

现在来看这个resolve_symbol_wait()函数:

static const struct kernel_symbol *
resolve_symbol_wait(struct module *mod,
		    const struct load_info *info,
		    const char *name)
{
	const struct kernel_symbol *ksym;
	char owner[MODULE_NAME_LEN];

	if (wait_event_interruptible_timeout(module_wq,
			!IS_ERR(ksym = resolve_symbol(mod, info, name, owner))
			|| PTR_ERR(ksym) != -EBUSY,
					     30 * HZ) <= 0) {
		printk(KERN_WARNING "%s: gave up waiting for init of module %s.\n",
		       mod->name, owner);
	}
	return ksym;
}

resolve_symbol_wait()函数会先调用resolve_symbol()解析内核导出符号。

因此进入resolve_symbol()函数:

static const struct kernel_symbol *resolve_symbol(struct module *mod,
						  const struct load_info *info,
						  const char *name,
						  char ownername[])
{
	struct module *owner;
	const struct kernel_symbol *sym;
	const unsigned long *crc;
	int err;
    printk(KERN_ERR "resolve symbol: %s...\n", name);
	mutex_lock(&module_mutex);
	sym = find_symbol(name, &owner, &crc,
			  !(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)), true);
	if (!sym)
		goto unlock;

	if (!check_version(info->sechdrs, info->index.vers, name, mod, crc,
			   owner)) {
		sym = ERR_PTR(-EINVAL);
		goto getname;
	}

	err = ref_module(mod, owner);
	if (err) {
		sym = ERR_PTR(err);
		goto getname;
	}

getname:
	/* We must make copy under the lock if we failed to get ref. */
	strncpy(ownername, module_name(owner), MODULE_NAME_LEN);
unlock:
	mutex_unlock(&module_mutex);
	return sym;
}

resolve_symbol()函数继续调用find_symbol()函数,返回一个struct kernel_symbol结构指针。

const struct kernel_symbol *find_symbol(const char *name,
					struct module **owner,
					const unsigned long **crc,
					bool gplok,
					bool warn)
{
	struct find_symbol_arg fsa;

	fsa.name = name;
	fsa.gplok = gplok;
	fsa.warn = warn;

	if (each_symbol_section(find_symbol_in_section, &fsa)) {
		if (owner)
			*owner = fsa.owner;
		if (crc)
			*crc = fsa.crc;
		return fsa.sym;
	}

	pr_debug("Failed to find symbol %s\n", name);
	return NULL;
}

进入find_symbol()函数,函数内部先构造一个搜索符号的参数 fsa,并根据要解析的内核符号初始化这个参数。接着以&fsa和find_symbol_in_section这个函数指针为参数调用each_symbol_section()函数。

bool each_symbol_section(bool (*fn)(const struct symsearch *arr,
				    struct module *owner,
				    void *data),
			 void *data)
{
	struct module *mod;
	static const struct symsearch arr[] = {
		{ __start___ksymtab, __stop___ksymtab, __start___kcrctab,
		  NOT_GPL_ONLY, false },
		{ __start___ksymtab_gpl, __stop___ksymtab_gpl,
		  __start___kcrctab_gpl,
		  GPL_ONLY, false },
		{ __start___ksymtab_gpl_future, __stop___ksymtab_gpl_future,
		  __start___kcrctab_gpl_future,
		  WILL_BE_GPL_ONLY, false },
#ifdef CONFIG_UNUSED_SYMBOLS
		{ __start___ksymtab_unused, __stop___ksymtab_unused,
		  __start___kcrctab_unused,
		  NOT_GPL_ONLY, true },
		{ __start___ksymtab_unused_gpl, __stop___ksymtab_unused_gpl,
		  __start___kcrctab_unused_gpl,
		  GPL_ONLY, true },
#endif
	};

	if (each_symbol_in_section(arr, ARRAY_SIZE(arr), NULL, fn, data))
		return true;

	list_for_each_entry_rcu(mod, &modules, list) {
		struct symsearch arr[] = {
			{ mod->syms, mod->syms + mod->num_syms, mod->crcs,
			  NOT_GPL_ONLY, false },
			{ mod->gpl_syms, mod->gpl_syms + mod->num_gpl_syms,
			  mod->gpl_crcs,
			  GPL_ONLY, false },
			{ mod->gpl_future_syms,
			  mod->gpl_future_syms + mod->num_gpl_future_syms,
			  mod->gpl_future_crcs,
			  WILL_BE_GPL_ONLY, false },
#ifdef CONFIG_UNUSED_SYMBOLS
			{ mod->unused_syms,
			  mod->unused_syms + mod->num_unused_syms,
			  mod->unused_crcs,
			  NOT_GPL_ONLY, true },
			{ mod->unused_gpl_syms,
			  mod->unused_gpl_syms + mod->num_unused_gpl_syms,
			  mod->unused_gpl_crcs,
			  GPL_ONLY, true },
#endif
		};

		if (each_symbol_in_section(arr, ARRAY_SIZE(arr), mod, fn, data))
			return true;
	}
	return false;
}

这个函数里有个static数组,数组元素是struct symsearch结构体,这个结构体是用来描述一个内核符号表的,start stop 分别描述表的头尾,crcs暂时不管,licence描述符号的“证书”。

struct symsearch {
	const struct kernel_symbol *start, *stop;
	const unsigned long *crcs;
	enum {
		NOT_GPL_ONLY,
		GPL_ONLY,
		WILL_BE_GPL_ONLY,
	} licence;
	bool unused;
};

可以看出内核定义了3~5个内核符号表。。这些描述符号表的结构体的成员都已经赋了值,像__start___ksymtab,__stop___ksymtab等等,这些值定义在arch/arm/kernel/vmlinux.lds中,也就是定义在链接脚本中。前面说过,内核符号通过EXPORT_SYMBOL()宏导出到一个特殊段,在链接内核的时候,链接脚本就将这些段合并为几个内核符号表段,并定义了几个标志开始和结束地址的符号,如__start___ksymtab,__stop___ksymtab就标识了___ksymtab符号表段的开始地址和结束地址。

	static const struct symsearch arr[] = {
		{ __start___ksymtab, __stop___ksymtab, __start___kcrctab,
		  NOT_GPL_ONLY, false },
		{ __start___ksymtab_gpl, __stop___ksymtab_gpl,
		  __start___kcrctab_gpl,
		  GPL_ONLY, false },
		{ __start___ksymtab_gpl_future, __stop___ksymtab_gpl_future,
		  __start___kcrctab_gpl_future,
		  WILL_BE_GPL_ONLY, false },
#ifdef CONFIG_UNUSED_SYMBOLS
		{ __start___ksymtab_unused, __stop___ksymtab_unused,
		  __start___kcrctab_unused,
		  NOT_GPL_ONLY, true },
		{ __start___ksymtab_unused_gpl, __stop___ksymtab_unused_gpl,
		  __start___kcrctab_unused_gpl,
		  GPL_ONLY, true },
#endif
	};

接着回到each_symbol_section()函数,函数调用each_symbol_in_section(),参数是这个static数组指针和数组的size,owner为NULL,*fn就是 find_symbol_in_section,data其实是&fsa这个指针。each_symbol_in_section()这个函数做的事很简单,就是遍历arr数组中的每个内核符号表,调用find_symbol_in_section函数在每个内核符号表里搜索&fsa指定的符号。

static bool each_symbol_in_section(const struct symsearch *arr,
				   unsigned int arrsize,
				   struct module *owner,
				   bool (*fn)(const struct symsearch *syms,
					      struct module *owner,
					      void *data),
				   void *data)
{
	unsigned int j;

	for (j = 0; j < arrsize; j++) {
		if (fn(&arr[j], owner, data))
			return true;
	}

	return false;
}

find_symbol_in_section函数如下,先将data指针转化为struct find_symbol_arg结构体指针,再调用bsearch在syms描述的内核符号表里搜索fsa指定的内核符号,我们假设搜索到了这个符号,搜索的结果就存放在struct kernel_symbol这个结构体中。

static bool find_symbol_in_section(const struct symsearch *syms,
				   struct module *owner,
				   void *data)
{
	struct find_symbol_arg *fsa = data;
	struct kernel_symbol *sym;

	sym = bsearch(fsa->name, syms->start, syms->stop - syms->start,
			sizeof(struct kernel_symbol), cmp_name);
        if(sym != NULL)
                printk(KERN_ERR "name: %s, sym value: %x\n", fsa->name, sym->value);

	if (sym != NULL && check_symbol(syms, owner, sym - syms->start, data))
		return true;

	return false;
}

bsearch函数就是用“二分法”查表,不说了。。

void *bsearch(const void *key, const void *base, size_t num, size_t size,
	      int (*cmp)(const void *key, const void *elt))
{
	size_t start = 0, end = num;
	int result;

	while (start < end) {
		size_t mid = start + (end - start) / 2;

		result = cmp(key, base + mid * size);
		if (result < 0)
			end = mid;
		else if (result > 0)
			start = mid + 1;
		else
			return (void *)base + mid * size;
	}

	return NULL;
}
bsearch返回后,调用check_symbol()函数,这个函数在返回前将搜索到的内核导出符号的struct kernel_symbol结构指针赋值给fsa->sym。

static bool check_symbol(const struct symsearch *syms,
				 struct module *owner,
				 unsigned int symnum, void *data)
{
	struct find_symbol_arg *fsa = data;

	if (!fsa->gplok) {
		if (syms->licence == GPL_ONLY)
			return false;
		if (syms->licence == WILL_BE_GPL_ONLY && fsa->warn) {
			printk(KERN_WARNING "Symbol %s is being used "
			       "by a non-GPL module, which will not "
			       "be allowed in the future\n", fsa->name);
			printk(KERN_WARNING "Please see the file "
			       "Documentation/feature-removal-schedule.txt "
			       "in the kernel source tree for more details.\n");
		}
	}

#ifdef CONFIG_UNUSED_SYMBOLS
	if (syms->unused && fsa->warn) {
		printk(KERN_WARNING "Symbol %s is marked as UNUSED, "
		       "however this module is using it.\n", fsa->name);
		printk(KERN_WARNING
		       "This symbol will go away in the future.\n");
		printk(KERN_WARNING
		       "Please evalute if this is the right api to use and if "
		       "it really is, submit a report the linux kernel "
		       "mailinglist together with submitting your code for "
		       "inclusion.\n");
	}
#endif

	fsa->owner = owner;
	fsa->crc = symversion(syms->crcs, symnum);
	fsa->sym = &syms->start[symnum];
	return true;
}

check_symbol()函数返回后,find_symbol_in_section函数也返回true,并向上层层返回到find_symbol(),find_symbol()函数将fsa->sym返回至resolve_symbol()函数,接着resolve_symbol()函数调用ref_module(),第一个参数a是现在正在加载的模块,第二个参数是a模块用到的模块,此处为NULL。

int ref_module(struct module *a, struct module *b)
{
	int err;

	if (b == NULL || already_uses(a, b))
		return 0;

	/* If module isn't available, we fail. */
	err = strong_try_module_get(b);
	if (err)
		return err;

	err = add_module_usage(a, b);
	if (err) {
		module_put(b);
		return err;
	}
	return 0;
}

因为这里b = NULL,所以这里直接返回0;

返回到resolve_symbol(),resolve_symbol()最后将描述内核导出符号的struct kernel_symbol结构体的指针sym返回。

现在我们已经返回到resolve_symbol_wait()函数了,接下来调用wait_event_interruptible_timeout(),因为我们刚才已经得到了内核导出符号,那么现在就不用睡眠等待,否则会睡眠。最后resolve_symbol_wait()返回resolve_symbol()刚刚返回的sym。

向上返回到simplify_symbols(),前面说过,将这个struct kernel_symbol结构体的value成员直接赋值给st_value。然后返回。

这样绕了一大圈,simplify_symbols()终于返回了。。。simplify_symbols()返回后,在init空间的符号表的每个符号表项中,st_value域就指向符号的绝对虚拟地址值了。-_-

接下来load_module()调用apply_relocations()进行真正的重定位工作。

static int apply_relocations(struct module *mod, const struct load_info *info)
{
	unsigned int i;
	int err = 0;

	/* Now do relocations. */
	for (i = 1; i < info->hdr->e_shnum; i++) {
		unsigned int infosec = info->sechdrs[i].sh_info;

		/* Not a valid relocation section? */
		if (infosec >= info->hdr->e_shnum)
			continue;

		/* Don't bother with non-allocated sections */
		if (!(info->sechdrs[infosec].sh_flags & SHF_ALLOC))
			continue;

		if (info->sechdrs[i].sh_type == SHT_REL) {
                    printk(KERN_ERR "relocate section : %s,  type: %s\n", info->secstrings + (info->sechdrs[i].sh_name), "SHT_REL");
		    err = apply_relocate(info->sechdrs, info->strtab,
					     info->index.sym, i, mod);
                    printk(KERN_ERR "mod init: %x\n", mod->init);
                }
		else if (info->sechdrs[i].sh_type == SHT_RELA) {
                    printk(KERN_ERR "relocate section : %s,  type: %s\n", info->secstrings + (info->sechdrs[i].sh_name), "SHT_RELA");
	            err = apply_relocate_add(info->sechdrs, info->strtab,
						 info->index.sym, i, mod);
                }
		if (err < 0)
			break;
	}
	return err;
}

for循环遍历临时内核空间的各个段,筛选出其中有效的重定位表段,对重定位表所作用的段进行重定位。重定位段的类型主要有SHT_REL和SHT_RELA,以SHT_REL为例,当重定位表段的类型是SHT_REL时,调用apply_relocate()进行重定位。apply_relocate()位于arch/arm/kernel/module.c中。


int
apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
	       unsigned int relindex, struct module *module)
{
	Elf32_Shdr *symsec = sechdrs + symindex;
	Elf32_Shdr *relsec = sechdrs + relindex;
	Elf32_Shdr *dstsec = sechdrs + relsec->sh_info;
	Elf32_Rel *rel = (void *)relsec->sh_addr;
	unsigned int i;

	for (i = 0; i < relsec->sh_size / sizeof(Elf32_Rel); i++, rel++) {
		unsigned long loc;
		Elf32_Sym *sym;
		const char *symname;
		s32 offset;
#ifdef CONFIG_THUMB2_KERNEL
		u32 upper, lower, sign, j1, j2;
#endif

		offset = ELF32_R_SYM(rel->r_info);
		if (offset < 0 || offset > (symsec->sh_size / sizeof(Elf32_Sym))) {
			pr_err("%s: section %u reloc %u: bad relocation sym offset\n",
				module->name, relindex, i);
			return -ENOEXEC;
		}

		sym = ((Elf32_Sym *)symsec->sh_addr) + offset;
		symname = strtab + sym->st_name;
		if (rel->r_offset < 0 || rel->r_offset > dstsec->sh_size - sizeof(u32)) {
			pr_err("%s: section %u reloc %u sym '%s': out of bounds relocation, offset %d size %u\n",
			       module->name, relindex, i, symname,
			       rel->r_offset, dstsec->sh_size);
			return -ENOEXEC;
		}

		loc = dstsec->sh_addr + rel->r_offset;

		switch (ELF32_R_TYPE(rel->r_info)) {
		case R_ARM_NONE:
			/* ignore */
			break;

		case R_ARM_ABS32:
			*(u32 *)loc += sym->st_value;
			break;

		case R_ARM_PC24:
		case R_ARM_CALL:
		case R_ARM_JUMP24:
			offset = (*(u32 *)loc & 0x00ffffff) << 2;
			if (offset & 0x02000000)
				offset -= 0x04000000;

			offset += sym->st_value - loc;
			if (offset & 3 ||
			    offset <= (s32)0xfe000000 ||
			    offset >= (s32)0x02000000) {
				pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n",
				       module->name, relindex, i, symname,
				       ELF32_R_TYPE(rel->r_info), loc,
				       sym->st_value);
				return -ENOEXEC;
			}

			offset >>= 2;

			*(u32 *)loc &= 0xff000000;
			*(u32 *)loc |= offset & 0x00ffffff;
			break;

	       case R_ARM_V4BX:
		       /* Preserve Rm and the condition code. Alter
			* other bits to re-code instruction as
			* MOV PC,Rm.
			*/
		       *(u32 *)loc &= 0xf000000f;
		       *(u32 *)loc |= 0x01a0f000;
		       break;

		case R_ARM_PREL31:
			offset = *(u32 *)loc + sym->st_value - loc;
			*(u32 *)loc = offset & 0x7fffffff;
			break;

		case R_ARM_MOVW_ABS_NC:
		case R_ARM_MOVT_ABS:
			offset = *(u32 *)loc;
			offset = ((offset & 0xf0000) >> 4) | (offset & 0xfff);
			offset = (offset ^ 0x8000) - 0x8000;

			offset += sym->st_value;
			if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_ABS)
				offset >>= 16;

			*(u32 *)loc &= 0xfff0f000;
			*(u32 *)loc |= ((offset & 0xf000) << 4) |
					(offset & 0x0fff);
			break;

#ifdef CONFIG_THUMB2_KERNEL
		case R_ARM_THM_CALL:
		case R_ARM_THM_JUMP24:
			upper = *(u16 *)loc;
			lower = *(u16 *)(loc + 2);

			/*
			 * 25 bit signed address range (Thumb-2 BL and B.W
			 * instructions):
			 *   S:I1:I2:imm10:imm11:0
			 * where:
			 *   S     = upper[10]   = offset[24]
			 *   I1    = ~(J1 ^ S)   = offset[23]
			 *   I2    = ~(J2 ^ S)   = offset[22]
			 *   imm10 = upper[9:0]  = offset[21:12]
			 *   imm11 = lower[10:0] = offset[11:1]
			 *   J1    = lower[13]
			 *   J2    = lower[11]
			 */
			sign = (upper >> 10) & 1;
			j1 = (lower >> 13) & 1;
			j2 = (lower >> 11) & 1;
			offset = (sign << 24) | ((~(j1 ^ sign) & 1) << 23) |
				((~(j2 ^ sign) & 1) << 22) |
				((upper & 0x03ff) << 12) |
				((lower & 0x07ff) << 1);
			if (offset & 0x01000000)
				offset -= 0x02000000;
			offset += sym->st_value - loc;

			/*
			 * For function symbols, only Thumb addresses are
			 * allowed (no interworking).
			 *
			 * For non-function symbols, the destination
			 * has no specific ARM/Thumb disposition, so
			 * the branch is resolved under the assumption
			 * that interworking is not required.
			 */
			if ((ELF32_ST_TYPE(sym->st_info) == STT_FUNC &&
				!(offset & 1)) ||
			    offset <= (s32)0xff000000 ||
			    offset >= (s32)0x01000000) {
				pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n",
				       module->name, relindex, i, symname,
				       ELF32_R_TYPE(rel->r_info), loc,
				       sym->st_value);
				return -ENOEXEC;
			}

			sign = (offset >> 24) & 1;
			j1 = sign ^ (~(offset >> 23) & 1);
			j2 = sign ^ (~(offset >> 22) & 1);
			*(u16 *)loc = (u16)((upper & 0xf800) | (sign << 10) |
					    ((offset >> 12) & 0x03ff));
			*(u16 *)(loc + 2) = (u16)((lower & 0xd000) |
						  (j1 << 13) | (j2 << 11) |
						  ((offset >> 1) & 0x07ff));
			break;

		case R_ARM_THM_MOVW_ABS_NC:
		case R_ARM_THM_MOVT_ABS:
			upper = *(u16 *)loc;
			lower = *(u16 *)(loc + 2);

			/*
			 * MOVT/MOVW instructions encoding in Thumb-2:
			 *
			 * i	= upper[10]
			 * imm4	= upper[3:0]
			 * imm3	= lower[14:12]
			 * imm8	= lower[7:0]
			 *
			 * imm16 = imm4:i:imm3:imm8
			 */
			offset = ((upper & 0x000f) << 12) |
				((upper & 0x0400) << 1) |
				((lower & 0x7000) >> 4) | (lower & 0x00ff);
			offset = (offset ^ 0x8000) - 0x8000;
			offset += sym->st_value;

			if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_ABS)
				offset >>= 16;

			*(u16 *)loc = (u16)((upper & 0xfbf0) |
					    ((offset & 0xf000) >> 12) |
					    ((offset & 0x0800) >> 1));
			*(u16 *)(loc + 2) = (u16)((lower & 0x8f00) |
						  ((offset & 0x0700) << 4) |
						  (offset & 0x00ff));
			break;
#endif

		default:
			printk(KERN_ERR "%s: unknown relocation: %u\n",
			       module->name, ELF32_R_TYPE(rel->r_info));
			return -ENOEXEC;
		}
	}
	return 0;
}

这个函数所做的大概工作就是遍历重定位表项,对每个重定位项,找到重定位入口地址,再根据符号表得到符号的绝对虚拟地址,再根据重定位入口的类型,进行对应的地址修正。总之,重定位完成后,代码中对符号的引用,都将会被修正为符号在内核的正确地址。

这里有一点要注意下,前面不是提到.gnu.linkonce.this_module段吗?这个段也有一个自己的重定位表,叫.rel.gnu.linkonce.this_module,这个重定位表里只有两个重定位表项,还记得前面提到的struct module结构体吗?现在给出struct module结构体的定义。。module结构内有两个成员,init和exit。这两个成员存放着模块的__init函数和__exit函数的指针,.rel.gnu.linkonce.this_module重定位表中的两个重定位项就分别对应着.gnu.linkonce.this_module段中的这两个指针!

也就是说,这两个指针的值也会被apply_relocate()函数重定位,重定位这两个指针有什么用呢?因为后面将会用这两个指针,调用模块的__init函数和__exit函数。

struct module
{
	enum module_state state;

	/* Member of list of modules */
	struct list_head list;

	/* Unique handle for this module */
	char name[MODULE_NAME_LEN];

	/* Sysfs stuff. */
	struct module_kobject mkobj;
	struct module_attribute *modinfo_attrs;
	const char *version;
	const char *srcversion;
	struct kobject *holders_dir;

	/* Exported symbols */
	const struct kernel_symbol *syms;
	const unsigned long *crcs;
	unsigned int num_syms;

	/* Kernel parameters. */
	struct kernel_param *kp;
	unsigned int num_kp;

	/* GPL-only exported symbols. */
	unsigned int num_gpl_syms;
	const struct kernel_symbol *gpl_syms;
	const unsigned long *gpl_crcs;

#ifdef CONFIG_UNUSED_SYMBOLS
	/* unused exported symbols. */
	const struct kernel_symbol *unused_syms;
	const unsigned long *unused_crcs;
	unsigned int num_unused_syms;

	/* GPL-only, unused exported symbols. */
	unsigned int num_unused_gpl_syms;
	const struct kernel_symbol *unused_gpl_syms;
	const unsigned long *unused_gpl_crcs;
#endif

	/* symbols that will be GPL-only in the near future. */
	const struct kernel_symbol *gpl_future_syms;
	const unsigned long *gpl_future_crcs;
	unsigned int num_gpl_future_syms;

	/* Exception table */
	unsigned int num_exentries;
	struct exception_table_entry *extable;

	/* Startup function. */
	int (*init)(void);

	/* If this is non-NULL, vfree after init() returns */
	void *module_init;

	/* Here is the actual code + data, vfree'd on unload. */
	void *module_core;

	/* Here are the sizes of the init and core sections */
	unsigned int init_size, core_size;

	/* The size of the executable code in each section.  */
	unsigned int init_text_size, core_text_size;

	/* Size of RO sections of the module (text+rodata) */
	unsigned int init_ro_size, core_ro_size;

	/* Arch-specific module values */
	struct mod_arch_specific arch;

	unsigned int taints;	/* same bits as kernel:tainted */

#ifdef CONFIG_GENERIC_BUG
	/* Support for BUG */
	unsigned num_bugs;
	struct list_head bug_list;
	struct bug_entry *bug_table;
#endif

#ifdef CONFIG_KALLSYMS
	/*
	 * We keep the symbol and string tables for kallsyms.
	 * The core_* fields below are temporary, loader-only (they
	 * could really be discarded after module init).
	 */
	Elf_Sym *symtab, *core_symtab;
	unsigned int num_symtab, core_num_syms;
	char *strtab, *core_strtab;

	/* Section attributes */
	struct module_sect_attrs *sect_attrs;

	/* Notes attributes */
	struct module_notes_attrs *notes_attrs;
#endif

	/* The command line arguments (may be mangled).  People like
	   keeping pointers to this stuff */
	char *args;

#ifdef CONFIG_SMP
	/* Per-cpu data. */
	void __percpu *percpu;
	unsigned int percpu_size;
#endif

#ifdef CONFIG_TRACEPOINTS
	unsigned int num_tracepoints;
	struct tracepoint * const *tracepoints_ptrs;
#endif
#ifdef HAVE_JUMP_LABEL
	struct jump_entry *jump_entries;
	unsigned int num_jump_entries;
#endif
#ifdef CONFIG_TRACING
	unsigned int num_trace_bprintk_fmt;
	const char **trace_bprintk_fmt_start;
#endif
#ifdef CONFIG_EVENT_TRACING
	struct ftrace_event_call **trace_events;
	unsigned int num_trace_events;
#endif
#ifdef CONFIG_FTRACE_MCOUNT_RECORD
	unsigned int num_ftrace_callsites;
	unsigned long *ftrace_callsites;
#endif

#ifdef CONFIG_MODULE_UNLOAD
	/* What modules depend on me? */
	struct list_head source_list;
	/* What modules do I depend on? */
	struct list_head target_list;

	/* Who is waiting for us to be unloaded */
	struct task_struct *waiter;

	/* Destruction function. */
	void (*exit)(void);

	struct module_ref __percpu *refptr;
#endif

#ifdef CONFIG_CONSTRUCTORS
	/* Constructor functions. */
	ctor_fn_t *ctors;
	unsigned int num_ctors;
#endif
};

继续一路返回到load_module()函数,load_module()后面的代码不看,一路返回到sys_init_module(),在sys_init_module()中,后面会调用do_one_initcall(),参数就是被重定位过的init指针。

	if (mod->init != NULL)
		ret = do_one_initcall(mod->init);

这个do_one_initcall()函数会调用这个init指针所指向的函数,至此,我们模块的__init函数就被调用了。

(__init函数是指用__init前缀定义的函数,__exit函数是指用__exit前缀定义的函数,前面提过。)






你可能感兴趣的:(linux技术)