ELF:executable and Linkable Format (extensible LInking Format).是UNIX-Like 下的二进制格式,内核模块同样也是ELF格式。
重要数据成员
typedef struct elf32_hdr{ unsigned char e_ident[EI_NIDENT]; Elf32_Half e_type; // 文件的类型,对于驱动模块,这个值是1,表明驱动模块是一个可定位的ELF文件 Elf32_Half e_machine; Elf32_Word e_version; Elf32_Addr e_entry; /* Entry point */ Elf32_Off e_phoff; Elf32_Off e_shoff; // 表明 Section header table 部分在文件中的偏移量 Elf32_Word e_flags; Elf32_Half e_ehsize; Elf32_Half e_phentsize; Elf32_Half e_phnum; Elf32_Half e_shentsize; // 表明 Section header table 部分中没一个entry的大小 Elf32_Half e_shnum; // Section heaer table 中有多少个 entry Elf32_Half e_shstrndx; // 与Section header entry 中的 sh_name 一起用来指明对应的 section 的 name } Elf32_Ehdr;
2. Section header table 部分
typedef struct elf32_shdr { Elf32_Word sh_name; Elf32_Word sh_type; Elf32_Word sh_flags; Elf32_Addr sh_addr; // 表示该entry所对应的section在内存中的实际地址,在静态的文件视图中,这个值是0,当模块被加载到内核时,加载器会用该section的内存地址改写sh_addr Elf32_Off sh_offset;// 对应的section在文件视图中的偏移量 Elf32_Word sh_size; // 对应section在文件中的大小 Elf32_Word sh_link; Elf32_Word sh_info; Elf32_Word sh_addralign; Elf32_Word sh_entsize; // 主要用于由固定数量 entry 组成的表构成的 section, 如符号表, 此种情况下用来表示表中的 entry 的大小 } Elf32_Shdr;
用户空间使用 insmod 来安装一个 kernel module,首先 insmod会利用文件系统的接口来将数据读取到用户空间的一段内存,然后通过系统调用 sys_init_module 让内核去处理加载的整个过程.
sys_init_module 原型如下:
long sys_init_module(void __user *umod, unsigned long len, const char __user *uargs);umod // 用来指向用户空间 demodev.ko 文件映像数据的内核地址
len // 该文件的数据大小
uargs // 传给模块的参数在用户空间的地址
在 sys_init_module 中加载 module 主要使用 load_module 函数来完成
static struct module *load_module(void __user *umod,
unsigned long len,
const char __user *uargs)
1.3.2 struct module 简介
struct module 是内核用来管理系统加载的模块时使用的数据结构,一个struct module 对象代表着现实中的一个内核模块在Linux系统中的抽象,定义
struct module {: enum module_state state; // state: LVING module被成功加载到kernel COMMING:module正在被加载 GOING:module正在被卸载 /* Member of list of modules */ struct list_head list; // 内核用来管理所有的module /* Unique handle for this module */ char name[MODULE_NAME_LEN]; /* Sysfs stuff. */ struct module_kobject mkobj; struct module_attribute *modinfo_attrs; const char *version; const char *srcversion; struct kobject *holders_dir; /* Exported symbols */ const struct kernel_symbol *syms; // module导出符号所在起始地址 const unsigned long *crcs; // module 导出符号校验码所在起始地址 unsigned int num_syms; /* Kernel parameters. */ struct kernel_param *kp; // 参数起始地址 unsigned int num_kp; /* GPL-only exported symbols. */ unsigned int num_gpl_syms; const struct kernel_symbol *gpl_syms; const unsigned long *gpl_crcs; #ifdef CONFIG_UNUSED_SYMBOLS /* unused exported symbols. */ const struct kernel_symbol *unused_syms; const unsigned long *unused_crcs; unsigned int num_unused_syms; /* GPL-only, unused exported symbols. */ unsigned int num_unused_gpl_syms; const struct kernel_symbol *unused_gpl_syms; const unsigned long *unused_gpl_crcs; #endif /* symbols that will be GPL-only in the near future. */ const struct kernel_symbol *gpl_future_syms; const unsigned long *gpl_future_crcs; unsigned int num_gpl_future_syms; /* Exception table */ unsigned int num_exentries; struct exception_table_entry *extable; /* Startup function. */ int (*init)(void); // 指向内核模块初始化函数的指针,在内核源码中由 module_init 宏制定 /* If this is non-NULL, vfree after init() returns */ void *module_init; /* Here is the actual code + data, vfree'd on unload. */ void *module_core; /* Here are the sizes of the init and core sections */ unsigned int init_size, core_size; /* The size of the executable code in each section. */ unsigned int init_text_size, core_text_size; /* Size of RO sections of the module (text+rodata) */ unsigned int init_ro_size, core_ro_size; /* Arch-specific module values */ struct mod_arch_specific arch; unsigned int taints; /* same bits as kernel:tainted */ #ifdef CONFIG_GENERIC_BUG /* Support for BUG */ unsigned num_bugs; struct list_head bug_list; struct bug_entry *bug_table; #endif #ifdef CONFIG_KALLSYMS /* * We keep the symbol and string tables for kallsyms. * The core_* fields below are temporary, loader-only (they * could really be discarded after module init). */ Elf_Sym *symtab, *core_symtab; unsigned int num_symtab, core_num_syms; char *strtab, *core_strtab; /* Section attributes */ struct module_sect_attrs *sect_attrs; /* Notes attributes */ struct module_notes_attrs *notes_attrs; #endif /* The command line arguments (may be mangled). People like keeping pointers to this stuff */ char *args; #ifdef CONFIG_SMP /* Per-cpu data. */ void __percpu *percpu; unsigned int percpu_size; #endif #ifdef CONFIG_TRACEPOINTS unsigned int num_tracepoints; struct tracepoint * const *tracepoints_ptrs; #endif #ifdef HAVE_JUMP_LABEL struct jump_entry *jump_entries; unsigned int num_jump_entries; #endif #ifdef CONFIG_TRACING unsigned int num_trace_bprintk_fmt; const char **trace_bprintk_fmt_start; #endif #ifdef CONFIG_EVENT_TRACING struct ftrace_event_call **trace_events; unsigned int num_trace_events; #endif #ifdef CONFIG_FTRACE_MCOUNT_RECORD unsigned int num_ftrace_callsites; unsigned long *ftrace_callsites; #endif #ifdef CONFIG_MODULE_UNLOAD /* What modules depend on me? */ struct list_head source_list; /* What modules do I depend on? */ struct list_head target_list; /* Who is waiting for us to be unloaded */ struct task_struct *waiter; /* Destruction function. */ void (*exit)(void); // 指向 kernel 中 module_exit 宏修饰的函数 struct module_ref __percpu *refptr; #endif #ifdef CONFIG_CONSTRUCTORS /* Constructor functions. */ ctor_fn_t *ctors; unsigned int num_ctors; #endif };
elf: Executable and Linkable Format(Exetensible Linking Format)ELF file 包含:
字符串表是 ELF 文件中的一个 section 用来保存文件中各个 section 的名称和符号名,ELF 中有两个这样的字符串表 section。一个用来保存各 section 名称的字符串,另一个用来保存符号表中的每个符号名称的字符串。使用命令 readelf
readelf -h hello.ko
ELF Header: Magic: 7f 45 4c 46 02 01 01 00 00 00 00 00 00 00 00 00
Class: ELF64
Data: 2's complement, little endian
Version: 1 (current)
OS/ABI: UNIX - System V
ABI Version: 0
Type: REL (Relocatable file)
Machine: Advanced Micro Devices X86-64
Version: 0x1
Entry point address: 0x0
Start of program headers: 0 (bytes into file)
Start of section headers: 1488 (bytes into file)
Flags: 0x0
Size of this header: 64 (bytes)
Size of program headers: 0 (bytes)
Number of program headers: 0
Size of section headers: 64 (bytes)
Number of section headers: 19
Section header string table index: 16
readelf -p .shstrtab hell.ko
读取保存各 section 名称的字符串
String dump of section '.shstrtab': [ 1] .symtab
[ 9] .strtab
[ 11] .shstrtab
[ 1b] .note.gnu.build-id
[ 2e] .rela.init.text
[ 3e] .rela.exit.text
[ 4e] .rodata.str1.1
[ 5d] .modinfo
[ 66] __versions
[ 71] .data
[ 77] .rela.gnu.linkonce.this_module
[ 96] .bss
[ 9b] .comment
[ a4] .note.GNU-stack
得到该 section 地址为:
char *secstrings = (char *hdr) + entry[hdr->e_shstrndex].sh_offset
导出符号使用
readelf -p .strtab hell.ko
[ 1] hello.c
[ 9] hello_init
[ 14] hello_exit
[ 1f] _UNIQUEID_license0
[ 34] hello.mod.c
[ 40] _UNIQUEID_srcversion1
[ 58] _moduledepends
[ 69] __versions
[ 76] _UNIQUEID_vermagic0
[ 8c] _thismodule
[ 9a] cleanup_module
[ a9] init_module
[ b5] printk
[ bc] current_task
这就是 hello.ko 符号名称,获得 符号名称字符串地址为
遍历 section header table 找到 sh_type = SHT_SYMTAB 的 entry
.symtab SYMTAB 0000000000000000 00000bb00000000000000288 0000000000000018 18 22 8
entry[i].sh_link 是符号字符串表section在section header table中的索引值, 也就是说
char *strtab = (char *hdr) + entry[entry[i].sh_link].sh_offset
Q: 为什么要进行改写?
A: 这是因为 module 已经从用户空间 --> kernel 中因此地址有所变化需要将sh_addr
update 新的地址
Q: 根据什么来 update?
A: 根据 hdr 内核中申请新的地址与偏移量来e_shoff
update
entry[i].sh_addr = (size_t)hdr + entry[i].e_shoff;
find_sec
// 根据 名字来找到相应的 sec // return index in the seciton header table static unsigned int find_sec(const struct load_info *info, const char *name) { unsigned int i;
}for (i = 1; i < info->hdr->e_shnum; i++) { Elf_Shdr *shdr = &info->sechdrs[i]; /* Alloc bit cleared means "ignore it." */ if ((shdr->sh_flags & SHF_ALLOC) && strcmp(info->secstrings + shdr->sh_name, name) == 0) return i; } return 0;
struct module mod 类型初始化
该变量是通过模块文件中.gnu.linkonce.this_module
在 *.mod.c
, 定义了module一些变量
struct module __this_module
__attribute__((section(".gnu.linkonce.this_module"))) = {
.name = KBUILD_MODNAME,
// 显然是自己定义的 xxx_init
.init = init_module,
#ifdef CONFIG_MODULE_UNLOAD
.exit = cleanup_module,
#endif
.arch = MODULE_ARCH_INIT,
};
Q: 为什么要进行第二次改写?
A: 这是因为 module 加载结束后系统会释放掉 HDR 视图所在的区域. 不仅如此, 在 module 初始化完成后, INIT section 所在的区域也会free
Q: 那些 section 需要移动?
A: layout_section
决定那些 section 需要改写
SHF_ALLOC
定义的section分成四个类系.init
开始的section划归为CORE section
修改 HDR 中 section header table 中对应 entry 的sh_entsize
entry[i].sh_entsize = mod->core_size;
记录总的大小
mod->core_size += entry[i].sh_size;
Q: 搬到哪里去?
A: 对ELF划分CORE INIT 后,调用vmalloc为 CORE INIT 分配对应的内存空间,地址分别记录
ptr = vmalloc();
mod->module_core = ptr;
mod->module_init = ptr;
mod = (void *)info->sechdrs[info->index.mod].sh_addr;
处理"未解决引用" 问题本质是在模块加载期间找到当前"未解决引用"符号在内存中实际地址
EXPORT_SYMBOL 实现
/* For every exported symbol, place a struct in the __ksymtab section */
#define __EXPORT_SYMBOL(sym, sec) \
extern typeof(sym) sym; \
__CRC_SYMBOL(sym, sec) \
static const char __kstrtab_##sym[] \
__attribute__((section("__ksymtab_strings"), aligned(1))) \
= MODULE_SYMBOL_PREFIX #sym; \
static const struct kernel_symbol __ksymtab_##sym \
__used \
__attribute__((section("___ksymtab" sec "+" #sym), unused)) \
= { (unsigned long)&sym, __kstrtab_##sym }
#define EXPORT_SYMBOL(sym) \
__EXPORT_SYMBOL(sym, "")
// 例子
// EXPORT_SYMBOL(my_exp_func)
static const char __kstrtab_my_exp_func[] = "my_exp_func";
static const struct kernel_symbol __ksymtab_my_exp_func = {&my_exp_func, __kstrtab_my_exp_func}
// struct kernel_symbol 如下
struct kernel_symbol
{
// addr
unsigned long value;
const char *name;
};
使用EXPORTSYMBOL(myexpfunc) 来导出符号 "myexp_func" ,通过struct kernel symbol
告诉外界两点信息:符号和地址其实导出符号没有特殊地方只是放入特定的 section 中.
导出符号其实就是将导出符号放入一个特定的section中,那么编译工具链负责生成这些导出符号section, 并且这些section都是带有SHF_ALLOC因此在模块加载后会搬运到 CORE section 区域中.
kernel 导出符号使用了 struct module 成员
struct module { .... /* Exported symbols */ const struct kernel_symbol *syms; const unsigned long *crcs; unsigned int num_syms;
}.... /* Kernel parameters. */ struct kernel_param *kp; unsigned int num_kp;
在搬移到最终的 CORE section 和 INIT section 之后. kernel 通过find_module_sections
找到相应的section
// __ksymtab PROGBITS 0000000000000040 00000100
// 0000000000000010 0000000000000000 A 0 0 16
mod->kp = section_objs(info, "__param");
mod->syms = section_objs(info, "__ksymtab");
find_symbol 函数
// name: 查找symbol名字
// owner: symbol 可能所在的 module
const struct kernel_symbol *find_symbol(const char *name, struct module **owner,
const unsigned long **crc,
bool gplok,
bool warn)
struct symsearch
struct symsearch { const struct kernelsymbol *start, *stop; const unsigned long *crcs; enum { NOTGPLONLY, GPLONLY, WILLBEGPL_ONLY, } licence; bool unused;};用来对应要查找的每一个符号表 section
struct findsymbolarg
struct findsymbolarg { /* Input */ const char *name; bool gplok; bool warn;
/* Output */
struct module *owner;
const unsigned long *crc;
const struct kernel_symbol *sym;
};
用来做查找符号的标识参数.
find_symbol 会首先在 kernel 导出符号查找, 然后在 module 中查找.
const struct kernel_symbol *find_symbol(const char *name, struct module **owner, const unsigned long **crc, bool gplok, bool warn) { struct find_symbol_arg fsa;
}bool each_symbol_section(bool (*fn)(const struct symsearch *arr, struct module *owner, void *data), void *data){ struct module *mod; static const struct symsearch arr[] = { // kernel 导出的符号 { __start___ksymtab, __stop___ksymtab, __start___kcrctab, NOT_GPL_ONLY, false },fsa.name = name; fsa.gplok = gplok; fsa.warn = warn; if (each_symbol_section(find_symbol_in_section, &fsa)) { if (owner) *owner = fsa.owner; if (crc) *crc = fsa.crc; return fsa.sym; }
}if (each_symbol_in_section(arr, ARRAY_SIZE(arr), NULL, fn, data)) return true; // 在 kernel 导出符合没有找到 // 在 modules(全局变量保存所有的module) 中查找 list_for_each_entry_rcu(mod, &modules, list) { struct symsearch arr[] = { { mod->syms, mod->syms + mod->num_syms, mod->crcs, NOT_GPL_ONLY, false }, if (each_symbol_in_section(arr, ARRAY_SIZE(arr), mod, fn, data)) return true; } return false; } }
对于 "未解决的引用" (unresolved symbol) 处理
Q: 什么是 unresolved symbol
A: 是模块编译工具链在对模块进行链接生成最终的.ko文件时,对模块中调用的一些函数,链接工具无法在该模块的所有目标文件中找到这个函数的具体指令码,如printk
对它的处理一直到insmod
找到该符合所在的地址.
simplify_symbols (fix up syms, so that st_vaule is a pointer to location)
/* Change all symbols so that st_value encodes the pointer directly. */ static int simplify_symbols(struct module *mod, const struct load_info *info) { Elf_Shdr *symsec = &info->sechdrs[info->index.sym]; Elf_Sym *sym = (void *)symsec->sh_addr; unsigned long secbase; unsigned int i; int ret = 0; const struct kernel_symbol *ksym;
}for (i = 1; i < symsec->sh_size / sizeof(Elf_Sym); i++) { const char *name = info->strtab + sym[i].st_name; switch (sym[i].st_shndx) { case SHN_COMMON: /* We compiled with -fno-common. These are not supposed to happen. */ pr_debug("Common symbol: %s\n", name); printk("%s: please compile with -fno-common\n", mod->name); ret = -ENOEXEC; break; case SHN_ABS: /* Don't need to do anything */ pr_debug("Absolute symbol: 0x%08lx\n", (long)sym[i].st_value); break; case SHN_UNDEF: ksym = resolve_symbol_wait(mod, info, name); /* Ok if resolved. */ if (ksym && !IS_ERR(ksym)) { sym[i].st_value = ksym->value; break; } /* Ok if weak. */ if (!ksym && ELF_ST_BIND(sym[i].st_info) == STB_WEAK) break; printk(KERN_WARNING "%s: Unknown symbol %s (err %li)\n", mod->name, name, PTR_ERR(ksym)); ret = PTR_ERR(ksym) ?: -ENOENT; break; default: /* Divert to percpu allocation if a percpu var. */ if (sym[i].st_shndx == info->index.pcpu) secbase = (unsigned long)mod_percpu(mod); else secbase = info->sechdrs[sym[i].st_shndx].sh_addr; sym[i].st_value += secbase; break; } } return ret;