内容简介:主要讲解了内核与模块的交互方式,也就是:模块如何装载和卸载,以及内核如何检测不同模块间的相互依赖。
略
主要讲述了modutils工具包:
1. 添加和移除
modprobe :从内核中添加或移除模块,考虑模块间可能的依赖关系;
insmode:添加单个模块;
lsmod:显示内核模块状态----也就是翻译/proc/modules文件内容;
depmod:产生modules.dep文件和映射文件(主要是计算内核模块间的依赖关系),modules.dep默认目录:/lib/modules/4.4.0-21-generic,二进制哈希版本:modules.dep.bin。modules.dep格式:
kernel/arch/x86/crypto/twofish-i586.ko:kernel/crypto/twofish_common.ko
modinfo:查看模块信息;
nm:列出对象文件的所有符号:
U 符号是未定义的;
A 符号地址是绝对的;
T/t 符号在text段(代码段);
D/d 符号在初始化数据段;
B/b 符号在未初始化数据段(BSS);
I 符号是其他模块的间接引用;
R/r 符号在readonly数据段;
? 符号类型未知或者有目标文件本身指定;
/probe/kallsystems:查看内核所有导出符号表;
2. 自动加载模块
在某个模块的符号为定义时,内核使用request_module函数(函数使用modprobe工具自动探测)自动加载其他模块。
1. 用户态API:init_module()、delete_module();
2. 内核态API:request_module()
3. 模块表示:
struct module {
enum module_statestate; -----------------------模块状态:装载中/运行中/正在移除/
/* Member of list of modules */
struct list_head list; -------用于链接所有的加载模块,list头在文件kernel/module.c中:modules
/* Unique handle for this module */
charname[MODULE_NAME_LEN]; ----模块名
/* Sysfs stuff. */
struct module_kobject mkobj;
struct module_attribute *modinfo_attrs;
const char *version;
const char *srcversion;
struct kobject *holders_dir;
/* Exported symbols */
const struct kernel_symbol *syms; ------导出符号数组指针
const unsigned long *crcs; ------导出符号的校验和,用于版本控制
unsigned int num_syms; ------导出符号数组项个数
/* Kernel parameters. */
#ifdefCONFIG_SYSFS
struct mutex param_lock;
#endif
struct kernel_param *kp;
unsigned int num_kp;
/* GPL-only exported symbols. */ ----------只提供给GPL模块的符号
unsigned int num_gpl_syms;
const struct kernel_symbol *gpl_syms;
const unsigned long *gpl_crcs;
#ifdefCONFIG_UNUSED_SYMBOLS ------------已经导出,到in-tress模块未使用的符号
/* unused exported symbols. */
const struct kernel_symbol *unused_syms;
const unsigned long *unused_crcs;
unsigned int num_unused_syms;
/* GPL-only, unused exported symbols. */ ------------已经导出,到in-tress模块未使用的符号(GPL)
unsigned int num_unused_gpl_syms;
const struct kernel_symbol *unused_gpl_syms;
const unsigned long *unused_gpl_crcs;
#endif
#ifdefCONFIG_MODULE_SIG
/* Signature was verified. */
bool sig_ok;
#endif
bool async_probe_requested;
/* symbols that will be GPL-only in the nearfuture. */ ------------将来只提供给GPL的符号
const struct kernel_symbol *gpl_future_syms;
const unsigned long *gpl_future_crcs;
unsigned int num_gpl_future_syms;
/* Exception table */ -------模块定义的异常
unsigned int num_exentries; -------数组项个数
struct exception_table_entry *extable; -------数组
/* Startup function. */
int (*init)(void); ------模块启动回调
/*
* Ifthis is non-NULL, vfree() after init() returns.
*
*Cacheline align here, such that:
* module_init, module_core, init_size,core_size,
* init_text_size, core_text_size andmtn_core::{mod,node[0]}
* are onthe same cacheline.
*/
void *module_init ____cacheline_aligned; ------------模块二进制数据:初始化部分
/* Here is the actual code + data, vfree'd onunload. */ ------------模块二进制数据:核心部分
void *module_core;
/* Here are the sizes of the init and coresections */
unsigned int init_size, core_size; -----初始化/核心部分长度
/* The size of the executable code in eachsection. */ -----------初始化/核心部分代码段长度
unsigned int init_text_size, core_text_size;
#ifdefCONFIG_MODULES_TREE_LOOKUP
/*
* Wewant mtn_core::{mod,node[0]} to be in the same cacheline as the
* aboveentries such that a regular lookup will only touch one
*cacheline.
*/
struct mod_tree_node mtn_core;
struct mod_tree_node mtn_init;
#endif
/* Size of RO sections of the module(text+rodata) */
unsigned int init_ro_size, core_ro_size;
/* Arch-specific module values */
struct mod_arch_specific arch; -------指向特性架构
unsigned int taints; /* same bits as kernel:tainted */ ------模块是否污染内核
#ifdefCONFIG_GENERIC_BUG
/* Support for BUG */
unsigned num_bugs;
struct list_head bug_list;
struct bug_entry *bug_table;
#endif
#ifdefCONFIG_KALLSYMS --------模块所有符号
/* Protected by RCU and/or module_mutex: usercu_dereference() */
struct mod_kallsyms *kallsyms;
struct mod_kallsyms core_kallsyms;
/* Section attributes */
struct module_sect_attrs *sect_attrs; ------模块各段的属性
/* Notes attributes */
struct module_notes_attrs *notes_attrs; ------模块note属性
#endif
/* The command line arguments (may bemangled). People like
keeping pointers to this stuff */
char *args; -----模块装载时的命令行参数
#ifdefCONFIG_SMP
/* Per-cpu data. */
void __percpu *percpu; ----指向属于模块的各CPU数据。
unsigned int percpu_size;
#endif
#ifdefCONFIG_TRACEPOINTS
unsigned int num_tracepoints;
struct tracepoint * const *tracepoints_ptrs;
#endif
#ifdefHAVE_JUMP_LABEL
struct jump_entry *jump_entries;
unsigned int num_jump_entries;
#endif
#ifdefCONFIG_TRACING
unsigned int num_trace_bprintk_fmt;
const char **trace_bprintk_fmt_start;
#endif
#ifdefCONFIG_EVENT_TRACING
struct trace_event_call **trace_events;
unsigned int num_trace_events;
struct trace_enum_map **trace_enums;
unsigned int num_trace_enums;
#endif
#ifdefCONFIG_FTRACE_MCOUNT_RECORD
unsigned int num_ftrace_callsites;
unsigned long *ftrace_callsites;
#endif
#ifdefCONFIG_LIVEPATCH
bool klp_alive;
#endif
#ifdefCONFIG_MODULE_UNLOAD
/* What modules depend on me? */ -----依赖此模块的模块链
struct list_head source_list;
/* What modules do I depend on? */ -----本模块依赖的模块链
struct list_head target_list;
/* Destruction function. */
void (*exit)(void); ---------------模块退出回调
atomic_t refcnt; -----------------模块引用计数
#endif
#ifdefCONFIG_CONSTRUCTORS
/* Constructor functions. */
ctor_fn_t *ctors;
unsigned int num_ctors;
#endif
} ____cacheline_aligned;
4. 模块的二进制结构
就是一个elf文件,各个符号按段存储。宏定义EXPORT_SYMBOL/EXPORT_SYMBOL_GPL(定义在文件linux/export.h)用于导出模块符号,导出后的符号可以用cat /proc/kallsyms查看地址。
5. 插入模块
讲解了系统调用init_module所走的内核流程,重点是内核函数:load_module();
6. 移除模块
讲解了系统调用delete_module所有的内核流程,比较简单,慢慢看能看懂。
1. 自动加载模块
主要讲解了内核函数:request_module的流程(子函数:__request_module):
1) 拼装模块名:
ret =vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
2) 通过原子变量kmod_concurrent判断同时调到request_module的次数是否过多,若太多,则返回。
3) 调用用户态modprobe工具加载模块。函数里面会声明一个新的工作队列,等等。嵌套比较复杂。
2. 热插拔
1) systemd-udevd守护进程可以监控来自内核的事件,如笔者开启udev监控,然后插入一个USB U盘,收到如下事件:
zg-Linux systemd # udevadm monitor ----开启监控
KERNEL[1966.614028]add /devices/pci0000:00/0000:00:14.0/usb3/3-2 (usb)
KERNEL[1966.615078]add /devices/pci0000:00/0000:00:14.0/usb3/3-2/3-2:1.0 (usb)
UDEV [1968.128283] add /devices/pci0000:00/0000:00:14.0/usb3/3-2(usb)
KERNEL[1968.191791] add /module/usb_storage (module)------------------插入模块
KERNEL[1968.192038]add /devices/pci0000:00/0000:00:14.0/usb3/3-2/3-2:1.0/host6(scsi)
KERNEL[1968.192562]add /devices/pci0000:00/0000:00:14.0/usb3/3-2/3-2:1.0/host6/scsi_host/host6(scsi_host)
KERNEL[1968.192654]add /bus/usb/drivers/usb-storage(drivers)
UDEV [1968.192803] add /module/usb_storage (module)
UDEV [1968.193501] add /bus/usb/drivers/usb-storage (drivers)
KERNEL[1968.198462]add /module/uas (module)
KERNEL[1968.198496]add /bus/usb/drivers/uas (drivers)
UDEV [1968.199080] add /module/uas (module)
UDEV [1968.199220] add /bus/usb/drivers/uas (drivers)
UDEV [1968.228816] add /devices/pci0000:00/0000:00:14.0/usb3/3-2/3-2:1.0 (usb)
UDEV [1968.230274] add /devices/pci0000:00/0000:00:14.0/usb3/3-2/3-2:1.0/host6 (scsi)
UDEV [1968.231660] add /devices/pci0000:00/0000:00:14.0/usb3/3-2/3-2:1.0/host6/scsi_host/host6(scsi_host)
KERNEL[1969.282493]add /devices/pci0000:00/0000:00:14.0/usb3/3-2/3-2:1.0/host6/target6:0:0(scsi)
KERNEL[1969.282969]add /devices/pci0000:00/0000:00:14.0/usb3/3-2/3-2:1.0/host6/target6:0:0/6:0:0:0(scsi)
KERNEL[1969.283453]add /devices/pci0000:00/0000:00:14.0/usb3/3-2/3-2:1.0/host6/target6:0:0/6:0:0:0/scsi_disk/6:0:0:0(scsi_disk)
KERNEL[1969.283553]add /devices/pci0000:00/0000:00:14.0/usb3/3-2/3-2:1.0/host6/target6:0:0/6:0:0:0/scsi_device/6:0:0:0(scsi_device)
KERNEL[1969.283712]add /devices/pci0000:00/0000:00:14.0/usb3/3-2/3-2:1.0/host6/target6:0:0/6:0:0:0/scsi_generic/sg2(scsi_generic)
KERNEL[1969.283850]add /devices/pci0000:00/0000:00:14.0/usb3/3-2/3-2:1.0/host6/target6:0:0/6:0:0:0/bsg/6:0:0:0(bsg)
UDEV [1969.285064] add /devices/pci0000:00/0000:00:14.0/usb3/3-2/3-2:1.0/host6/target6:0:0(scsi)
UDEV [1969.286408] add /devices/pci0000:00/0000:00:14.0/usb3/3-2/3-2:1.0/host6/target6:0:0/6:0:0:0(scsi)
UDEV [1969.288368] add /devices/pci0000:00/0000:00:14.0/usb3/3-2/3-2:1.0/host6/target6:0:0/6:0:0:0/scsi_disk/6:0:0:0(scsi_disk)
UDEV [1969.288749] add /devices/pci0000:00/0000:00:14.0/usb3/3-2/3-2:1.0/host6/target6:0:0/6:0:0:0/scsi_device/6:0:0:0(scsi_device)
UDEV [1969.290992] add /devices/pci0000:00/0000:00:14.0/usb3/3-2/3-2:1.0/host6/target6:0:0/6:0:0:0/scsi_generic/sg2(scsi_generic)
UDEV [1969.291063] add /devices/pci0000:00/0000:00:14.0/usb3/3-2/3-2:1.0/host6/target6:0:0/6:0:0:0/bsg/6:0:0:0(bsg)
KERNEL[1970.428838]add /devices/virtual/bdi/8:16 (bdi)
UDEV [1970.430739] add /devices/virtual/bdi/8:16 (bdi)
KERNEL[1970.456474]add /devices/pci0000:00/0000:00:14.0/usb3/3-2/3-2:1.0/host6/target6:0:0/6:0:0:0/block/sdb(block)
KERNEL[1970.456613]add /devices/pci0000:00/0000:00:14.0/usb3/3-2/3-2:1.0/host6/target6:0:0/6:0:0:0/block/sdb/sdb4 (block)----添加一个块设备sdb4
UDEV [1970.519782] add /devices/pci0000:00/0000:00:14.0/usb3/3-2/3-2:1.0/host6/target6:0:0/6:0:0:0/block/sdb(block)
UDEV [1970.562684] add /devices/pci0000:00/0000:00:14.0/usb3/3-2/3-2:1.0/host6/target6:0:0/6:0:0:0/block/sdb/sdb4(block)
KERNEL[1970.684704]add /module/nls_iso8859_1 (module)
UDEV [1970.685554] add /module/nls_iso8859_1 (module)
Linux内核模块的版本控制主要思想:使用函数或过程的参数,生成一个CRC校验和。
1. 生成校验和:内核采用工具genksym工具(笔者没找到)生成所有导出符号的校验和;
2. 内核模块在编译时,自动生成module.mod.c文件,主要是定义了两个变量分别位于不同的段:"__versions"段用于存放本模块所引用的符号及其校验和;".modinfo"存放所依赖的所有模块列表;
3. 内核用于模块版本的控制的函数:check_version,书上的代码和笔者4.4.0内核差不多,表面内核代码文档。有兴趣的可以自己去看看。
感觉整个章节都比较重要,特别是现在很多商业公司在开发自己的产品时越来越注重提供原生态的linux内核,以便提供第三方工具和方便获取/移植开源社区的最新成果。我所知道的有:华为、新华三。其早期的网络操作系统很多代码都写在内核态,把内核改造的“乱七八糟”,也只有本公司人员能玩。现在,在它们新一代网络操作系统开发中,以“尽量不要修改内核代码”为原则,实在要修改,也以module的方式加载到内核,提供内核的完整性。因而,掌握内核module的使用/开发是非常重要的。