目录
一、Kernel的入口
二、Kernel的启动过程
1. head.S过程分析
(1)判断内核版本是否支持该CPU
(2)保存RAM的起始物理地址到r8寄存器
(4)创建一级页表
(5)保存最后的跳转地址,设置处理器和MMU相关寄存器
(6)使能MMU
(7)head.S的终点
在前一篇博客中,总结了u-boot的过程,最后定义了u-boot的终点,这就是Kernel的起点,如下所示
theKernel = (void (*)(int, int, uint))images->ep;
theKernel (0, machid, bd->bi_boot_params);
而且分析到了images->ep的值就是0x80008000,那么0x80008000到底对应于Kernel的哪一出并不得而知。
先看内核的生成过程:
(1)make ARCH=arm CROSS_COMPILE=arm-hisiv100nptl-linux- hi3520d_full_defconfig
(2)make ARCH=arm CROSS_COMPILE=arm-hisiv100nptl-linux- -j20
(3)make ARCH=arm CROSS_COMPILE=arm-hisiv100nptl-linux- uImage -j 20
运行完(2)后,输出:
Kernel: arch/arm/boot/Image is ready
AS arch/arm/boot/compressed/piggy.gzip.o
LD arch/arm/boot/compressed/vmlinux
OBJCOPY arch/arm/boot/zImage
Kernel: arch/arm/boot/zImage is ready
首先要注意的是vmlinux即为俗称的内核,后面的zImage和uImage也是由它生成的,而vmlinux有两个:
(1)根目录下的vmlinux
(2)arch/arm/boot/compressed下的vmlinux
根据Makefile并跟进打印,得到过程如下:
# vmlinux
# ^
# |
# +-< $(vmlinux-init)
# | +--< init/version.o + more
# |
# +--< $(vmlinux-main)
# | +--< driver/built-in.o mm/built-in.o + more
# |
# +-< kallsyms.o (see description in CONFIG_KALLSYMS section)
vmlinux-init := $(head-y) $(init-y)
vmlinux-main := $(core-y) $(libs-y) $(drivers-y) $(net-y)
quiet_cmd_vmlinux-modpost = LD $@
cmd_vmlinux-modpost = $(LD) $(LDFLAGS) -r -o $@ \
$(vmlinux-init) --start-group $(vmlinux-main) --end-group \
$(filter-out $(vmlinux-init) $(vmlinux-main) FORCE ,$^)
# vmlinux image - including updated kernel symbols
vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o $(kallsyms.o) FORCE
ifdef CONFIG_HEADERS_CHECK
$(Q)$(MAKE) -f $(srctree)/Makefile headers_check
endif
ifdef CONFIG_SAMPLES
$(Q)$(MAKE) $(build)=samples
endif
ifdef CONFIG_BUILD_DOCSRC
$(Q)$(MAKE) $(build)=Documentation
endif
$(call vmlinux-modpost)
$(call if_changed_rule,vmlinux__)
$(Q)rm -f .old_version
(1)链接arch/arm/kernel/head.o + arch/arm/kernel/init_task.o + init/ + kernel/ + mm/ + ... + net/ = 根目录vmlinux
$(obj)/Image: vmlinux FORCE
$(call if_changed,objcopy)
@echo ' Kernel: $@ is ready'
(2)将该vmlinux OBJCOPY(去掉调试信息等)后 = arch/arm/boot下的Image
$(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/$(HEAD) $(obj)/piggy.$(suffix_y).o \
$(addprefix $(obj)/, $(OBJS)) $(lib1funcs) FORCE
$(call if_changed,ld)
@$(check_for_bad_syms)
$(obj)/piggy.$(suffix_y): $(obj)/../Image FORCE
$(call if_changed,$(suffix_y))
$(obj)/piggy.$(suffix_y).o: $(obj)/piggy.$(suffix_y) FORCE
(3)将arch/arm/boot/Image GZIP压缩后 = arch/arm/boot/compressed/piggy.gzip
= arch/arm/boot/compressed/piggy.gzip.o
arch/arm/boot/compressed/head.o + arch/arm/boot/compressed/misc.o + arch/arm/boot/compressed/decompress.o + arch/arm/boot/compressed/piggy.gzip.o = arch/arm/boot/compressed/vmlinux
$(obj)/zImage: $(obj)/compressed/vmlinux FORCE
$(call if_changed,objcopy)
@echo ' Kernel: $@ is ready'
(4)将arch/arm/boot/compressed/vmlinux OBJCOPY(去掉调试信息等)后 = arch/arm/boot/zImage
所以真正解压后的内核,运行的第一个文件由arch/arm/kernel的vmlinux.lds来决定
内核编译依靠linux源码根目录/arch/arm/kernel下的vmlinux.lds,而vmlinux.lds由linux源码根目录/arch/arm/kernel下的vmlinux.lds.s生成
OUTPUT_ARCH(arm)
ENTRY(stext)
#ifndef __ARMEB__
jiffies = jiffies_64;
#else
jiffies = jiffies_64 + 4;
#endif
SECTIONS
{
#ifdef CONFIG_XIP_KERNEL
. = XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR);
#else
. = PAGE_OFFSET + TEXT_OFFSET;
#endif
.init : { /* Init code and data */
_stext = .;
_sinittext = .;
HEAD_TEXT
INIT_TEXT
ARM_EXIT_KEEP(EXIT_TEXT)
_einittext = .;
ARM_CPU_DISCARD(PROC_INFO)
__arch_info_begin = .;
*(.arch.info.init)
__arch_info_end = .;
__tagtable_begin = .;
*(.taglist.init)
__tagtable_end = .;
#ifdef CONFIG_SMP_ON_UP
__smpalt_begin = .;
*(.alt.smp.init)
__smpalt_end = .;
#endif
__pv_table_begin = .;
*(.pv_table)
__pv_table_end = .;
INIT_SETUP(16)
INIT_CALLS
CON_INITCALL
SECURITY_INITCALL
INIT_RAM_FS
#ifndef CONFIG_XIP_KERNEL
__init_begin = _stext;
INIT_DATA
ARM_EXIT_KEEP(EXIT_DATA)
#endif
}
从代码可以看出,内核启动的虚拟地址被设置成了PAGE_OFFSET + TEXT_OFFSET,这个地址在linux源码根目录/arch/arm/boot下的Makefile有所提及
# Note: the following conditions must always be true:
# ZRELADDR == virt_to_phys(PAGE_OFFSET + TEXT_OFFSET)
所以这里便把内核启动的物理地址和虚拟地址对应上了,且PAGE_OFFSET + TEXT_OFFSET的值也可以从最终生成的vmlinux.lds看出
OUTPUT_ARCH(arm)
ENTRY(stext)
jiffies = jiffies_64;
SECTIONS
{
. = 0xC0000000 + 0x00008000;
.init : { /* Init code and data */
_stext = .;
_sinittext = .;
*(.head.text)
*(.init.text) *(.cpuinit.text) *(.meminit.text)
_einittext = .;
__proc_info_begin = .; *(.proc.info.init) __proc_info_end = .;
__arch_info_begin = .;
*(.arch.info.init)
__arch_info_end = .;
__tagtable_begin = .;
*(.taglist.init)
__tagtable_end = .;
__pv_table_begin = .;
*(.pv_table)
__pv_table_end = .;
. = ALIGN(16); __setup_start = .; *(.init.setup) __setup_end = .;
__initcall_start = .; *(.initcallearly.init) __early_initcall_end = .; *(.initcall0.init) *(.initcall0s.init) *(.initcall1.init) *(.initcall1s.init) *(.initcall2.init) *(.initcall2s.init) *(.initcall3.init) *(.initcall3s.init) *(.initcall4.init) *(.initcall4s.init) *(.initcall5.init) *(.initcall5s.init) *(.initcallrootfs.init) *(.initcall6.init) *(.initcall6s.init) *(.initcall7.init) *(.initcall7s.init) __initcall_end = .;
__con_initcall_start = .; *(.con_initcall.init) __con_initcall_end = .;
__security_initcall_start = .; *(.security_initcall.init) __security_initcall_end = .;
. = ALIGN(4); __initramfs_start = .; *(.init.ramfs) . = ALIGN(8); *(.init.ramfs.info)
__init_begin = _stext;
*(.init.data) *(.cpuinit.data) *(.meminit.data) *(.init.rodata) *(.cpuinit.rodata) *(.meminit.rodata) . = ALIGN(32); __dtb_start = .; *(.dtb.init.rodata) __dtb_end = .;
}
PAGE_OFFSET=0xC0000000,TEXT_OFFSET=0x00008000,它运行的位置为ENTRY(stext),这个在linux源码根目录/arch/arm/kernel下的head.S和head-nommu.S都有定义的
__HEAD
ENTRY(stext)
setmode PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9 @ ensure svc mode
@ and irqs disabled
mrc p15, 0, r9, c0, c0 @ get processor id
bl __lookup_processor_type @ r5=procinfo r9=cpuid
movs r10, r5 @ invalid processor (r5=0)?
THUMB( it eq ) @ force fixup-able long branch encoding
beq __error_p @ yes, error 'p'
#define __HEAD .section ".head.text","ax"
那么究竟使用的是哪一个呢?
其实linux源码根目录/arch/arm/kernel的Makefile中有
head-y := head$(MMUEXT).o
obj-$(CONFIG_DEBUG_LL) += debug.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
extra-y := $(head-y) init_task.o vmlinux.lds
在linux源码根目录/arch/arm的Makefile中
ifeq ($(CONFIG_MMU),)
MMUEXT := -nommu
endif
实际上对于支持MMU的处理器,就不存在$(MMUEXT),而hi3520d是支持mmu的,从.config(即linux源码根目录/arch/arm/configs下的hi3520d_full_defconfig)中可以看到
#
# System Type
#
CONFIG_MMU=y
因此得出head.S便是内核的第一个运行文件,且运行位置为ENTRY(stext)处开始。
①读出协处理器cp15的c0寄存器来获取处理器id(保存在r9中)
mrc p15, 0, r9, c0, c0 @ get processor id
②判断是否支持
bl __lookup_processor_type @ r5=procinfo r9=cpuid
__lookup_processor_type的真正实现在linux源码根目录/arch/arm/kernel的head-common.S中
__CPUINIT
__lookup_processor_type:
adr r3, __lookup_processor_type_data
ldmia r3, {r4 - r6}
sub r3, r3, r4 @ get offset between virt&phys
add r5, r5, r3 @ convert virt addresses to
add r6, r6, r3 @ physical address space
1: ldmia r5, {r3, r4} @ value, mask
and r4, r4, r9 @ mask wanted bits
teq r3, r4
beq 2f @forward
add r5, r5, #PROC_INFO_SZ @ sizeof(proc_info_list)
cmp r5, r6
blo 1b @backward
mov r5, #0 @ unknown processor
2: mov pc, lr
ENDPROC(__lookup_processor_type)
/*
* Look in for information about the __proc_info structure.
*/
.align 2
.type __lookup_processor_type_data, %object
__lookup_processor_type_data:
.long . @ will be saved to r4
.long __proc_info_begin @ will be saved to r5
.long __proc_info_end @ will be saved to r6
.size __lookup_processor_type_data, . - __lookup_processor_type_data
首先需要知道的是,在启用MMU之前,内核使用的都应该是物理地址,但是内核代码在链接时(依赖链接器脚本)的地址都是虚拟地址,因此需要有一个虚拟地址转换物理地址的过程,__lookup_processor_type的第一步就在做这个:
首先,通过adr指令,将__lookup_processor_type_data的物理地址读出来到r3;
然后,ldmia指令所做的是,将r3(__lookup_processor_type_data)虚拟地址读取到r4,将(r3+4)即__proc_info_begin读取到r5,将(r3+8)即__proc_info_end读取到r6;
然后,算出虚拟地址和物理地址的差;
最后,将r5中保存的__proc_info_begin和r6中保存的__proc_info_end的虚拟地址转换为物理地址。
那么__proc_info_begin和__proc_info_end究竟存的是什么,可从链接器脚本vmlinux.lds中看到线索
__proc_info_begin = .; *(.proc.info.init) __proc_info_end = .;
*(.proc.info.init)就是答案,那么这个又对应的内容是什么,则需要在代码中找,但搜出的代码中有许多proc-xxx.S,至于究竟用哪个,首先看linux源码根目录下的.config
#
# Processor Type
#
CONFIG_CPU_V7=y
然后再在linux源码根目录/arch/arm/mm的Makefile中
obj-$(CONFIG_CPU_V7) += proc-v7.o
所以对应的代码应该是proc-v7.S
.section ".proc.info.init", #alloc, #execinstr
/*
* ARM Ltd. Cortex A9 processor.
*/
.type __v7_ca9mp_proc_info, #object
__v7_ca9mp_proc_info:
.long 0x410fc090 @ Required ID value
.long 0xff0ffff0 @ Mask for ID
ALT_SMP(.long \
PMD_TYPE_SECT | \
PMD_SECT_AP_WRITE | \
PMD_SECT_AP_READ | \
PMD_FLAGS_SMP)
ALT_UP(.long \
PMD_TYPE_SECT | \
PMD_SECT_AP_WRITE | \
PMD_SECT_AP_READ | \
PMD_FLAGS_UP)
.long PMD_TYPE_SECT | \
PMD_SECT_XN | \
PMD_SECT_AP_WRITE | \
PMD_SECT_AP_READ
W(b) __v7_ca9mp_setup
.long cpu_arch_name
.long cpu_elf_name
.long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_TLS
.long cpu_v7_name
.long v7_processor_functions
.long v7wbi_tlb_fns
.long v6_user_fns
.long v7_cache_fns
.size __v7_ca9mp_proc_info, . - __v7_ca9mp_proc_info
/*
* Match any ARMv7 processor core.
*/
.type __v7_proc_info, #object
__v7_proc_info:
.long 0x000f0000 @ Required ID value /*unsigned int cpu_val*/
.long 0x000f0000 @ Mask for ID /*unsigned int cpu_mask*/
ALT_SMP(.long \ /*unsigned long __cpu_mm_mmu_flags; used by head.S */
PMD_TYPE_SECT | \
PMD_SECT_AP_WRITE | \
PMD_SECT_AP_READ | \
PMD_FLAGS_SMP)
ALT_UP(.long \
PMD_TYPE_SECT | \
PMD_SECT_AP_WRITE | \
PMD_SECT_AP_READ | \
PMD_FLAGS_UP)
.long PMD_TYPE_SECT | \ /*unsigned long __cpu_io_mmu_flags; used by head.S */
PMD_SECT_XN | \
PMD_SECT_AP_WRITE | \
PMD_SECT_AP_READ
W(b) __v7_setup /*unsigned long __cpu_flush; used by head.S */
.long cpu_arch_name /*const char *arch_name;*/
.long cpu_elf_name /*const char *elf_name;*/
/*unsigned int elf_hwcap;*/
.long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_TLS
.long cpu_v7_name /*const char *cpu_name;*/
.long v7_processor_functions /*struct processor *proc*/
.long v7wbi_tlb_fns /*struct cpu_tlb_fns *tlb*/
.long v6_user_fns /*struct cpu_user_fns *user*/
.long v7_cache_fns /*struct cpu_cache_fns *cache*/
.size __v7_proc_info, . - __v7_proc_info
这里面其实定义了两个结构__v7_ca9mp_proc_info和__v7_proc_info,每个结构具体的组成其实能够在linux源码根目录/arch/arm/include/asm的procinfo.h中找到
struct proc_info_list {
unsigned int cpu_val;
unsigned int cpu_mask;
unsigned long __cpu_mm_mmu_flags; /* used by head.S */
unsigned long __cpu_io_mmu_flags; /* used by head.S */
unsigned long __cpu_flush; /* used by head.S */
const char *arch_name;
const char *elf_name;
unsigned int elf_hwcap;
const char *cpu_name;
struct processor *proc;
struct cpu_tlb_fns *tlb;
struct cpu_user_fns *user;
struct cpu_cache_fns *cache;
};
在proc-v7.S中也加上了注释,可以一一对应地看。其实最终使用的是__v7_ca9mp_proc_info,因为通过3520D_SDK根目录/release/00.hardware/chip/documents_cn中的Hi3520DV200 H.264编解码处理器简介.pdf可以看出其内核为ARM Cortex A9.
再回到__lookup_processor_type中,得到了__proc_info_begin的物理地址后,用ldmia指令取出了第一个proc_info_list结构的cpu_val(保存在r3)和cpu_mask(保存在r4),将cpu_mask和之前保存在r9中的处理器id做“与”操作,然后和cpu_val比较是否相等,相等则返回(此时r5保存的是所匹配到的proc_info_list结构的物理地址),不相等则接着往下找下一个proc_info_list结构,完成同样的判断,直到__proc_info_end还没找到,就将r5赋值为0返回,中间找到相等的就赋值给r5(匹配到的proc_info_list结构的物理地址)。返回后,将r5再赋值给r10。
#ifndef CONFIG_XIP_KERNEL
adr r3, 2f
ldmia r3, {r4, r8}
sub r4, r3, r4 @ (PHYS_OFFSET - PAGE_OFFSET)
add r8, r8, r4 @ PHYS_OFFSET
#else
ldr r8, =PLAT_PHYS_OFFSET
#endif
#ifndef CONFIG_XIP_KERNEL
2: .long .
.long PAGE_OFFSET
#endif
内核中未定义CONFIG_XIP_KERNEL(查看.config:# CONFIG_XIP_KERNEL is not set),因此走的逻辑为:
r3读取标号2的物理地址;
标号2的虚拟地址给r4,PAGE_OFFSET的虚拟地址给r8;
物理地址和虚拟地址的差赋值给r4;
将r8转换为物理地址,即RAM的首地址,r4仍保存物理地址和虚拟地址的差。
①寄存器说明
在(1)部分后,目前已知的寄存器保存的值如下:
由于theKernel (0, machid, bd->bi_boot_params)作用,r0=0,r1=machid,r2=bd->bi_boot_params
r9=processor id,r10=匹配到的proc_info_list结构的物理地址,r8=RAM的起始物理地址,r4=物理地址-虚拟地址
②回顾u-boot参数传递过程
/* The list ends with an ATAG_NONE node. */
#define ATAG_NONE 0x00000000
struct tag_header {
u32 size;
u32 tag;
};
/* The list must start with an ATAG_CORE node */
#define ATAG_CORE 0x54410001
struct tag {
struct tag_header hdr;
union {
struct tag_core core;
struct tag_mem32 mem;
struct tag_videotext videotext;
struct tag_ramdisk ramdisk;
struct tag_initrd initrd;
struct tag_serialnr serialnr;
struct tag_revision revision;
struct tag_videolfb videolfb;
struct tag_cmdline cmdline;
/*
* Acorn specific
*/
struct tag_acorn acorn;
/*
* DC21285 specific
*/
struct tag_memclk memclk;
} u;
};
#define tag_next(t) ((struct tag *)((u32 *)(t) + (t)->hdr.size))
#define tag_size(type) ((sizeof(struct tag_header) + sizeof(struct type)) >> 2)
/*first tag is ATAG_CORE*/
static void setup_start_tag (bd_t *bd)
{
params = (struct tag *) bd->bi_boot_params;
params->hdr.tag = ATAG_CORE;
params->hdr.size = tag_size (tag_core);
params->u.core.flags = 0;
params->u.core.pagesize = 0;
params->u.core.rootdev = 0;
params = tag_next (params);
}
/*end tag is ATAG_NONE*/
static void setup_end_tag (bd_t *bd)
{
params->hdr.tag = ATAG_NONE;
params->hdr.size = 0;
}
/*all tag assigned as bellow, use tag_next to point next tag's address*/
void setup_serial_tag (struct tag **tmp)
{
struct tag *params = *tmp;
struct tag_serialnr serialnr;
void get_board_serial(struct tag_serialnr *serialnr);
get_board_serial(&serialnr);
params->hdr.tag = ATAG_SERIAL;
params->hdr.size = tag_size (tag_serialnr);
params->u.serialnr.low = serialnr.low;
params->u.serialnr.high= serialnr.high;
params = tag_next (params);
*tmp = params;
}
不清楚这块的可以先看一下前一篇博客。所有的参数都是以一个struct tag的结构给出,第一个tag的名称为ATAG_CORE,最后一个tag的名称为ATAG_NONE,其中的每个tag的赋值都类似于setup_serial_tag函数,对tag内容进行赋值,然后调用tag_next指向下一个tag地址。
其实内核中也保存了同样一份struct tag声明,在linux源码根目录/arch/arm/include/asm的setup.h中。
③校验参数的过程
#define ATAG_CORE 0x54410001
#define ATAG_CORE_SIZE ((2*4 + 3*4) >> 2)
#define ATAG_CORE_SIZE_EMPTY ((2*4) >> 2)
#ifdef CONFIG_CPU_BIG_ENDIAN
#define OF_DT_MAGIC 0xd00dfeed
#else
#define OF_DT_MAGIC 0xedfe0dd0 /* 0xd00dfeed in big-endian */
#endif
__vet_atags:
tst r2, #0x3 @ aligned?
bne 1f
ldr r5, [r2, #0]
#ifdef CONFIG_OF_FLATTREE
ldr r6, =OF_DT_MAGIC @ is it a DTB?
cmp r5, r6
beq 2f
#endif
cmp r5, #ATAG_CORE_SIZE @ is first tag ATAG_CORE?
cmpne r5, #ATAG_CORE_SIZE_EMPTY
bne 1f
ldr r5, [r2, #4]
ldr r6, =ATAG_CORE
cmp r5, r6
bne 1f
2: mov pc, lr @ atag/dtb pointer is ok
1: mov r2, #0
mov pc, lr
首先,r2中保存的就是bd->bi_boot_params即tag在内存中保存的首地址,校验r2是否为四字节对齐;
然后,将r2中的内容取出保存在r5,即r5为第一个tag(ATAG_CORE)的struct tag_header中的size;
然后,比较r5是否与u-boot传入的tag_size (tag_core)的大小相等;
最后,比较第一个tag是否为ATAG_CORE。
当条件全部满足后返回,r2仍为bd->bi_boot_params=DDR地址+0x100=0x80000100,否则r2=0
①“初始化”工作
.macro pgtbl, rd, phys
add \rd, \phys, #TEXT_OFFSET - 0x4000
.endm
__create_page_tables:
pgtbl r4, r8 @ page table address
/*
* Clear the 16K level 1 swapper page table
*/
mov r0, r4
mov r3, #0
add r6, r0, #0x4000
1: str r3, [r0], #4
str r3, [r0], #4
str r3, [r0], #4
str r3, [r0], #4
teq r0, r6
bne 1b
ldr r7, [r10, #PROCINFO_MM_MMUFLAGS] @ mm_mmuflags
由之前寄存器中存放的值,可以知道,r8=RAM的起始物理地址(0x80000000),r4=物理地址-虚拟地址,且链接器脚本指明了TEXT_OFFSET=0x00008000,那么把pgtbl的宏展开即为r4 = r8 + TEXT_OFFSET - 0x4000 = 0x80004000,这一步主要是声明了将内核代码的前16K大小用于存放页表,并将之清0。而ldr指令的作用,结合定义于linux源码根目录/arch/arm/kernel下的asm-offsets.c来看
DEFINE(PROCINFO_MM_MMUFLAGS, offsetof(struct proc_info_list, __cpu_mm_mmu_flags));
可以知道,r10中保存的是与process id相符的proc_info_list结构,这里取出的是proc_info_list结构中的__cpu_mm_mmu_flags成员,最后将这个值从proc-v7.S中重新找出来发现,无论是__v7_ca9mp_proc_info还是__v7_proc_info,这个字段都是一样的,为
ALT_SMP(.long \
PMD_TYPE_SECT | \
PMD_SECT_AP_WRITE | \
PMD_SECT_AP_READ | \
PMD_FLAGS_SMP)
展开后即为(2<<0) | (1<<10) | (1<<11) | (1<<16) | (1<<12) | (1<<3) | (1<<2) = 0x00011c0e,将这个值保存到r7寄存器中,这个值具体含义可以先看后面对这个值的操作情况。
②MMU使能代码对应的页表转换
adr r0, __enable_mmu_loc
ldmia r0, {r3, r5, r6}
sub r0, r0, r3 @ virt->phys offset
add r5, r5, r0 @ phys __enable_mmu
add r6, r6, r0 @ phys __enable_mmu_end
mov r5, r5, lsr #20
mov r6, r6, lsr #20
1: orr r3, r7, r5, lsl #20 @ flags + kernel base
str r3, [r4, r5, lsl #2] @ identity mapping
teq r5, r6
addne r5, r5, #1 @ next section
bne 1b
__enable_mmu_loc:
.long .
.long __enable_mmu
.long __enable_mmu_end
这一步主要是将__enable_mmu这个标号中的代码建立虚拟地址的页表转换,这样做的目的如下:
因为开启MMU之前,pc的地址是物理地址(为什么?因为自从u-boot中执行了ldr pc, _start_armboot后,pc指针就一直是RAM中的实际物理地址),是0x80008000之后的某个地址,而开启之后,如果没有特殊的操作,地址也会是0x80008000之后的某个地址,但是此时的地址会被认为是虚拟地址,如果不对这部分虚拟地址建立页表,程序会立马出现问题。
操作过程:
这里前五行和之前虚拟地址-物理地址的转换方式如出一辙,不再赘述,主要是将__enable_mmu(一个标号的地址,用于初始化MMU)和__enable_mmu_end转换为物理地址分别保存于r5和r6寄存器中,然后将r5左移20位即取出r5的高12位,这个操作主要因为段式页表(因为0x00011c0e的最低两位为10)的建立是每一个表项转换1M的空间,页表具体格式如下(参见ARM体系结构与编程.pdf):
在页表格式的基础之上,再来理解之前r7 = 0x00011c0e的含义,以及orr r3, r7, r5, lsl #20的作用就不难了,因为是1:1的映射关系,所以段基地址即为物理地址。而str r3, [r4, r5, lsl #2]左移两位的原因,也和段地址的变换过程有关:
第一个地址,页表的基地址为0x80004000;
第二个地址,一级页表内的偏移序号,即为r5>>20后的结果,段内地址的偏移量(0x00000000~0x000fffff);
第三个地址,页表项存放的位置,是页表的基地址r4=0x80004000加上(r5>>20后的结果再左移两位);
第四个地址,页表项中存放的值,是段对应的物理基地址(r5>>20)<<20,或上__cpu_mm_mmu_flags = 0x00011c0e;
第五个地址,虚拟地址的对应值。
MMU这部分页表建立完成之后,对应关系如下:
③Kernel代码对应的页表转换
#define KERNEL_RAM_VADDR (PAGE_OFFSET + TEXT_OFFSET)
#define KERNEL_START KERNEL_RAM_VADDR
#define KERNEL_END _end
mov r3, pc
mov r3, r3, lsr #20
orr r3, r7, r3, lsl #20
add r0, r4, #(KERNEL_START & 0xff000000) >> 18
str r3, [r0, #(KERNEL_START & 0x00f00000) >> 18]!
ldr r6, =(KERNEL_END - 1)
add r0, r0, #4
add r6, r4, r6, lsr #18
1: cmp r0, r6
add r3, r3, #1 << 20
strls r3, [r0], #4
bls 1b
在了解了之前的__enable_mmu转换过程之后,这部分就比较好理解了,只是此处处理的是pc指针罢了。但比较奇怪的是KERNEL_START为什么只取了前8位并只右移了18位,而之前是右移20位,且最后赋值到对应地址时也没有了左移两位,其实想想后发现算法并没有变化,只是换了一种写法:右移18位后保证少移了两位,且最后两位是0,但由于只取了前8位,会丢失4位,所以赋值时会把这四位的值直接加上。还有一点需要注意的是,内核虚拟地址的起点很简单,是链接脚本中的PAGE_OFFSET+TEXT_OFFSET=0xC0000000+0x00008000,但是终点呢?也需要看链接器脚本中的
_end = .;
而且,页表转换颗粒度永远都是1M,所以_end不是1M对齐时,也会将1M的位置补齐保证中间的虚拟地址都能正常转换为物理地址(和__enable_mmu的转换类似,代码不足1M也会补齐1M的转换页表)。最终Kernel转换后,对应关系如下:
④bootargs对应的页表转换
mov r0, r2, lsr #20
movs r0, r0, lsl #20
moveq r0, r8
sub r3, r0, r8
add r3, r3, #PAGE_OFFSET
add r3, r4, r3, lsr #18
orr r6, r7, r0
str r6, [r3]
方法类似,不再赘述,读者可自行推解这个过程。
ldr r13, =__mmap_switched @ address to jump to after
@ mmu has been enabled
adr lr, BSYM(1f) @ return (PIC) address
mov r8, r4 @ set TTBR1 to swapper_pg_dir
ARM( add pc, r10, #PROCINFO_INITFUNC )
THUMB( add r12, r10, #PROCINFO_INITFUNC )
THUMB( mov pc, r12 )
1: b __enable_mmu
这里主要注意__mmap_switched其实此时是链接地址的值,即0xC0008000之后的一个地址,最后跳转后由于已经映射了内核部分的代码,这部分可以正常运行,若这之前没有映射,之后mov pc, r13将跑飞。还有一点:
THUMB( add r12, r10, #PROCINFO_INITFUNC )
THUMB( mov pc, r12 )
这两句,比较容易被忽视,一旦忽视了,后面的__enable_mmu以及__turn_mmu_on就会对r0寄存器存在疑问,为什么其中保存的是协处理器的值。首先,通过r10是对应的proc_info_list结构,取出它的PROCINFO_INITFUNC成员即__cpu_flush函数指针,值为__v7_ca9mp_setup(proc-v7.S中可以找到)
__v7_ca9mp_setup:
#ifdef CONFIG_SMP
ALT_SMP(mrc p15, 0, r0, c1, c0, 1)
ALT_UP(mov r0, #(1 << 6)) @ fake it for UP
tst r0, #(1 << 6) @ SMP/nAMP mode enabled?
orreq r0, r0, #(1 << 6) | (1 << 0) @ Enable SMP/nAMP mode and
mcreq p15, 0, r0, c1, c0, 1 @ TLB ops broadcasting
#endif
__v7_setup:
adr r12, __v7_setup_stack @ the local stack
stmia r12, {r0-r5, r7, r9, r11, lr}
bl v7_flush_dcache_all
ldmia r12, {r0-r5, r7, r9, r11, lr}
mrc p15, 0, r0, c0, c0, 0 @ read main ID register
and r10, r0, #0xff000000 @ ARM?
teq r10, #0x41000000
bne 3f
and r5, r0, #0x00f00000 @ variant
and r6, r0, #0x0000000f @ revision
orr r6, r6, r5, lsr #20-4 @ combine variant and revision
ubfx r0, r0, #4, #12 @ primary part number
/* Cortex-A9 Errata */
2: ldr r10, =0x00000c09 @ Cortex-A9 primary part number
teq r0, r10
bne 3f
#ifdef CONFIG_ARM_ERRATA_742230
cmp r6, #0x22 @ only present up to r2p2
mrcle p15, 0, r10, c15, c0, 1 @ read diagnostic register
orrle r10, r10, #1 << 4 @ set bit #4
mcrle p15, 0, r10, c15, c0, 1 @ write diagnostic register
#endif
#ifdef CONFIG_ARM_ERRATA_742231
teq r6, #0x20 @ present in r2p0
teqne r6, #0x21 @ present in r2p1
teqne r6, #0x22 @ present in r2p2
mrceq p15, 0, r10, c15, c0, 1 @ read diagnostic register
orreq r10, r10, #1 << 12 @ set bit #12
orreq r10, r10, #1 << 22 @ set bit #22
mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register
#endif
#ifdef CONFIG_ARM_ERRATA_743622
teq r6, #0x20 @ present in r2p0
teqne r6, #0x21 @ present in r2p1
teqne r6, #0x22 @ present in r2p2
mrceq p15, 0, r10, c15, c0, 1 @ read diagnostic register
orreq r10, r10, #1 << 6 @ set bit #6
mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register
#endif
#ifdef CONFIG_ARM_ERRATA_751472
cmp r6, #0x30 @ present prior to r3p0
mrclt p15, 0, r10, c15, c0, 1 @ read diagnostic register
orrlt r10, r10, #1 << 11 @ set bit #11
mcrlt p15, 0, r10, c15, c0, 1 @ write diagnostic register
#endif
3: mov r10, #0
#ifdef HARVARD_CACHE
mcr p15, 0, r10, c7, c5, 0 @ I+BTB cache invalidate
#endif
dsb
#ifdef CONFIG_MMU
mcr p15, 0, r10, c8, c7, 0 @ invalidate I + D TLBs
mcr p15, 0, r10, c2, c0, 2 @ TTB control register
ALT_SMP(orr r4, r4, #TTB_FLAGS_SMP)
ALT_UP(orr r4, r4, #TTB_FLAGS_UP)
ALT_SMP(orr r8, r8, #TTB_FLAGS_SMP)
ALT_UP(orr r8, r8, #TTB_FLAGS_UP)
mcr p15, 0, r8, c2, c0, 1 @ load TTB1
ldr r5, =PRRR @ PRRR
ldr r6, =NMRR @ NMRR
mcr p15, 0, r5, c10, c2, 0 @ write PRRR
mcr p15, 0, r6, c10, c2, 1 @ write NMRR
#endif
adr r5, v7_crval
ldmia r5, {r5, r6}
#ifdef CONFIG_CPU_ENDIAN_BE8
orr r6, r6, #1 << 25 @ big-endian page tables
#endif
#ifdef CONFIG_SWP_EMULATE
orr r5, r5, #(1 << 10) @ set SW bit in "clear"
bic r6, r6, #(1 << 10) @ clear it in "mmuset"
#endif
mrc p15, 0, r0, c1, c0, 0 @ read control register
bic r0, r0, r5 @ clear bits them
orr r0, r0, r6 @ set them
THUMB( orr r0, r0, #1 << 30 ) @ Thumb exceptions
mov pc, lr @ return to head.S:__ret
ENDPROC(__v7_setup)
/* AT
* TFR EV X F I D LR S
* .EEE ..EE PUI. .T.T 4RVI ZWRS BLDP WCAM
* rxxx rrxx xxx0 0101 xxxx xxxx x111 xxxx < forced
* 1 0 110 0011 1100 .111 1101 < we want
*/
.type v7_crval, #object
v7_crval:
crval clear=0x0120c302, mmuset=0x10c03c7d, ucset=0x00c01c7c
__v7_setup_stack:
.space 4 * 11 @ 11 registers
在设置了处理器相关的寄存器后,adr r5, v7_crval和ldmia r5, {r5, r6},把需要屏蔽的bit位和需要设置的bit位保存在了r5=0x0120c302和r6=0x10c03c7d中,紧接着读出了cp15协处理器中的c1寄存器到r0,将r5中的位清掉,r6中的位设置上(注意MMU_ENABLE位bit0也在此处被设置上了),之后的使能就围绕了r0进行了其他位操作,最终将赋值好的r0写回cp15的c1寄存器即可。
__enable_mmu:
#ifdef CONFIG_ALIGNMENT_TRAP
orr r0, r0, #CR_A
#else
bic r0, r0, #CR_A
#endif
#ifdef CONFIG_CPU_DCACHE_DISABLE
bic r0, r0, #CR_C
#endif
#ifdef CONFIG_CPU_BPREDICT_DISABLE
bic r0, r0, #CR_Z
#endif
#ifdef CONFIG_CPU_ICACHE_DISABLE
bic r0, r0, #CR_I
#endif
mov r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \
domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \
domain_val(DOMAIN_IO, DOMAIN_CLIENT))
mcr p15, 0, r5, c3, c0, 0 @ load domain access register
mcr p15, 0, r4, c2, c0, 0 @ load page table pointer
b __turn_mmu_on
主要是设置协处理器p15的c2和c3寄存器,在ARM体系结构与编程.pdf中也有所说明:
之前设置的所有页表地址还有地址中保存的值,最终将页表的首地址r4寄存器赋值给c2寄存器后,MMU就能够根据这个关系来转换虚拟地址和物理地址了。至于开启MMU的操作就很简单了
__turn_mmu_on:
mov r0, r0
mcr p15, 0, r0, c1, c0, 0 @ write control reg
mrc p15, 0, r3, c0, c0, 0 @ read id reg
mov r3, r3
mov r3, r13
mov pc, r3
只需要操作cp15的c1寄存器即可(ARM体系结构与编程.pdf中也有)
在使能后的跳转便可以用到r13寄存器了(ldr r13, =__mmap_switched)
/*
* The following fragment of code is executed with the MMU on in MMU mode,
* and uses absolute addresses; this is not position independent.
*
* r0 = cp#15 control register
* r1 = machine ID
* r2 = atags/dtb pointer
* r9 = processor ID
*/
__INIT
__mmap_switched:
adr r3, __mmap_switched_data
ldmia r3!, {r4, r5, r6, r7}
cmp r4, r5 @ Copy data segment if needed
1: cmpne r5, r6
ldrne fp, [r4], #4
strne fp, [r5], #4
bne 1b
mov fp, #0 @ Clear BSS (and zero fp)
1: cmp r6, r7
strcc fp, [r6],#4
bcc 1b
ARM( ldmia r3, {r4, r5, r6, r7, sp})
THUMB( ldmia r3, {r4, r5, r6, r7} )
THUMB( ldr sp, [r3, #16] )
str r9, [r4] @ Save processor ID
str r1, [r5] @ Save machine type
str r2, [r6] @ Save atags pointer
bic r4, r0, #CR_A @ Clear 'A' bit
stmia r7, {r0, r4} @ Save control register values
b start_kernel
__mmap_switched_data:
.long __data_loc @ r4
.long _sdata @ r5
.long __bss_start @ r6
.long _end @ r7
.long processor_id @ r4
.long __machine_arch_type @ r5
.long __atags_pointer @ r6
.long cr_alignment @ r7
.long init_thread_union + THREAD_START_SP @ sp
.size __mmap_switched_data, . - __mmap_switched_data
__mmap_switched完成的工作如下:
(1)如果有必要则进行data段的拷贝搬移
结合链接脚本可知,拷贝过程为,将__data_loc到_sdata的间数据拷贝到_sdata到__bss_start段。
(2)清空bss段及fp指针
将__bss_start到_end之间的数据清零,cmp r6,r7(比较r6,r7,若r6 (3)将r9中保存的处理器ID、r1中保存的machid、r2中保存的bi_boot_params、r0中保存的cp15控制寄存器拷贝到processor_id、__machine_arch_type、__atags_pointer、cr_alignment等全局变量中,并设置sp指针为init_thread_union + THREAD_START_SP,init_thread_union代表的是内核第一个进程,pid = 0,该进程是内核人为造出来的,而不是fork出来的,在linux源码根目录/arch/arm/kernel/的init_task.c中定义了该进程 其中定义了THREAD_SIZE(8KB)的静态栈空间,__mmap_switched将sp设置在了内核第一进程的栈顶部,栈向下生长,因此接下来start_kernel就运行在该内核栈。 直到rest_init中调用kernel_thread创建进程kernel_init(kernel_init的pid = 1)。 (4)跳转到start_kernel __mmap_switched最后跳转到start_kernel开始进入C函数运行环境,这时整个kernel image已经运行在虚拟地址之上,运行地址 链接地址保持了一致,内核运行进入了新时代!/*
* linux/arch/arm/kernel/init_task.c
*/
#include