整个过程是SPL→U-Boot→Linux。前面我们分析了**SPL调用U-Boot的过程,**接下来再分析一下U-Boot调用Linux的过程。
U-Boot的执行过程同样是从start.S的reset开始。
在_main函数中我们会再次调用board_init_f函数,但是这时调用的函数与SPL阶段的board_init_f函数是不一样的。这里调用的board_init_f函数是在arch/arm/lib/board.c中定义的。
我们挑拣一些重点代码来说明。
gd->mon_len = (ulong)&__bss_end - (ulong)_start;
/*初始化gd->mon_len为U-Boot cod代码的大小*/
for (init_fnc_ptr = init_sequence; *init_fnc_ptr; ++init_fnc_ptr) {
if ((*init_fnc_ptr)() != 0) {
hang ();
}
}
/*For循环遍历调用init_sequence中的所有函数,init_sequence的定义如下*/
init_fnc_t *init_sequence[] = {
arch_cpu_init, /* basic arch cpu dependent setup */
mark_bootstage,
#ifdef CONFIG_OF_CONTROL
fdtdec_check_fdt,
#endif
#if defined(CONFIG_BOARD_EARLY_INIT_F)
board_early_init_f,
#endif
timer_init, /* initialize timer */
#ifdef CONFIG_BOARD_POSTCLK_INIT
board_postclk_init,
#endif
#ifdef CONFIG_FSL_ESDHC
get_clocks,
#endif
env_init, /* initialize environment */
init_baudrate, /* initialze baudrate settings */
serial_init, /* serial communications setup */
console_init_f, /* stage 1 init of console */
display_banner, /* say that we are here */
print_cpuinfo, /* display cpu info (and speed) */
#if defined(CONFIG_DISPLAY_BOARDINFO)
checkboard, /* display board info */
#endif
#if defined(CONFIG_HARD_I2C) || defined(CONFIG_SYS_I2C)
init_func_i2c,
#endif
dram_init, /* configure available RAM banks */
NULL,
};
在这一系列函数中,我们最关心的是init_baudrate、serial_init、console_init_f和dram_init这几个初始化函数。前面三个函数依次初始化波特率、串口和打印终端。
dram_init函数对gd->ram_size进行初始化,以便board_init_f函数后续的代码对dram的空间进行分配。
#if defined(CONFIG_SYS_MEM_TOP_HIDE)
/*
* Subtract specified amount of memory to hide so that it won't
* get "touched" at all by U-Boot. By fixing up gd->ram_size
* the Linux kernel should now get passed the now "corrected"
* memory size and won't touch it either. This should work
* for arch/ppc and arch/powerpc. Only Linux board ports in
* arch/powerpc with bootwrapper support, that recalculate the
* memory size from the SDRAM controller setup will have to
* get fixed.
*/
gd->ram_size -= CONFIG_SYS_MEM_TOP_HIDE;
#endif
注释表明这个操作是适用于powerpc架构的,那么这个宏定义肯定是不起作用的,因此我们就无须分析了。
addr = CONFIG_SYS_SDRAM_BASE + get_effective_memsize();
为addr赋值,具体由SDRAM的基址和有效的memsize相加而成,也就是addr为SDRAM的顶端地址。
#ifdef CONFIG_LOGBUFFER
#ifndef CONFIG_ALT_LB_ADDR
/* reserve kernel log buffer */
addr -= (LOGBUFF_RESERVE);
debug("Reserving %dk for kernel logbuffer at %08lx\n", LOGBUFF_LEN,
addr);
#endif
#endif
#ifdef CONFIG_PRAM
/*
* reserve protected RAM
*/
reg = getenv_ulong("pram", 10, CONFIG_PRAM);
addr -= (reg << 10); /* size is in kB */
debug("Reserving %ldk for protected RAM at %08lx\n", reg, addr);
#endif /* CONFIG_PRAM */
#if !(defined(CONFIG_SYS_ICACHE_OFF) && defined(CONFIG_SYS_DCACHE_OFF))
/* reserve TLB table */
gd->arch.tlb_size = PGTABLE_SIZE;
addr -= gd->arch.tlb_size;
/* round down to next 64 kB limit */
addr &= ~(0x10000 - 1);
gd->arch.tlb_addr = addr;
debug("TLB table from %08lx to %08lx\n", addr, addr + gd->arch.tlb_size);
#endif
这一段代码为log buffer、pram和TLB table分配地址,因为宏定义都是关闭的,所以我们也不考虑。
/* round down to next 4 kB limit */
addr &= ~(4096 - 1);
debug("Top of RAM usable for U-Boot at: %08lx\n", addr);
如果上面有分配的话,这里将addr的值进行4KB字节对齐。
/*
* reserve memory for U-Boot code, data & bss
* round down to next 4 kB limit
*/
addr -= gd->mon_len;
addr &= ~(4096 - 1);
debug("Reserving %ldk for U-Boot at: %08lx\n", gd->mon_len >> 10, addr);
这里将为U-Boot的代码、数据和BSS段预留空间,然后再次进行4KB字节对齐。
#ifndef CONFIG_SPL_BUILD
/*
* reserve memory for malloc() arena
*/
addr_sp = addr - TOTAL_MALLOC_LEN;
debug("Reserving %dk for malloc() at: %08lx\n",
TOTAL_MALLOC_LEN >> 10, addr_sp);
/*
* (permanently) allocate a Board Info struct
* and a permanent copy of the "global" data
*/
addr_sp -= sizeof (bd_t);
bd = (bd_t *) addr_sp;
gd->bd = bd;
debug("Reserving %zu Bytes for Board Info at: %08lx\n",
sizeof (bd_t), addr_sp);
#ifdef CONFIG_MACH_TYPE
gd->bd->bi_arch_number = CONFIG_MACH_TYPE; /* board id for Linux */
#endif
addr_sp -= sizeof (gd_t);
id = (gd_t *) addr_sp;
debug("Reserving %zu Bytes for Global Data at: %08lx\n",
sizeof (gd_t), addr_sp);
#if defined(CONFIG_OF_SEPARATE) && defined(CONFIG_OF_CONTROL)
/*
* If the device tree is sitting immediate above our image then we
* must relocate it. If it is embedded in the data section, then it
* will be relocated with other data.
*/
if (gd->fdt_blob) {
fdt_size = ALIGN(fdt_totalsize(gd->fdt_blob) + 0x1000, 32);
addr_sp -= fdt_size;
new_fdt = (void *)addr_sp;
debug("Reserving %zu Bytes for FDT at: %08lx\n",
fdt_size, addr_sp);
}
#endif
#ifndef CONFIG_ARM64
/* setup stackpointer for exeptions */
gd->irq_sp = addr_sp;
#ifdef CONFIG_USE_IRQ
addr_sp -= (CONFIG_STACKSIZE_IRQ+CONFIG_STACKSIZE_FIQ);
debug("Reserving %zu Bytes for IRQ stack at: %08lx\n",
CONFIG_STACKSIZE_IRQ+CONFIG_STACKSIZE_FIQ, addr_sp);
#endif
/* leave 3 words for abort-stack */
addr_sp -= 12;
/* 8-byte alignment for ABI compliance */
addr_sp &= ~0x07;
#else /* CONFIG_ARM64 */
/* 16-byte alignment for ABI compliance */
addr_sp &= ~0x0f;
#endif /* CONFIG_ARM64 */
#else
addr_sp += 128; /* leave 32 words for abort-stack */
gd->irq_sp = addr_sp;
#endif
debug("New Stack Pointer is: %08lx\n", addr_sp);
首先预留malloc len,这里预定义的长度为TOTAL_MALLOC_LEN。TOTAL_MALLOC_LEN的定义在include/common.h中。
注释说明,为bd、gd做一个永久的copy;留出全局信息bd_t结构体的空间,首地址存在于gd->bd;留出gd_t结构体的空间,首地址存在于ID中。将此地址保存在gd->irq_sp中作为异常栈指针。在U-Boot中我们没有用到中断。最后为abort stack留出12字节。
到这里,addr_sp值就确定了,总结一下addr_sp之上的空间分配,由高到低有:addr–>malloc len–>bd len–>gd len–>12 byte–>addr_sp(栈往下增长,addr_sp之下的空间作为栈空间)。
最后一部分代码如下:
gd->bd->bi_baudrate = gd->baudrate;
/* Ram ist board specific, so move it to board code ... */
dram_init_banksize();
display_dram_config(); /* and display it */
gd->relocaddr = addr;
gd->start_addr_sp = addr_sp;
gd->reloc_off = addr - (ulong)&_start;
debug("relocation Offset is: %08lx\n", gd->reloc_off);
if (new_fdt) {
memcpy(new_fdt, gd->fdt_blob, fdt_size);
gd->fdt_blob = new_fdt;
}
首先将bd->bi_baudrate赋值为gd->baudrate,gd->baudrate在前面的baudrate_init中已经完成了初始化。
dram_init_banksize()是需要实现的板级函数。根据板上ddrc获取ddr的bank信息,并填充在gd->bd->bi_dram[CONFIG_NR_DRAM_BANKS]中。
gd->relocaaddr为目标addr,gd->start_addr_sp为目标addr_sp,gd->reloc_off为目标addr与现在实际代码起始地址的偏移。reloc_off非常重要,会作为后面relocate_code函数的参数,以实现代码的复制。
最后将gd结构体的数据复制到新的地址ID上。
board_init_f函数将SDRAM空间重新进行了划分,可以看出栈空间和堆空间是分开的。
至此,board_init_f结束,回到_main函数。 因为在U-Boot中没有CONFIG_SPL_BUILD的定义,所以我们会在arch/arm/lib/crt0.S里的_main函数中调用relocate_code代码。
#if ! defined(CONFIG_SPL_BUILD)
/*
* Set up intermediate environment (new sp and gd) and call
* relocate_code(addr_moni). Trick here is that we'll return
* 'here' but relocated.
*/
ldr sp, [r9, #GD_START_ADDR_SP] /* sp = gd->start_addr_sp */
bic sp, sp, #7 /* 8-byte alignment for ABI compliance */
ldr r9, [r9, #GD_BD] /* r9 = gd->bd */
sub r9, r9, #GD_SIZE /* new GD is below bd */
adr lr, here
ldr r0, [r9, #GD_RELOC_OFF] /* r0 = gd->reloc_off */
add lr, lr, r0
ldr r0, [r9, #GD_RELOCADDR] /* r0 = gd->relocaddr */
b relocate_code
here:
/* Set up final (full) environment */
bl c_runtime_cpu_setup /* we still call old routine here */
ldr r0, =__bss_start /* this is auto-relocated! */
ldr r1, =__bss_end /* this is auto-relocated! */
mov r2, #0x00000000 /* prepare zero to clear BSS */
clbss_l:cmp r0, r1 /* while not at end of BSS */
strlo r2, [r0] /* clear 32-bit BSS word */
addlo r0, r0, #4 /* move to next */
blo clbss_l
bl coloured_LED_init
bl red_led_on
/* call board_init_r(gd_t *id, ulong dest_addr) */
mov r0, r9 /* gd_t */
ldr r1, [r9, #GD_RELOCADDR] /* dest_addr */
/* call board_init_r */
ldr pc, =board_init_r /* this is auto-relocated! */
/* we should not return here. */
#endif
我们先来看第一部分代码:
/*
* Set up intermediate environment (new sp and gd) and call
* relocate_code(addr_moni). Trick here is that we'll return
* 'here' but relocated.
*/
ldr sp, [r9, #GD_START_ADDR_SP] /* sp = gd->start_addr_sp */
bic sp, sp, #7 /* 8-byte alignment for ABI compliance */
ldr r9, [r9, #GD_BD] /* r9 = gd->bd */
sub r9, r9, #GD_SIZE /* new GD is below bd */
adr lr, here
ldr r0, [r9, #GD_RELOC_OFF] /* r0 = gd->reloc_off */
add lr, lr, r0
ldr r0, [r9, #GD_RELOCADDR] /* r0 = gd->relocaddr */
b relocate_code
这段注释写得很清晰,建立中间环境(新的sp和gd),然后调用relocate_code(addr_moni)。注意,从relocate_code返回时已经是重定位过的here地址了。
(这个过程在那副图中也展示过)
这段注释写得很清晰,建立中间环境(新的sp和gd),然后调用relocate_code(addr_moni)。注意,从relocate_code返回时已经是重定位过的here地址了。
前4条汇编实现了新gd结构体的更新:首先更新sp,并且将sp 8字节对齐,以便于后面函数开辟的新的调用栈能对齐,然后获取gd->bd地址到r9中,需要注意,在board_init_f中gd->bd已经更新为新分配的bd了,下一条汇编将r9减掉gd的SIZE,这样就获取到了board_init_f中新分配的gd了。
后面的汇编则是为relocate_code做准备,首先加载here地址,然后加上新地址偏移量给lr,得到的是代码重定位后的新here地址了,relocate_code返回条转到lr,也就是新位置的here。
最后在r0中保存代码的新地址,并跳转到relocate_code。relocate_code函数在arch/arm/lib/relocate.S中实现:
ENTRY(relocate_code)
ldr r1, =__image_copy_start /* r1 <- SRC &__image_copy_start */
subs r4, r0, r1 /* r4 <- relocation offset */
beq relocate_done /* skip relocation */
ldr r2, =__image_copy_end /* r2 <- SRC &__image_copy_end */
copy_loop:
ldmia r1!, {r10-r11} /* copy from source address [r1] */
stmia r0!, {r10-r11} /* copy to target address [r0] */
cmp r1, r2 /* until source end address [r2] */
blo copy_loop
/*
* fix .rel.dyn relocations
*/
ldr r2, =__rel_dyn_start /* r2 <- SRC &__rel_dyn_start */
ldr r3, =__rel_dyn_end /* r3 <- SRC &__rel_dyn_end */
fixloop:
ldmia r2!, {r0-r1} /* (r0,r1) <- (SRC location,fixup) */
and r1, r1, #0xff
cmp r1, #23 /* relative fixup */
bne fixnext
/* relative fix: increase location by offset */
add r0, r0, r4
ldr r1, [r0]
add r1, r1, r4
str r1, [r0]
fixnext:
cmp r2, r3
blo fixloop
relocate_done:
#ifdef __XSCALE__
/*
* On xscale, icache must be invalidated and write buffers drained,
* even with cache disabled - 4.2.7 of xscale core developer's manual
*/
mcr p15, 0, r0, c7, c7, 0 /* invalidate icache */
mcr p15, 0, r0, c7, c10, 4 /* drain write buffer */
#endif
/* ARMv4- don't know bx lr but the assembler fails to see that */
#ifdef __ARM_ARCH_4__
mov pc, lr
#else
bx lr
#endif
ENDPROC(relocate_code)
relocate_code函数分成两个部分。第一个部分为:
ldr r1, =__image_copy_start /* r1 <- SRC &__image_copy_start */
subs r4, r0, r1 /* r4 <- relocation offset */
beq relocate_done /* skip relocation */
ldr r2, =__image_copy_end /* r2 <- SRC &__image_copy_end */
copy_loop:
ldmia r1!, {r10-r11} /* copy from source address [r1] */
stmia r0!, {r10-r11} /* copy to target address [r0] */
cmp r1, r2 /* until source end address [r2] */
blo copy_loop
第一个部分为复制,即将__image_copy_start和__image_copy_end之间的数据复制到新的地址处。__image_copy_start和__image_copy_end在arch/arm/cpu/u-boot.lds中定义。 来看看u-boot.lds的代码:
#include
OUTPUT_FORMAT("elf32-littlearm", "elf32-littlearm", "elf32-littlearm")
OUTPUT_ARCH(arm)
ENTRY(_start)
SECTIONS
{
. = 0x00000000;
. = ALIGN(4);
.text :
{
*(.__image_copy_start)
*(.vectors)
CPUDIR/start.o (.text*)
*(.text*)
}
#ifdef CONFIG_ARMV7_NONSEC
#ifndef CONFIG_ARMV7_SECURE_BASE
#define CONFIG_ARMV7_SECURE_BASE
#endif
.__secure_start : {
. = ALIGN(0x1000);
*(.__secure_start)
}
.secure_text CONFIG_ARMV7_SECURE_BASE :
AT(ADDR(.__secure_start) + SIZEOF(.__secure_start))
{
*(._secure.text)
}
. = LOADADDR(.__secure_start) +
SIZEOF(.__secure_start) +
SIZEOF(.secure_text);
__secure_end_lma = .;
.__secure_end : AT(__secure_end_lma) {
*(.__secure_end)
LONG(0x1d1071c); /* Must output something to reset LMA */
}
#endif
. = ALIGN(4);
.rodata : { *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.rodata*))) }
. = ALIGN(4);
.data : {
*(.data*)
}
. = ALIGN(4);
. = .;
. = ALIGN(4);
.u_boot_list : {
KEEP(*(SORT(.u_boot_list*)));
}
. = ALIGN(4);
.image_copy_end :
{
*(.__image_copy_end)
}
.rel_dyn_start :
{
*(.__rel_dyn_start)
}
.rel.dyn : {
*(.rel*)
}
.rel_dyn_end :
{
*(.__rel_dyn_end)
}
.end :
{
*(.__end)
}
__image_copy_start和__image_copy_end之间包含了.text段、.rodata段和.data段。在这里可以看到.rel.dyn段跟在其后,也就是说不会复制.rel.dyn段。
代码的第二部分才是真正完成重定位的代码:
/*
* fix .rel.dyn relocations
*/
ldr r2, =__rel_dyn_start /* r2 <- SRC &__rel_dyn_start */
ldr r3, =__rel_dyn_end /* r3 <- SRC &__rel_dyn_end */
fixloop:
ldmia r2!, {r0-r1} /* (r0,r1) <- (SRC location,fixup) */
and r1, r1, #0xff
cmp r1, #23 /* relative fixup */
bne fixnext
/* relative fix: increase location by offset */
add r0, r0, r4
ldr r1, [r0]
add r1, r1, r4
str r1, [r0]
fixnext:
cmp r2, r3
blo fixloop
relocate_done:
首先在r2中保存__rel_dyn_start,在r3中保存__rel_dyn_end。然后在这个地址范围内进行遍历,首先从地址处取连续两个字节地址的值存放在r0和r1中,取r1中的低8位值,并与0x17比较,如果相等,表明是需要重定位的数据。因为在rel.dyn段中每一个rel section(8个字节)的第二个4字节如果是0x17,则表明其是类型R_ARM_RELATIVE。如果是需要重定位的类型,则将地址加上r4的值以进行计算重定位。
完成重定位之后,我们重新回到_main函数中,剩下的操作就很简单了,首先调用c_runtime_cpu_setup函数,然后清除重定位后的BSS段,最后调用arch/arm/lib/board.c文件中的board_init_r函数。该函数会进行很多初始化操作,在这里就不一一分析了,在函数最后调用main_loop函数。
board_init_r函数代码片段
697 /* main_loop() can return to retry autoboot, if so just run it again. */
698 for (;;) {
699 main_loop();
700 }
在main_loop函数中将会调用process_boot_delay函数,这个函数会有一个倒计时:
Hit any key to stop autoboot: 3
如果在定义时间内没有按键,那么就自动去引导系统,其思路和SPL类似,在此留给读者去分析;如果有按键按下,就进入U-Boot的命令行,在里面可以键入“help”查看所有可用的命令。
(难怪 uboot源码中还需要解析命令行参数)
到这里就完成了整个UBoot,整个流程。对于这个部分,我后面打算做个流程图 来 梳理一下这个过程。期待。下一步该回到TEEOS了
参考资料:
《深入理解BootLoader》