linux kernel启动代码分析

本文分析基于mips架构,linux-3.3.8

一.kernel入口

首先通过链接脚本找到kernel的入口

1.内核是压缩过的

/arch/mips/boot/compressed/Makefile  ,将$(obj)/head.o $(obj)/decompress.o $(obj)/dbg.o  $(obj)/piggy.o 链接为vmlinuz,链接脚本为ld.script
vmlinuz: $(src)/ld.script $(vmlinuzobjs-y) $(obj)/calc_vmlinuz_load_addr
    $(call cmd,zld)
    $(call cmd,strip)

vmlinux.bin.z采用objcopy工具,作为数据打包为piggy.o
targets += piggy.o
OBJCOPYFLAGS_piggy.o := --add-section=.image=$(obj)/vmlinux.bin.z \
                        --set-section-flags=.image=contents,alloc,load,readonly,data
$(obj)/piggy.o: $(obj)/dummy.o $(obj)/vmlinux.bin.z FORCE
    $(call if_changed,objcopy)

将vmlinux.bin采用压缩工具压缩为vmlinux.bin.z
tool_$(CONFIG_KERNEL_GZIP)    = gzip
tool_$(CONFIG_KERNEL_BZIP2)   = bzip2
tool_$(CONFIG_KERNEL_LZMA)    = lzma
tool_$(CONFIG_KERNEL_LZO)     = lzo

targets += vmlinux.bin.z
$(obj)/vmlinux.bin.z: $(obj)/vmlinux.bin FORCE
    $(call if_changed,$(tool_y))

可见在压缩是入口为/arch/mips/boot/compressed/head.s

2.内核是未压缩的

定义makefile指定了vmlinux的链接脚本:

vmlinux-lds  := arch/$(SRCARCH)/kernel/vmlinux.lds, 由vmlinux.lds.S编译得到

vmlinux.lds:
OUTPUT_ARCH(mips)
ENTRY(kernel_entry)
PHDRS {
text PT_LOAD FLAGS(7); /* RWX */
note PT_NOTE FLAGS(4); /* R__ */
}
  jiffies = jiffies_64;
SECTIONS
{
. = 0x80001000;
/* read-only */
_text = .; /* Text and read-only data */
.text : {
  . = ALIGN(8); *(.text.hot) *(.text) *(.ref.text) *(.devinit.text) *(.devexit.text) *(.cpuinit.text) *(.cpuexit.text) *(.text.unlikely)
  . = ALIGN(8); __sched_text_start = .; *(.sched.text) __sched_text_end = .;
  . = ALIGN(8); __lock_text_start = .; *(.spinlock.text) __lock_text_end = .;
  . = ALIGN(8); __kprobes_text_start = .; *(.kprobes.text) __kprobes_text_end = .;
  *(.text.*)
  *(.fixup)
  *(.gnu.warning)
} :text = 0
_etext = .; /* End of text section */

可见入口为kernel_entry

这里只讲解未压缩的情况,因为压缩情况解压之后的跳转到内核执行过程是一样的。

 (arch/mips/kernel/head.s)
NESTED(kernel_entry, 16, sp)            # kernel entry point

    kernel_entry_setup            # cpu specific setup

    setup_c0_status_pri

    /* We might not get launched at the address the kernel is linked to,
       so we jump there.  */
    PTR_LA    t0, 0f
    jr    t0
0:
    PTR_LA        t0, __bss_start        # clear .bss      //将内核的bss段清零
    LONG_S        zero, (t0)
    PTR_LA        t1, __bss_stop - LONGSIZE
1:
    PTR_ADDIU    t0, LONGSIZE
    LONG_S        zero, (t0)
    bne        t0, t1, 1b

    LONG_S        a0, fw_arg0        # firmware arguments
    LONG_S        a1, fw_arg1
    LONG_S        a2, fw_arg2
    LONG_S        a3, fw_arg3

    MTC0        zero, CP0_CONTEXT    # clear context register
    PTR_LA        $28, init_thread_union        //union thread_union init_thread_union,  $28寄存器指向 init_thread_union
    /* Set the SP after an empty pt_regs.  */
    PTR_LI        sp, _THREAD_SIZE - 32 - PT_SIZE
    PTR_ADDU    sp, $28         //sp寄存器指向 init_thread_union中的栈空间
    back_to_back_c0_hazard   //同步c0,防止c0冒险,见下文解释
    set_saved_sp    sp, t0, t1    //将sp寄存器的值保存到kernelsp变量中,即将进程内核态栈指针保存在kernelsp变量中
    PTR_SUBU    sp, 4 * SZREG        # init stack pointer

    j        start_kernel      //启动c代码,init/main.c
    END(kernel_entry)


kernel_entry_setup的定义位于:

arch/mips/include/asm/mach-brcmstb/kernel-entry-init.h
    .macro kernel_entry_setup
    # save arguments for CFE callback
    sw    a0, cfe_handle     //将a0 寄存器的内容保存至cfe_handle,  为了kernel能够调用cfe中的函数,取得内核启动函数和其他硬件配置参数,这些参数保存在cfe的环境变量中
    sw    a2, cfe_entry       //将a2 寄存器的内容保存至cfe_entry
    sw    a3, cfe_seal         //将a3 寄存器的内容保存至cfe_seal 
    jal    bmips_enable_xks01
    .endm
        .macro  smp_slave_setup
        .endm


arch/mips/kernel/bmips_vec.s
/***********************************************************************
* XKS01 support
* Certain CPUs support extending kseg0 to 1024MB.
***********************************************************************/
LEAF(bmips_enable_xks01)

#if defined(CONFIG_XKS01)
    mfc0    t0, $22, 3
    li    t1, 0x1ff0
    li    t2, (1 << 12) | (1 << 9)
    or    t0, t1
    xor    t0, t1
    or    t0, t2
    mtc0    t0, $22, 3
    BARRIER
#endif /* defined(CONFIG_XKS01) */

    jr    ra

END(bmips_enable_xks01)


setup_c0_status_pri的定义位于:

 (arch/mips/kernel/head.s)
    .macro    setup_c0_status_pri
#ifdef CONFIG_64BIT
    setup_c0_status ST0_KX 0
#else
    setup_c0_status 0 0               //将c0  status寄存器清零
#endif
    .endm


arch/mips/include/asm/hazards.h:   _ehb汇编指令,用于同步c0,防止c0冒险
ASMMACRO(back_to_back_c0_hazard,
     _ehb
    )

current表示当前进程,我们来看看它的定义:

#define get_current() (current_thread_info()->task)
#define current get_current()

register struct thread_info *__current_thread_info __asm__("$28");
#define current_thread_info()  __current_thread_info

__current_thread_info是一个寄存器变量,$28表示gp寄存器

所以这里PTR_LA        $28, init_thread_union 实际上就是设置为0号进程,

 PTR_LI        sp, _THREAD_SIZE - 32 - PT_SIZE

 PTR_ADDU    sp, $28

sp= $28 +  _THREAD_SIZE - 32 - PT_SIZE=$28 +8192-32-sizeof(struct pt_regs)

union thread_union {
#ifndef CONFIG_X86
struct thread_info thread_info;
#endif
unsigned long stack[THREAD_SIZE/sizeof(long)];

};

struct thread_info {
struct task_struct *task; /* main task structure */
struct exec_domain *exec_domain; /* execution domain */
unsigned long flags; /* low level flags */
unsigned long tp_value; /* thread pointer */
__u32 cpu; /* current CPU */
int preempt_count; /* 0 => preemptable, <0 => BUG */


mm_segment_t addr_limit; /* thread address space:
   0-0xBFFFFFFF for user-thead
   0-0xFFFFFFFF for kernel-thread
*/
struct restart_block restart_block;
struct pt_regs *regs;

};

$28---------→+----------------+          low address

                       |     *task            |

                       |        ...              |

                       |    *regs            |   ----------------------------------+

 grow    ↑        |   xxxxxxxxx      |                                                      |

              |        |  xxxxxxxxx       |                                                      |

sp--------->   | xxxxxxxxx        |                                                       |

                       |  32 byte         |                                                        |

                       | sizeof(pt_regs)|    ← ------------------------------ +

这样sp就切换到了0号进程的内核栈上,设置好栈之后就可以执行c定义的函数了

二.进入start_kernel

asmlinkage void __init start_kernel(void)
{
boot_init_stack_canary();
local_irq_disable();

tick_init();

setup_arch(&command_line);

build_all_zonelists(NULL);
page_alloc_init();
trap_init();
mm_init();
sched_init();
init_IRQ();
prio_tree_init();
init_timers();
hrtimers_init();
softirq_init();
timekeeping_init();
time_init();
local_irq_enable();
        ...
/* Do the rest non-__init'ed, we're now alive */
rest_init();

}

static noinline void __init_refok rest_init(void)
{
int pid;
/*
* We need to spawn init first so that it obtains pid 1, however
* the init task will end up wanting to create kthreads, which, if
* we schedule it before we create kthreadd, will OOPS.
*/
kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND);   //创建init进程,即1号进程

rcu_read_lock();
kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
rcu_read_unlock();
complete(&kthreadd_done);

/*
* The boot idle thread must execute schedule()
* at least once to get things moving:
*/
init_idle_bootup_task(current);
preempt_enable_no_resched();
schedule();


/* Call into cpu_idle with preempt disabled */
preempt_disable();
cpu_idle();  //0号进程

}

static int __init kernel_init(void * unused)
{
        ...
smp_init();
sched_init_smp();

do_basic_setup();

/* Open the /dev/console on the rootfs, this should never fail */
if (sys_open((const char __force_user *) "/dev/console", O_RDWR, 0) < 0)   //打开标准终端0
printk(KERN_WARNING "Warning: unable to open an initial console.\n");

(void) sys_dup(0);  //打开标准终端1
(void) sys_dup(0);  //打开标准终端2
        ...
init_post();
return 0;

}

static noinline int init_post(void)

{

        ...

current->signal->flags |= SIGNAL_UNKILLABLE;  //init进程不可kill
run_init_process("/sbin/init");    //查找init程序的顺序
run_init_process("/etc/init");
run_init_process("/bin/init");
run_init_process("/bin/sh");


panic("No init found.  Try passing init= option to kernel. "  //如果init进程执行成功,执行流不可能到达这里
      "See Linux Documentation/init.txt for guidance.");
}

我们现在来看看0号进程即swapper进程都在干些啥:

void __noreturn cpu_idle(void)
{
int cpu;
/* CPU is going idle. */
cpu = smp_processor_id();

/* endless idle loop with no priority at all */
while (1) {
tick_nohz_idle_enter();
rcu_idle_enter();
while (!need_resched() && cpu_online(cpu)) {
if (cpu_wait) {
/* Don't trace irqs off for idle */
stop_critical_timings();
(*cpu_wait)();
start_critical_timings();
}
}
rcu_idle_exit();
tick_nohz_idle_exit();
preempt_enable_no_resched();
schedule();
preempt_disable();
}

}

它是一个while(1)循环,在没有进程可运行时,swapper进程开始运行,一旦其他的进程准备就绪,就切换到其他进程去运行,那么其他进程从阻塞到准备就绪是在哪来切换的呢,当然是在swapper进程执行过程中发生的中断,在中断处理函数中将等待资源的进程设置为就绪态。

通过以上分析代码可以看出,mips架构内核初始化是非常简单的,汇编代码要做的工作非常少,内核的入口地址为0x80001000,整个内核代码的地址空间在KSEG0的覆盖范围了,进行直接地址转换而不需要MMU的设置,整个分页系统的初始都可以用C语言来完成。

你可能感兴趣的:(kernel基础,mips)