Linux进程在运行的时候有不同的状态,可以有用户态、内核态、中断异常状态,用户态由于系统调用等原因可以进入内核态,或者产生外部中断则执行中断流程。同时由于函数的调用需要进行压栈和出栈的操作,cpu硬件以arm为例存在栈指针,arm不同的运行模式下分别有对应的栈操作SP寄存器。
简单的说,linux进程用户态有用户栈,内核态有内核栈,运行于ARM的不同模式上,用户态运行于usr模式,内核态运行于svc模式。Linux进程的描述采用struct task_struct,进程结构体中的内核栈用{……void *stack……}来表示,该指针指向结构体struct thread_info。该结构体大小为8k,2^13幂对齐。Thread_info中有struct cpu_context_save保存各个寄存器的值, thread_info描述的是内核栈。
Linxu内核进程只是运行在内核态中只有内核栈;用户进程既有用户栈,也有内核栈,用户进程运行的时候运行于arm的usr模式,当有系统调用时候,arm的运行模式切换到svc,用户模式的寄存器保存到svc模式的栈中,即内核态的上下文thread_info中,也即内核栈中,当系统调用完成,寄存器又恢复到usr状态,该状态下的寄存器也重新从thread_info中恢复。
当内核启动的时候,第一个内核进程INIT_TASK(tsk)运行,最后该进程成为IDLE进程;第二个内核进程为init进程,该进程为linux中所有进程的最顶端的进程,即所有进程树都由他派生出来。在linux系统初始化完成,挂载了根文件系统后,会启动用户进程。
以下具体讲述。
.align 2
.type __mmap_switched_data, %object
__mmap_switched_data:
.long __data_loc @ r4
.long _sdata @r5
.long __bss_start @ r6
.long _end @r7
.long processor_id @ r4
.long __machine_arch_type @ r5
.long __atags_pointer @ r6
.long cr_alignment @ r7
.long init_thread_union+ THREAD_START_SP @ sp
.size__mmap_switched_data, . - __mmap_switched_data
在系统的启动阶段,在执行start_kernel之前,引导程序会用__mmap_switched_data数据初始化寄存器中的各个值,其中包括SP指针;它指向init_thread_union +THREAD_START_SP,初始化完成后即IDLE进程的环境搭好了,就正式进入start_kernel.
#define INIT_TASK(tsk) \
{
……
.stack = &init_thread_info,
…… \
}
#define init_thread_info(init_thread_union.thread_info)
#define init_stack (init_thread_union.stack)
union thread_union init_thread_union__init_task_data = /、
{INIT_THREAD_INFO(init_task) }; //初始化init_task中的thread_info(或者stack)
__INIT
__mmap_switched:
adr r3, __mmap_switched_data
ldmia r3!, {r4, r5, r6, r7}
cmpr4, r5 @Copy data segment if needed
1: cmpne r5, r6
ldrne fp, [r4], #4
strne fp, [r5], #4
bne 1b
movfp, #0 @Clear BSS (and zero fp)
1: cmpr6, r7
strccfp, [r6],#4
bcc 1b
ARM( ldmia r3,{r4, r5, r6, r7, sp})
THUMB( ldmia r3, {r4, r5, r6, r7} )
THUMB( ldr sp, [r3, #16] )
str r9, [r4] @Save processor ID
str r1, [r5] @Save machine type
str r2, [r6] @Save atags pointer
bic r4, r0, #CR_A @Clear 'A' bit
stmia r7, {r0, r4} @ Save control register values
b start_kernel //此处进入 C语言代码
ENDPROC(__mmap_switched)
Linux的init进程完成大部分驱动和模块的加载,它的启动时是从idle进程中创建了一个内核进程。如下:
kernel_thread(kernel_init,NULL, CLONE_FS | CLONE_SIGHAND);其他内核线程的创建大体相似如下。
3.1 kernel_thread的源代码:
pid_t kernel_thread(int(*fn)(void *), void *arg, unsigned long flags)
{
structpt_regs regs;
memset(®s,0, sizeof(regs));
regs.ARM_r4= (unsigned long)arg; //参数
regs.ARM_r5= (unsigned long)fn; //内核进程函数指针
regs.ARM_r6= (unsigned long)kernel_thread_exit;
regs.ARM_r7= SVC_MODE | PSR_ENDSTATE | PSR_ISETSTATE;//设置ARM运行模式
regs.ARM_pc = (unsigned long)kernel_thread_helper;
regs.ARM_cpsr= regs.ARM_r7 | PSR_I_BIT;
returndo_fork(flags|CLONE_VM|CLONE_UNTRACED, 0, ®s, 0, NULL, NULL);
}
asm( ".pushsection.text\n"
" .align\n"
" .type kernel_thread_helper, #function\n"
"kernel_thread_helper:\n"
#ifdef CONFIG_TRACE_IRQFLAGS
" bl trace_hardirqs_on\n"
#endif
" msr cpsr_c, r7\n" //设置ARM运行模式
" movr0, r4\n" //参数
" movlr, r6\n"
" movpc, r5\n" //内核线程函数指针
" .sizekernel_thread_helper, . -kernel_thread_helper\n"
" .popsection");
接下来讨论do_fork(regs)->copy_process(regs)->copy_thread(regs) //regs是传入的参数
int
copy_thread(unsigned long clone_flags,unsigned long stack_start,
unsigned long stk_sz, struct task_struct*p, struct pt_regs *regs)
{
structthread_info *thread = task_thread_info(p);
structpt_regs *childregs = task_pt_regs(p);//新创建进程的栈顶
*childregs= *regs;//寄存器的值复制入栈
childregs->ARM_r0= 0;
childregs->ARM_sp= stack_start; //这个值?????
memset(&thread->cpu_context,0, sizeof(struct cpu_context_save));
thread->cpu_context.sp= (unsigned long)childregs; //新创建进程的栈顶
thread->cpu_context.pc= (unsigned long)ret_from_fork; //这里的PC值和3.1kernel__thread()中的 regs.ARM_pc = (unsignedlong)kernel_thread_helper;关系没完全搞明白。
clear_ptrace_hw_breakpoint(p);
if(clone_flags & CLONE_SETTLS)
thread->tp_value= regs->ARM_r3;
thread_notify(THREAD_NOTIFY_COPY,thread);
return0;
}
在linux系统初始化的后期,会挂载根文件系统,然后读取文件系统的初始化脚本,启动用户进程,由于是在内核态启动,用户进程启动后,要让进程从内核态进入到进程态。
static voidrun_init_process(const char *init_filename)
{
argv_init[0] = init_filename;
kernel_execve(init_filename, argv_init,envp_init);
}
///////////////////////////////////////////////////////////////////////////////
intkernel_execve(const char *filename,
const char *const argv[],
const char *const envp[])
{
struct pt_regs regs;
int ret;
memset(®s, 0, sizeof(structpt_regs));
ret = do_execve(filename,
(const char __user *const__user *)argv,
(const char __user *const__user *)envp, ®s);
if (ret < 0)
goto out;
/*
*Save argc to the register structure for userspace.
*/
regs.ARM_r0 = ret;
/*
*We were successful. We won't bereturning to our caller, but
*instead to user space by manipulating the kernel stack.
*/
asm( "add r0, %0, %1\n\t"
"mov r1, %2\n\t"
"mov r2, %3\n\t"
"bl memmove\n\t" /*copy regs to top of stack */
"mov r8, #0\n\t" /*not a syscall */
"mov r9, %0\n\t" /*thread structure */
"mov sp, r0\n\t" /*reposition stack pointer */
"b ret_to_user"
:
: "r"(current_thread_info()),
"Ir" (THREAD_START_SP - sizeof(regs)),
"r" (®s),
"Ir" (sizeof(regs))
: "r0", "r1","r2", "r3", "r8", "r9", "ip","lr", "memory");
out:
return ret;
}
用户态程序在创建新的程序时一般采用那个fork()和execve()来完成。这两个函数分别调用系统调用sys_fork(),sys_execve()。这里面就牵涉到系统调用时寄存器的保存恢复和函数参数从用户空间传到内核空间,及函数结果的返回。
do_fork()创建了一个新的进程,进程结构体复制父进程的信息,当调用do_execve()时,新的执行体填充task_struct结构中的mm_struct结构,
struct mm_struct
{……
unsigned long start_code, end_code,start_data, end_data;
unsigned long start_brk, brk,start_stack;
unsigned long arg_start, arg_end,env_start, env_end;
……
}
其中unsigned long start_stack是用户进程用户栈的地址。
do_execve()->do_execve_common()->search_binary_handler()->load_binary()
.load_binary = load_elf_binary
{
……
current->mm->end_code = end_code;
current->mm->start_code= start_code;
current->mm->start_data= start_data;
current->mm->end_data =end_data;
current->mm->start_stack= bprm->p;
……
start_thread(regs,elf_entry, bprm->p);
……
}
#define start_thread(regs,pc,sp) \
({ \
unsigned long *stack =(unsigned long *)sp; \
memset(regs->uregs, 0,sizeof(regs->uregs)); \
if (current->personality& ADDR_LIMIT_32BIT) \
regs->ARM_cpsr =USR_MODE; \
else \
regs->ARM_cpsr =USR26_MODE; \
if (elf_hwcap &HWCAP_THUMB && pc & 1) \
regs->ARM_cpsr |=PSR_T_BIT; \
regs->ARM_cpsr |=PSR_ENDSTATE; \
regs->ARM_pc = pc &~1; /* pc */ \
regs->ARM_sp = sp; /* sp */ \
regs->ARM_r2 = stack[2]; /* r2 (envp) */ \
regs->ARM_r1 = stack[1]; /* r1 (argv) */ \
regs->ARM_r0 = stack[0]; /* r0 (argc) */ \
nommu_start_thread(regs); \
})