Linux内核进程管理-基本数据结构分析

/*********************************************************************/
/*
 * task_struct简介
 * 内核把进程的列表存放在叫task_list的双向链表中.链表中的每一项
 * 都是类型为task_struct,称为process descriptor的结构,该结构定义在
 * 中.task_struct包含一个具体进程的所有信息.
 * task_struct比较大,在32位机器上,它大约有1.7KB.
 * 主要包含下列信息:
 * 进程的地址空间,挂起的信号,进程的状态,等
 * 
 * 参考文章:
 * http://blog.csdn.net/npy_lp/article/details/7292563
 * http://zyj177484.blog.163.com/blog/static/183794231201221610379193/
 * http://blog.csdn.net/jurrah/article/details/3965437
 * http://www.ibm.com/developerworks/cn/linux/l-linux-process-management/
 * http://www.embedu.org/Column/Column366.htm
/*********************************************************************/

/*task_struct 进程描述符*/
struct task_struct {
    /*进程的运行状态*/
	volatile long state;	/* -1 unrunnable, 0 runnable, >0 stopped */
    /*stack进程内核栈*/
    /*
     * 进程通过alloc_thread_info()分配它的内核栈,
     * 通过free_thread_info()释放它的内核栈
     * 两个函数定义在中.
     * thread_info是进程的另一个内核数据结构,存放在进程内核栈的尾端.
     * thread_info内部的task域存放指向该任务实际的task_struct.
     * linux内核栈是由联合体thread_union表示的,定义在中.
     */
	void *stack;
	atomic_t usage;
	unsigned int flags;	/* per process flags, defined below */
	unsigned int ptrace;

	int lock_depth;		/* BKL lock depth */

#ifdef CONFIG_SMP
#ifdef __ARCH_WANT_UNLOCKED_CTXSW
	int oncpu;
#endif
#endif

    /*
     * 内核中规定,进程的优先级范围为[0,MAX_PRIO-1].其中分为实时进程部分:
     * [0,MAX_RT_PRIO-1]和非实时进程部分:[MAX_RT_PRIO,MAX_PRIO-1].
     * 优先级值越小,意味着优先级别越高,任务先被内核调度.
     * prio 指任务当前的动态优先级,其值影响任务的调度顺序.
     * normal_prio指的是任务的常规优先级,该值基于static_prio和调度策略计算
     * static_prio值得是任务的静态优先级,在进程创建时分配,该值会影响分配给 
     * 任务的时间片的长短和非实时任务动态优先级的计算.
     * rt_prioity指的是任务的实时优先级.0表示普通任务,[1,99]表示实时任务.
     * 值越大,优先级越高
     * 对于实时进程:prio = normal_prio = static_prio
     * 对于普通进程:prio = normal_prio = MAX_RT_PRIO -1 -rt_priority
     * prio的值在使用实时互斥量时会暂时提升,释放后恢复成normal_prio
     */
	int prio, static_prio, normal_prio;
	const struct sched_class *sched_class;
    /*sched_entity se 调度器实体 用来对进程运行时间做记账*/
	struct sched_entity se;
    
	struct sched_rt_entity rt;

#ifdef CONFIG_PREEMPT_NOTIFIERS
	/* list of struct preempt_notifier: */
	struct hlist_head preempt_notifiers;
#endif

	/*
	 * fpu_counter contains the number of consecutive context switches
	 * that the FPU is used. If this is over a threshold, the lazy fpu
	 * saving becomes unlazy to save the trap. This is an unsigned char
	 * so that after 256 times the counter wraps and the behavior turns
	 * lazy again; this to deal with bursty apps that only use FPU for
	 * a short time
	 */
	unsigned char fpu_counter;
	s8 oomkilladj; /* OOM kill score adjustment (bit shift). */
#ifdef CONFIG_BLK_DEV_IO_TRACE
	unsigned int btrace_seq;
#endif

    /*
     * 调度策略 实时进程FIFO/RR or OTHER
     */
	unsigned int policy;
	cpumask_t cpus_allowed;

#ifdef CONFIG_PREEMPT_RCU
	int rcu_read_lock_nesting;
	int rcu_flipctr_idx;
#endif /* #ifdef CONFIG_PREEMPT_RCU */

#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
	struct sched_info sched_info;
#endif

	struct list_head tasks;
	/*
	 * ptrace_list/ptrace_children forms the list of my children
	 * that were stolen by a ptracer.
	 */
	struct list_head ptrace_children;
	struct list_head ptrace_list;

   /*
    * mm域存放了进程使用的内存描述符,内核线程的此域值为NULL
    * active_mm域存放当前活动的内存描述符,内核线程把前一个活动进程的
    * mm域值存入此域,并作为临时地址空间执行程序.
    */
	struct mm_struct *mm, *active_mm;

/* task state */
	struct linux_binfmt *binfmt;

    /*
     * exit_state进程的退出状态
     */
	int exit_state;

	int exit_code, exit_signal;
	int pdeath_signal;  /*  The signal sent when the parent dies  */
	/* ??? */
	unsigned int personality;
	unsigned did_exec:1;
    /*
     * pid进程标识符 
     */
	pid_t pid;
    /* 
     * tgid进程组标识符 
     * POSIX标准规定,一个多线程应用程序中的所有线程必须有相同的PID
     * 在linux中,一个线程组的所有线程使用与该组的领头线程pid相同的值
     * 作为线程组id,并存入tgid域中.
     * 另:使用getpid()系统调用得到的是tgid而非pid
     */
	pid_t tgid;

#ifdef CONFIG_CC_STACKPROTECTOR
	/* Canary value for the -fstack-protector gcc feature */
	unsigned long stack_canary;
#endif
	/* 
	 * pointers to (original) parent process, youngest child, younger sibling,
	 * older sibling, respectively.  (p->father can be replaced with 
	 * p->parent->pid)
	 */
	/*
	 * 实际的父进程,父进程,仅在调试时才区分二者
     */
	struct task_struct *real_parent; /* real parent process (when being debugged) */
	struct task_struct *parent;	/* parent process */
	/*
	 * children/sibling forms the list of my children plus the
	 * tasks I'm ptracing.
	 */
	struct list_head children;	/* list of my children */
	struct list_head sibling;	/* linkage in my parent's children list */
	struct task_struct *group_leader;	/* threadgroup leader */

	/* PID/PID hash table linkage. */
	struct pid_link pids[PIDTYPE_MAX];
    /*
     *线程链表
     */
	struct list_head thread_group;

	struct completion *vfork_done;		/* for vfork() */
	int __user *set_child_tid;		/* CLONE_CHILD_SETTID */
	int __user *clear_child_tid;		/* CLONE_CHILD_CLEARTID */

	unsigned int rt_priority;
	cputime_t utime, stime, utimescaled, stimescaled;
	cputime_t gtime;
	cputime_t prev_utime, prev_stime;
	unsigned long nvcsw, nivcsw; /* context switch counts */
    /*
     * 进程创建时间
     */
	struct timespec start_time; 		/* monotonic time */
    /*
     * 进程实际的创建时间,基于系统启动时间
     */
	struct timespec real_start_time;	/* boot based time */
    /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
    /* 
     * 累计进程的次缺页数min_flt和主缺页数maj_flt
     */
	unsigned long min_flt, maj_flt;

  	cputime_t it_prof_expires, it_virt_expires;
	unsigned long long it_sched_expires;
	struct list_head cpu_timers[3];

    /* process credentials */
    /*
     * uid/gid运行该进程的用户的用户标识符和组标识符
     * euid/egid有效的uid/gid
     * fsuid/fsgid文件系统的uid/gid 通常与euid/egid相同
     * 在检查进程对文件系统的访问权限时使用fsuid/fsgid
     * suid/sgid为备份uid/gid
     */
	uid_t uid,euid,suid,fsuid;
	gid_t gid,egid,sgid,fsgid;
	struct group_info *group_info;
	kernel_cap_t   cap_effective, cap_inheritable, cap_permitted, cap_bset;
	unsigned keep_capabilities:1;
	struct user_struct *user;
#ifdef CONFIG_KEYS
	struct key *request_key_auth;	/* assumed request_key authority */
	struct key *thread_keyring;	/* keyring private to this thread */
	unsigned char jit_keyring;	/* default keyring to attach requested keys to */
#endif
	char comm[TASK_COMM_LEN]; /* executable name excluding path
				     - access with [gs]et_task_comm (which lock
				       it with task_lock())
				     - initialized normally by flush_old_exec */
/* file system info */
	int link_count, total_link_count;
#ifdef CONFIG_SYSVIPC
/* ipc stuff */
	struct sysv_sem sysvsem;
#endif
#ifdef CONFIG_DETECT_SOFTLOCKUP
/* hung task detection */
	unsigned long last_switch_timestamp;
	unsigned long last_switch_count;
#endif
/* CPU-specific state of this task */
    /* 
     * 用来标识进程的存储状态,具体实现依赖于特定的CPU架构
     * 保存内核使用的相关任务状态段内容 
     */
	struct thread_struct thread;
/* filesystem information */
    /*
     * 文件系统信息
     */
	struct fs_struct *fs;
/* open file information */
    /*
     * 打开文件表
     */
	struct files_struct *files;
/* namespaces */
	struct nsproxy *nsproxy;
/* signal handlers */
	struct signal_struct *signal;
	struct sighand_struct *sighand;

	sigset_t blocked, real_blocked;
	sigset_t saved_sigmask;		/* To be restored with TIF_RESTORE_SIGMASK */
	struct sigpending pending;

	unsigned long sas_ss_sp;
	size_t sas_ss_size;
	int (*notifier)(void *priv);
	void *notifier_data;
	sigset_t *notifier_mask;
#ifdef CONFIG_SECURITY
	void *security;
#endif
	struct audit_context *audit_context;
#ifdef CONFIG_AUDITSYSCALL
	uid_t loginuid;
	unsigned int sessionid;
#endif
	seccomp_t seccomp;

/* Thread group tracking */
   	u32 parent_exec_id;
   	u32 self_exec_id;
/* Protection of (de-)allocation: mm, files, fs, tty, keyrings */
	spinlock_t alloc_lock;

	/* Protection of the PI data structures: */
	spinlock_t pi_lock;

#ifdef CONFIG_RT_MUTEXES
	/* PI waiters blocked on a rt_mutex held by this task */
	struct plist_head pi_waiters;
	/* Deadlock detection and priority inheritance handling */
	struct rt_mutex_waiter *pi_blocked_on;
#endif

#ifdef CONFIG_DEBUG_MUTEXES
	/* mutex deadlock detection */
	struct mutex_waiter *blocked_on;
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
	unsigned int irq_events;
	int hardirqs_enabled;
	unsigned long hardirq_enable_ip;
	unsigned int hardirq_enable_event;
	unsigned long hardirq_disable_ip;
	unsigned int hardirq_disable_event;
	int softirqs_enabled;
	unsigned long softirq_disable_ip;
	unsigned int softirq_disable_event;
	unsigned long softirq_enable_ip;
	unsigned int softirq_enable_event;
	int hardirq_context;
	int softirq_context;
#endif
#ifdef CONFIG_LOCKDEP
# define MAX_LOCK_DEPTH 48UL
	u64 curr_chain_key;
	int lockdep_depth;
	struct held_lock held_locks[MAX_LOCK_DEPTH];
	unsigned int lockdep_recursion;
#endif

/* journalling filesystem info */
	void *journal_info;

/* stacked block device info */
	struct bio *bio_list, **bio_tail;

/* VM state */
	struct reclaim_state *reclaim_state;

	struct backing_dev_info *backing_dev_info;

	struct io_context *io_context;

	unsigned long ptrace_message;
	siginfo_t *last_siginfo; /* For ptrace use.  */
#ifdef CONFIG_TASK_XACCT
/* i/o counters(bytes read/written, #syscalls */
	u64 rchar, wchar, syscr, syscw;
#endif
	struct task_io_accounting ioac;
#if defined(CONFIG_TASK_XACCT)
	u64 acct_rss_mem1;	/* accumulated rss usage */
	u64 acct_vm_mem1;	/* accumulated virtual memory usage */
	cputime_t acct_stimexpd;/* stime since last update */
#endif
#ifdef CONFIG_NUMA
  	struct mempolicy *mempolicy;
	short il_next;
#endif
#ifdef CONFIG_CPUSETS
	nodemask_t mems_allowed;
	int cpuset_mems_generation;
	int cpuset_mem_spread_rotor;
#endif
#ifdef CONFIG_CGROUPS
	/* Control Group info protected by css_set_lock */
	struct css_set *cgroups;
	/* cg_list protected by css_set_lock and tsk->alloc_lock */
	struct list_head cg_list;
#endif
#ifdef CONFIG_FUTEX
	struct robust_list_head __user *robust_list;
#ifdef CONFIG_COMPAT
	struct compat_robust_list_head __user *compat_robust_list;
#endif
	struct list_head pi_state_list;
	struct futex_pi_state *pi_state_cache;
#endif
	atomic_t fs_excl;	/* holding fs exclusive resources */
	struct rcu_head rcu;

	/*
	 * cache last used pipe for splice
	 */
	struct pipe_inode_info *splice_pipe;
#ifdef	CONFIG_TASK_DELAY_ACCT
	struct task_delay_info *delays;
#endif
#ifdef CONFIG_FAULT_INJECTION
	int make_it_fail;
#endif
	struct prop_local_single dirties;
#ifdef CONFIG_LATENCYTOP
	int latency_record_count;
	struct latency_record latency_record[LT_SAVECOUNT];
#endif
};


/*********************************************************************/
/*
 * thread_info简介
 * 这是一个相对于task_struct结构要小很多的一个结构,
 * 每个内核线程都有一个thread_info结构,当进程从用户态陷入内核后,可以由
 * thread_info中的task域指针来找到进程的task_struct.
 */
/*********************************************************************/
struct thread_info {
	unsigned long		flags;		/* low level flags */
	int			preempt_count;	/* 0 => preemptable, <0 => bug */
	mm_segment_t		addr_limit;	/* address limit */
    /* 
     * 相应的主任务的task_struct
     */
	struct task_struct	*task;		/* main task structure */
    /*
     * 执行域,
     * default_exec_domain 默认的执行域,定义在中
     */
	struct exec_domain	*exec_domain;	/* execution domain */
	__u32			cpu;		/* cpu */
	__u32			cpu_domain;	/* cpu domain */
    /*
     * 保存的CPU上下文,其成员为一系列CPU寄存器
     */
	struct cpu_context_save	cpu_context;	/* cpu context */
	__u32			syscall;	/* syscall number */
	__u8			used_cp[16];	/* thread used copro */
	unsigned long		tp_value;
	struct crunch_state	crunchstate;
	union fp_state		fpstate __attribute__((aligned(8)));
	union vfp_state		vfpstate;
	struct restart_block	restart_block;
};

/*********************************************************************/
/*
 * thread_union简介
 * 内核栈的数据结构表示.内核栈是向下生长的,
 * 内核线程描述符thread_info分配在内核栈栈底,由于内核栈空间比
 * thread_info结构体大很多,因此这样安排可以有效防止内存重叠.
 * 出于效率考虑,内核让8KB的内核栈占据两个连续的页框并让第一个页框的起始地址
 * 是2^13的倍数.
 */
/*********************************************************************/
union thread_union {
	struct thread_info thread_info;
	unsigned long stack[THREAD_SIZE/sizeof(long)];
};

/*********************************************************************/
/*
 * current_thread_info简介
 * 下面这段代码是current_thread_info在ARM上的实现.
 * 由于内核栈起始地址是2^13的整数倍,因此在内核态,
 * 把当前SP&0x1fff得到的地址就是内核栈栈的基地址,
 * 即存放thread_info的地址.
 */
/*********************************************************************/
static inline struct thread_info *current_thread_info(void)
{
	register unsigned long sp asm ("sp");
	return (struct thread_info *)(sp & ~(THREAD_SIZE - 1));
}

你可能感兴趣的:(Linux,内核与驱动)