/*********************************************************************/
/*
* task_struct简介
* 内核把进程的列表存放在叫task_list的双向链表中.链表中的每一项
* 都是类型为task_struct,称为process descriptor的结构,该结构定义在
* 中.task_struct包含一个具体进程的所有信息.
* task_struct比较大,在32位机器上,它大约有1.7KB.
* 主要包含下列信息:
* 进程的地址空间,挂起的信号,进程的状态,等
*
* 参考文章:
* http://blog.csdn.net/npy_lp/article/details/7292563
* http://zyj177484.blog.163.com/blog/static/183794231201221610379193/
* http://blog.csdn.net/jurrah/article/details/3965437
* http://www.ibm.com/developerworks/cn/linux/l-linux-process-management/
* http://www.embedu.org/Column/Column366.htm
/*********************************************************************/
/*task_struct 进程描述符*/
struct task_struct {
/*进程的运行状态*/
volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
/*stack进程内核栈*/
/*
* 进程通过alloc_thread_info()分配它的内核栈,
* 通过free_thread_info()释放它的内核栈
* 两个函数定义在中.
* thread_info是进程的另一个内核数据结构,存放在进程内核栈的尾端.
* thread_info内部的task域存放指向该任务实际的task_struct.
* linux内核栈是由联合体thread_union表示的,定义在中.
*/
void *stack;
atomic_t usage;
unsigned int flags; /* per process flags, defined below */
unsigned int ptrace;
int lock_depth; /* BKL lock depth */
#ifdef CONFIG_SMP
#ifdef __ARCH_WANT_UNLOCKED_CTXSW
int oncpu;
#endif
#endif
/*
* 内核中规定,进程的优先级范围为[0,MAX_PRIO-1].其中分为实时进程部分:
* [0,MAX_RT_PRIO-1]和非实时进程部分:[MAX_RT_PRIO,MAX_PRIO-1].
* 优先级值越小,意味着优先级别越高,任务先被内核调度.
* prio 指任务当前的动态优先级,其值影响任务的调度顺序.
* normal_prio指的是任务的常规优先级,该值基于static_prio和调度策略计算
* static_prio值得是任务的静态优先级,在进程创建时分配,该值会影响分配给
* 任务的时间片的长短和非实时任务动态优先级的计算.
* rt_prioity指的是任务的实时优先级.0表示普通任务,[1,99]表示实时任务.
* 值越大,优先级越高
* 对于实时进程:prio = normal_prio = static_prio
* 对于普通进程:prio = normal_prio = MAX_RT_PRIO -1 -rt_priority
* prio的值在使用实时互斥量时会暂时提升,释放后恢复成normal_prio
*/
int prio, static_prio, normal_prio;
const struct sched_class *sched_class;
/*sched_entity se 调度器实体 用来对进程运行时间做记账*/
struct sched_entity se;
struct sched_rt_entity rt;
#ifdef CONFIG_PREEMPT_NOTIFIERS
/* list of struct preempt_notifier: */
struct hlist_head preempt_notifiers;
#endif
/*
* fpu_counter contains the number of consecutive context switches
* that the FPU is used. If this is over a threshold, the lazy fpu
* saving becomes unlazy to save the trap. This is an unsigned char
* so that after 256 times the counter wraps and the behavior turns
* lazy again; this to deal with bursty apps that only use FPU for
* a short time
*/
unsigned char fpu_counter;
s8 oomkilladj; /* OOM kill score adjustment (bit shift). */
#ifdef CONFIG_BLK_DEV_IO_TRACE
unsigned int btrace_seq;
#endif
/*
* 调度策略 实时进程FIFO/RR or OTHER
*/
unsigned int policy;
cpumask_t cpus_allowed;
#ifdef CONFIG_PREEMPT_RCU
int rcu_read_lock_nesting;
int rcu_flipctr_idx;
#endif /* #ifdef CONFIG_PREEMPT_RCU */
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
struct sched_info sched_info;
#endif
struct list_head tasks;
/*
* ptrace_list/ptrace_children forms the list of my children
* that were stolen by a ptracer.
*/
struct list_head ptrace_children;
struct list_head ptrace_list;
/*
* mm域存放了进程使用的内存描述符,内核线程的此域值为NULL
* active_mm域存放当前活动的内存描述符,内核线程把前一个活动进程的
* mm域值存入此域,并作为临时地址空间执行程序.
*/
struct mm_struct *mm, *active_mm;
/* task state */
struct linux_binfmt *binfmt;
/*
* exit_state进程的退出状态
*/
int exit_state;
int exit_code, exit_signal;
int pdeath_signal; /* The signal sent when the parent dies */
/* ??? */
unsigned int personality;
unsigned did_exec:1;
/*
* pid进程标识符
*/
pid_t pid;
/*
* tgid进程组标识符
* POSIX标准规定,一个多线程应用程序中的所有线程必须有相同的PID
* 在linux中,一个线程组的所有线程使用与该组的领头线程pid相同的值
* 作为线程组id,并存入tgid域中.
* 另:使用getpid()系统调用得到的是tgid而非pid
*/
pid_t tgid;
#ifdef CONFIG_CC_STACKPROTECTOR
/* Canary value for the -fstack-protector gcc feature */
unsigned long stack_canary;
#endif
/*
* pointers to (original) parent process, youngest child, younger sibling,
* older sibling, respectively. (p->father can be replaced with
* p->parent->pid)
*/
/*
* 实际的父进程,父进程,仅在调试时才区分二者
*/
struct task_struct *real_parent; /* real parent process (when being debugged) */
struct task_struct *parent; /* parent process */
/*
* children/sibling forms the list of my children plus the
* tasks I'm ptracing.
*/
struct list_head children; /* list of my children */
struct list_head sibling; /* linkage in my parent's children list */
struct task_struct *group_leader; /* threadgroup leader */
/* PID/PID hash table linkage. */
struct pid_link pids[PIDTYPE_MAX];
/*
*线程链表
*/
struct list_head thread_group;
struct completion *vfork_done; /* for vfork() */
int __user *set_child_tid; /* CLONE_CHILD_SETTID */
int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */
unsigned int rt_priority;
cputime_t utime, stime, utimescaled, stimescaled;
cputime_t gtime;
cputime_t prev_utime, prev_stime;
unsigned long nvcsw, nivcsw; /* context switch counts */
/*
* 进程创建时间
*/
struct timespec start_time; /* monotonic time */
/*
* 进程实际的创建时间,基于系统启动时间
*/
struct timespec real_start_time; /* boot based time */
/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
/*
* 累计进程的次缺页数min_flt和主缺页数maj_flt
*/
unsigned long min_flt, maj_flt;
cputime_t it_prof_expires, it_virt_expires;
unsigned long long it_sched_expires;
struct list_head cpu_timers[3];
/* process credentials */
/*
* uid/gid运行该进程的用户的用户标识符和组标识符
* euid/egid有效的uid/gid
* fsuid/fsgid文件系统的uid/gid 通常与euid/egid相同
* 在检查进程对文件系统的访问权限时使用fsuid/fsgid
* suid/sgid为备份uid/gid
*/
uid_t uid,euid,suid,fsuid;
gid_t gid,egid,sgid,fsgid;
struct group_info *group_info;
kernel_cap_t cap_effective, cap_inheritable, cap_permitted, cap_bset;
unsigned keep_capabilities:1;
struct user_struct *user;
#ifdef CONFIG_KEYS
struct key *request_key_auth; /* assumed request_key authority */
struct key *thread_keyring; /* keyring private to this thread */
unsigned char jit_keyring; /* default keyring to attach requested keys to */
#endif
char comm[TASK_COMM_LEN]; /* executable name excluding path
- access with [gs]et_task_comm (which lock
it with task_lock())
- initialized normally by flush_old_exec */
/* file system info */
int link_count, total_link_count;
#ifdef CONFIG_SYSVIPC
/* ipc stuff */
struct sysv_sem sysvsem;
#endif
#ifdef CONFIG_DETECT_SOFTLOCKUP
/* hung task detection */
unsigned long last_switch_timestamp;
unsigned long last_switch_count;
#endif
/* CPU-specific state of this task */
/*
* 用来标识进程的存储状态,具体实现依赖于特定的CPU架构
* 保存内核使用的相关任务状态段内容
*/
struct thread_struct thread;
/* filesystem information */
/*
* 文件系统信息
*/
struct fs_struct *fs;
/* open file information */
/*
* 打开文件表
*/
struct files_struct *files;
/* namespaces */
struct nsproxy *nsproxy;
/* signal handlers */
struct signal_struct *signal;
struct sighand_struct *sighand;
sigset_t blocked, real_blocked;
sigset_t saved_sigmask; /* To be restored with TIF_RESTORE_SIGMASK */
struct sigpending pending;
unsigned long sas_ss_sp;
size_t sas_ss_size;
int (*notifier)(void *priv);
void *notifier_data;
sigset_t *notifier_mask;
#ifdef CONFIG_SECURITY
void *security;
#endif
struct audit_context *audit_context;
#ifdef CONFIG_AUDITSYSCALL
uid_t loginuid;
unsigned int sessionid;
#endif
seccomp_t seccomp;
/* Thread group tracking */
u32 parent_exec_id;
u32 self_exec_id;
/* Protection of (de-)allocation: mm, files, fs, tty, keyrings */
spinlock_t alloc_lock;
/* Protection of the PI data structures: */
spinlock_t pi_lock;
#ifdef CONFIG_RT_MUTEXES
/* PI waiters blocked on a rt_mutex held by this task */
struct plist_head pi_waiters;
/* Deadlock detection and priority inheritance handling */
struct rt_mutex_waiter *pi_blocked_on;
#endif
#ifdef CONFIG_DEBUG_MUTEXES
/* mutex deadlock detection */
struct mutex_waiter *blocked_on;
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
unsigned int irq_events;
int hardirqs_enabled;
unsigned long hardirq_enable_ip;
unsigned int hardirq_enable_event;
unsigned long hardirq_disable_ip;
unsigned int hardirq_disable_event;
int softirqs_enabled;
unsigned long softirq_disable_ip;
unsigned int softirq_disable_event;
unsigned long softirq_enable_ip;
unsigned int softirq_enable_event;
int hardirq_context;
int softirq_context;
#endif
#ifdef CONFIG_LOCKDEP
# define MAX_LOCK_DEPTH 48UL
u64 curr_chain_key;
int lockdep_depth;
struct held_lock held_locks[MAX_LOCK_DEPTH];
unsigned int lockdep_recursion;
#endif
/* journalling filesystem info */
void *journal_info;
/* stacked block device info */
struct bio *bio_list, **bio_tail;
/* VM state */
struct reclaim_state *reclaim_state;
struct backing_dev_info *backing_dev_info;
struct io_context *io_context;
unsigned long ptrace_message;
siginfo_t *last_siginfo; /* For ptrace use. */
#ifdef CONFIG_TASK_XACCT
/* i/o counters(bytes read/written, #syscalls */
u64 rchar, wchar, syscr, syscw;
#endif
struct task_io_accounting ioac;
#if defined(CONFIG_TASK_XACCT)
u64 acct_rss_mem1; /* accumulated rss usage */
u64 acct_vm_mem1; /* accumulated virtual memory usage */
cputime_t acct_stimexpd;/* stime since last update */
#endif
#ifdef CONFIG_NUMA
struct mempolicy *mempolicy;
short il_next;
#endif
#ifdef CONFIG_CPUSETS
nodemask_t mems_allowed;
int cpuset_mems_generation;
int cpuset_mem_spread_rotor;
#endif
#ifdef CONFIG_CGROUPS
/* Control Group info protected by css_set_lock */
struct css_set *cgroups;
/* cg_list protected by css_set_lock and tsk->alloc_lock */
struct list_head cg_list;
#endif
#ifdef CONFIG_FUTEX
struct robust_list_head __user *robust_list;
#ifdef CONFIG_COMPAT
struct compat_robust_list_head __user *compat_robust_list;
#endif
struct list_head pi_state_list;
struct futex_pi_state *pi_state_cache;
#endif
atomic_t fs_excl; /* holding fs exclusive resources */
struct rcu_head rcu;
/*
* cache last used pipe for splice
*/
struct pipe_inode_info *splice_pipe;
#ifdef CONFIG_TASK_DELAY_ACCT
struct task_delay_info *delays;
#endif
#ifdef CONFIG_FAULT_INJECTION
int make_it_fail;
#endif
struct prop_local_single dirties;
#ifdef CONFIG_LATENCYTOP
int latency_record_count;
struct latency_record latency_record[LT_SAVECOUNT];
#endif
};
/*********************************************************************/
/*
* thread_info简介
* 这是一个相对于task_struct结构要小很多的一个结构,
* 每个内核线程都有一个thread_info结构,当进程从用户态陷入内核后,可以由
* thread_info中的task域指针来找到进程的task_struct.
*/
/*********************************************************************/
struct thread_info {
unsigned long flags; /* low level flags */
int preempt_count; /* 0 => preemptable, <0 => bug */
mm_segment_t addr_limit; /* address limit */
/*
* 相应的主任务的task_struct
*/
struct task_struct *task; /* main task structure */
/*
* 执行域,
* default_exec_domain 默认的执行域,定义在中
*/
struct exec_domain *exec_domain; /* execution domain */
__u32 cpu; /* cpu */
__u32 cpu_domain; /* cpu domain */
/*
* 保存的CPU上下文,其成员为一系列CPU寄存器
*/
struct cpu_context_save cpu_context; /* cpu context */
__u32 syscall; /* syscall number */
__u8 used_cp[16]; /* thread used copro */
unsigned long tp_value;
struct crunch_state crunchstate;
union fp_state fpstate __attribute__((aligned(8)));
union vfp_state vfpstate;
struct restart_block restart_block;
};
/*********************************************************************/
/*
* thread_union简介
* 内核栈的数据结构表示.内核栈是向下生长的,
* 内核线程描述符thread_info分配在内核栈栈底,由于内核栈空间比
* thread_info结构体大很多,因此这样安排可以有效防止内存重叠.
* 出于效率考虑,内核让8KB的内核栈占据两个连续的页框并让第一个页框的起始地址
* 是2^13的倍数.
*/
/*********************************************************************/
union thread_union {
struct thread_info thread_info;
unsigned long stack[THREAD_SIZE/sizeof(long)];
};
/*********************************************************************/
/*
* current_thread_info简介
* 下面这段代码是current_thread_info在ARM上的实现.
* 由于内核栈起始地址是2^13的整数倍,因此在内核态,
* 把当前SP&0x1fff得到的地址就是内核栈栈的基地址,
* 即存放thread_info的地址.
*/
/*********************************************************************/
static inline struct thread_info *current_thread_info(void)
{
register unsigned long sp asm ("sp");
return (struct thread_info *)(sp & ~(THREAD_SIZE - 1));
}