Linux进程数据结构详解

Linux系统中无论是进程还是线程都统一叫任务,有一个统一的结构task_struct进行管理。

linnux中关于task_struct的定义如下

Struct task_struct{
	/* -1 unrunnable, 0 runnable, >0 stopped: */
	volatile long			state;
	/*
	 * This begins the randomizable portion of task_struct. Only
	 * scheduling-critical items should be added above here.
	 */
	randomized_struct_fields_start

	void				*stack;
	atomic_t			usage;
	/* Per task flags (PF_*), defined further below: */
	unsigned int			flags;
	unsigned int			ptrace;
	int				on_rq;
	int				prio;
	int				static_prio;
	int				normal_prio;
	unsigned int			rt_priority;

	const struct sched_class	*sched_class;
	struct sched_entity		se;
	struct sched_rt_entity		rt;
	struct sched_dl_entity		dl;

	unsigned int			policy;
	int				nr_cpus_allowed;
	cpumask_t			cpus_allowed;
	struct sched_info		sched_info;
	struct list_head		tasks;
	struct mm_struct		*mm;
	struct mm_struct		*active_mm;
	/* Per-thread vma caching: */
	struct vmacache			vmacache;
	int				exit_state;
	int				exit_code;
	int				exit_signal;
	/* The signal sent when the parent dies: */
	int				pdeath_signal;
	/* JOBCTL_*, siglock protected: */
	unsigned long			jobctl;

	/* Used for emulating ABI behavior of previous Linux versions: */
	unsigned int			personality;
	/* Scheduler bits, serialized by scheduler locks: */
	unsigned			sched_reset_on_fork:1;
	unsigned			sched_contributes_to_load:1;
	unsigned			sched_migrated:1;
	unsigned			sched_remote_wakeup:1;
	/* Force alignment to the next boundary: */
	unsigned			:0;

	/* Unserialized, strictly 'current' */

	/* Bit to tell LSMs we're in execve(): */
	unsigned			in_execve:1;
	unsigned			in_iowait:1;
	unsigned long			atomic_flags; /* Flags requiring atomic access. */
	struct restart_block		restart_block;
	pid_t				pid;
	pid_t				tgid;
	/* Real parent process: */
	struct task_struct __rcu	*real_parent;

	/* Recipient of SIGCHLD, wait4() reports: */
	struct task_struct __rcu	*parent;
	/*
	 * Children/sibling form the list of natural children:
	 */
	struct list_head		children;
	struct list_head		sibling;
	struct task_struct		*group_leader;
	/*
	 * 'ptraced' is the list of tasks this task is using ptrace() on.
	 *
	 * This includes both natural children and PTRACE_ATTACH targets.
	 * 'ptrace_entry' is this task's link on the p->parent->ptraced list.
	 */
	struct list_head		ptraced;
	struct list_head		ptrace_entry;

	/* PID/PID hash table linkage. */
	struct pid			*thread_pid;
	struct hlist_node		pid_links[PIDTYPE_MAX];
	struct list_head		thread_group;
	struct list_head		thread_node;
	struct completion		*vfork_done;
	/* CLONE_CHILD_SETTID: */
	int __user			*set_child_tid;
	/* CLONE_CHILD_CLEARTID: */
	int __user			*clear_child_tid;
	u64				utime;
	u64				stime;
	u64				gtime;
	struct prev_cputime		prev_cputime;

	/* Context switch counts: */
	unsigned long			nvcsw;
	unsigned long			nivcsw;
	/* Monotonic time in nsecs: */
	u64				start_time;
	/* Boot based time in nsecs: */
	u64				real_start_time;
	/* MM fault and swap info: this can arguably be seen as either mm-specific or thread-specific: */
	unsigned long			min_flt;
	unsigned long			maj_flt;
	/* Process credentials: */
	/* Tracer's credentials at attach: */
	const struct cred __rcu		*ptracer_cred;

	/* Objective and real subjective task credentials (COW): */
	const struct cred __rcu		*real_cred;

	/* Effective (overridable) subjective task credentials (COW): */
	const struct cred __rcu		*cred;

	/*
	 * executable name, excluding path.
	 *
	 * - normally initialized setup_new_exec()
	 * - access it with [gs]et_task_comm()
	 * - lock it with task_lock()
	 */
	char				comm[TASK_COMM_LEN];

	struct nameidata		*nameidata;
	/* Filesystem information: */
	struct fs_struct		*fs;

	/* Open file information: */
	struct files_struct		*files;

	/* Namespaces: */
	struct nsproxy			*nsproxy;

	/* Signal handlers: */
	struct signal_struct		*signal;
	struct sighand_struct		*sighand;
	sigset_t			blocked;
	sigset_t			real_blocked;
	/* Restored if set_restore_sigmask() was used: */
	sigset_t			saved_sigmask;
	struct sigpending		pending;
	unsigned long			sas_ss_sp;
	size_t				sas_ss_size;
	unsigned int			sas_ss_flags;

	struct callback_head		*task_works;

	struct audit_context		*audit_context;
	struct seccomp			seccomp;

	/* Thread group tracking: */
	u32				parent_exec_id;
	u32				self_exec_id;

	/* Protection against (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, mempolicy: */
	spinlock_t			alloc_lock;

	/* Protection of the PI data structures: */
	raw_spinlock_t			pi_lock;

	struct wake_q_node		wake_q;
	/* Journalling filesystem info: */
	void				*journal_info;

	/* Stacked block device info: */
	struct bio_list			*bio_list;
	/* VM state: */
	struct reclaim_state		*reclaim_state;
	struct backing_dev_info		*backing_dev_info;

	struct io_context		*io_context;
	/* Ptrace state: */
	unsigned long			ptrace_message;
	siginfo_t			*last_siginfo;

	struct task_io_accounting	ioac;
	
	struct tlbflush_unmap_batch	tlb_ubc;

	struct rcu_head			rcu;

	/* Cache last used pipe for splice(): */
	struct pipe_inode_info		*splice_pipe;

	struct page_frag		task_frag;

#ifdef CONFIG_FAULT_INJECTION
	int				make_it_fail;
	unsigned int			fail_nth;
#endif
	/*
	 * When (nr_dirtied >= nr_dirtied_pause), it's time to call
	 * balance_dirty_pages() for a dirty throttling pause:
	 */
	int				nr_dirtied;
	int				nr_dirtied_pause;
	/* Start of a write-and-pause period: */
	unsigned long			dirty_paused_when;

#ifdef CONFIG_LATENCYTOP
	int				latency_record_count;
	struct latency_record		latency_record[LT_SAVECOUNT];
#endif
	/*
	 * Time slack values; these are used to round up poll() and
	 * select() etc timeout values. These are in nanoseconds.
	 */
	u64				timer_slack_ns;
	u64				default_timer_slack_ns;

#ifdef CONFIG_KASAN
	unsigned int			kasan_depth;
#endif

#ifdef CONFIG_FUNCTION_GRAPH_TRACER
	/* Index of current stored address in ret_stack: */
	int				curr_ret_stack;

	/* Stack of return addresses for return function tracing: */
	struct ftrace_ret_stack		*ret_stack;

	/* Timestamp for last schedule: */
	unsigned long long		ftrace_timestamp;

	/*
	 * Number of functions that haven't been traced
	 * because of depth overrun:
	 */
	atomic_t			trace_overrun;

	/* Pause tracing: */
	atomic_t			tracing_graph_pause;
#endif

#ifdef CONFIG_TRACING
	/* State flags for use by tracers: */
	unsigned long			trace;

	/* Bitmask and counter of trace recursion: */
	unsigned long			trace_recursion;
#endif /* CONFIG_TRACING */

#ifdef CONFIG_KCOV
	/* Coverage collection mode enabled for this task (0 if disabled): */
	unsigned int			kcov_mode;

	/* Size of the kcov_area: */
	unsigned int			kcov_size;

	/* Buffer for coverage collection: */
	void				*kcov_area;

	/* KCOV descriptor wired with this task or NULL: */
	struct kcov			*kcov;
#endif

#ifdef CONFIG_MEMCG
	struct mem_cgroup		*memcg_in_oom;
	gfp_t				memcg_oom_gfp_mask;
	int				memcg_oom_order;

	/* Number of pages to reclaim on returning to userland: */
	unsigned int			memcg_nr_pages_over_high;

	/* Used by memcontrol for targeted memcg charge: */
	struct mem_cgroup		*active_memcg;
#endif

#ifdef CONFIG_BLK_CGROUP
	struct request_queue		*throttle_queue;
#endif

#ifdef CONFIG_UPROBES
	struct uprobe_task		*utask;
#endif
#if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE)
	unsigned int			sequential_io;
	unsigned int			sequential_io_avg;
#endif
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
	unsigned long			task_state_change;
#endif
	int				pagefault_disabled;
#ifdef CONFIG_MMU
	struct task_struct		*oom_reaper_list;
#endif
#ifdef CONFIG_VMAP_STACK
	struct vm_struct		*stack_vm_area;
#endif
#ifdef CONFIG_THREAD_INFO_IN_TASK
	/* A live task holds one reference: */
	atomic_t			stack_refcount;
#endif
#ifdef CONFIG_LIVEPATCH
	int patch_state;
#endif
#ifdef CONFIG_SECURITY
	/* Used by LSM modules for access restriction: */
	void				*security;
#endif

	/*
	 * New fields for task_struct should be added above here, so that
	 * they are included in the randomized portion of task_struct.
	 */
	randomized_struct_fields_end

	/* CPU-specific state of this task: */
	struct thread_struct		thread;

	/*
	 * WARNING: on x86, 'thread_struct' contains a variable-sized
	 * structure.  It *MUST* be at the end of 'task_struct'.
	 *
	 * Do not put anything below here!
	 */
}

下面将逐个介绍重要的成员变量

任务链表

struct list_head		tasks;//进程链表,将所有的进程串起来

进程ID

task_struct中涉及ID 的有下面几个

	pid_t				pid;//process id
	pid_t				tgid;//thread group id
	struct task_struct		*group_leader;

信号处理

关于信号处理的字段如下:

	/* Signal handlers: */
	struct signal_struct		*signal;
	struct sighand_struct		*sighand;//正通过信号处理函数进行处理的信号
	sigset_t			blocked;//被阻塞不处理的信号
	sigset_t			real_blocked;
	/* Restored if set_restore_sigmask() was used: */
	sigset_t			saved_sigmask;
	struct sigpending		pending;//尚等待处理的信号
	unsigned long			sas_ss_sp;
	size_t				sas_ss_size;
	unsigned int			sas_ss_flags;

进程状态

在task_struct中涉及任务状态的是下面几个变量

/* -1 unrunnable, 0 runnable, >0 stopped: */
	volatile long			state;
	unsigned int			flags;
	int				exit_state;

其中其中state的可以取值定义在/include/linux/sched.h头文件中

/* Used in tsk->state: */
#define TASK_RUNNING			0x0000
#define TASK_INTERRUPTIBLE		0x0001
#define TASK_UNINTERRUPTIBLE		0x0002
#define __TASK_STOPPED			0x0004
#define __TASK_TRACED			0x0008
/* Used in tsk->exit_state: */
#define EXIT_DEAD			0x0010
#define EXIT_ZOMBIE			0x0020
#define EXIT_TRACE			(EXIT_ZOMBIE | EXIT_DEAD)
/* Used in tsk->state again: */
#define TASK_PARKED			0x0040
#define TASK_DEAD			0x0080
#define TASK_WAKEKILL			0x0100
#define TASK_WAKING			0x0200
#define TASK_NOLOAD			0x0400
#define TASK_NEW			0x0800
#define TASK_STATE_MAX			0x1000
#define TASK_KILLABLE			(TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
#define TASK_STOPPED			(TASK_WAKEKILL | __TASK_STOPPED)
#define TASK_TRACED			(TASK_WAKEKILL | __TASK_TRACED)

#define TASK_IDLE			(TASK_UNINTERRUPTIBLE | TASK_NOLOAD)

/* Convenience macros for the sake of wake_up(): */
#define TASK_NORMAL			(TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)

其中
TASK_RUNNING: 并不是说进程正在运行,而是表示进程时刻准备运行的状态。
TASK_INTERRUPTIBLE:表示可以中断的睡眠状态,这是一种浅睡眠状态,尽管是在睡眠当等待的时间没有到来这时候又有信号来的时候进程还是会被唤醒,唤醒后不是继续原来的动作而是响应信号处理。
TASK_UNINTERRUPTIBLE: 是不可中断的睡眠状态,深度睡眠状态不乐可以被信号唤醒,只能死等要等待的事件到来,linux系统中应尽量避免这种状态的出现。
TASK_KILLABLE:则是可以终止的睡眠状态
TASK_STOPPED:是进程收到SIGSTOP、SIGTTIN、SIGTSTP或者SIGTTOU信号之后进入该状态
TASK_TRACED:表示进程被debugger等进程监视,进程执行被调试程序所停止的状态。

进程调度

进程调度相关的字段主要如下

int				on_rq;//是否在运行队列上
//优先级相关
int				prio;
	int				static_prio;
	int				normal_prio;
	unsigned int			rt_priority;
//调度器类
	const struct sched_class	*sched_class;
	//调度实体
	struct sched_entity		se;
	struct sched_rt_entity		rt; 	
struct sched_dl_entity		dl;
//调度策略
unsigned int			policy;
//可以使用那些CPU
int				nr_cpus_allowed;
cpumask_t			cpus_allowed;
struct sched_info		sched_info;


运行统计信息

进程的运行需要一些统计量,这些信息包括了进程在用户态和内核态消耗的时间,上下文切换的次数等,具体信息如下:

	u64				utime;//用户态消耗的CPU时间
	u64				stime;//内核态消耗的CPU时间
/* Context switch counts: */
	unsigned long			nvcsw;//自愿上下文切换的次数
	unsigned long			nivcsw;//非自愿上下文切换的次数
	/* Monotonic time in nsecs: */
	u64				start_time;//进程启动时间,不包含睡眠时间
	/* Boot based time in nsecs: */
	u64				real_start_time;//进程启动时间包含了睡眠时间

进程亲缘关系

每一个进程都有一个父进程,同样也会有兄弟进程。进程亲缘关系变量如下:

/* Real parent process: */
	struct task_struct __rcu	*real_parent;

	/* Recipient of SIGCHLD, wait4() reports: */
	struct task_struct __rcu	*parent;
	/*
	 * Children/sibling form the list of natural children:
	 */
	struct list_head		children;
	struct list_head		sibling;

parent指向其父进程,当它终止时,必须向父进程发送信息
children表示链表的头部,链表中所有的元素都是该进程的子进程
sibling用于把当前进程插入到兄弟链表中
通常情况下real_parent和parent是一样的。
进程亲缘关系可以用下图来表示

Linux进程数据结构详解_第1张图片

进程权限

在linux中,对于进程权限的定义如下:

/* Process credentials: */
	/* Tracer's credentials at attach: */
	const struct cred __rcu		*ptracer_cred;

	/* Objective and real subjective task credentials (COW): */
	const struct cred __rcu		*real_cred;
	/* Effective (overridable) subjective task credentials (COW): */
	const struct cred __rcu		*cred;

上面的注释信息中Objective表示谁能操作我,real_cred就是说明谁能操作我这个进程;
cred就是说明我这个进程能操作谁。该结构体的详细定义如下:

Struct crud{
	atomic_t	usage;
	kuid_t		uid;		/* real UID of the task */
	kgid_t		gid;		/* real GID of the task */
	kuid_t		suid;		/* saved UID of the task */
	kgid_t		sgid;		/* saved GID of the task */
	kuid_t		euid;		/* effective UID of the task */
	kgid_t		egid;		/* effective GID of the task */
	kuid_t		fsuid;		/* UID for VFS ops */
	kgid_t		fsgid;		/* GID for VFS ops */
	unsigned	securebits;	/* SUID-less security management */
	kernel_cap_t	cap_inheritable; /* caps our children can inherit */
	kernel_cap_t	cap_permitted;	/* caps we're permitted */
	kernel_cap_t	cap_effective;	/* caps we can actually use */
	kernel_cap_t	cap_bset;	/* capability bounding set */
	kernel_cap_t	cap_ambient;	/* Ambient capability set */
	struct user_struct *user;	/* real user ID subscription */
	struct user_namespace *user_ns; /* user_ns the caps and keyrings are relative to. */
	struct group_info *group_info;	/* supplementary groups for euid/fsgid */
	struct rcu_head	rcu;		/* RCU deletion hook */
}

该结构体大部分是关于用户和用于所属的用户组的信息。

内存管理

每个进程都有自己独立的虚拟内存空间,这需要一个数据结构来表示,这个结构就是mm_struct。

		struct mm_struct		*mm;
		struct mm_struct		*active_mm;

文件与文件系统

每个进程都一个文件系统的数据结构,还有一个打开文件的数据结构,具体如下

	/* Filesystem information: */
	struct fs_struct		*fs;
	/* Open file information: */
	struct files_struct		*files;

总结

进程数据结构比较复杂 下面的结构图清晰的描述了进程数据结构各个主要字段的意义

Linux进程数据结构详解_第2张图片

你可能感兴趣的:(linux系统)