一、冯诺依曼体系结构:
是一种常见的计算机体系结构。
二、操作系统:
操作系统是一款搞管理软硬件的软件。包括内核(进程管理,内存管理,文件管理,驱动管理)和其他程序(库函数,shell程序等)。
三、进程
四、PCB
struct task_struct {
volatile long state; //任务的运行状态(-1 不可运行,0 可运行(就绪),>0 已停止)。
void *stack; //进程内核栈
atomic_t usage; //有几个进程正在使用该结构
unsigned int flags; //per process flags, defined below//反应进程状态的信息,但不是运行状态
unsigned int ptrace; //系统调用
int lock_depth; // BKL lock depth
#ifdef CONFIG_SMP
#ifdef __ARCH_WANT_UNLOCKED_CTXSW
int oncpu; //在SMP上帮助实现无加锁的进程切换
#endif
#endif
int prio, static_prio, normal_prio; //静态优先级,动态优先级
unsigned int rt_priority; //实时任务的优先级
const struct sched_class *sched_class; //与调度相关的函数
struct sched_entity se; //调度实体
struct sched_rt_entity rt; //实时任务调度实体
#ifdef CONFIG_PREEMPT_NOTIFIERS
//list of struct preempt_notifier:
struct hlist_head preempt_notifiers; //与抢占有关
#endif
/*
* fpu_counter contains the number of consecutive context switches
* that the FPU is used. If this is over a threshold, the lazy fpu
* saving becomes unlazy to save the trap. This is an unsigned char
* so that after 256 times the counter wraps and the behavior turns
* lazy again; this to deal with bursty apps that only use FPU for
* a short time
*/
unsigned char fpu_counter; //FPU使用计数
#ifdef CONFIG_BLK_DEV_IO_TRACE
unsigned int btrace_seq; //blktrace是一个针对Linux内核中块设备I/O层的跟踪工具
#endif
unsigned int policy; //调度策略
cpumask_t cpus_allowed; //多核体系结构中管理CPU的位图
#ifdef CONFIG_TREE_PREEMPT_RCU //RCU同步原语
int rcu_read_lock_nesting;
char rcu_read_unlock_special;
struct rcu_node *rcu_blocked_node;
struct list_head rcu_node_entry;
#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
struct sched_info sched_info; /*调度相关的信息,如在CPU上运行的时间/在队列中等待的时间等。*/
#endif
struct list_head tasks; /*任务队列,用于构建进程链表*/
struct plist_node pushable_tasks;
struct mm_struct *mm, *active_mm; /*mm指向进程所拥有的内存描述符,而active_mm指向进程运行时所使用的内存描述符。对于普通进程而言,这两个指针变量的值相同。但是,内核线程不拥有任何内存描述符,所以它们的mm成员总是为NULL。当内核线程得以运行时,它的active_mm成员被初始化为前一个运行进程的active_mm值.*/
/* task state */
int exit_state; /*进程退出时的状态*/
int exit_code, exit_signal; /*exit_code用于设置进程的终止代号,这个值要么是_exit()或exit_group()系统调用参数(正常终止),要么是由内核提供的一个错误代号(异常终止)。exit_signal被置为-1时表示是某个线程组中的一员。只有当线程组的最后一个成员终止时,才会产生一个信号,以通知线程组的领头进程的父进程。*/
int pdeath_signal; /* pdeath_signal用于判断父进程终止时发送信号。*/
unsigned int personality; /*由于Unix有许多不同的版本和变种,应用程序也有了适用范围,personality用于处理不同的ABI*/
unsigned did_exec:1; /*/根据POSIX程序设计的标准,did_exec是用来表示当前进程是在执行原来的代码还是在执行由execve调度的新的代码*/
unsigned in_execve:1; /* in_execve用于通知LSM是否被do_execve()函数所调用。*/
unsigned in_iowait:1; /* in_iowait用于判断是否进行iowait计数。*/
/* Revert to default priority/policy when forking */
unsigned sched_reset_on_fork:1; /*用于判断是否恢复默认的优先级或调度策略。*/
pid_t pid;//进程ID
pid_t tgid;//线程组ID
#ifdef CONFIG_CC_STACKPROTECTOR
/* Canary value for the -fstack-protector gcc feature */
unsigned long stack_canary;
#endif
/*
* pointers to (original) parent process, youngest child, younger sibling,
* older sibling, respectively. (p->father can be replaced with
* p->real_parent->pid)
*/
struct task_struct *real_parent; /* real_parent指向其父进程,如果创建它的父进程不再存在,则指向PID为1的init进程。*/
struct task_struct *parent; /* parent指向其父进程,当它终止时,必须向它的父进程发送信号。它的值通常与real_parent相同。*/
/*
* children/sibling forms the list of my natural children
*/
struct list_head children; /*children表示链表的头部,链表中的所有元素都是它的子进程。*/
struct list_head sibling; /* sibling用于把当前进程插入到兄弟链表中。*/
struct task_struct *group_leader; /* group_leader指向其所在进程组的领头进程。*/
/*
* ptraced is the list of tasks this task is using ptrace on.
* This includes both natural children and PTRACE_ATTACH targets.
* p->ptrace_entry is p's link on the p->parent->ptraced list.
*/
struct list_head ptraced; /*成员ptrace被设置为0时表示不需要被跟踪*/
struct list_head ptrace_entry;
/* PID/PID hash table linkage. */
struct pid_link pids[PIDTYPE_MAX];
struct list_head thread_group;
struct completion *vfork_done; /* for vfork() */
int __user *set_child_tid; /* CLONE_CHILD_SETTID */
int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */
cputime_t utime, stime, utimescaled, stimescaled;/*utime是进程用户态耗费的时间,stime是系统态运行时间。utimescaled/stimescaled也是用于记录进程在用户态/内核态的运行时间,但它们以处理器的频率为刻度*/
cputime_t gtime; /*gtime是以节拍计数的虚拟机运行时间(guest time)*/
cputime_t prev_utime, prev_stime; /*prev_utime/prev_stime是先前的运行时间*/
unsigned long nvcsw, nivcsw; /*nvcsw/nivcsw是自愿(voluntary)/非自愿(involuntary)上下文切换计数。last_switch_count是nvcsw和nivcsw的总和*/
struct timespec start_time; /* monotonic time */
struct timespec real_start_time; /* boot based time *//*start_time和real_start_time都是进程创建时间,real_start_time还包含了进程睡眠时间,常用于/proc/pid/stat*/
/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
unsigned long min_flt, maj_flt;
struct task_cputime cputime_expires; /*cputime_expires用来统计进程或进程组被跟踪的处理器时间,其中的三个成员对应着cpu_timers[3]的三个链表*/
struct list_head cpu_timers[3]; /*请参考cred结构定义文件的注释说明*/
const struct cred __rcu *real_cred; /* objective and real subjective task * credentials (COW) */
const struct cred __rcu *cred; /* effective (overridable) subjective task * credentials (COW) */
struct cred *replacement_session_keyring; /* for KEYCTL_SESSION_TO_PARENT */
char comm[TASK_COMM_LEN]; /* executable name excluding path - access with [gs]et_task_comm (which lock it with task_lock()) - initialized normally by setup_new_exec */
/* file system info */
int link_count, total_link_count; /*硬连接的数量*/
#ifdef CONFIG_SYSVIPC/* ipc stuff /*进程间通信相关的东西*/
struct sysv_sem sysvsem;
#endif
#ifdef CONFIG_DETECT_HUNG_TASK/* hung task detection */
unsigned long last_switch_count;
#endif/* CPU-specific state of this task */
struct thread_struct thread; /*因为task_stcut是与硬件体系结构无关的,因此用thread_struct这个结构来包容不同的体系结构*/
/* filesystem information */
struct fs_struct *fs;
/* open file information */
struct files_struct *files;
//关于命名空间深入讨论
struct nsproxy *nsproxy;/* signal handlers */
struct signal_struct *signal; /* signal指向进程的信号描述符。*/
struct sighand_struct *sighand; /*sighand指向进程的信号处理程序描述符。*/
sigset_t blocked, real_blocked; /*blocked表示被阻塞信号的掩码,real_blocked表示临时掩码。*/
sigset_t saved_sigmask;
struct sigpending pending; /*pending存放私有挂起信号的数据结构。*/
unsigned long sas_ss_sp;size_t sas_ss_size; /*sas_ss_sp是信号处理程序备用堆栈的地址,sas_ss_size表示堆栈的大小。*/
/*Although signal handling takes place in the kernel, the installed signal handlers run in usermode — otherwise,
it would be very easy to introduce malicious or faulty code into the kernel andundermine the system security mechanisms.
Generally, signal handlers use the user mode stack ofthe process in question.
However, POSIX mandates the option of running signal handlers on a stackset up specifically for this purpose (using the
sigaltstack system call). The address and size of this additional stack (which must be explicitly allocated by the
user application) are held in sas_ss_sp andsas_ss_size, respectively. (Professional Linux® Kernel Architecture Page384)*/
int (*notifier)(void *priv); /*设备驱动程序常用notifier指向的函数来阻塞进程的某些信号(notifier_mask是这些信号的位掩码),notifier_data指的是notifier所指向的函数可能使用的数据。*/
void *notifier_data;
sigset_t *notifier_mask;
struct audit_context *audit_context; //请参看 Professional Linux® Kernel Architecture Page1100
#ifdef CONFIG_AUDITSYSCALL
uid_t loginuid;
unsigned int sessionid;
#endif
seccomp_t seccomp;
/* Thread group tracking */
u32 parent_exec_id;
u32 self_exec_id;/* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, * mempolicy */
spinlock_t alloc_lock;
#ifdef CONFIG_GENERIC_HARDIRQS/* IRQ handler threads */
struct irqaction *irqaction;#endif/* Protection of the PI data structures: */ //PI --> Priority Inheritanceraw_spinlock_t pi_lock;
#ifdef CONFIG_RT_MUTEXES //RT--> RealTime Task 实时任务/* PI waiters blocked on a rt_mutex held by this task */
struct plist_head pi_waiters;/* Deadlock detection and priority inheritance handling */
struct rt_mutex_waiter *pi_blocked_on;
#endif
#ifdef CONFIG_DEBUG_MUTEXES/* mutex deadlock detection */
struct mutex_waiter *blocked_on;
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
unsigned int irq_events;
unsigned long hardirq_enable_ip;
unsigned long hardirq_disable_ip;
unsigned int hardirq_enable_event;
unsigned int hardirq_disable_event;
int hardirqs_enabled;
int hardirq_context;
unsigned long softirq_disable_ip;
unsigned long softirq_enable_ip;
unsigned int softirq_disable_event;
unsigned int softirq_enable_event;
int softirqs_enabled;
int softirq_context;
#endif
#ifdef CONFIG_LOCKDEP
# define MAX_LOCK_DEPTH 48UL
u64 curr_chain_key;
int lockdep_depth; //锁的深度
unsigned int lockdep_recursion;
struct held_lock held_locks[MAX_LOCK_DEPTH];
gfp_t lockdep_reclaim_gfp;
#endif
/* journalling filesystem info */
void *journal_info; //文件系统日志信息
/* stacked block device info */
struct bio_list *bio_list; //块IO设备表
#ifdef CONFIG_BLOCK
/* stack plugging */
struct blk_plug *plug;
#endif
/* VM state */
struct reclaim_state *reclaim_state;
struct backing_dev_info *backing_dev_info;
struct io_context *io_context;
unsigned long ptrace_message;
siginfo_t *last_siginfo;
/* For ptrace use. */
struct task_io_accounting ioac; //a structure which is used for recording a single task's IO statistics.
#if defined(CONFIG_TASK_XACCT)
u64 acct_rss_mem1;
/* accumulated rss usage */
u64 acct_vm_mem1;
/* accumulated virtual memory usage */
cputime_t acct_timexpd;
/* stime + utime since last update */
#endif
#ifdef CONFIG_CPUSETS
nodemask_t mems_allowed;
/* Protected by alloc_lock */
int mems_allowed_change_disable;
int cpuset_mem_spread_rotor;
int cpuset_slab_spread_rotor;
#endif
#ifdef CONFIG_CGROUPS
/* Control Group info protected by css_set_lock */
struct css_set __rcu *cgroups;
/* cg_list protected by css_set_lock and tsk->alloc_lock */
struct list_head cg_list;
#endif
#ifdef CONFIG_FUTEX
struct robust_list_head __user *robust_list;
#ifdef CONFIG_COMPAT
struct compat_robust_list_head __user *compat_robust_list;
#endifstruct list_head pi_state_list;
struct futex_pi_state *pi_state_cache;
#endif
#ifdef CONFIG_PERF_EVENTS
struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
struct mutex perf_event_mutex;
struct list_head perf_event_list;
#endif
#ifdef CONFIG_NUMA
struct mempolicy *mempolicy;
/* Protected by alloc_lock */
short il_next;
short pref_node_fork;
#endifatomic_t fs_excl; /* holding fs exclusive resources *///是否允许进程独占文件系统。为0表示否。
struct rcu_head rcu;/* * cache last used pipe for splice */
struct pipe_inode_info *splice_pipe;
#ifdef CONFIG_TASK_DELAY_ACCT
struct task_delay_info *delays;
#endif
#ifdef CONFIG_FAULT_INJECTION
int make_it_fail;
#endif
struct prop_local_single dirties;
#ifdef CONFIG_LATENCYTOP
int latency_record_count;
struct latency_record latency_record[LT_SAVECOUNT];
#endif
/* * time slack values; these are used to round up poll() and * select() etc timeout values.
These are in nanoseconds. */
unsigned long timer_slack_ns;
unsigned long default_timer_slack_ns;
struct list_head *scm_work_list;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
/* Index of current stored address in ret_stack */
int curr_ret_stack;/* Stack of return addresses for return function tracing */
struct ftrace_ret_stack *ret_stack;/* time stamp for last schedule */
unsigned long long ftrace_timestamp;
/* * Number of functions that haven't been traced * because of depth overrun. */
atomic_t trace_overrun;
/* Pause for the tracing */
atomic_t tracing_graph_pause;
#endif
#ifdef CONFIG_TRACING
/* state flags for use by tracers */
unsigned long trace;/* bitmask and counter of trace recursion */
unsigned long trace_recursion;
#endif /* CONFIG_TRACING */
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
/* memcg uses this to do batch job */
struct memcg_batch_info {int do_batch; /* incremented when batch uncharge started */
struct mem_cgroup *memcg; /* target memcg of uncharge */
unsigned long nr_pages; /* uncharged usage */
unsigned long memsw_nr_pages; /* uncharged mem+swap usage */
} memcg_batch;
#endif
#ifdef CONFIG_HAVE_HW_BREAKPOINT
atomic_t ptrace_bp_refcnt;
#endif
};
状态 | 描述 |
TASK_RUNNING(就绪状态) | 表示进程正在执行或者处于准备执行的状态,并不一定在运行 |
TASK_INTERRUPTIBLE(浅度睡眠状态) | 也叫睡眠状态(sleeping),进程因为等待某些条件处于阻塞(挂起的状态),一旦等待的条件成立,进程便会从该状态转化成就绪状态 |
TASK_UNINTERRUPTIBLE(深度睡眠状态) | 也叫磁盘休眠状态(Disk sleeping),意思与TASK_INTERRUPTIBLE类似,但是我们传递任意信号等不能唤醒他们,只有它所等待的资源可用的时候,他才会被唤醒,这个状态通常会等待IO的结束 |
TASK_STOPPED(暂停状态) | 进程被停止执行,可以通过发送SIGSTOP信号来停止进程,通过SIGCONT信号让进程继续运行 |
TASK_TRACED(跟踪状态) | 进程被debugger等进程所监视 |
EXIT_ZOMBIE(僵尸状态) | 进程的执行被终止,但是其父进程还没有使用wait()等系统调用来获知它的终止信息,此时进程成为僵尸进程 |
EXIT_DEAD(死亡状态) | 进程被杀死,即进程的最终状态,这个状态只是一个返回状态,不会在任务列表里看到 |
TASK_KILLABLE(新进程状态) | 当进程处于这种可以终止的新睡眠状态中,它的运行原理类似于TASK_UNINTERRUPTIBLE,只不过可以响应致命信号 |
成员 | 描述 |
static_prio | 用来保存静态优先级,可以调用nice系统直接来修改,取值范围为100~139 |
rt_priority | 用来保存实时优先级,取值范围为0~99 |
prio | 用来保存动态优先级 |
normal_prio | 它的值取决于静态优先级和调度策略 |
延伸:
(一)查看进程
六、fork
(二)、状态
(三)、进程优先级
五、程序地址空间
六、进程调度
一个进程,一个PCB,一个虚拟地址空间,一个页表,一份映射关系;