linux系统中每个进程由一个进程id标识,在内核中对应一个task_struct结构的进程描述符,系统中所有进程的task_struct通过链表链接在一起,在内核中,经常需要通过进程pid来获取进程描述符,例如kill命令;最简单的方法可以通过遍历task_struct链表并对比pid的值来获取,但这样效率太低,尤其当系统中运行很多个进程的时候。 linux内核通过pids散列表来解决这一问题,能快速的通过进程PID获取到进程描述符。 PID散列表包含4个表,因为进程描述符包含了表示不同类型PID的字段,每种类型的PID需要自己的散列表。
在task_struct结构体中:
struct task_struct {
pid_t pid;
pid_t tgid;
/* PID/PID hash table linkage. */
struct pid_link pids[PIDTYPE_MAX]; //一个进程ID可能是多种身份,比如Session ID,进程组ID, 进程ID,所以指向多个pid节点
struct list_head thread_group;
}
enum pid_type { PIDTYPE_PID, PIDTYPE_PGID, PIDTYPE_SID, PIDTYPE_MAX };pid结构体和task_struct结构体之间的联系,通过pid_link结构体;
/* * What is struct pid? * * A struct pid is the kernel's internal notion of a process identifier. * It refers to individual tasks, process groups, and sessions. While * there are processes attached to it the struct pid lives in a hash * table, so it and then the processes that it refers to can be found * quickly from the numeric pid value. The attached processes may be * quickly accessed by following pointers from struct pid. * * Storing pid_t values in the kernel and referring to them later has a * problem. The process originally with that pid may have exited and the * pid allocator wrapped, and another process could have come along * and been assigned that pid. * * Referring to user space processes by holding a reference to struct * task_struct has a problem. When the user space process exits * the now useless task_struct is still kept. A task_struct plus a * stack consumes around 10K of low kernel memory. More precisely * this is THREAD_SIZE + sizeof(struct task_struct). By comparison * a struct pid is about 64 bytes. * * Holding a reference to struct pid solves both of these problems. * It is small so holding a reference does not consume a lot of * resources, and since a new struct pid is allocated when the numeric pid * value is reused (when pids wrap around) we don't mistakenly refer to new * processes. */ /* * struct upid is used to get the id of the struct pid, as it is * seen in particular namespace. Later the struct pid is found with * find_pid_ns() using the int nr and struct pid_namespace *ns. */ struct upid { /* Try to keep pid_chain in the same cacheline as nr for find_vpid */ int nr; struct pid_namespace *ns; struct hlist_node pid_chain; }; struct pid { atomic_t count; unsigned int level; /* lists of tasks that use this pid */ struct hlist_head tasks[PIDTYPE_MAX]; struct rcu_head rcu; struct upid numbers[1]; }; struct pid_link { struct hlist_node node; struct pid *pid; };
看完基本的关联之后,利用pid是如何找到进程描述符的呢?
直接贴代码!
/* * Must be called under rcu_read_lock(). */ struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns) { rcu_lockdep_assert(rcu_read_lock_held(), "find_task_by_pid_ns() needs rcu_read_lock()" " protection"); return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID); }
#define pid_hashfn(nr, ns) \ hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift) static struct hlist_head *pid_hash; static unsigned int pidhash_shift = 4; struct pid *find_pid_ns(int nr, struct pid_namespace *ns) { struct upid *pnr; hlist_for_each_entry_rcu(pnr, &pid_hash[pid_hashfn(nr, ns)], pid_chain) if (pnr->nr == nr && pnr->ns == ns) return container_of(pnr, struct pid, numbers[ns->level]); return NULL; }
find_pid_ns() 在pid_hash的哈希表中根据哈希函数得到pid所在的链表头部,然后遍历得到 id 和 ns 都匹对的pid结构体指针;
struct task_struct *pid_task(struct pid *pid, enum pid_type type)
{
struct task_struct *result = NULL;
if (pid) {
struct hlist_node *first;
first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]),
lockdep_tasklist_lock_is_held());
if (first) //pid中的task[type] 与task_struct.pid[type].node 指向的是同一个节点
result = hlist_entry(first, struct task_struct, pids[(type)].node); //node实体在task_struct结构中,所以可以利用first指针得到task_struct结构体指针
}
return result;
}
pid_task()函数中rcu_dereference_check()可以忽略,可以看做:first = hlist_first_rcu(&pid->tasks[type]);
#define hlist_first_rcu(head) (*((struct hlist_node __rcu **)(&(head)->first)))
#define hlist_entry(ptr, type, member) container_of(ptr,type,member)
pid_hash的初始化函数:
/* * The pid hash table is scaled according to the amount of memory in the * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or * more. */ void __init pidhash_init(void) { unsigned int i, pidhash_size; pid_hash = alloc_large_system_hash("PID", sizeof(*pid_hash), 0, 18, HASH_EARLY | HASH_SMALL, &pidhash_shift, NULL, 0, 4096); pidhash_size = 1U << pidhash_shift; for (i = 0; i < pidhash_size; i++) INIT_HLIST_HEAD(&pid_hash[i]); }
从pidhash_init函数中可以看出,pid_hash至少含有16项,最多4096项,它是系统中所有进程的哈希表,根据id分配,但此哈希表与PGID,TGID并无直接关系;
/* * allocate a large system hash table from bootmem * - it is assumed that the hash table must contain an exact power-of-2 * quantity of entries * - limit is the number of hash buckets, not the total allocation size */ void *__init alloc_large_system_hash(const char *tablename, unsigned long bucketsize, unsigned long numentries, int scale, int flags, unsigned int *_hash_shift, unsigned int *_hash_mask, unsigned long low_limit, unsigned long high_limit)
注:所用内核版本为3.10
引用:http://blog.chinaunix.net/uid-20196318-id-134921.html