内核线程创建
内核线程的创建最终是由kthreadd完成,内核创建内核线程的其他api是kthread_create_on_node对它的封装。
常见的kthread_create_on_cpu和create_worker最终调用kthread_create_on_node
内核初始化时,会创建两个进程init和kthreadd,init进程最终会从内核态转到用户态,执行/etc/init.d的各种service,完成系统加载;kthreadd用于创建内核线程,其是所有内核线程的parent
static noinline void __init_refok rest_init(void)
{
int pid;
rcu_scheduler_starting();
/*
* We need to spawn init first so that it obtains pid 1, however
* the init task will end up wanting to create kthreads, which, if
* we schedule it before we create kthreadd, will OOPS.
*/
kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND); //init进程,kernel_thread是对do_fork的简单封装
numa_default_policy();
pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);// kthreadd进程创建
rcu_read_lock();
kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
rcu_read_unlock();
complete(&kthreadd_done);
/*
* The boot idle thread must execute schedule()
* at least once to get things moving:
*/
init_idle_bootup_task(current);
schedule_preempt_disabled();
/* Call into cpu_idle with preempt disabled */
cpu_startup_entry(CPUHP_ONLINE);
}
kthreadd是创建的内核线程kthreadd的执行函数,在一个循环中遍历kthread_create_list,调用create_thread为每个节点创建内核线程。
int kthreadd(void *unused)
{
struct task_struct *tsk = current;
/* Setup a clean context for our children to inherit. */
set_task_comm(tsk, "kthreadd");
ignore_signals(tsk);
set_cpus_allowed_ptr(tsk, cpu_all_mask);
set_mems_allowed(node_states[N_MEMORY]);
current->flags |= PF_NOFREEZE;
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
if (list_empty(&kthread_create_list))
schedule();
__set_current_state(TASK_RUNNING);
spin_lock(&kthread_create_lock);
while (!list_empty(&kthread_create_list)) {
struct kthread_create_info *create;
create = list_entry(kthread_create_list.next,
struct kthread_create_info, list);
list_del_init(&create->list);
spin_unlock(&kthread_create_lock);
create_kthread(create);
spin_lock(&kthread_create_lock);
}
spin_unlock(&kthread_create_lock);
}
return 0;
}
create_kthread调用kernel_thread创建内核线程:
static void create_kthread(struct kthread_create_info *create)
{
int pid;
#ifdef CONFIG_NUMA
current->pref_node_fork = create->node;
#endif
/* We want our own signal handler (we take no signals by default). */
pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD);
if (pid < 0) {
create->result = ERR_PTR(pid);
complete(&create->done);
}
}
上面简单介绍了kthreadd创建内核线程的过程,那么系统中kswapd,ksoftirq等内核线程是如何创建的呢,kthread_create_list链表节点何时被加入的呢?
kthread_create_on_node(threadfn, data, node, name)
创建内核线程,返回创建的内核线程对应的描述符task_struct。其中,threadfn是内核线程的执行函数,data是threadfn的参数,node是建立内核线程中分配内存的节点node,name是创建的内核线程名称。
内核线程的特点:创建的内核线程状态是STOP,需要主动调用wake_up_process唤醒该线程。如果终止该内核线程,要么直接调用do_exit,或者调用kthread_stop。
创建内核线程的原理:
1. 分配kthread_create_info描述符,初始化创建内核线程的描述符,kthread_create_list维护了kthreadd需要创建的内核线程
2. 设置内核线程的cpu_mask和调度策略
struct kthread_create_info create;
create.threadfn = threadfn;
create.data = data;
create.node = node;
init_completion(&create.done);
spin_lock(&kthread_create_lock);
list_add_tail(&create.list, &kthread_create_list);//维护需要创建的内核线程链表
spin_unlock(&kthread_create_lock);
wake_up_process(kthreadd_task);//唤醒kthreadd,用于创建内核线程。
wait_for_completion(&create.done);
....
sched_setscheduler_nocheck(create.result, SCHED_NORMAL, ¶m); //内核线程默认调度策略:SCHED_NORMAL。
set_cpus_allowed_ptr(create.result, cpu_all_mask);//内核线程被允许执行的cpu是系统可使用的cpu
struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
void *data, unsigned int cpu,
const char *namefmt)
{
struct task_struct *p;
p = kthread_create_on_node(threadfn, data, cpu_to_node(cpu), namefmt,
cpu);
if (IS_ERR(p))
return p;
set_bit(KTHREAD_IS_PER_CPU, &to_kthread(p)->flags);
to_kthread(p)->cpu = cpu;
/* Park the thread to get it out of TASK_UNINTERRUPTIBLE state */
kthread_park(p);
return p;
}
create_worker为pool创建woker,其中每个worker的function----worker_thread:
static struct worker *create_worker(struct worker_pool *pool)
{
struct worker *worker = NULL;
int id = -1;
char id_buf[16];
lockdep_assert_held(&pool->manager_mutex);
/*
* ID is needed to determine kthread name. Allocate ID first
* without installing the pointer.
*/
idr_preload(GFP_KERNEL);
spin_lock_irq(&pool->lock);
id = idr_alloc(&pool->worker_idr, NULL, 0, 0, GFP_NOWAIT);
spin_unlock_irq(&pool->lock);
idr_preload_end();
if (id < 0)
goto fail;
worker = alloc_worker();
if (!worker)
goto fail;
worker->pool = pool;
worker->id = id;
if (pool->cpu >= 0)
snprintf(id_buf, sizeof(id_buf), "%d:%d%s", pool->cpu, id,
pool->attrs->nice < 0 ? "H" : "");
else
snprintf(id_buf, sizeof(id_buf), "u%d:%d", pool->id, id);
worker->task = kthread_create_on_node(worker_thread, worker, pool->node,
"kworker/%s", id_buf);
if (IS_ERR(worker->task))
goto fail;
/*
* set_cpus_allowed_ptr() will fail if the cpumask doesn't have any
* online CPUs. It'll be re-applied when any of the CPUs come up.
*/
set_user_nice(worker->task, pool->attrs->nice);
set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
/* prevent userland from meddling with cpumask of workqueue workers */
worker->task->flags |= PF_NO_SETAFFINITY;
/*
* The caller is responsible for ensuring %POOL_DISASSOCIATED
* remains stable across this function. See the comments above the
* flag definition for details.
*/
if (pool->flags & POOL_DISASSOCIATED)
worker->flags |= WORKER_UNBOUND;
/* successful, commit the pointer to idr */
spin_lock_irq(&pool->lock);
idr_replace(&pool->worker_idr, worker, worker->id);
spin_unlock_irq(&pool->lock);
return worker;
fail:
if (id >= 0) {
spin_lock_irq(&pool->lock);
idr_remove(&pool->worker_idr, id);
spin_unlock_irq(&pool->lock);
}
kfree(worker);
return NULL;
}