linux 中断下半部之softirq

中断下半部之softirq

  • 软中断注册
    • 二级目录
      • 三级目录

软中断注册

二级目录

三级目录

1, irq_exit函数的解析

/*
 * Exit an interrupt context. Process softirqs if needed and possible:                                                                                                                                      
 */
void irq_exit(void)
{
#ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
    local_irq_disable();
#else
    lockdep_assert_irqs_disabled();
#endif
    account_irq_exit_time(current);
    preempt_count_sub(HARDIRQ_OFFSET);     //这个位置修改preempt_count

    // 判断当前是否在中断上下文中,并且是否有软中断在 pending 状态
     注意:这里只有两个条件同时满足时,才有可能调用 invoke_softirq() 进入软中断。
    if (!in_interrupt() && local_softirq_pending())
        invoke_softirq();

    tick_irq_exit();
    rcu_irq_exit();
    trace_hardirq_exit(); /* must be last! */
}

2, 对in_interrupt的解释:

include/linux/preempt.h
/*
 * We put the hardirq and softirq counter into the preemption
 * counter. The bitmask has the following meaning:                                                                                                                                                          
 *
 * - bits 0-7 are the preemption count (max preemption depth: 256)
 * - bits 8-15 are the softirq count (max # of softirqs: 256)
 *
 * The hardirq count could in theory be the same as the number of
 * interrupts in the system, but we run all interrupt handlers with
 * interrupts disabled, so we cannot have nesting interrupts. Though
 * there are a few palaeontologic drivers which reenable interrupts in
 * the handler, so we need more than one bit here.
 *
 *         PREEMPT_MASK:    0x000000ff
 *         SOFTIRQ_MASK:    0x0000ff00
 *         HARDIRQ_MASK:    0x000f0000
 *                   NMI_MASK:    0x00100000
 * PREEMPT_NEED_RESCHED:    0x80000000
 */
#define PREEMPT_BITS    8
#define SOFTIRQ_BITS    8
#define HARDIRQ_BITS    4
#define NMI_BITS    1
#define PREEMPT_SHIFT   0
#define SOFTIRQ_SHIFT   (PREEMPT_SHIFT + PREEMPT_BITS)
#define HARDIRQ_SHIFT   (SOFTIRQ_SHIFT + SOFTIRQ_BITS)
#define NMI_SHIFT   (HARDIRQ_SHIFT + HARDIRQ_BITS)

#define __IRQ_MASK(x)   ((1UL << (x))-1)

#define PREEMPT_MASK    (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT)
#define SOFTIRQ_MASK    (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)                                                                                                                                         
#define HARDIRQ_MASK    (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
#define NMI_MASK    (__IRQ_MASK(NMI_BITS)     << NMI_SHIFT)

#define PREEMPT_OFFSET  (1UL << PREEMPT_SHIFT)
#define SOFTIRQ_OFFSET  (1UL << SOFTIRQ_SHIFT)
#define HARDIRQ_OFFSET  (1UL << HARDIRQ_SHIFT)
#define NMI_OFFSET  (1UL << NMI_SHIFT)

include/asm-generic/preempt.h
static __always_inline int preempt_count(void)
{
    return READ_ONCE(current_thread_info()->preempt_count);
}1)bit0~7位表示抢占计数,即支持最大的抢占深度为2562)bit8~15位表示软中断计数,即支持最大的软中断的个数为256,
由于软中断还受限于pending状态,一个32位的变量,该变量的每一位标记一个软中
断类型,因此实际最大只能支持32个软中断。
(3)bit16~20位表示硬件中断嵌套层数,hardirq计数理论上可以与系统中的中断数
相同,但是我们在禁用中断的情况下运行所有的中断处理程序,因此我们不能使用嵌
套的中断。虽然有一些古老的驱动程序可以在处理器中重新启动中断,因此我们这里
需要的不止一个。
 (4)bit21位表示的是NMI中断。

include/linux/preempt.h
#define hardirq_count() (preempt_count() & HARDIRQ_MASK)
#define softirq_count() (preempt_count() & SOFTIRQ_MASK)
#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
                 | NMI_MASK))

/*  
 * Are we doing bottom half or hardware interrupt processing?
 *  
 * in_irq()       - We're in (hard) IRQ context
 * in_softirq()   - We have BH disabled, or are processing softirqs
 * in_interrupt() - We're in NMI,IRQ,SoftIRQ context or have BH disabled
 * in_serving_softirq() - We're in softirq context
 * in_nmi()       - We're in NMI context
 * in_task()      - We're in task context
 *  
 * Note: due to the BH disabled confusion: in_softirq(),in_interrupt() really
 *       should not be used in new code.
 */
#define in_irq()        (hardirq_count())
#define in_softirq()        (softirq_count())
#define in_interrupt()      (irq_count())
#define in_serving_softirq()    (softirq_count() & SOFTIRQ_OFFSET)
#define in_nmi()        (preempt_count() & NMI_MASK)
#define in_task()       (!(preempt_count() & \
                   (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)))

因此in_interrupt()表示的是,如果我们在NMI, HARDIRQ,softirq的上下文中,该
计数就不会为0,用于判断我们是否是具有执行softirq的条件,因为softirq不允许
softirq抢占,也不能去抢占硬中断和NMI中断。

3, invoke_softirq

static inline void invoke_softirq(void)
{
    if (ksoftirqd_running(local_softirq_pending()))
        return;

    /*force_irqthreads用于判断系统是否启用了强制中断线程化的特性,如果没有
    启用irq线程化,则调用__do_softirq或者do_softirq_own_stack去处理软终
    端,否则调用wakeup_softirqd做irq线程化的后续工作。*/
    if (!force_irqthreads) {
#ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
        /*
         * We can safely execute softirq on the current stack if
         * it is the irq stack, because it should be near empty
         * at this stage.
         */
        __do_softirq();
#else
        /*
         * Otherwise, irq_exit() is called on the task stack that can
         * be potentially deep already. So call softirq in its own stack
         * to prevent from any overrun.
         */
        do_softirq_own_stack();
#endif
    } else {
        wakeup_softirqd();
    }
}

ksoftirqd_running函数是用于判断ksoftirqd task是否被置位为TASK_RUNNING状态,也即为ksoftirqd task是否已经schedual。如果ksoftirqd已经处于TASK_RUNNING状态,则放弃执行当前的softirq。

/*
* If ksoftirqd is scheduled, we do not want to process pending softirqs
* right now. Let ksoftirqd handle this at its own rate, to get fairness,
* unless we're doing some of the synchronous softirqs.
*/
#define SOFTIRQ_NOW_MASK ((1 << HI_SOFTIRQ) | (1 << TASKLET_SOFTIRQ))
static bool ksoftirqd_running(unsigned long pending)
{
    struct task_struct *tsk = __this_cpu_read(ksoftirqd);

    if (pending & SOFTIRQ_NOW_MASK)
        return false;
    return tsk && (tsk->state == TASK_RUNNING) &&
        !__kthread_should_park(tsk);
}
#ifdef CONFIG_IRQ_FORCED_THREADING
__read_mostly bool force_irqthreads;
EXPORT_SYMBOL_GPL(force_irqthreads);

static int __init setup_forced_irqthreads(char *arg)                                                                                                                                                        
{       
    force_irqthreads = true;
    return 0;
}       
early_param("threadirqs", setup_forced_irqthreads);
#endif
asmlinkage __visible void __softirq_entry __do_softirq(void)
{
    unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
    unsigned long old_flags = current->flags;
    int max_restart = MAX_SOFTIRQ_RESTART;
    struct softirq_action *h;
    bool in_hardirq;
    __u32 pending;
    int softirq_bit;

    /*
     * Mask out PF_MEMALLOC s current task context is borrowed for the
     * softirq. A softirq handled such as network RX might set PF_MEMALLOC
     * again if the socket is related to swap
     */
    current->flags &= ~PF_MEMALLOC;

    pending = local_softirq_pending();
    account_irq_enter_time(current);

    __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
    in_hardirq = lockdep_softirq_start();
restart:
     // 每次循环在允许硬件 中断强占前,首先重置软中断的标志位。
    /* Reset the pending bitmask before enabling irqs */
    set_softirq_pending(0);

     /* 到这里才开中断运行,注意:以前运行状态一直是关中断运行,这时当前处
     理软中断才可能被硬件中断抢占。也就是说在进入软中断时不是一开始就会被
     硬件中断抢占。只有在这里以后的代码才可能被硬件中断抢占。*/
    local_irq_enable();

     /* 这里要注意,以下代码运行时可以被硬件中断抢占,但这个硬件中断执行完
     成后,它的所注册的软中断无法马上运行,别忘了,现在虽是开硬件中断执
     行,但前面的 __local_bh_disable()函数屏蔽了软中断。所以这种环境下只
     能被硬件中断抢占,但这个硬中断注册的软中断回调函数无法运行。要问为什
     么,那是因为__local_bh_disable() 函数设置了一个标志当作互斥量,而这
     个标志正是上面的 irq_exit() 和 do_softirq() 函数中的in_interrupt() 
     函数判断的条件之一,也就是说 in_interrupt() 函数不仅检测硬中断而且还
     判断了软中断。所以在这个环境下触发硬中断时注册的软中断,根本无法重新
     进入到这个函数中来,只能是做一个标志,等待下面的重复循环(最大 
     MAX_SOFTIRQ_RESTART)才可能处理到这个时候触发的硬件中断所注册的软中
     断。得到软中断向量表。*/
    h = softirq_vec;

    while ((softirq_bit = ffs(pending))) {
        unsigned int vec_nr;
        int prev_count;

        h += softirq_bit - 1;

        vec_nr = h - softirq_vec;
        prev_count = preempt_count();

        kstat_incr_softirqs_this_cpu(vec_nr);                                                                                                                                                               

        trace_softirq_entry(vec_nr);
        h->action(h);
        trace_softirq_exit(vec_nr);
        if (unlikely(prev_count != preempt_count())) {
            pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
                   vec_nr, softirq_to_name[vec_nr], h->action,
                   prev_count, preempt_count());                                                                                                                                                            
            preempt_count_set(prev_count);
        }
        h++;
        pending >>= softirq_bit;
    }

    rcu_bh_qs();

     // 关中断执行以下代码。注意:这里又关中断了,下面的代码执行过程中硬件中断无法抢占。
    local_irq_disable();

     /* 前面提到过,在刚才开硬件中断执行环境时只能被硬件中断抢占 ,在这个
     时候是无法处理软中断的,因为刚才开中断执行过程中可能多次被硬件中断抢
     占,每抢占一次就有可能注册一个软中断,所以要再重新取一次所有的软中
     断。以便下面的代码进行处理后跳回到 restart 处重复执行。*/
    pending = local_softirq_pending();
    if (pending) {
         /* 如果在上面的开中断执行环境中触发了硬件中断,且注册了一个软中断
         的话,这个软中断会设置 pending 位,但在当前一直屏蔽软中断的环境下
         无法得到执行,前面提到过,因为 irq_exit() 和 do_softirq() 根本无
         法进入到这个处理过程中来。这个在上面周详的记录过了。那么在这里又
         有了一个执行的机会。注意:虽然当前环境一直是处于屏蔽软中断执行的
         环境中,但在这里又给出了一个执行刚才在开中断环境过程中触发硬件中
         断时所注册的软中断的机会,其实只要理解了软中断机制就会知道,无非
         是在一些特定环境下调用 ISR 注册到软中断向量表里的函数而已。如果刚
         才触发的硬件中断注册了软中断,并且重复执行次数没有到 10 次的话,
         那么则跳转到 restart 标志处重复以上所介绍的所有步骤:设置软中断标
         志位,重新开中断执行...*/
         // 注意:这里是要两个条件都满足的情况下才可能重复以上步骤。
        if (time_before(jiffies, end) && !need_resched() && --max_restart)
            goto restart;

         /* 如果以上步骤重复了 10 次后还有 pending 的软中断的话,那么系统
         在一定时间内可能达到了一个峰值,为了平衡这点。系统专门建立了一个 
         ksoftirqd 线程来处理,这样避免在一 定时间内负荷太大。这个 
         ksoftirqd 线程本身是个大循环,在某些条件下为了不负载过重,他是能
         被其他进程抢占的,但注意,他是显示的调用了 preempt_xxx() 和 
         schedule()才会被抢占和转换的。这么做的原因是因为在他一旦调用 
         local_softirq_pending() 函数检测到有 pending 的软中断需要处理的
         时候,则会显示的调用 do_softirq() 来处理软中 断。也就是说,下面
         代码唤醒的 ksoftirqd 线程有可能会回到这个函数当中来,尤其是在系统
         需要响应非常多软中断的情况下,他的调用入口是 do_softirq(),这也就
         是为什么在 do_softirq() 的入口处也会用 in_interrupt() 函数来判断
         是否有软中断正在处理的原因了,目的还是为了防止重入。ksoftirqd 实
         现看下面对 ksoftirqd() 函数的分析。*/
        wakeup_softirqd();
    }

    lockdep_softirq_end(in_hardirq);
    account_irq_exit_time(current);

     // 到最后才开软中断执行环境,允许软中断执行。注意:这里使用的不是 local_bh_enable(),不会再次触发 do_softirq()的调用。
    __local_bh_enable(SOFTIRQ_OFFSET);
    WARN_ON_ONCE(in_interrupt());
    current_restore_flags(old_flags, PF_MEMALLOC);
}
/*
* we cannot loop indefinitely here to avoid userspace starvation,
* but we also don't want to introduce a worst case 1/HZ latency
* to the pending events, so lets the scheduler to balance
* the softirq load for us.
*/
static void wakeup_softirqd(void)
{
    /* Interrupts are disabled: no need to stop preemption */
    struct task_struct *tsk = __this_cpu_read(ksoftirqd);

    if (tsk && tsk->state != TASK_RUNNING)
        wake_up_process(tsk);
}
/**
* wake_up_process - Wake up a specific process
* @p: The process to be woken up.
*
* Attempt to wake up the nominated process and move it to the set of runnable
* processes.
*
* Return: 1 if the process was woken up, 0 if it was already running.
*
* This function executes a full memory barrier before accessing the task state.
*/
int wake_up_process(struct task_struct *p)
{
    return try_to_wake_up(p, TASK_NORMAL, 0);
}
/**
* try_to_wake_up - wake up a thread
* @p: the thread to be awakened
* @state: the mask of task states that can be woken
* @wake_flags: wake modifier flags (WF_*)
*
* If (@state & @p->state) @p->state = TASK_RUNNING.
*
* If the task was not queued/runnable, also place it back on a runqueue.
*
* Atomic against schedule() which would dequeue a task, also see
* set_current_state().
*
* This function executes a full memory barrier before accessing the task
* state; see set_current_state().
*
* Return: %true if @p->state changes (an actual wakeup was done),
*       %false otherwise.
*/
static int
try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
{
    unsigned long flags;
    int cpu, success = 0;

    preempt_disable();
    if (p == current) {
        /*
         * We're waking current, this means 'p->on_rq' and 'task_cpu(p)
         * == smp_processor_id()'. Together this means we can special
         * case the whole 'p->on_rq && ttwu_remote()' case below
         * without taking any locks.
         *
         * In particular:
         *  - we rely on Program-Order guarantees for all the ordering,
         *  - we're serialized against set_special_state() by virtue of
         *    it disabling IRQs (this allows not taking ->pi_lock).
         */
        if (!(p->state & state))
            goto out;

        success = 1;
        cpu = task_cpu(p);
        trace_sched_waking(p);
        p->state = TASK_RUNNING;	/*将ksoftirqd task的的状态设置为TASK_RUNNING,等待CPU执行*/
        trace_sched_wakeup(p);
        goto out;
    }

    /*
     * If we are going to wake up a thread waiting for CONDITION we
     * need to ensure that CONDITION=1 done by the caller can not be
     * reordered with p->state check below. This pairs with mb() in
     * set_current_state() the waiting thread does.
     */
    raw_spin_lock_irqsave(&p->pi_lock, flags);
    smp_mb__after_spinlock();
    if (!(p->state & state))
        goto unlock;

    trace_sched_waking(p);

    /* We're going to change ->state: */
    success = 1;
    cpu = task_cpu(p);

    /*
     * Ensure we load p->on_rq _after_ p->state, otherwise it would
     * be possible to, falsely, observe p->on_rq == 0 and get stuck
     * in smp_cond_load_acquire() below.
     *
     * sched_ttwu_pending()            try_to_wake_up()
     *   STORE p->on_rq = 1              LOAD p->state
     *   UNLOCK rq->lock
     *
     * __schedule() (switch to task 'p')
     *   LOCK rq->lock              smp_rmb();
     *   smp_mb__after_spinlock();
     *   UNLOCK rq->lock
     *
     * [task p]
     *   STORE p->state = UNINTERRUPTIBLE      LOAD p->on_rq
     *
     * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
     * __schedule().  See the comment for smp_mb__after_spinlock().
     */
    smp_rmb();
    if (p->on_rq && ttwu_remote(p, wake_flags))
        goto unlock;

#ifdef CONFIG_SMP
    /*
     * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
     * possible to, falsely, observe p->on_cpu == 0.
     *
     * One must be running (->on_cpu == 1) in order to remove oneself
     * from the runqueue.
     *
     * __schedule() (switch to task 'p')    try_to_wake_up()
     *   STORE p->on_cpu = 1          LOAD p->on_rq
     *   UNLOCK rq->lock
     *
     * __schedule() (put 'p' to sleep)
     *   LOCK rq->lock              smp_rmb();
     *   smp_mb__after_spinlock();
     *   STORE p->on_rq = 0              LOAD p->on_cpu
     *
     * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
     * __schedule().  See the comment for smp_mb__after_spinlock().
     */
    smp_rmb();

    /*
     * If the owning (remote) CPU is still in the middle of schedule() with
     * this task as prev, wait until its done referencing the task.
     *
     * Pairs with the smp_store_release() in finish_task().
     *
     * This ensures that tasks getting woken will be fully ordered against
     * their previous state and preserve Program Order.
     */
    smp_cond_load_acquire(&p->on_cpu, !VAL);

    p->sched_contributes_to_load = !!task_contributes_to_load(p);
    p->state = TASK_WAKING;

    if (p->in_iowait) {
        delayacct_blkio_end(p);
        atomic_dec(&task_rq(p)->nr_iowait);
    }

    cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags);
    if (task_cpu(p) != cpu) {
        wake_flags |= WF_MIGRATED;
        psi_ttwu_dequeue(p);
        set_task_cpu(p, cpu);
    }

#else /* CONFIG_SMP */

    if (p->in_iowait) {
        delayacct_blkio_end(p);
        atomic_dec(&task_rq(p)->nr_iowait);
    }

#endif /* CONFIG_SMP */

    ttwu_queue(p, cpu, wake_flags);
unlock:
    raw_spin_unlock_irqrestore(&p->pi_lock, flags);
out:
    if (success)
        ttwu_stat(p, cpu, wake_flags);
    preempt_enable();

    return success;
}

你可能感兴趣的:(linux嵌入式开发)