irq n: nobody cared (try booting with the "irqpoll" option)

__report_bad_irq() 是输出该crash log的函数。

/*
 * If 99,900 of the previous 100,000 interrupts have not been handled
 * then assume that the IRQ is stuck in some manner. Drop a diagnostic
 * and try to turn the IRQ off.
 *
 * (The other 100-of-100,000 interrupts may have been a correctly
 *  functioning device sharing an IRQ with the failing one)
 */
static void
__report_bad_irq(unsigned int irq, struct irq_desc *desc,
         irqreturn_t action_ret)
{
    struct irqaction *action;
    unsigned long flags;

    if (bad_action_ret(action_ret)) {
        printk(KERN_ERR "irq event %d: bogus return value %x\n",
                irq, action_ret);
    } else {
        printk(KERN_ERR "irq %d: nobody cared (try booting with "
                "the \"irqpoll\" option)\n", irq);
    }
    dump_stack();
    printk(KERN_ERR "handlers:\n");

    /*
     * We need to take desc->lock here. note_interrupt() is called
     * w/o desc->lock held, but IRQ_PROGRESS set. We might race
     * with something else removing an action. It's ok to take
     * desc->lock here. See synchronize_irq().
     */
    raw_spin_lock_irqsave(&desc->lock, flags);
    action = desc->action;
    while (action) {
        printk(KERN_ERR "[<%p>] %pf", action->handler, action->handler);
        if (action->thread_fn)
            printk(KERN_CONT " threaded [<%p>] %pf",
                    action->thread_fn, action->thread_fn);
        printk(KERN_CONT "\n");
        action = action->next;
    }
    raw_spin_unlock_irqrestore(&desc->lock, flags);
}
static void
report_bad_irq(unsigned int irq, struct irq_desc *desc, irqreturn_t action_ret)
{
    static int count = 100;

    if (count > 0) {
        count--;
        __report_bad_irq(irq, desc, action_ret);
    }
}

在note_interrupt() 函数中,封装了出现crash的两种情况。

void note_interrupt(unsigned int irq, struct irq_desc *desc,
            irqreturn_t action_ret)
{
    if (desc->istate & IRQS_POLL_INPROGRESS)
        return;

    /* we get here again via the threaded handler */
    if (action_ret == IRQ_WAKE_THREAD)
        return;

    if (bad_action_ret(action_ret)) {
        report_bad_irq(irq, desc, action_ret);
        return;
    }

    if (unlikely(action_ret == IRQ_NONE)) {
        /*
         * If we are seeing only the odd spurious IRQ caused by
         * bus asynchronicity then don't eventually trigger an error,
         * otherwise the counter becomes a doomsday timer for otherwise
         * working systems
         */
        if (time_after(jiffies, desc->last_unhandled + HZ/10))
            desc->irqs_unhandled = 1;
        else
            desc->irqs_unhandled++;
        desc->last_unhandled = jiffies;
    }

    if (unlikely(try_misrouted_irq(irq, desc, action_ret))) {
        int ok = misrouted_irq(irq);
        if (action_ret == IRQ_NONE)
            desc->irqs_unhandled -= ok;
    }

    desc->irq_count++;
    if (likely(desc->irq_count < 100000))
        return;

    desc->irq_count = 0;
    if (unlikely(desc->irqs_unhandled > 99900)) {
        /*
         * The interrupt is stuck
         */
        __report_bad_irq(irq, desc, action_ret);
        /*
         * Now kill the IRQ
         */
        printk(KERN_EMERG "Disabling IRQ #%d\n", irq);
        desc->istate |= IRQS_SPURIOUS_DISABLED;
        desc->depth++;
        irq_disable(desc);

        mod_timer(&poll_spurious_irq_timer,
              jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
    }
    desc->irqs_unhandled = 0;
}

从note_interrupt()中得到,当注册irq_action返回值错误(非IRQ_NONE,IRQ_HANDLED, IRQ_WAKE_THREAD)或返回值在很长时间都是IRQ_NONE时,都会导致这个crash。
中断被触发执行,此时却读到中断没有使能,这种情况最经常触发此crash。

irqreturn_t
handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action)
{
    irqreturn_t retval = IRQ_NONE;
    unsigned int flags = 0, irq = desc->irq_data.irq;

    do {
        irqreturn_t res;

        trace_irq_handler_entry(irq, action);
        res = action->handler(irq, action->dev_id);
        trace_irq_handler_exit(irq, action, res);

        if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pF enabled interrupts\n",
                  irq, action->handler))
            local_irq_disable();

        switch (res) {
        case IRQ_WAKE_THREAD:
            /*
             * Catch drivers which return WAKE_THREAD but
             * did not set up a thread function
             */
            if (unlikely(!action->thread_fn)) {
                warn_no_thread(irq, action);
                break;
            }

            irq_wake_thread(desc, action);

            /* Fall through to add to randomness */
        case IRQ_HANDLED:
            flags |= action->flags;
            break;

        default:
            break;
        }

        retval |= res;
        action = action->next;
    } while (action);

    add_interrupt_randomness(irq, flags);

    if (!noirqdebug)
        note_interrupt(irq, desc, retval);
    return retval;
}

共享irq时,所有的irq_action都没有被正确处理才会引发该crash。

你可能感兴趣的:(Linux,irq)