kernel watchdog

kernel watchdog跟硬件相关,主要检查当前是否有频繁的中断发生导致整个内核调度都出现问题

1>开关:(kernel/arch/arm64/configs/ZD552KL-Phoenix-perf_defconfigconfig档中

CONFIG_MSM_WATCHDOG_V2=y

编译条件:obj-$(CONFIG_MSM_WATCHDOG_V2) += watchdog_v2.o

2>对应device tree

    qcom,wdt@b017000 {
        compatible = "qcom,msm-watchdog";
        reg = <0xb017000 0x1000>;
        reg-names = "wdt-base";
        interrupts = <0 3 0>, <0 4 0>;
        qcom,bark-time = <15000>;//bark时间15秒,15秒之内没有喂狗,会触发中断
        qcom,pet-time = <10000>; //pet时间10秒,表示每10秒喂一次狗
        qcom,ipi-ping;
        qcom,wakeup-enable;
    };

2>watchdog_v2.c(kernel/drivers/soc/qcom/watchdog_v2.c)

①probe

static int msm_watchdog_probe(struct platform_device *pdev)
{
    int ret;
1.1msm_watchdog_data结构体
    struct msm_watchdog_data *wdog_dd;
​
    if (!pdev->dev.of_node || !enable)
        return -ENODEV;
    wdog_dd = kzalloc(sizeof(struct msm_watchdog_data), GFP_KERNEL);
    if (!wdog_dd)
        return -EIO;
解析device tree中相应内容填到msm_watchdog_data结构体
    ret = msm_wdog_dt_to_pdata(pdev, wdog_dd);
    if (ret)
        goto err;
​
    wdog_data = wdog_dd;
    wdog_dd->dev = &pdev->dev;
    platform_set_drvdata(pdev, wdog_dd);
    cpumask_clear(&wdog_dd->alive_mask);
1.2watchdog_kthread,创建watchdog线程
    wdog_dd->watchdog_task = kthread_create(watchdog_kthread, wdog_dd,
            "msm_watchdog");
    if (IS_ERR(wdog_dd->watchdog_task)) {
        ret = PTR_ERR(wdog_dd->watchdog_task);
        goto err;
    }
2>
    init_watchdog_data(wdog_dd);
    return 0;
err:
    kzfree(wdog_dd);
    return ret;
}

2>init_watchdog_data

static void init_watchdog_data(struct msm_watchdog_data *wdog_dd)
{
    unsigned long delay_time;
    uint32_t val;
    int error;
    u64 timeout;
    int ret;
​
    /*
     * Disable the watchdog for cluster 1 so that cluster 0 watchdog will
     * be mapped to the entire sub-system.
     */
​
    
    if (wdog_dd->wdog_absent_base)
        __raw_writel(2, wdog_dd->wdog_absent_base + WDOG_ABSENT);
​
    if (wdog_dd->irq_ppi) {
        wdog_dd->wdog_cpu_dd = alloc_percpu(struct msm_watchdog_data *);
        if (!wdog_dd->wdog_cpu_dd) {
            dev_err(wdog_dd->dev, "fail to allocate cpu data\n");
            return;
        }
        *raw_cpu_ptr(wdog_dd->wdog_cpu_dd) = wdog_dd;
注册中断irq,handler=>wdog_ppi_bark
        ret = request_percpu_irq(wdog_dd->bark_irq, wdog_ppi_bark,
                    "apps_wdog_bark",
                    wdog_dd->wdog_cpu_dd);
        if (ret) {
            dev_err(wdog_dd->dev, "failed to request bark irq\n");
            free_percpu(wdog_dd->wdog_cpu_dd);
            return;
        }
    } else {
        ret = devm_request_irq(wdog_dd->dev, wdog_dd->bark_irq,
                wdog_bark_handler, IRQF_TRIGGER_RISING,
                        "apps_wdog_bark", wdog_dd);
        if (ret) {
            dev_err(wdog_dd->dev, "failed to request bark irq\n");
            return;
        }
    }
    delay_time = msecs_to_jiffies(wdog_dd->pet_time);
    wdog_dd->min_slack_ticks = UINT_MAX;
    wdog_dd->min_slack_ns = ULLONG_MAX;
    configure_bark_dump(wdog_dd);
    timeout = (wdog_dd->bark_time * WDT_HZ)/1000;
将bark-time填入watchdog register,
    __raw_writel(timeout, wdog_dd->base + WDT0_BARK_TIME);
    __raw_writel(timeout + 3*WDT_HZ, wdog_dd->base + WDT0_BITE_TIME);
​
    wdog_dd->panic_blk.notifier_call = panic_wdog_handler;
    atomic_notifier_chain_register(&panic_notifier_list,
                       &wdog_dd->panic_blk);
    mutex_init(&wdog_dd->disable_lock);
    init_completion(&wdog_dd->pet_complete);
wake_up watchdog_task,其msm_watchdog thread,
    wake_up_process(wdog_dd->watchdog_task);
    init_timer(&wdog_dd->pet_timer);
    wdog_dd->pet_timer.data = (unsigned long)wdog_dd;
    wdog_dd->pet_timer.function = pet_task_wakeup;
    wdog_dd->pet_timer.expires = jiffies + delay_time;
    add_timer(&wdog_dd->pet_timer);

1.1.msm_watchdog_data

struct msm_watchdog_data {
    unsigned int __iomem phys_base;
    size_t size;
    void __iomem *base;
    void __iomem *wdog_absent_base;
    struct device *dev;
    unsigned int pet_time;
    unsigned int bark_time;
    unsigned int bark_irq;
    unsigned int bite_irq;
    bool do_ipi_ping;
    bool wakeup_irq_enable;
    unsigned long long last_pet;
    unsigned min_slack_ticks;
    unsigned long long min_slack_ns;
    void *scm_regsave;
    cpumask_t alive_mask;
    struct mutex disable_lock;
    bool irq_ppi;
    struct msm_watchdog_data __percpu **wdog_cpu_dd;
    struct notifier_block panic_blk;
    bool enabled;
    struct task_struct *watchdog_task;
    struct timer_list pet_timer;
    struct completion pet_complete;
};

1.2.watchdog_kthread

static __ref int watchdog_kthread(void *arg)
{
    struct msm_watchdog_data *wdog_dd =
        (struct msm_watchdog_data *)arg;
    unsigned long delay_time = 0;
    struct sched_param param = {.sched_priority = MAX_RT_PRIO-1};
​
    sched_setscheduler(current, SCHED_FIFO, ¶m);
    while (!kthread_should_stop()) {
        while (wait_for_completion_interruptible(
            &wdog_dd->pet_complete) != 0)
            ;
        reinit_completion(&wdog_dd->pet_complete);
        if (enable) {
            delay_time = msecs_to_jiffies(wdog_dd->pet_time);
            if (wdog_dd->do_ipi_ping)
                ping_other_cpus(wdog_dd);
喂狗,复位定时器
            pet_watchdog(wdog_dd);
        }
        /* Check again before scheduling *
         * Could have been changed on other cpu */
         
如果喂完狗,重新启动定时器,pet_timer到时会调用其function pet_task_wakeup
        mod_timer(&wdog_dd->pet_timer, jiffies + delay_time);
    }
    return 0;
}

①pet_watchdog,所谓的喂狗就是要每隔bark-time要复位定时器

static void pet_watchdog(struct msm_watchdog_data *wdog_dd)
{
    int slack, i, count, prev_count = 0;
    unsigned long long time_ns;
    unsigned long long slack_ns;
    unsigned long long bark_time_ns = wdog_dd->bark_time * 1000000ULL;
​
    for (i = 0; i < 2; i++) {
        count = (__raw_readl(wdog_dd->base + WDT0_STS) >> 1) & 0xFFFFF;
        if (count != prev_count) {
            prev_count = count;
            i = 0;
        }
    }
    slack = ((wdog_dd->bark_time * WDT_HZ) / 1000) - count;
    if (slack < wdog_dd->min_slack_ticks)
        wdog_dd->min_slack_ticks = slack;
    __raw_writel(1, wdog_dd->base + WDT0_RST);
    time_ns = sched_clock();
    slack_ns = (wdog_dd->last_pet + bark_time_ns) - time_ns;
    if (slack_ns < wdog_dd->min_slack_ns)
        wdog_dd->min_slack_ns = slack_ns;
    wdog_dd->last_pet = time_ns;
}

②pet_task_wakeup

static void pet_task_wakeup(unsigned long data)
{
    struct msm_watchdog_data *wdog_dd =
        (struct msm_watchdog_data *)data;
    complete(&wdog_dd->pet_complete);
}

根据pet_compelte,启动msm_watchdog thread, 初始化completion后继续调用watchdog_pet喂狗

如果CPU死掉,无法进行复位watchdog_pet喂狗,bark-time时间后便会触发中断

static irqreturn_t wdog_ppi_bark(int irq, void *dev_id)
{
    struct msm_watchdog_data *wdog_dd =
            *(struct msm_watchdog_data **)(dev_id); 
    return wdog_bark_handler(irq, wdog_dd);

⑤wdog_bark_handler

static irqreturn_t wdog_bark_handler(int irq, void *dev_id)
{
    struct msm_watchdog_data *wdog_dd = (struct msm_watchdog_data *)dev_id;
    unsigned long nanosec_rem;
    unsigned long long t = sched_clock();
​
    nanosec_rem = do_div(t, 1000000000);
    printk(KERN_INFO "Watchdog bark! Now = %lu.%06lu\n", (unsigned long) t,
        nanosec_rem / 1000);
​
    nanosec_rem = do_div(wdog_dd->last_pet, 1000000000);
    printk(KERN_INFO "Watchdog last pet at %lu.%06lu\n", (unsigned long)
        wdog_dd->last_pet, nanosec_rem / 1000);
    if (wdog_dd->do_ipi_ping)
        dump_cpu_alive_mask(wdog_dd);
    msm_trigger_wdog_bite();
    panic("Failed to cause a watchdog bite! - Falling back to kernel panic!");
    return IRQ_HANDLED;
}

⑥msm_trigger_wdog_bite

void msm_trigger_wdog_bite(void)
{​
    if (!wdog_data)
        return;
    pr_info("Causing a watchdog bite!");
    __raw_writel(1, wdog_data->base + WDT0_BITE_TIME);
    mb();
    __raw_writel(1, wdog_data->base + WDT0_RST);
    mb();
    /* Delay to make sure bite occurs */
    mdelay(10000);
    pr_err("Wdog - STS: 0x%x, CTL: 0x%x, BARK TIME: 0x%x, BITE TIME: 0x%x",
        __raw_readl(wdog_data->base + WDT0_STS),
        __raw_readl(wdog_data->base + WDT0_EN),
        __raw_readl(wdog_data->base + WDT0_BARK_TIME),
        __raw_readl(wdog_data->base + WDT0_BITE_TIME));
}

系统初始化后注册watchdog中断

static void init_watchdog_data(struct msm_watchdog_data *wdog_dd)
{
    unsigned long delay_time;
    uint32_t val;
    int error;
    u64 timeout;
    int ret;
​
    /*
     * Disable the watchdog for cluster 1 so that cluster 0 watchdog will
     * be mapped to the entire sub-system.
     */
    if (wdog_dd->wdog_absent_base)
        __raw_writel(2, wdog_dd->wdog_absent_base + WDOG_ABSENT);
​
    if (wdog_dd->irq_ppi) {
        wdog_dd->wdog_cpu_dd = alloc_percpu(struct msm_watchdog_data *);
        if (!wdog_dd->wdog_cpu_dd) {
            dev_err(wdog_dd->dev, "fail to allocate cpu data\n");
            return;
        }
        *raw_cpu_ptr(wdog_dd->wdog_cpu_dd) = wdog_dd;
        ret = request_percpu_irq(wdog_dd->bark_irq, wdog_ppi_bark,
                    "apps_wdog_bark",
                    wdog_dd->wdog_cpu_dd);
        if (ret) {
            dev_err(wdog_dd->dev, "failed to request bark irq\n");
            free_percpu(wdog_dd->wdog_cpu_dd);
            return;

你可能感兴趣的:(kernel watchdog)