kernel watchdog跟硬件相关,主要检查当前是否有频繁的中断发生导致整个内核调度都出现问题
1>开关:(kernel/arch/arm64/configs/ZD552KL-Phoenix-perf_defconfig)config档中
CONFIG_MSM_WATCHDOG_V2=y
编译条件:obj-$(CONFIG_MSM_WATCHDOG_V2) += watchdog_v2.o
2>对应device tree
qcom,wdt@b017000 {
compatible = "qcom,msm-watchdog";
reg = <0xb017000 0x1000>;
reg-names = "wdt-base";
interrupts = <0 3 0>, <0 4 0>;
qcom,bark-time = <15000>;//bark时间15秒,15秒之内没有喂狗,会触发中断
qcom,pet-time = <10000>; //pet时间10秒,表示每10秒喂一次狗
qcom,ipi-ping;
qcom,wakeup-enable;
};
2>watchdog_v2.c(kernel/drivers/soc/qcom/watchdog_v2.c)
①probe
static int msm_watchdog_probe(struct platform_device *pdev)
{
int ret;
1.1msm_watchdog_data结构体
struct msm_watchdog_data *wdog_dd;
if (!pdev->dev.of_node || !enable)
return -ENODEV;
wdog_dd = kzalloc(sizeof(struct msm_watchdog_data), GFP_KERNEL);
if (!wdog_dd)
return -EIO;
解析device tree中相应内容填到msm_watchdog_data结构体
ret = msm_wdog_dt_to_pdata(pdev, wdog_dd);
if (ret)
goto err;
wdog_data = wdog_dd;
wdog_dd->dev = &pdev->dev;
platform_set_drvdata(pdev, wdog_dd);
cpumask_clear(&wdog_dd->alive_mask);
1.2watchdog_kthread,创建watchdog线程
wdog_dd->watchdog_task = kthread_create(watchdog_kthread, wdog_dd,
"msm_watchdog");
if (IS_ERR(wdog_dd->watchdog_task)) {
ret = PTR_ERR(wdog_dd->watchdog_task);
goto err;
}
2>
init_watchdog_data(wdog_dd);
return 0;
err:
kzfree(wdog_dd);
return ret;
}
2>init_watchdog_data
static void init_watchdog_data(struct msm_watchdog_data *wdog_dd)
{
unsigned long delay_time;
uint32_t val;
int error;
u64 timeout;
int ret;
/*
* Disable the watchdog for cluster 1 so that cluster 0 watchdog will
* be mapped to the entire sub-system.
*/
if (wdog_dd->wdog_absent_base)
__raw_writel(2, wdog_dd->wdog_absent_base + WDOG_ABSENT);
if (wdog_dd->irq_ppi) {
wdog_dd->wdog_cpu_dd = alloc_percpu(struct msm_watchdog_data *);
if (!wdog_dd->wdog_cpu_dd) {
dev_err(wdog_dd->dev, "fail to allocate cpu data\n");
return;
}
*raw_cpu_ptr(wdog_dd->wdog_cpu_dd) = wdog_dd;
注册中断irq,handler=>wdog_ppi_bark
ret = request_percpu_irq(wdog_dd->bark_irq, wdog_ppi_bark,
"apps_wdog_bark",
wdog_dd->wdog_cpu_dd);
if (ret) {
dev_err(wdog_dd->dev, "failed to request bark irq\n");
free_percpu(wdog_dd->wdog_cpu_dd);
return;
}
} else {
ret = devm_request_irq(wdog_dd->dev, wdog_dd->bark_irq,
wdog_bark_handler, IRQF_TRIGGER_RISING,
"apps_wdog_bark", wdog_dd);
if (ret) {
dev_err(wdog_dd->dev, "failed to request bark irq\n");
return;
}
}
delay_time = msecs_to_jiffies(wdog_dd->pet_time);
wdog_dd->min_slack_ticks = UINT_MAX;
wdog_dd->min_slack_ns = ULLONG_MAX;
configure_bark_dump(wdog_dd);
timeout = (wdog_dd->bark_time * WDT_HZ)/1000;
将bark-time填入watchdog register,
__raw_writel(timeout, wdog_dd->base + WDT0_BARK_TIME);
__raw_writel(timeout + 3*WDT_HZ, wdog_dd->base + WDT0_BITE_TIME);
wdog_dd->panic_blk.notifier_call = panic_wdog_handler;
atomic_notifier_chain_register(&panic_notifier_list,
&wdog_dd->panic_blk);
mutex_init(&wdog_dd->disable_lock);
init_completion(&wdog_dd->pet_complete);
wake_up watchdog_task,其msm_watchdog thread,
wake_up_process(wdog_dd->watchdog_task);
init_timer(&wdog_dd->pet_timer);
wdog_dd->pet_timer.data = (unsigned long)wdog_dd;
wdog_dd->pet_timer.function = pet_task_wakeup;
wdog_dd->pet_timer.expires = jiffies + delay_time;
add_timer(&wdog_dd->pet_timer);
1.1.msm_watchdog_data
struct msm_watchdog_data {
unsigned int __iomem phys_base;
size_t size;
void __iomem *base;
void __iomem *wdog_absent_base;
struct device *dev;
unsigned int pet_time;
unsigned int bark_time;
unsigned int bark_irq;
unsigned int bite_irq;
bool do_ipi_ping;
bool wakeup_irq_enable;
unsigned long long last_pet;
unsigned min_slack_ticks;
unsigned long long min_slack_ns;
void *scm_regsave;
cpumask_t alive_mask;
struct mutex disable_lock;
bool irq_ppi;
struct msm_watchdog_data __percpu **wdog_cpu_dd;
struct notifier_block panic_blk;
bool enabled;
struct task_struct *watchdog_task;
struct timer_list pet_timer;
struct completion pet_complete;
};
1.2.watchdog_kthread
static __ref int watchdog_kthread(void *arg)
{
struct msm_watchdog_data *wdog_dd =
(struct msm_watchdog_data *)arg;
unsigned long delay_time = 0;
struct sched_param param = {.sched_priority = MAX_RT_PRIO-1};
sched_setscheduler(current, SCHED_FIFO, ¶m);
while (!kthread_should_stop()) {
while (wait_for_completion_interruptible(
&wdog_dd->pet_complete) != 0)
;
reinit_completion(&wdog_dd->pet_complete);
if (enable) {
delay_time = msecs_to_jiffies(wdog_dd->pet_time);
if (wdog_dd->do_ipi_ping)
ping_other_cpus(wdog_dd);
喂狗,复位定时器
pet_watchdog(wdog_dd);
}
/* Check again before scheduling *
* Could have been changed on other cpu */
如果喂完狗,重新启动定时器,pet_timer到时会调用其function pet_task_wakeup
mod_timer(&wdog_dd->pet_timer, jiffies + delay_time);
}
return 0;
}
①pet_watchdog,所谓的喂狗就是要每隔bark-time要复位定时器
static void pet_watchdog(struct msm_watchdog_data *wdog_dd)
{
int slack, i, count, prev_count = 0;
unsigned long long time_ns;
unsigned long long slack_ns;
unsigned long long bark_time_ns = wdog_dd->bark_time * 1000000ULL;
for (i = 0; i < 2; i++) {
count = (__raw_readl(wdog_dd->base + WDT0_STS) >> 1) & 0xFFFFF;
if (count != prev_count) {
prev_count = count;
i = 0;
}
}
slack = ((wdog_dd->bark_time * WDT_HZ) / 1000) - count;
if (slack < wdog_dd->min_slack_ticks)
wdog_dd->min_slack_ticks = slack;
__raw_writel(1, wdog_dd->base + WDT0_RST);
time_ns = sched_clock();
slack_ns = (wdog_dd->last_pet + bark_time_ns) - time_ns;
if (slack_ns < wdog_dd->min_slack_ns)
wdog_dd->min_slack_ns = slack_ns;
wdog_dd->last_pet = time_ns;
}
②pet_task_wakeup
static void pet_task_wakeup(unsigned long data)
{
struct msm_watchdog_data *wdog_dd =
(struct msm_watchdog_data *)data;
complete(&wdog_dd->pet_complete);
}
③根据pet_compelte,启动msm_watchdog thread, 初始化completion后继续调用watchdog_pet喂狗
④如果CPU死掉,无法进行复位watchdog_pet喂狗,bark-time时间后便会触发中断
static irqreturn_t wdog_ppi_bark(int irq, void *dev_id)
{
struct msm_watchdog_data *wdog_dd =
*(struct msm_watchdog_data **)(dev_id);
return wdog_bark_handler(irq, wdog_dd);
⑤wdog_bark_handler
static irqreturn_t wdog_bark_handler(int irq, void *dev_id)
{
struct msm_watchdog_data *wdog_dd = (struct msm_watchdog_data *)dev_id;
unsigned long nanosec_rem;
unsigned long long t = sched_clock();
nanosec_rem = do_div(t, 1000000000);
printk(KERN_INFO "Watchdog bark! Now = %lu.%06lu\n", (unsigned long) t,
nanosec_rem / 1000);
nanosec_rem = do_div(wdog_dd->last_pet, 1000000000);
printk(KERN_INFO "Watchdog last pet at %lu.%06lu\n", (unsigned long)
wdog_dd->last_pet, nanosec_rem / 1000);
if (wdog_dd->do_ipi_ping)
dump_cpu_alive_mask(wdog_dd);
msm_trigger_wdog_bite();
panic("Failed to cause a watchdog bite! - Falling back to kernel panic!");
return IRQ_HANDLED;
}
⑥msm_trigger_wdog_bite
void msm_trigger_wdog_bite(void)
{
if (!wdog_data)
return;
pr_info("Causing a watchdog bite!");
__raw_writel(1, wdog_data->base + WDT0_BITE_TIME);
mb();
__raw_writel(1, wdog_data->base + WDT0_RST);
mb();
/* Delay to make sure bite occurs */
mdelay(10000);
pr_err("Wdog - STS: 0x%x, CTL: 0x%x, BARK TIME: 0x%x, BITE TIME: 0x%x",
__raw_readl(wdog_data->base + WDT0_STS),
__raw_readl(wdog_data->base + WDT0_EN),
__raw_readl(wdog_data->base + WDT0_BARK_TIME),
__raw_readl(wdog_data->base + WDT0_BITE_TIME));
}
系统初始化后注册watchdog中断
static void init_watchdog_data(struct msm_watchdog_data *wdog_dd)
{
unsigned long delay_time;
uint32_t val;
int error;
u64 timeout;
int ret;
/*
* Disable the watchdog for cluster 1 so that cluster 0 watchdog will
* be mapped to the entire sub-system.
*/
if (wdog_dd->wdog_absent_base)
__raw_writel(2, wdog_dd->wdog_absent_base + WDOG_ABSENT);
if (wdog_dd->irq_ppi) {
wdog_dd->wdog_cpu_dd = alloc_percpu(struct msm_watchdog_data *);
if (!wdog_dd->wdog_cpu_dd) {
dev_err(wdog_dd->dev, "fail to allocate cpu data\n");
return;
}
*raw_cpu_ptr(wdog_dd->wdog_cpu_dd) = wdog_dd;
ret = request_percpu_irq(wdog_dd->bark_irq, wdog_ppi_bark,
"apps_wdog_bark",
wdog_dd->wdog_cpu_dd);
if (ret) {
dev_err(wdog_dd->dev, "failed to request bark irq\n");
free_percpu(wdog_dd->wdog_cpu_dd);
return;