记一次开机卡死

新的项目,编译代码,烧写到主板后,主板能起来,但就是进入不了主界面(通过vysor同屏查看)。这是比较少见的,怀疑是代码上那里弄错,但再三检查也没发现问题,将固件烧写到其他项目的主板上能正常起来,那就不是代码的问题了。

先看内核log,init进程一直在启动camera服务,但到了600s都没启动成功。

记一次开机卡死_第1张图片

 查看死掉的进程

root@G480:/home/w# adb  shell  ps  -AT|grep -e "D" -e "Z" -e  "R"
USER           PID   TID  PPID     VSZ    RSS WCHAN            ADDR S CMD            
root            69    69     2       0      0 mbox_send_thread    0 D mbox-send-threa
root            72    72     2       0      0 monitor_irqs_change 0 D irqs_change
root           232   232     2       0      0 0                   0 R sugov:0
root           419   689     1 2424672   8708 poll_schedule_timeout 0 S SkDestroyListen
root           420  4678     1 5456024 177276 futex_wait_queue_me 0 S HeapTaskDaemon
root           420  4679     1 5456024 177276 futex_wait_queue_me 0 S ReferenceQueueD
root           420  4680     1 5456024 177276 futex_wait_queue_me 0 S FinalizerDaemon
root           421  1451     1 1768180 161488 futex_wait_queue_me 0 S HeapTaskDaemon
root           421  1452     1 1768180 161488 futex_wait_queue_me 0 S ReferenceQueueD
root           421  1453     1 1768180 161488 futex_wait_queue_me 0 S FinalizerDaemon
cameraserver   427   427     1   71428  18752 sprd_i2c_handle_msg 0 D android.hardwar

刚好有个camerasevery用户处在D状态,sprd_i2c_handle_msg应该是正在执行的函数。一下子明白了,i2c卡死导致无法进入系统(之前有遇到类似的case)。查看摄像头代码默认使用了i2c0和i2c1,我们的主板i2c1还接了其他外设,也就可能是设备没上电导致i2c信号被拉低,导致系统初始化话摄像头的时候,引起系统卡死。修改代码,屏蔽i2c1后,系统能进入主界面了。调过qcom,mtk的主板,都没有遇到类似的情况,只有展讯平台才遇到过,遇到过几次了,于是提个cq问下展讯。他们回复是符合协议的,非平台特有。直觉告诉我那里不对,如果是i2c设备共性,那所有的平台都有类似的情况,但调的其他平台就没有遇到,难道是运气好。

找代码看下

https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/i2c/busses/i2c-sprd.c?h=v5.8-rc3

看下sprd_i2c_handle_msg(ps -AT有看到该函数),看到wait_for_completion函数没

static int sprd_i2c_handle_msg(struct i2c_adapter *i2c_adap,
			       struct i2c_msg *msg, bool is_last_msg)
{
	struct sprd_i2c *i2c_dev = i2c_adap->algo_data;

	i2c_dev->msg = msg;
	i2c_dev->buf = msg->buf;
	i2c_dev->count = msg->len;

	reinit_completion(&i2c_dev->complete);
	sprd_i2c_reset_fifo(i2c_dev);
	sprd_i2c_set_devaddr(i2c_dev, msg);
	sprd_i2c_set_count(i2c_dev, msg->len);

	if (msg->flags & I2C_M_RD) {
		sprd_i2c_opt_mode(i2c_dev, 1);
		sprd_i2c_send_stop(i2c_dev, 1);
	} else {
		sprd_i2c_opt_mode(i2c_dev, 0);
		sprd_i2c_send_stop(i2c_dev, !!is_last_msg);
	}

	/*
	 * We should enable rx fifo full interrupt to get data when receiving
	 * full data.
	 */
	if (msg->flags & I2C_M_RD)
		sprd_i2c_set_fifo_full_int(i2c_dev, 1);
	else
		sprd_i2c_data_transfer(i2c_dev);

	sprd_i2c_opt_start(i2c_dev);

	wait_for_completion(&i2c_dev->complete);

	return i2c_dev->err;
}

那什么时候发complete信号呢 

static irqreturn_t sprd_i2c_isr_thread(int irq, void *dev_id)
{
	....
	complete(&i2c_dev->complete);
	...
}

也就是i2c控制器中断来了,就发中complete信号,如果中断信号不过来,那sprd_i2c_handle_msg就一直卡住。

看下wait_for_completion函数实现,也就是没有信号过来,一直schedule(主动让出cpu),直到信号过来。

#define	MAX_SCHEDULE_TIMEOUT	LONG_MAX
void __sched wait_for_completion(struct completion *x)
{
	wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
}

static long __sched wait_for_common(struct completion *x, long timeout, int state)
{
	return __wait_for_common(x, schedule_timeout, timeout, state);
}

static inline long __sched
__wait_for_common(struct completion *x,
		  long (*action)(long), long timeout, int state)
{
	timeout = do_wait_for_common(x, action, timeout, state);
}


static inline long __sched do_wait_for_common(struct completion *x,
		   long (*action)(long), long timeout, int state)
{
	if (!x->done) {
		DECLARE_WAITQUEUE(wait, current);

		__add_wait_queue_tail_exclusive(&x->wait, &wait);
		do {
			if (signal_pending_state(state, current)) {
				timeout = -ERESTARTSYS;
				break;
			}
			__set_current_state(state);
			spin_unlock_irq(&x->wait.lock);
			timeout = action(timeout);
			spin_lock_irq(&x->wait.lock);
		} while (!x->done && timeout);
		__remove_wait_queue(&x->wait, &wait);
		if (!x->done)
			return timeout;
	}
	x->done--;
	return timeout ?: 1;
}

signed long __sched schedule_timeout(signed long timeout)
{

	switch (timeout)
	{
	case MAX_SCHEDULE_TIMEOUT:
		/*
		 * These two special cases are useful to be comfortable
		 * in the caller. Nothing more. We could take
		 * MAX_SCHEDULE_TIMEOUT from one of the negative value
		 * but I' d like to return a valid offset (>=0) to allow
		 * the caller to do everything it want with the retval.
		 */
		schedule();
		goto out;
	}
 out:
	return timeout < 0 ? 0 : timeout;
}

再看下mtk或qcom的驱动

https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/i2c/busses/i2c-mt65xx.c?h=v5.8-rc3

https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/i2c/busses/i2c-qcom-geni.c?h=v5.8-rc3

使用的是wait_for_completion_timeout函数,如

static int geni_i2c_tx_one_msg(struct geni_i2c_dev *gi2c, struct i2c_msg *msg,u32 m_param)
{
	unsigned long time_left;
	time_left = wait_for_completion_timeout(&gi2c->done, XFER_TIMEOUT);
	if (!time_left)
		geni_i2c_abort_xfer(gi2c);

	return gi2c->err;
}

即使没有信号过来,i2c也不会卡住。

当然,这个bug已提给他们,后续版本会加入超时机制。

你可能感兴趣的:(死机)