新的项目,编译代码,烧写到主板后,主板能起来,但就是进入不了主界面(通过vysor同屏查看)。这是比较少见的,怀疑是代码上那里弄错,但再三检查也没发现问题,将固件烧写到其他项目的主板上能正常起来,那就不是代码的问题了。
先看内核log,init进程一直在启动camera服务,但到了600s都没启动成功。
查看死掉的进程
root@G480:/home/w# adb shell ps -AT|grep -e "D" -e "Z" -e "R"
USER PID TID PPID VSZ RSS WCHAN ADDR S CMD
root 69 69 2 0 0 mbox_send_thread 0 D mbox-send-threa
root 72 72 2 0 0 monitor_irqs_change 0 D irqs_change
root 232 232 2 0 0 0 0 R sugov:0
root 419 689 1 2424672 8708 poll_schedule_timeout 0 S SkDestroyListen
root 420 4678 1 5456024 177276 futex_wait_queue_me 0 S HeapTaskDaemon
root 420 4679 1 5456024 177276 futex_wait_queue_me 0 S ReferenceQueueD
root 420 4680 1 5456024 177276 futex_wait_queue_me 0 S FinalizerDaemon
root 421 1451 1 1768180 161488 futex_wait_queue_me 0 S HeapTaskDaemon
root 421 1452 1 1768180 161488 futex_wait_queue_me 0 S ReferenceQueueD
root 421 1453 1 1768180 161488 futex_wait_queue_me 0 S FinalizerDaemon
cameraserver 427 427 1 71428 18752 sprd_i2c_handle_msg 0 D android.hardwar
刚好有个camerasevery用户处在D状态,sprd_i2c_handle_msg应该是正在执行的函数。一下子明白了,i2c卡死导致无法进入系统(之前有遇到类似的case)。查看摄像头代码默认使用了i2c0和i2c1,我们的主板i2c1还接了其他外设,也就可能是设备没上电导致i2c信号被拉低,导致系统初始化话摄像头的时候,引起系统卡死。修改代码,屏蔽i2c1后,系统能进入主界面了。调过qcom,mtk的主板,都没有遇到类似的情况,只有展讯平台才遇到过,遇到过几次了,于是提个cq问下展讯。他们回复是符合协议的,非平台特有。直觉告诉我那里不对,如果是i2c设备共性,那所有的平台都有类似的情况,但调的其他平台就没有遇到,难道是运气好。
找代码看下
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/i2c/busses/i2c-sprd.c?h=v5.8-rc3
看下sprd_i2c_handle_msg(ps -AT有看到该函数),看到wait_for_completion函数没
static int sprd_i2c_handle_msg(struct i2c_adapter *i2c_adap,
struct i2c_msg *msg, bool is_last_msg)
{
struct sprd_i2c *i2c_dev = i2c_adap->algo_data;
i2c_dev->msg = msg;
i2c_dev->buf = msg->buf;
i2c_dev->count = msg->len;
reinit_completion(&i2c_dev->complete);
sprd_i2c_reset_fifo(i2c_dev);
sprd_i2c_set_devaddr(i2c_dev, msg);
sprd_i2c_set_count(i2c_dev, msg->len);
if (msg->flags & I2C_M_RD) {
sprd_i2c_opt_mode(i2c_dev, 1);
sprd_i2c_send_stop(i2c_dev, 1);
} else {
sprd_i2c_opt_mode(i2c_dev, 0);
sprd_i2c_send_stop(i2c_dev, !!is_last_msg);
}
/*
* We should enable rx fifo full interrupt to get data when receiving
* full data.
*/
if (msg->flags & I2C_M_RD)
sprd_i2c_set_fifo_full_int(i2c_dev, 1);
else
sprd_i2c_data_transfer(i2c_dev);
sprd_i2c_opt_start(i2c_dev);
wait_for_completion(&i2c_dev->complete);
return i2c_dev->err;
}
那什么时候发complete信号呢
static irqreturn_t sprd_i2c_isr_thread(int irq, void *dev_id)
{
....
complete(&i2c_dev->complete);
...
}
也就是i2c控制器中断来了,就发中complete信号,如果中断信号不过来,那sprd_i2c_handle_msg就一直卡住。
看下wait_for_completion函数实现,也就是没有信号过来,一直schedule(主动让出cpu),直到信号过来。
#define MAX_SCHEDULE_TIMEOUT LONG_MAX
void __sched wait_for_completion(struct completion *x)
{
wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
}
static long __sched wait_for_common(struct completion *x, long timeout, int state)
{
return __wait_for_common(x, schedule_timeout, timeout, state);
}
static inline long __sched
__wait_for_common(struct completion *x,
long (*action)(long), long timeout, int state)
{
timeout = do_wait_for_common(x, action, timeout, state);
}
static inline long __sched do_wait_for_common(struct completion *x,
long (*action)(long), long timeout, int state)
{
if (!x->done) {
DECLARE_WAITQUEUE(wait, current);
__add_wait_queue_tail_exclusive(&x->wait, &wait);
do {
if (signal_pending_state(state, current)) {
timeout = -ERESTARTSYS;
break;
}
__set_current_state(state);
spin_unlock_irq(&x->wait.lock);
timeout = action(timeout);
spin_lock_irq(&x->wait.lock);
} while (!x->done && timeout);
__remove_wait_queue(&x->wait, &wait);
if (!x->done)
return timeout;
}
x->done--;
return timeout ?: 1;
}
signed long __sched schedule_timeout(signed long timeout)
{
switch (timeout)
{
case MAX_SCHEDULE_TIMEOUT:
/*
* These two special cases are useful to be comfortable
* in the caller. Nothing more. We could take
* MAX_SCHEDULE_TIMEOUT from one of the negative value
* but I' d like to return a valid offset (>=0) to allow
* the caller to do everything it want with the retval.
*/
schedule();
goto out;
}
out:
return timeout < 0 ? 0 : timeout;
}
再看下mtk或qcom的驱动
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/i2c/busses/i2c-mt65xx.c?h=v5.8-rc3
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/i2c/busses/i2c-qcom-geni.c?h=v5.8-rc3
使用的是wait_for_completion_timeout函数,如
static int geni_i2c_tx_one_msg(struct geni_i2c_dev *gi2c, struct i2c_msg *msg,u32 m_param)
{
unsigned long time_left;
time_left = wait_for_completion_timeout(&gi2c->done, XFER_TIMEOUT);
if (!time_left)
geni_i2c_abort_xfer(gi2c);
return gi2c->err;
}
即使没有信号过来,i2c也不会卡住。
当然,这个bug已提给他们,后续版本会加入超时机制。