内核出现panic,kdump搜集到了vmcore。vmcore中直接导致panic的log信息为(包含相应CPU上的堆栈):
Kernel panic - not syncing: Watchdog detected hard LOCKUP on cpu 18
Pid: 12410, comm: xxxx Not tainted 2.6.32-220.el6.x86_64 #1
Call Trace:
[] ? panic+0x8b/0x156
[] ? watchdog_overflow_callback+0x1fa/0x200
[] ? __perf_event_overflow+0x9d/0x230
[] ? perf_event_overflow+0x14/0x20
[] ? intel_pmu_handle_irq+0x336/0x550
[] ? kprobe_exceptions_notify+0x16/0x450
[] ? perf_event_nmi_handler+0x39/0xb0
[] ? notifier_call_chain+0x55/0x80
[] ? atomic_notifier_call_chain+0x1a/0x20
[] ? notify_die+0x2e/0x30
[] ? do_nmi+0x173/0x2c0
[] ? nmi+0x20/0x30
[] ? _spin_lock_irq+0x25/0x40
<> [] ? wait_for_common+0x3c/0x180
[] ? _spin_unlock_irqrestore+0x1d/0x20
[] ? wait_for_completion_timeout+0x13/0x20
[] ? _ctl_do_mpt_command+0x3be/0xce0 [mpt2sas]
[] ? avc_has_perm_noaudit+0x9b/0x470
[] ? _spin_unlock_irqrestore+0x17/0x20
[] ? _ctl_ioctl_main+0xdb9/0x12d0 [mpt2sas]
[] ? _ctl_ioctl+0x35/0x50 [mpt2sas]
[] ? vfs_ioctl+0x22/0xa0
[] ? do_vfs_ioctl+0x84/0x580
[] ? sys_ioctl+0x81/0xa0
[] ? system_call_fastpath+0x16/0x1b
2、深入分析
1)思维误区
接下来要分析为什么spinlock获取不到,这个问题要分析清楚,就比较复杂了。
通常来说,对于类似的死锁问题分析,都会有这种思路:死锁,那就肯定有进程持有相应的锁而一直不释放,导致本进程一直获取不到锁。
那就需要寻找持有锁的进程了,持有该锁的进程可能处于如下几种状态(按可能性大小排列):
a、处于RUNNING状态,且正在其它某CPU上运行。
b、处于RUNNING状态,但暂时没有得到调度运行。
c、处于D状态,等待某任务完成。
d、处于S状态
e、已经运行结束,进程已经不存在。
最可能的肯定是a和b,c、d和e属于明显的内核bug,对于c和d来说,因为spinlock持有者是不能sleep的,内核中对于持有spinlock再进行sleep的情况应该有判断和告警;对于e来说,那就是有进程持有spin_lock锁,但没有释放就退出了(比如某些异常分支),相当于泄露,这种情况内核中有静态代码检查工具,对于一般的用户态程序这种错误可能容易出现,但内核中这种可能性也极小。先不论可能性大小,对于本问题,这种想法将导致进入误区。照这种思路,分析过程大致为(这也是一种常用的分析思路,不能说不对,只能说不宜于本问题的分析):
(1)看看所有CPU上都在运行什么任务
crash> bt -a
PID: 18176 TASK: ffff8802b37100c0 CPU: 0 COMMAND: "monitor.s"
[exception RIP: lock_kernel+46]
...
PID: 18371 TASK: ffff880f5e5460c0 CPU: 1 COMMAND: "bash"
[exception RIP: lock_kernel+53]
...
PID: 18334 TASK: ffff880f5f219540 CPU: 2 COMMAND: "monitor.sh"
[exception RIP: lock_kernel+46]
...
PID: 15042 TASK: ffff880ec0015540 CPU: 3 COMMAND: "xxx"
[exception RIP: __bitmap_empty+115]
...
--- ---
#6 [ffff880eba59ddd8] __bitmap_empty at ffffffff81281f93
#7 [ffff880eba59dde0] flush_tlb_others_ipi at ffffffff810480d8
#8 [ffff880eba59de30] native_flush_tlb_others at ffffffff81048156
#9 [ffff880eba59de60] flush_tlb_mm at ffffffff8104832c
#10 [ffff880eba59de80] unmap_region at ffffffff8114744f
#11 [ffff880eba59def0] do_munmap at ffffffff81147aa6
#12 [ffff880eba59df50] sys_munmap at ffffffff81147be6
#13 [ffff880eba59df80] system_call_fastpath at ffffffff8100b0f2
...
PID: 12410 TASK: ffff8818681cd540 CPU: 18 COMMAND: "xxxx"(最终触发nmi watchdog的CPU)
[exception RIP: _spin_lock_irq+37]
...
--- ---
#13 [ffff88186635bba8] _spin_lock_irq at ffffffff814fb465
#14 [ffff88186635bbb0] wait_for_common at ffffffff814f95ec
#15 [ffff88186635bc40] wait_for_completion_timeout at ffffffff814f97c3
#16 [ffff88186635bc50] _ctl_do_mpt_command at ffffffffa006e8ee [mpt2sas]
#17 [ffff88186635bd30] _ctl_ioctl_main at ffffffffa0070159 [mpt2sas]
#18 [ffff88186635be30] _ctl_ioctl at ffffffffa0070725 [mpt2sas]
#19 [ffff88186635be60] vfs_ioctl at ffffffff8118fb72
#20 [ffff88186635bea0] do_vfs_ioctl at ffffffff8118fd14
#21 [ffff88186635bf30] sys_ioctl at ffffffff81190291
PID: 18122 TASK: ffff880e9f5c94c0 CPU: 19 COMMAND: "xxxxx"
[exception RIP: _spin_lock+33]...
--- ---
#6 [ffff8802b4e05d58] _spin_lock at ffffffff814fb541
#7 [ffff8802b4e05d60] flush_tlb_others_ipi at ffffffff81048019
#8 [ffff8802b4e05db0] native_flush_tlb_others at ffffffff81048156
#9 [ffff8802b4e05de0] flush_tlb_mm at ffffffff8104832c
#10 [ffff8802b4e05e00] mprotect_fixup at ffffffff811491b0
#11 [ffff8802b4e05f20] sys_mprotect at ffffffff811494e5
#12 [ffff8802b4e05f80] system_call_fastpath at ffffffff8100b0f2
...
可以看出,出CPU3、18、19外,其它CPU都是阻塞在lock_kernel(大内核锁,老版本内核中,通常文件操作都需要持有大内核锁,对内核性能影响极大,新版本中已经逐渐去除)上,而CPU18正是触发nmi watchdog的CPU,其中的堆栈流程中可以看出,由ioctl进入,而ioctl的流程中正需要持有大内核锁:
crash> l vfs_ioctl
...
crash> l
44
45 if (filp->f_op->unlocked_ioctl) {
46 error = filp->f_op->unlocked_ioctl(filp, cmd, arg);
47 if (error == -ENOIOCTLCMD)
48 error = -EINVAL;
49 goto out;
50 } else if (filp->f_op->ioctl) {
51 lock_kernel();
52 error = filp->f_op->ioctl(filp->f_path.dentry->d_inode,
53 filp, cmd, arg);
所以除CPU3、18和19外,其它的所有CPU都是因为CPU18而阻塞。
再看看CPU19,该CPU也阻塞在spinlock上,但是位于flush_tlb_others_ipi流程中,结合CPU3一起看,可以看出CPU3也是在这个流程中,但阻塞地方不一样(__bitmap_empty ),看看flush_tlb_others_ipi 的流程,可以确认__bitmap_empty是在获取spinlock后的流程了:
crash> l flush_tlb_others_ipi
...
185 /*
186 * Could avoid this lock when
187 * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
188 * probably not worth checking this for a cache-hot lock.
189 */
190 spin_lock(&f->tlbstate_lock);
191
crash> l
192 f->flush_mm = mm;
193 f->flush_va = va;
194 if (cpumask_andnot(to_cpumask(f->flush_cpumask), cpumask, cpumask_of(smp_processor_id()))) {
195 /*
196 * We have to send the IPI only to
197 * CPUs affected.
198 */
199 apic->send_IPI_mask(to_cpumask(f->flush_cpumask),
200 INVALIDATE_TLB_VECTOR_START + sender);
crash> bt 27685
PID: 27685 TASK: ffff88185936e080 CPU: 3 COMMAND: "java"
#0 [ffff880d29d49d38] schedule at ffffffff814f8b42
#1 [ffff880d29d49e00] schedule_timeout at ffffffff814f99d4
#2 [ffff880d29d49eb0] sys_epoll_wait at ffffffff811c1019
#3 [ffff880d29d49f80] system_call_fastpath at ffffffff8100b0f2
crash> bt
PID: 12410 TASK: ffff8818681cd540 CPU: 18 COMMAND: "xxxx"
#0 [ffff88109c6c7af0] machine_kexec at ffffffff8103237b
#1 [ffff88109c6c7b50] crash_kexec at ffffffff810ba552
#2 [ffff88109c6c7c20] panic at ffffffff814f846b
#3 [ffff88109c6c7ca0] watchdog_overflow_callback at ffffffff810dac1a
#4 [ffff88109c6c7cd0] __perf_event_overflow at ffffffff8110cb3d
#5 [ffff88109c6c7d70] perf_event_overflow at ffffffff8110d0f4
#6 [ffff88109c6c7d80] intel_pmu_handle_irq at ffffffff8101e396
#7 [ffff88109c6c7e90] perf_event_nmi_handler at ffffffff814fcc39
#8 [ffff88109c6c7ea0] notifier_call_chain at ffffffff814fe7a5
#9 [ffff88109c6c7ee0] atomic_notifier_call_chain at ffffffff814fe80a
#10 [ffff88109c6c7ef0] notify_die at ffffffff81097dce
#11 [ffff88109c6c7f20] do_nmi at ffffffff814fc3c3
#12 [ffff88109c6c7f50] nmi at ffffffff814fbcd0
[exception RIP: _spin_lock_irq+37]
RIP: ffffffff814fb465 RSP: ffff88186635bba8 RFLAGS: 00000002
RAX: 0000000000000000 RBX: 0000000000002710 RCX: 000000000000fc92
RDX: 0000000000000001 RSI: 0000000000002710 RDI: ffff881054610960
RBP: ffff88186635bba8 R8: 0000000000000000 R9: ffff881055c438c0
R10: 0000000000000000 R11: 0000000000000006 R12: ffff881054610958
R13: ffff881054610960 R14: 0000000000000002 R15: ffff881054610938
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
--- ---
#13 [ffff88186635bba8] _spin_lock_irq at ffffffff814fb465
#14 [ffff88186635bbb0] wait_for_common at ffffffff814f95ec
#15 [ffff88186635bc40] wait_for_completion_timeout at ffffffff814f97c3
#16 [ffff88186635bc50] _ctl_do_mpt_command at ffffffffa006e8ee [mpt2sas]
#17 [ffff88186635bd30] _ctl_ioctl_main at ffffffffa0070159 [mpt2sas]
#18 [ffff88186635be30] _ctl_ioctl at ffffffffa0070725 [mpt2sas]
#19 [ffff88186635be60] vfs_ioctl at ffffffff8118fb72
#20 [ffff88186635bea0] do_vfs_ioctl at ffffffff8118fd14
#21 [ffff88186635bf30] sys_ioctl at ffffffff81190291
#22 [ffff88186635bf80] system_call_fastpath at ffffffff8100b0f2
RIP: 0000003d5f8dd847 RSP: 00007f4d34fda528 RFLAGS: 00003202
RAX: 0000000000000010 RBX: ffffffff8100b0f2 RCX: 0000003d5f8dd847
RDX: 00007f4ce0002120 RSI: 00000000c0484c14 RDI: 0000000000000047
crash> dis -l wait_for_common
0xffffffff814f95e4 : mov %r13,%rdi
0xffffffff814f95e7 : callq 0xffffffff814fb440 <_spin_lock_irq>
crash> l _spin_lock_irq
68 EXPORT_SYMBOL(_spin_lock_irqsave);
69 #endif
70
71 #ifndef _spin_lock_irq
72 void __lockfunc _spin_lock_irq(spinlock_t *lock)
73 {
74 __spin_lock_irq(lock);
75 }
76 EXPORT_SYMBOL(_spin_lock_irq);
77 #endif
crash> spinlock_t ffff881054610960
struct spinlock_t {
raw_lock = {
slock = 65537
}
}
crash> eval 65537
hexadecimal: 10001
decimal: 65537
octal: 200001
binary: 0000000000000000000000000000000000000000000000010000000000000001
crash> dis -l ffffffff814fb465
/usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/arch/x86/include/asm/spinlock.h: 127
0xffffffff814fb465 <_spin_lock_irq+37>: movzwl (%rdi),%edx
crash> l /usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/arch/x86/include/asm/spinlock.h: 127
122 static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
123 {
124 int inc;
125 int tmp;
126
127 asm volatile("1:\t\n"
128 "mov $0x10000, %0\n\t"
129 LOCK_PREFIX "xaddl %0, %1\n"
130 "movzwl %w0, %2\n\t"
131 "shrl $16, %0\n\t"
crash> dis -l ffffffff814f95ec
/usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/kernel/sched.c: 6228
0xffffffff814f95ec : mov (%r12),%eax
crash> l/usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/kernel/sched.c: 6228
6223 timeout = schedule_timeout(timeout);
6224 spin_lock_irq(&x->wait.lock);
6225 } while (!x->done && timeout);
6226 __remove_wait_queue(&x->wait, &wait);
6227 if (!x->done)
6228 return timeout;
6229 }
6230 x->done--;
6231 return timeout ?: 1;
6232 }
继续看代码
6270 unsigned long __sched
6271 wait_for_completion_timeout(struct completion *x, unsigned long timeout)
6272 {
6273 return wait_for_common(x, timeout, TASK_UNINTERRUPTIBLE);
6274 }
6275 EXPORT_SYMBOL(wait_for_completion_timeout);
crash> completion
struct completion {
unsigned int done;
wait_queue_head_t wait;
}
SIZE: 32
crash> wait_queue_head_t
typedef struct __wait_queue_head {
spinlock_t lock;
struct list_head task_list;
} wait_queue_head_t;
SIZE: 24
crash> dis -l ffffffffa006e8ee
/usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/drivers/scsi/mpt2sas/mpt2sas_ctl.c: 909
0xffffffffa006e8ee <_ctl_do_mpt_command+958>: movzbl 0x3(%r14),%eax
crash> l /usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/drivers/scsi/mpt2sas/mpt2sas_ctl.c: 909
904 else
905 timeout = karg.timeout;
906 init_completion(&ioc->ctl_cmds.done);
907 timeleft = wait_for_completion_timeout(&ioc->ctl_cmds.done,
908 timeout*HZ);
909 if (mpi_request->Function == MPI2_FUNCTION_SCSI_TASK_MGMT) {
910 Mpi2SCSITaskManagementRequest_t *tm_request =
911 (Mpi2SCSITaskManagementRequest_t *)mpi_request;
912 mpt2sas_scsih_clear_tm_flag(ioc, le16_to_cpu(
crash> mod -s mpt2sas usr/lib/debug/lib/modules/2.6.32-220.el6.x86_64/kernel/drivers/scsi/mpt2sas/mpt2sas.ko.debug
MODULE NAME SIZE OBJECT FILE
ffffffffa007a460 mpt2sas 173472 usr/lib/debug/lib/modules/2.6.32-220.el6.x86_64/kernel/drivers/scsi/mpt2sas/mpt2sas.ko.debug
crash> MPT2SAS_ADAPTER
struct MPT2SAS_ADAPTER {
struct list_head list;
struct Scsi_Host *shost;
u8 id;
u32 pci_irq;
...
struct _internal_cmd ctl_cmds;
...
}
crash> _internal_cmd
struct _internal_cmd {
struct mutex mutex;
struct completion done;
void *reply;
void *sense;
u16 status;
u16 smid;
}
SIZE: 88
crash> l /usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/drivers/scsi/mpt2sas/mpt2sas_ctl.c: 899
894 mpt2sas_base_put_smid_default(ioc, smid);
895 break;
896 }
897 default:
898 mpt2sas_base_put_smid_default(ioc, smid);
899 break;
900 }
901
902 if (karg.timeout < MPT2_IOCTL_DEFAULT_TIMEOUT)
903 timeout = MPT2_IOCTL_DEFAULT_TIMEOUT;
crash> l /usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/drivers/scsi/mpt2sas/mpt2sas_ctl.c: 909
904 else
905 timeout = karg.timeout;
906 init_completion(&ioc->ctl_cmds.done);
907 timeleft = wait_for_completion_timeout(&ioc->ctl_cmds.done,
908 timeout*HZ);
909 if (mpi_request->Function == MPI2_FUNCTION_SCSI_TASK_MGMT) {
910 Mpi2SCSITaskManagementRequest_t *tm_request =
911 (Mpi2SCSITaskManagementRequest_t *)mpi_request;
912 mpt2sas_scsih_clear_tm_flag(ioc, le16_to_cpu(
913 tm_request->DevHandle));
[PATCH 06/07] [SCSI] mpt2sas : Rearrange the the code so that the completion queues are initialized prior to sending the request to controller firmware
Rearrange the the code so that the completion queues are initialized prior
to sending the request to controller firmware.
Signed-off-by: Nagalakshmi Nandigama
---
diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.c b/drivers/scsi/mpt2sas/mpt2sas_base.c
index d0a36c9..e78733f 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_base.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_base.c
@@ -3200,8 +3200,8 @@ mpt2sas_base_sas_iounit_control(struct MPT2SAS_ADAPTER *ioc,
if (mpi_request->Operation == MPI2_SAS_OP_PHY_HARD_RESET ||
mpi_request->Operation == MPI2_SAS_OP_PHY_LINK_RESET)
ioc->ioc_link_reset_in_progress = 1;
- mpt2sas_base_put_smid_default(ioc, smid);
init_completion(&ioc->base_cmds.done);
+ mpt2sas_base_put_smid_default(ioc, smid);
timeleft = wait_for_completion_timeout(&ioc->base_cmds.done,
msecs_to_jiffies(10000));
if ((mpi_request->Operation == MPI2_SAS_OP_PHY_HARD_RESET ||
@@ -3302,8 +3302,8 @@ mpt2sas_base_scsi_enclosure_processor(struct MPT2SAS_ADAPTER *ioc,
request = mpt2sas_base_get_msg_frame(ioc, smid);
ioc->base_cmds.smid = smid;
memcpy(request, mpi_request, sizeof(Mpi2SepReply_t));
- mpt2sas_base_put_smid_default(ioc, smid);
init_completion(&ioc->base_cmds.done);
+ mpt2sas_base_put_smid_default(ioc, smid);
timeleft = wait_for_completion_timeout(&ioc->base_cmds.done,
msecs_to_jiffies(10000));
if (!(ioc->base_cmds.status & MPT2_CMD_COMPLETE)) {
@@ -3810,8 +3810,8 @@ _base_event_notification(struct MPT2SAS_ADAPTER *ioc, int sleep_flag)
for (i = 0; i < MPI2_EVENT_NOTIFY_EVENTMASK_WORDS; i++)
mpi_request->EventMasks[i] =
cpu_to_le32(ioc->event_masks[i]);
- mpt2sas_base_put_smid_default(ioc, smid);
init_completion(&ioc->base_cmds.done);
+ mpt2sas_base_put_smid_default(ioc, smid);
timeleft = wait_for_completion_timeout(&ioc->base_cmds.done, 30*HZ);
if (!(ioc->base_cmds.status & MPT2_CMD_COMPLETE)) {
printk(MPT2SAS_ERR_FMT "%s: timeout\n",
diff --git a/drivers/scsi/mpt2sas/mpt2sas_ctl.c b/drivers/scsi/mpt2sas/mpt2sas_ctl.c
index cffed28..cb8290b 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_ctl.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_ctl.c
@@ -819,6 +819,7 @@ _ctl_do_mpt_command(struct MPT2SAS_ADAPTER *ioc,
_ctl_display_some_debug(ioc, smid, "ctl_request", NULL);
#endif
+ init_completion(&ioc->ctl_cmds.done);
switch (mpi_request->Function) {
case MPI2_FUNCTION_SCSI_IO_REQUEST:
case MPI2_FUNCTION_RAID_SCSI_IO_PASSTHROUGH:
@@ -904,7 +905,6 @@ _ctl_do_mpt_command(struct MPT2SAS_ADAPTER *ioc,
timeout = MPT2_IOCTL_DEFAULT_TIMEOUT;
else
timeout = karg.timeout;
- init_completion(&ioc->ctl_cmds.done);
timeleft = wait_for_completion_timeout(&ioc->ctl_cmds.done,
timeout*HZ);
if (mpi_request->Function == MPI2_FUNCTION_SCSI_TASK_MGMT) {
@@ -1478,8 +1478,8 @@ _ctl_diag_register_2(struct MPT2SAS_ADAPTER *ioc,
mpi_request->ProductSpecific[i] =
cpu_to_le32(ioc->product_specific[buffer_type][i]);
- mpt2sas_base_put_smid_default(ioc, smid);
init_completion(&ioc->ctl_cmds.done);
+ mpt2sas_base_put_smid_default(ioc, smid);
timeleft = wait_for_completion_timeout(&ioc->ctl_cmds.done,
MPT2_IOCTL_DEFAULT_TIMEOUT*HZ);
@@ -1822,8 +1822,8 @@ _ctl_send_release(struct MPT2SAS_ADAPTER *ioc, u8 buffer_type, u8 *issue_reset)
mpi_request->VF_ID = 0; /* TODO */
mpi_request->VP_ID = 0;
- mpt2sas_base_put_smid_default(ioc, smid);
init_completion(&ioc->ctl_cmds.done);
+ mpt2sas_base_put_smid_default(ioc, smid);
timeleft = wait_for_completion_timeout(&ioc->ctl_cmds.done,
MPT2_IOCTL_DEFAULT_TIMEOUT*HZ);
@@ -2096,8 +2096,8 @@ _ctl_diag_read_buffer(void __user *arg, enum block_state state)
mpi_request->VF_ID = 0; /* TODO */
mpi_request->VP_ID = 0;
- mpt2sas_base_put_smid_default(ioc, smid);
init_completion(&ioc->ctl_cmds.done);
+ mpt2sas_base_put_smid_default(ioc, smid);
timeleft = wait_for_completion_timeout(&ioc->ctl_cmds.done,
MPT2_IOCTL_DEFAULT_TIMEOUT*HZ);
diff --git a/drivers/scsi/mpt2sas/mpt2sas_transport.c b/drivers/scsi/mpt2sas/mpt2sas_transport.c
index 322285c..d0750eb 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_transport.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_transport.c
@@ -398,8 +398,8 @@ _transport_expander_report_manufacture(struct MPT2SAS_ADAPTER *ioc,
dtransportprintk(ioc, printk(MPT2SAS_INFO_FMT "report_manufacture - "
"send to sas_addr(0x%016llx)\n", ioc->name,
(unsigned long long)sas_address));
- mpt2sas_base_put_smid_default(ioc, smid);
init_completion(&ioc->transport_cmds.done);
+ mpt2sas_base_put_smid_default(ioc, smid);
timeleft = wait_for_completion_timeout(&ioc->transport_cmds.done,
10*HZ);
@@ -1186,8 +1186,8 @@ _transport_get_expander_phy_error_log(struct MPT2SAS_ADAPTER *ioc,
dtransportprintk(ioc, printk(MPT2SAS_INFO_FMT "phy_error_log - "
"send to sas_addr(0x%016llx), phy(%d)\n", ioc->name,
(unsigned long long)phy->identify.sas_address, phy->number));
- mpt2sas_base_put_smid_default(ioc, smid);
init_completion(&ioc->transport_cmds.done);
+ mpt2sas_base_put_smid_default(ioc, smid);
timeleft = wait_for_completion_timeout(&ioc->transport_cmds.done,
10*HZ);
@@ -1511,8 +1511,9 @@ _transport_expander_phy_control(struct MPT2SAS_ADAPTER *ioc,
"send to sas_addr(0x%016llx), phy(%d), opcode(%d)\n", ioc->name,
(unsigned long long)phy->identify.sas_address, phy->number,
phy_operation));
- mpt2sas_base_put_smid_default(ioc, smid);
+
init_completion(&ioc->transport_cmds.done);
+ mpt2sas_base_put_smid_default(ioc, smid);
timeleft = wait_for_completion_timeout(&ioc->transport_cmds.done,
10*HZ);
@@ -1951,8 +1952,8 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
dtransportprintk(ioc, printk(MPT2SAS_INFO_FMT "%s - "
"sending smp request\n", ioc->name, __func__));
- mpt2sas_base_put_smid_default(ioc, smid);
init_completion(&ioc->transport_cmds.done);
+ mpt2sas_base_put_smid_default(ioc, smid);
timeleft = wait_for_completion_timeout(&ioc->transport_cmds.done,
10*HZ);
问题就此定位。
仔细想想,其实该问题的本质在于对completion结构的访问没有进行保护,由于可能在多CPU上并发访问,按理应该有相应的机制进行保护才对(比如锁),而这里没有,补丁中采用的方法是“串行化”,保证这种场景下的串行执行。但理论上应无法杜绝其它场景的并发,也许在mpt2sas驱动中没有其它的并发场景,其具体机制没有深入研究,不能妄下结论。