class CTestThreadDlg : public CDialog
{
afx_msg void OnBnClickedButton1();
CRITICAL_SECTION _section;
CRITICAL_SECTION _section_sec;
};
DWORD WINAPI ThreadProc(LPVOID parame);
unsigned int __stdcall ThreadProcSecond(void * parame);
void CTestThreadDlg::OnBnClickedButton1()
{}
DWORD WINAPI ThreadProc( LPVOID parame )
{
CTestThreadDlg * _dlg = (CTestThreadDlg*)parame;
EnterCriticalSection(&_dlg->_section);
Sleep(1000*2);
EnterCriticalSection(&_dlg->_section_sec);
LeaveCriticalSection(&_dlg->_section);
LeaveCriticalSection(&_dlg->_section_sec);
return 0;
}
unsigned int __stdcall ThreadProcSecond( void * parame )
{
CTestThreadDlg * _dlg = (CTestThreadDlg*)parame;
EnterCriticalSection(&_dlg->_section_sec);
Sleep(1000*2);
EnterCriticalSection(&_dlg->_section);
LeaveCriticalSection(&_dlg->_section);
LeaveCriticalSection(&_dlg->_section_sec);
return 100;
}
如上程序在执行之后出现线程死锁,经过WinDbg分析后得到如下信息
0:001> ~*
0 Id: 11e4.11d4 Suspend: 1 Teb: 7ffdf000 Unfrozen
Start: *** WARNING: Unable to verify checksum for F:\Test\TestThread\Debug\TestThread.exe
TestThread!ILT+2030(_wWinMainCRTStartup) (004117f3)
Priority: 0 Priority class: 32 Affinity: 3
. 1 Id: 11e4.1354 Suspend: 1 Teb: 7ffde000 Unfrozen
Start: ntdll!DbgUiRemoteBreakin (7c9720ec)
Priority: 0 Priority class: 32 Affinity: 3
3 Id: 11e4.15cc Suspend: 1 Teb: 7ffdb000 Unfrozen
Start: kernel32!BaseThreadStartThunk (7c810729)
Priority: 0 Priority class: 32 Affinity: 3
4 Id: 11e4.138c Suspend: 1 Teb: 7ffdd000 Unfrozen
Start: kernel32!BaseThreadStartThunk (7c810729)
Priority: 0 Priority class: 32 Affinity: 3
5 Id: 11e4.10cc Suspend: 1 Teb: 7ffda000 Unfrozen
Start: kernel32!BaseThreadStartThunk (7c810729)
Priority: 0 Priority class: 32 Affinity: 3
0:001> ~4kv
ChildEBP RetAddr Args to Child
055cfe3c 7c92df5a 7c939b23 00000208 00000000 ntdll!KiFastSystemCallRet (FPO: [0,0,0])
055cfe40 7c939b23 00000208 00000000 00000000 ntdll!NtWaitForSingleObject+0xc (FPO: [3,0,0])
055cfec8 7c921046 0012fe88 00413b1f 0012fe88 ntdll!RtlpWaitForCriticalSection+0x132 (FPO: [1,26,4])
055cfed0 00413b1f 0012fe88 00dafda0 010003f0 ntdll!RtlEnterCriticalSection+0x46 (FPO: [1,0,0])
055cffb4 7c80b729 0012fdf8 00dafda0 010003f0 TestThread!ThreadProc+0x5f (FPO: [Non-Fpo]) (CONV: stdcall) [f:\test\testthread\testthread\testthreaddlg.cpp @ 183]
055cffec 00000000 0041185c 0012fdf8 00000000 kernel32!BaseThreadStart+0x37 (FPO: [Non-Fpo])
0:001> ~5kv
ChildEBP RetAddr Args to Child
057cfddc 7c92df5a 7c939b23 00000210 00000000 ntdll!KiFastSystemCallRet (FPO: [0,0,0])
057cfde0 7c939b23 00000210 00000000 00000000 ntdll!NtWaitForSingleObject+0xc (FPO: [3,0,0])
057cfe68 7c921046 0012fe70 00412f7f 0012fe70 ntdll!RtlpWaitForCriticalSection+0x132 (FPO: [1,26,4])
057cfe70 00412f7f 0012fe70 10299358 0012f390 ntdll!RtlEnterCriticalSection+0x46 (FPO: [1,0,0])
057cff6c 1023dfd3 0012fdf8 cef52b3d 10299358 TestThread!ThreadProcSecond+0x5f (FPO: [Non-Fpo]) (CONV: stdcall) [f:\test\testthread\testthread\testthreaddlg.cpp @ 197]
057cffa8 1023df69 0039fd40 057cffec 7c80b729 MSVCR90D!_callthreadstartex+0x53 (FPO: [Non-Fpo]) (CONV: cdecl) [f:\dd\vctools\crt_bld\self_x86\crt\src\threadex.c @ 348]
057cffb4 7c80b729 0039fd40 10299358 0012f390 MSVCR90D!_threadstartex+0x89 (FPO: [Non-Fpo]) (CONV: stdcall) [f:\dd\vctools\crt_bld\self_x86\crt\src\threadex.c @ 331]
057cffec 00000000 1023dee0 0039fb00 00000000 kernel32!BaseThreadStart+0x37 (FPO: [Non-Fpo])
0:001> dt RTL_CRITICAL_SECTION 0012fe70
TestThread!RTL_CRITICAL_SECTION
+0x000 DebugInfo : 0x00161ff8 _RTL_CRITICAL_SECTION_DEBUG
+0x004 LockCount : 1
+0x008 RecursionCount : 1
+0x00c OwningThread : 0x0000138c
+0x010 LockSemaphore : 0x00000210
+0x014 SpinCount : 0
0:001> dt RTL_CRITICAL_SECTION 0012fe88
TestThread!RTL_CRITICAL_SECTION
+0x000 DebugInfo : 0x00162a58 _RTL_CRITICAL_SECTION_DEBUG
+0x004 LockCount : 1
+0x008 RecursionCount : 1
+0x00c OwningThread : 0x000010cc
+0x010 LockSemaphore : 0x00000208
+0x014 SpinCount : 0
(从上面红色数据得知,两个线程都在进入之后等待另外一个信号出现的死锁,而且看出是Critical_Section对象)
结论一: 线程4的线程地址是:11e4.138c (0x0000138c) ,它开始进入时占用了互斥对象0012fe70的信号0x00000210
4 Id: 11e4.138c Suspend: 1 Teb: 7ffdd000 Unfrozen
Start: kernel32!BaseThreadStartThunk (7c810729)
Priority: 0 Priority class: 32 Affinity: 3
0:001> dt RTL_CRITICAL_SECTION 0012fe70
TestThread!RTL_CRITICAL_SECTION
+0x000 DebugInfo : 0x00161ff8 _RTL_CRITICAL_SECTION_DEBUG
+0x004 LockCount : 1
+0x008 RecursionCount : 1
+0x00c OwningThread : 0x0000138c
+0x010 LockSemaphore : 0x00000210
+0x014 SpinCount : 0
然后线程4,占用互斥对象0012fe70之后,尝试占用互斥对象0012fe88
0:001> ~4kv
ChildEBP RetAddr Args to Child
055cfe3c 7c92df5a 7c939b23 00000208 00000000 ntdll!KiFastSystemCallRet (FPO: [0,0,0])
055cfe40 7c939b23 00000208 00000000 00000000 ntdll!NtWaitForSingleObject+0xc (FPO: [3,0,0])
055cfec8 7c9210460012fe8800413b1f 0012fe88 ntdll!RtlpWaitForCriticalSection+0x132 (FPO: [1,26,4])
055cfed0 00413b1f 0012fe8800dafda0 010003f0 ntdll!RtlEnterCriticalSection+0x46 (FPO: [1,0,0])
但是此时互斥对象0012fe88 被线程5占用,线程5的地址为11e4.10cc (0x000010cc ),并且一直没有释放,
5 Id: 11e4.10cc Suspend: 1 Teb: 7ffda000 Unfrozen
Start: kernel32!BaseThreadStartThunk (7c810729)
Priority: 0 Priority class: 32 Affinity: 3
0:001> dt RTL_CRITICAL_SECTION 0012fe88
TestThread!RTL_CRITICAL_SECTION
+0x000 DebugInfo : 0x00162a58 _RTL_CRITICAL_SECTION_DEBUG
+0x004 LockCount : 1
+0x008 RecursionCount : 1
+0x00c OwningThread :0x000010cc
+0x010 LockSemaphore : 0x00000208
+0x014 SpinCount : 0
此时,线程5也正在等待线程4所占用的互斥对象,所以出现了死锁
0:001> ~5kv
ChildEBP RetAddr Args to Child
057cfddc 7c92df5a 7c939b23 00000210 00000000 ntdll!KiFastSystemCallRet (FPO: [0,0,0])
057cfde0 7c939b23 00000210 00000000 00000000 ntdll!NtWaitForSingleObject+0xc (FPO: [3,0,0])
057cfe68 7c9210460012fe70 00412f7f 0012fe70 ntdll!RtlpWaitForCriticalSection+0x132 (FPO: [1,26,4])
057cfe70 00412f7f0012fe70 10299358 0012f390 ntdll!RtlEnterCriticalSection+0x46 (FPO: [1,0,0])
解决线程死锁的办法就是,所有线程按照同一个顺序访问互斥对象