tag: 信号 signal sigchld 死锁 堆栈
我们的程序需要捕获信号自己处理,所以尝试对1-32的信号处理(后面33-64的信号不处理)。
但是在调试代码时,发现一个线程死锁的问题。
程序目的:捕获信号,然后打印堆栈。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
|
伪代码如下:
设置捕获信号函数()
{
//设置信号处理函数
sigact.sa_sigaction = TsSigHandler;
//
//这里捕获了很多信号,包括SIGCHLD:子进程结束,父进程会收到该信号
sigaction( SIGSEGV, &sigact, NULL );
....
sigaction( SIGCHLD, &sigact, NULL );
}
信号处理函数:TsSigHandler
{
//调用打印堆栈函数
PrintStack();
}
打印堆栈函数PrintStack
{
//打印堆栈
backtrace();
backtrace_symbols();
//调用system函数执行一些命令
system
(
"xxxxxx"
);
}
|
Thread 12 (Thread 0xf7dd2b90 (LWP 5770)):
以下是一个让我觉得奇怪的堆栈,奇怪之处:
1.死锁了:__lll_lock_wait_private
2.获得了2个信号:<signal handler called>,为什么不是一个一个信号处理
堆栈如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
|
#0 0xffffe410 in __kernel_vsyscall ()
#1 0x002a0783 in __lll_lock_wait_private () from /lib/libc.so.6
#2 0x001f8448 in _L_lock_124 () from /lib/libc.so.6
#3 0x001f7f8b in do_system () from /lib/libc.so.6
#4 0x001f8412 in system () from /lib/libc.so.6
#5 0x00317ead in system () from /lib/libpthread.so.0
#6 0x080f95c1 in PrintStack() ()
#7 0x080f9844 in TsSigHandler(int, siginfo*, void*) ()
#8 <signal handler called>
#9 0xffffe410 in __kernel_vsyscall ()
#10 0x001eb1a9 in sigprocmask () from /lib/libc.so.6
#11 0x001f8132 in do_system () from /lib/libc.so.6
#12 0x001f8412 in system () from /lib/libc.so.6
#13 0x00317ead in system () from /lib/libpthread.so.0
#14 0x080f95c1 in PrintStack() ()
#15 0x080f9844 in TsSigHandler(int, siginfo*, void*) ()
#16 <signal handler called>
#17 0x002338ec in memcpy () from /lib/libc.so.6
#18 0x0804fa02 in boom ()
#19 0x080dbd9c in RunCmd ()
#20 0x080dbf12 in CmdParse ()
#21 0x080dc705 in OspTeleDaemon ()
#22 0x080f8817 in OspTaskTemplateFunc(void*) ()
#23 0x0030f832 in start_thread () from /lib/libpthread.so.0
#24 0x00293e0e in clone () from /lib/libc.so.6
|
#18 0x0804fa02 in boom ()
boom()是我写的一个制造崩溃的函数:
char *pBoom = NULL;
memcpy( pBoom, "aaaa", 100 );
#16 <signal handler called>
触发信号
#15 0x080f9844 in TsSigHandler(int, siginfo*, void*) ()
TsSigHandler是信号处理函数。通过以下代码设置:
struct sigaction sigact;
sigemptyset( &sigact.sa_mask );
sigact.sa_flags = SA_ONESHOT | SA_SIGINFO;
sigact.sa_sigaction = TsSigHandler;
信号触发后,由TsSigHandler函数处理
#14 0x080f95c1 in PrintStack() ()
TsSigHandler函数中调用PrintStack函数打印堆栈。
#13 0x00317ead in system () from /lib/libpthread.so.0
PrintStack函数中调用了system函数做一些额外的事情,例如执行gcore(事实证明,这种方法是有点问题的)。
#11 0x001f8132 in do_system () from /lib/libc.so.6
system调用了do_system
#10 0x001eb1a9 in sigprocmask () from /lib/libc.so.6
do_system调用sigprocmask
#8 <signal handler called>
关键来了:这是获取到了另外一个信号:SIGCHLD。
#7 0x080f9844 in TsSigHandler(int, siginfo*, void*) ()
又调用信号处理函数TsSigHandler
#3 0x001f7f8b in do_system () from /lib/libc.so.6
system调用do_system,调用流程和上面当然是一样的
#2 0x001f8448 in _L_lock_124 () from /lib/libc.so.6
#1 0x002a0783 in __lll_lock_wait_private () from /lib/libc.so.6
nice!锁住了。。
分析:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
|
/* Execute LINE as a shell command, returning its status. */
static
int
do_system (
const
char
*line)
{
int
status, save;
pid_t pid;
struct
sigaction sa;
#ifndef _LIBC_REENTRANT
struct
sigaction intr, quit;
#endif
sigset_t omask;
sa.sa_handler = SIG_IGN;
sa.sa_flags = 0;
__sigemptyset (&sa.sa_mask);
DO_LOCK ();
if
(ADD_REF () == 0)
{
if
(__sigaction (SIGINT, &sa, &intr) < 0)
{
(
void
) SUB_REF ();
goto
out;
}
if
(__sigaction (SIGQUIT, &sa, &quit) < 0)
{
save =
errno
;
(
void
) SUB_REF ();
goto
out_restore_sigint;
}
}
DO_UNLOCK ();
/* We reuse the bitmap in the 'sa' structure. */
__sigaddset (&sa.sa_mask, SIGCHLD);
save =
errno
;
if
(__sigprocmask (SIG_BLOCK, &sa.sa_mask, &omask) < 0)
{
#ifndef _LIBC
if
(
errno
== ENOSYS)
__set_errno (save);
else
#endif
{
DO_LOCK ();
if
(SUB_REF () == 0)
{
save =
errno
;
(
void
) __sigaction (SIGQUIT, &quit, (
struct
sigaction *) NULL);
out_restore_sigint:
(
void
) __sigaction (SIGINT, &intr, (
struct
sigaction *) NULL);
__set_errno (save);
}
out:
DO_UNLOCK ();
return
-1;
}
}
#ifdef CLEANUP_HANDLER
CLEANUP_HANDLER;
#endif
#ifdef FORK
pid = FORK ();
#else
pid = __fork ();
#endif
if
(pid == (pid_t) 0)
{
/* Child side. */
const
char
*new_argv[4];
new_argv[0] = SHELL_NAME;
new_argv[1] =
"-c"
;
new_argv[2] = line;
new_argv[3] = NULL;
/* Restore the signals. */
(
void
) __sigaction (SIGINT, &intr, (
struct
sigaction *) NULL);
(
void
) __sigaction (SIGQUIT, &quit, (
struct
sigaction *) NULL);
(
void
) __sigprocmask (SIG_SETMASK, &omask, (sigset_t *) NULL);
INIT_LOCK ();
/* Exec the shell. */
(
void
) __execve (SHELL_PATH, (
char
*
const
*) new_argv, __environ);
_exit (127);
}
else
if
(pid < (pid_t) 0)
/* The fork failed. */
status = -1;
else
/* Parent side. */
{
/* Note the system() is a cancellation point. But since we call
waitpid() which itself is a cancellation point we do not
have to do anything here. */
if
(TEMP_FAILURE_RETRY (__waitpid (pid, &status, 0)) != pid)
status = -1;
}
#ifdef CLEANUP_HANDLER
CLEANUP_RESET;
#endif
save =
errno
;
DO_LOCK ();
if
((SUB_REF () == 0
&& (__sigaction (SIGINT, &intr, (
struct
sigaction *) NULL)
| __sigaction (SIGQUIT, &quit, (
struct
sigaction *) NULL)) != 0)
|| __sigprocmask (SIG_SETMASK, &omask, (sigset_t *) NULL) != 0)
{
#ifndef _LIBC
/* glibc cannot be used on systems without waitpid. */
if
(
errno
== ENOSYS)
__set_errno (save);
else
#endif
status = -1;
}
DO_UNLOCK ();
return
status;
}
|
system()函数执行的大体过程是:fork()->exec()->waitpid(),
waitpid用于等待子进程执行完毕。
但是在子进程执行完毕时,会产生SIGCHLD信号,
而SIGCHLD信号会唤醒wait中的进程,这就是看到了2个信号的原因,
解决方法:
1.忽略SIGCHLD信号:其实这个信号一般情况下应该被忽略,除非你的程序需要对这种情况做非常特殊的处理
2.不要在这里调用system()
to do: 有空了记得补详细些