信号是操作系统中一种很重要的通信方式.近几个版本中,信号处理这部份很少有大的变动.我们从用户空间的信号应用来分析Linux内核的信号实现方式.
一:信号有关的数据结构
在task_struct中有关的信号结构:
struct task_struct {
……
//指向进程信号描述符
struct signal_struct *signal;
//指向信号的处理描述符
struct sighand_struct *sighand;
//阻塞信号的掩码
sigset_t blocked, real_blocked;
//保存的信号掩码.当定义TIF_RESTORE_SIGMASK的时候,恢复信号掩码
sigset_t saved_sigmask; /* To be restored with TIF_RESTORE_SIGMASK */
//存放挂起的信号
struct sigpending pending;
//指定信号处理程序的栈地址
unsigned long sas_ss_sp;
//信号处理程序的栈大小
size_t sas_ss_size;
//反映向一个函数的指针,设备驱动用此来阻塞进程的某些信号
int (*notifier)(void *priv);
//notifier()的参数
void *notifier_data;
//驱动程序通过notifier()所阻塞信号的位图
sigset_t *notifier_mask;
……
}
Sigset_t的数据结构如下:
//信号位图.
typedef struct {
//在x86中需要64位掩码,即2元素的32位数组
unsigned long sig[_NSIG_WORDS];
} sigset_t;
#define _NSIG 64
#ifdef __i386__
# define _NSIG_BPW 32
#else
# define _NSIG_BPW 64
#endif
#define _NSIG_WORDS (_NSIG / _NSIG_BPW)
在linux中共有64个信号.前32个为常规信号.后32个为实时信号.实时信号与常规信号的唯一区别就是实时信号会排队等候.
struct sigpending结构如下:
//信号等待队列
struct sigpending {
struct list_head list;
//如果某信号在等待,则该信号表示的位置1
sigset_t signal;
};
Struct sighand_struct的结构如下:
struct sighand_struct {
//引用计数
atomic_t count;
//信号向量表
struct k_sigaction action[_NSIG];
spinlock_t siglock;
wait_queue_head_t signalfd_wqh;
}
同中断处理一样,每一个信号都对应action中的一个处理函数.
struct k_sigaction结构如下示:
struct sigaction {
//信号处理函数
__sighandler_t sa_handler;
//指定的信号处理标志
unsigned long sa_flags;
__sigrestore_t sa_restorer;
//在运行处理信号的时候要屏弊的信号
sigset_t sa_mask; /* mask last for extensibility */
};
Struct signal_struct结构如下:
struct signal_struct {
//共享计数
atomic_t count;
//线程组内存活的信号
atomic_t live;
//wait_chldexit:子进程的等待队列
wait_queue_head_t wait_chldexit; /* for wait4() */
/* current thread group signal load-balancing target: */
//线程组内最使收到信号的进程
struct task_struct *curr_target;
/* shared signal handling: */
//共享信号的等待队列
struct sigpending shared_pending;
/* thread group exit support */
//线程组的终止码
int group_exit_code;
/* overloaded:
* - notify group_exit_task when ->count is equal to notify_count
* - everyone except group_exit_task is stopped during signal delivery
* of fatal signals, group_exit_task processes the signal.
*/
//当kill 掉整个线程组的时候使用
struct task_struct *group_exit_task;
//当kill 掉整个线程组的时候使用
int notify_count;
/* thread group stop support, overloads group_exit_code too */
//当整个线程组停止的时候使用
int group_stop_count;
unsigned int flags; /* see SIGNAL_* flags below */
……
}
上述所讨论的数据结构可以用下图表示(摘自>):
二:更改信号的处理函数
在用户空间编程的时候,我们常用的注册信号处理函数的API有:
typedef void (*sighandler_t)(int);
sighandler_t signal(int signum, sighandler_t handler);
int sigaction(int signum, const struct sigaction *act, struct sigaction *oldact);
两者都可以更改信号.sigaction是Unix后期才出现的接口.这个接口较signal()更为健壮也更为强大:
Signal()只能为指定的信号设置信号处理函数.而sigaction()不仅可以设置信号处理函数,还可以设置进程的信号掩码.返回设置之前的sigaction结构.sigaction结构在上面已经分析过了.
这两个用户空间的接口对应的系统调用为别是:
sys_signal(int sig, __sighandler_t handler)
sys_sigaction(int sig, const struct old_sigaction __user *act, struct old_sigaction __user *oact)
我们来分析一下内核是怎么样处理的.sys_signal()代码如下:
asmlinkage unsigned long
sys_signal(int sig, __sighandler_t handler)
{
struct k_sigaction new_sa, old_sa;
int ret;
new_sa.sa.sa_handler = handler;
//SA_ONESHOT:使用了函数指针之后,将其处理函数设为SIG_DEF
//SA_NOMASK: 在执行信号处理的时候,不执行任何信号屏弊
new_sa.sa.sa_flags = SA_ONESHOT | SA_NOMASK;
//清除信号掩码.表示在处理该信号的时候不要屏弊任何信号
sigemptyset(&new_sa.sa.sa_mask);
ret = do_sigaction(sig, &new_sa, &old_sa);
//如果调用错误,返回错误码.如果成功,返回之前的处理函数
return ret ? ret : (unsigned long)old_sa.sa.sa_handler;
}
sys_sigaction()的代码如下:
asmlinkage int
sys_sigaction(int sig, const struct old_sigaction __user *act,
struct old_sigaction __user *oact)
{
struct k_sigaction new_ka, old_ka;
int ret;
//将用户空间的sigaction 拷贝到内核空间
if (act) {
old_sigset_t mask;
if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
__get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
__get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
return -EFAULT;
__get_user(new_ka.sa.sa_flags, &act->sa_flags);
__get_user(mask, &act->sa_mask);
siginitset(&new_ka.sa.sa_mask, mask);
}
ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
//出错,返回错误代码.否则返回信号的sigaction结构
if (!ret && oact) {
if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
__put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
__put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
return -EFAULT;
__put_user(old_ka.sa.sa_flags, &oact->sa_flags);
__put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
}
return ret;
}
由此可以看出,两个函数最终都会调用do_sigaction()进行处理.该函数代码如下:
int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
{
struct k_sigaction *k;
sigset_t mask;
//sig_kernel_only:判断sig是否为SIGKILL SIGSTOP
//不能为KILL, STOP信号重设处理函数
if (!valid_signal(sig) || sig
return -EINVAL;
//取进程的旧k_sigaction
k = ¤t->sighand->action[sig-1];
spin_lock_irq(¤t->sighand->siglock);
// 如果oact不为空,则将其赋给oact .oact参数返回旧的k_sigaction
if (oact)
*oact = *k;
if (act) {
//使SIGKILL SIGSTOP不可屏弊
sigdelsetmask(&act->sa.sa_mask,
sigmask(SIGKILL) | sigmask(SIGSTOP));
//将新的k_siaction赋值到k
*k = *act;
/*
* POSIX 3.3.1.3:
* "Setting a signal action to SIG_IGN for a signal that is
* pending shall cause the pending signal to be discarded,
* whether or not it is blocked."
*
* "Setting a signal action to SIG_DFL for a signal that is
* pending and whose default action is to ignore the signal
* (for example, SIGCHLD), shall cause the pending signal to
* be discarded, whether or not it is blocked"
*/
//POSIX标准:
//如果设置的处理为SIG_IGN 或者是SIG_DEL而且是对SIGCONT SIGCHILD SIGWINCH
//进行重设时
//如果有一个或者几个这样的信号在等待,则删除之
if (act->sa.sa_handler == SIG_IGN ||
(act->sa.sa_handler == SIG_DFL && sig_kernel_ignore(sig))) {
struct task_struct *t = current;
sigemptyset(&mask);
sigaddset(&mask, sig);
rm_from_queue_full(&mask, &t->signal->shared_pending);
//如果不是共享信号,在线程中的线程等待队列中将该信号
//删除
do {
rm_from_queue_full(&mask, &t->pending);
t = next_thread(t);
} while (t != current);
}
}
spin_unlock_irq(¤t->sighand->siglock);
return 0;
}
Rm_from_queue_full()用来将等待队列中的信号删除.并清除等待队列中的位图.代码如下:
static int rm_from_queue_full(sigset_t *mask, struct sigpending *s)
{
struct sigqueue *q, *n;
sigset_t m;
//如果进程接收到了一个信号,但末处理,只是将sigpending->signal简单置位
//在等待队列中无此信号
sigandsets(&m, mask, &s->signal);
if (sigisemptyset(&m))
return 0;
// 删除等待的信号
signandsets(&s->signal, &s->signal, mask);
list_for_each_entry_safe(q, n, &s->list, list) {
//如果该信号就是mask中设置的信号
if (sigismember(mask, q->info.si_signo)) {
//将其脱链并且初始化
list_del_init(&q->list);
//释放对应项
__sigqueue_free(q);
}
}
return 1;
}
上面有关POSIX标准,请自行查阅相关资料.
三:发送信号
在用户空间中,我们可以用kill()给指定进程发送相应信号.它在用户空间的定义如下所示:
int kill(pid_t pid, int signo)
pid的含义如下所示:
pid > 0 将信号发送给进程ID为pid的进程。
pid == 0 将信号发送给其进程组ID等于发送进程的进程组ID,而且发送进程有许可权向
其发送信号的所有进程。
这里用的术语“所有进程”不包括实现定义的系统进程集。对于大多数U N I X系统,系统
进程集包括:交换进程(pid 0),init (pid 1)以及页精灵进程(pid 2)。
Pid == -1 将信号发送给所有进程.除了swapper(0),init(1)和当前进程
pid
的所有进程。如上所述一样,“所有进程”并不包括系统进程集中的进程.
Kill()的系统调用接口为sys_kill():
asmlinkage long
sys_kill(int pid, int sig)
{
struct siginfo info;
//构造一个siginfo
info.si_signo = sig;
info.si_errno = 0;
info.si_code = SI_USER;
info.si_pid = task_tgid_vnr(current);
info.si_uid = current->uid;
return kill_something_info(sig, &info, pid);
}
转到kill_something_info():
static int kill_something_info(int sig, struct siginfo *info, int pid)
{
int ret;
rcu_read_lock();
if (!pid) {
//将信号发送到进程组
ret = kill_pgrp_info(sig, info, task_pgrp(current));
} else if (pid == -1) {
//将信号发送到所有大于1的进程
int retval = 0, count = 0;
struct task_struct * p;
read_lock(&tasklist_lock);
for_each_process(p) {
if (p->pid > 1 && !same_thread_group(p, current)) {
int err = group_send_sig_info(sig, info, p);
++count;
if (err != -EPERM)
retval = err;
}
}
read_unlock(&tasklist_lock);
ret = count ? retval : -ESRCH;
} else if (pid
//把信号发送到进程组-pid的所有进程
ret = kill_pgrp_info(sig, info, find_vpid(-pid));
} else {
//将信号发送到pid的进程
ret = kill_pid_info(sig, info, find_vpid(pid));
}
rcu_read_unlock();
return ret;
}
假设pid > 0.转入kill_pid_info().即把信号发送到pid的进程
int kill_pid_info(int sig, struct siginfo *info, struct pid *pid)
{
int error;
struct task_struct *p;
rcu_read_lock();
if (unlikely(sig_needs_tasklist(sig)))
read_lock(&tasklist_lock);
//找到进程号为pid 的进程
p = pid_task(pid, PIDTYPE_PID);
error = -ESRCH;
if (p)
error = group_send_sig_info(sig, info, p);
if (unlikely(sig_needs_tasklist(sig)))
read_unlock(&tasklist_lock);
rcu_read_unlock();
return error;
}
在这里将pid转化为对应的task_struct.然后调用group_send_sig_info().代码如下:
int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
{
unsigned long flags;
int ret;
//检查是否有权限发送信号
ret = check_kill_permission(sig, info, p);
if (!ret && sig) {
ret = -ESRCH;
//为了防止竞争.加锁
if (lock_task_sighand(p, &flags)) {
//发送信号
ret = __group_send_sig_info(sig, info, p);
//解锁
unlock_task_sighand(p, &flags);
}
}
return ret;
}
首先,要给进程发送信号,应该先判断它是否具有这样的权限.判断的依据为:
如果是用户空间发送的信号,检查其是否有相应的权限
必须要满足以下几个条件中的任一个才可以发送:
1:发送信号者必须拥有相关的权能
2: 如果是发送SIGCONT且发送进程与种目标进程处于同一个注册会话中
3:属于同一个用户的进程
转入__group_send_sig_info():
int
__group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
{
int ret = 0;
assert_spin_locked(&p->sighand->siglock);
//对会引起进程停止的进程进行一些特定的处理
handle_stop_signal(sig, p);
/* Short-circuit ignored signals. */
//判断信号是不是被忽略
if (sig_ignored(p, sig))
return ret;
//如果不是一个RT信号,且等待队列中已经有这个信号了,返回即可
//TODO: 常规信号是不会排队的
if (LEGACY_QUEUE(&p->signal->shared_pending, sig))
/* This is a non-RT signal and we already have one queued. */
return ret;
/*
* Put this signal on the shared-pending queue, or fail with EAGAIN.
* We always use the shared queue for process-wide signals,
* to avoid several races.
*/
ret = send_signal(sig, info, p, &p->signal->shared_pending);
if (unlikely(ret))
return ret;
//唤醒该进程对该信号进行处理
//如果该进程对此信号进行了屏弊,则选择线程组中一个合适的进程来唤醒
__group_complete_signal(sig, p);
return 0;
}
具体的进程发送过程是在send_signal()完成的.它的代码如下:
static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
struct sigpending *signals)
{
struct sigqueue * q = NULL;
int ret = 0;
/*
* Deliver the signal to listening signalfds. This must be called
* with the sighand lock held.
*/
//选择编译函数
signalfd_notify(t, sig);
/*
* fast-pathed signals for kernel-internal things like SIGSTOP
* or SIGKILL.
*/
if (info == SEND_SIG_FORCED)
goto out_set;
/* Real-time signals must be queued if sent by sigqueue, or
some other real-time mechanism. It is implementation
defined whether kill() does so. We attempt to do so, on
the principle of least surprise, but since kill is not
allowed to fail with EAGAIN when low on memory we just
make sure at least one signal gets delivered and don't
pass on the info struct. */
//分配一个sigqueue
q = __sigqueue_alloc(t, GFP_ATOMIC, (sig
(is_si_special(info) ||
info->si_code >= 0)));
if (q) {
//将分配的sigqueue 加入等待队列
list_add_tail(&q->list, &signals->list);
switch ((unsigned long) info) {
case (unsigned long) SEND_SIG_NOINFO:
q->info.si_signo = sig;
q->info.si_errno = 0;
q->info.si_code = SI_USER;
q->info.si_pid = task_pid_vnr(current);
q->info.si_uid = current->uid;
break;
case (unsigned long) SEND_SIG_PRIV:
q->info.si_signo = sig;
q->info.si_errno = 0;
q->info.si_code = SI_KERNEL;
q->info.si_pid = 0;
q->info.si_uid = 0;
break;
default:
copy_siginfo(&q->info, info);
break;
}
} else if (!is_si_special(info)) {
if (sig >= SIGRTMIN && info->si_code != SI_USER)
/*
* Queue overflow, abort. We may abort if the signal was rt
* and sent by user using something other than kill().
*/
return -EAGAIN;
}
out_set:
//更新等待队列的signal 位图,表示收到了一个信号,但没有处理
sigaddset(&signals->signal, sig);
return ret;
}
经过这个过程,我们看到了进程怎么将信号发送到另外的进程.特别要注意的是,目标进程接收到信号之后会将其唤醒.这时如果目标进程是系统调用阻塞状态就会将它的系统调用中断.
另外,内核经常使用force_sig_info()/force_sig()来给进程发送信号.这样的信号经常不可以忽略,不可以阻塞.我们来看一下它的处理.代码如下:
int
force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
{
unsigned long int flags;
int ret, blocked, ignored;
struct k_sigaction *action;
spin_lock_irqsave(&t->sighand->siglock, flags);
//取进程的信号的处理函数
action = &t->sighand->action[sig-1];
//如果该信号被忽略或者该信号被阻塞
ignored = action->sa.sa_handler == SIG_IGN;
blocked = sigismember(&t->blocked, sig);
if (blocked || ignored) {
//重信号处理函数为默认的处理
action->sa.sa_handler = SIG_DFL;
//如果信号被屏弊
if (blocked) {
//清除信号屏弊位
sigdelset(&t->blocked, sig);
//重新计算进程是否有末处理的信号
recalc_sigpending_and_wake(t);
}
}
//"特殊"的信号发送
ret = specific_send_sig_info(sig, info, t);
spin_unlock_irqrestore(&t->sighand->siglock, flags);
return ret;
}
当进程的信号阻塞标志被更改时,就会引起TIF_SIGPENDING标志的变化.对于TIF_SIGPENDING标志的检测是在 recalc_sigpending_and_wake()调用recalc_sigpending_tsk()来完成的.它实际是判断等待队列中是否有 没有被阻塞的信号.如果有,则设置TIF_SIGPENDING标志.
specific_send_sig_info()内核用于将信号发送到进程.我们比较一下它跟用户空间的发送有什么不同.它的代码如下:
static int
specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t)
{
int ret = 0;
BUG_ON(!irqs_disabled());
assert_spin_locked(&t->sighand->siglock);
/* Short-circuit ignored signals. */
//信号被忽略,退出
if (sig_ignored(t, sig))
goto out;
/* Support queueing exactly one non-rt signal, so that we
can get more detailed information about the cause of
the signal. */
//如果不是实时信号,且已经有信号在等待队列中了.直接等待(不排队)
if (LEGACY_QUEUE(&t->pending, sig))
goto out;
//将信号发送到目标进程
ret = send_signal(sig, info, t, &t->pending);
// TODO: 这里调用signal_wake_up()直接唤醒进程
if (!ret && !sigismember(&t->blocked, sig))
signal_wake_up(t, sig == SIGKILL);
out:
return ret;
}
这样,内核就将信号传送给目标进程.无论进程用什么样的方式,都不能阻止对此信号的处理.
四:信号的处理
信号处理的时机:每次从内核空间返回用户空间时,都会检查当前进程是否有末处理的信号.如果有,则对信号进行处理
信号的处理函数如下:
static void fastcall do_signal(struct pt_regs *regs)
{
siginfo_t info;
int signr;
struct k_sigaction ka;
sigset_t *oldset;
//判断是否是处于返回到用户空间的前夕.不需要处理
if (!user_mode(regs))
return;
//要从task->saved_sigmask中恢复进程信号掩码
if (test_thread_flag(TIF_RESTORE_SIGMASK))
oldset = ¤t->saved_sigmask;
else
oldset = ¤t->blocked;
//对等待信号的处理
//只有遇到用户重设信号处理函数的信号或者处理完等待信号才会返回
signr = get_signal_to_deliver(&info, &ka, regs, NULL);
if (signr > 0) {
//对用户设置了信号处理函数的信号处理
if (unlikely(current->thread.debugreg[7]))
set_debugreg(current->thread.debugreg[7], 7);
/* Whee! Actually deliver the signal. */
if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
if (test_thread_flag(TIF_RESTORE_SIGMASK))
clear_thread_flag(TIF_RESTORE_SIGMASK);
}
return;
}
//没有Catch信号的系统调用重启
/* Did we come from a system call? */
if (regs->orig_eax >= 0) {
/* Restart the system call - no handlers present */
switch (regs->eax) {
case -ERESTARTNOHAND:
case -ERESTARTSYS:
case -ERESTARTNOINTR:
regs->eax = regs->orig_eax;
regs->eip -= 2;
break;
//如果是返回-ERESTART_RESTARTBLOCK ,返回用户空间后重新发起
//系统调用.系统调用号为__NR_restart_syscall
//一般用在与timer有关的系统调用中
case -ERESTART_RESTARTBLOCK:
regs->eax = __NR_restart_syscall;
regs->eip -= 2;
break;
}
}
/* if there's no signal to deliver, we just put the saved sigmask
* back */
if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
//清除TIF_RESTORE_SIGMASK 并恢复信号掩码
clear_thread_flag(TIF_RESTORE_SIGMASK);
sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL);
}
}
正好我们在上节发送信号中所论述的一样,信号可能会引起系统调用中断.这里必须要采取必要的措施来使系统调用重启.
关于返回值与重启还是忽略如下表如示(摘自 understanding the linux kernel >>):
Signal
Action
EINTR
ERESTARTSYS
ERESTARTNOHAND
ERESTART_RESTARTBLOCK
[url=mk:@MSITStore:E:/Document/linux内核资料/understanding%20the%20linux% 20kernel/OReilly.Understanding.the.Linux.Kernel.3rd.Edition.Nov.2005.HAPPY.NEW.YEAR.chm::/0596005652/understandlk-CHP-11-SECT-3.html#TFN2#TFN2]a[/url]
ERESTARTNOINTR
Default
Terminate
Reexecute
Reexecute
Reexecute
Ignore
Terminate
Reexecute
Reexecute
Reexecute
Catch
Terminate
Depends
Terminate
Reexecute
有必要关注一下上面的系统调用重启过程:
Regs参数表示用户空的硬件环境.regs->eax是表示返回用户空间后的eax寄存器的值.regs->eip是返回用户空间后执行的 指针地址. regs->orig_eax是表示系统调用时eax的值,里面存放着系统调用号.请参阅本站的有关中断初始化的文档.
Regs->eip -= 2 ,为什么eip要减2呢?因为发现系统调用是int 0x80 指令.中断后,eip会指向int 80后面的一条指令.这样,如果要重新执新int 0x80.那就必须要把eip返回两条指令.
转入get_signal_to_deliver():
int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka,
struct pt_regs *regs, void *cookie)
{
sigset_t *mask = ¤t->blocked;
int signr = 0;
//选择编译函数
try_to_freeze();
relock:
spin_lock_irq(¤t->sighand->siglock);
for (;;) {
struct k_sigaction *ka;
if (unlikely(current->signal->group_stop_count > 0) &&
handle_group_stop())
goto relock;
//从等待队列中取信号
signr = dequeue_signal(current, mask, info);
//信号为空,退出
if (!signr)
break; /* will return 0 */
//当前进程正在被跟踪
if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) {
ptrace_signal_deliver(regs, cookie);
/* Let the debugger run. */
ptrace_stop(signr, signr, info);
/* We're back. Did the debugger cancel the sig? */
signr = current->exit_code;
if (signr == 0)
continue;
current->exit_code = 0;
/* Update the siginfo structure if the signal has
changed. If the debugger wanted something
specific in the siginfo structure then it should
have updated *info via PTRACE_SETSIGINFO. */
if (signr != info->si_signo) {
info->si_signo = signr;
info->si_errno = 0;
info->si_code = SI_USER;
info->si_pid = task_pid_vnr(current->parent);
info->si_uid = current->parent->uid;
}
/* If the (new) signal is now blocked, requeue it. */
if (sigismember(¤t->blocked, signr)) {
specific_send_sig_info(signr, info, current);
continue;
}
}
ka = ¤t->sighand->action[signr-1];
//信号被忽略,不做任何处理
if (ka->sa.sa_handler == SIG_IGN) /* Do nothing. */
continue;
//如果不为默认操作.也就是说用户已经重置了该信号的处理
//这样情况下会调用break退出循环
if (ka->sa.sa_handler != SIG_DFL) {
/* Run the handler. */
*return_ka = *ka;
//如果定义了SA_ONESHOT 标志,指明信号处理完之后,恢复信号的默认处理
if (ka->sa.sa_flags & SA_ONESHOT)
ka->sa.sa_handler = SIG_DFL;
break; /* will return non-zero "signr" value */
}
/*
* Now we are doing the default action for this signal.
*/
//如果是内核所忽略的信号,不做任何处理
//这里注意了.Child信号的默认处理是忽略.这就是形成僵尸进程
//的主要原因
if (sig_kernel_ignore(signr)) /* Default is nothing. */
continue;
/*
* Global init gets no signals it doesn't want.
*/
//判断是否是INIT 进程
if (is_global_init(current))
continue;
//引起进程挂起的信号
if (sig_kernel_stop(signr)) {
//SIGSTOP的处理与其它会引起停止的信号有点不同
//SIGSTOP总是停止进程,而其它信号只会停止不在孤儿进程组
//中的进程
if (signr != SIGSTOP) {
spin_unlock_irq(¤t->sighand->siglock);
/* signals can be posted during this window */
if (is_current_pgrp_orphaned())
goto relock;
spin_lock_irq(¤t->sighand->siglock);
}
//停止进程
if (likely(do_signal_stop(signr))) {
/* It released the siglock. */
goto relock;
}
/*
* We didn't actually stop, due to a race
* with SIGCONT or something like that.
*/
continue;
}
spin_unlock_irq(¤t->sighand->siglock);
//除去内核忽略和引起进程停止的信号之处的所有信号都会让过程
//终止
/*
* Anything else is fatal, maybe with a core dump.
*/
//置进程标志位PF_SIGNALED.表示该信号终止是由信号引起的
current->flags |= PF_SIGNALED;
if ((signr != SIGKILL) && print_fatal_signals)
print_fatal_signal(regs, signr);
//如果是一些会引起核心转储的信号
//建立核心转储文件后退出
if (sig_kernel_coredump(signr)) {
/*
* If it was able to dump core, this kills all
* other threads in the group and synchronizes with
* their demise. If we lost the race with another
* thread getting here, it set group_exit_code
* first and our do_group_exit call below will use
* that value and ignore the one we pass it.
*/
do_coredump((long)signr, signr, regs);
}
/*
* Death signals, no core dump.
*/
//进程组退出
do_group_exit(signr);
/* NOTREACHED */
}
spin_unlock_irq(¤t->sighand->siglock);
return signr;
}
这个函数比较简单,基本上就是遍历信号等待队列.然后处理信号.一直遇到信号处理被重设或者没有等待信号之后才会返回.
信号出列函数为dequeue_signal():
int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
{
int signr = 0;
/* We only dequeue private signals from ourselves, we don't let
* signalfd steal them
*/
//从pending 队列中取出等待信号
signr = __dequeue_signal(&tsk->pending, mask, info);
//如果pending 队列中没有等待信号,则从shared_pending中取
if (!signr) {
signr = __dequeue_signal(&tsk->signal->shared_pending,
mask, info);
//如果是SIGALRM 信号
//重启计时器
if (unlikely(signr == SIGALRM)) {
struct hrtimer *tmr = &tsk->signal->real_timer;
if (!hrtimer_is_queued(tmr) &&
tsk->signal->it_real_incr.tv64 != 0) {
hrtimer_forward(tmr, tmr->base->get_time(),
tsk->signal->it_real_incr);
hrtimer_restart(tmr);
}
}
}
//重新判断是位还有末处理的信号,更新TIF_SIGPENDING 标志
recalc_sigpending();
//会引起进程终止的信号,置SIGNAL_STOP_DEQUEUED 标志
//禁止信号出列,即阻止后续的信号处理
if (signr && unlikely(sig_kernel_stop(signr))) {
if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT))
tsk->signal->flags |= SIGNAL_STOP_DEQUEUED;
}
//__SI_TIMER : 定时器到期
if (signr &&
((info->si_code & __SI_MASK) == __SI_TIMER) &&
info->si_sys_private){
spin_unlock(&tsk->sighand->siglock);
do_schedule_next_timer(info);
spin_lock(&tsk->sighand->siglock);
}
return signr;
}
__dequeue_signal()用于从等待队列中取出信号.代码如下:
static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
siginfo_t *info)
{
//取位图中为第一个为1的标志位
int sig = next_signal(pending, mask);
if (sig) {
//如果定义了进程通告?
//task->notifier:指向一个函数指针. 设备驱动程序用它来阻塞某些信号
if (current->notifier) {
if (sigismember(current->notifier_mask, sig)) {
if (!(current->notifier)(current->notifier_data)) {
clear_thread_flag(TIF_SIGPENDING);
return 0;
}
}
}
//将信号从等待队列中移除,更新等待信号标志位
if (!collect_signal(sig, pending, info))
sig = 0;
}
return sig;
}
Cllect_signal()代码如下:
static int collect_signal(int sig, struct sigpending *list, siginfo_t *info)
{
struct sigqueue *q, *first = NULL;
int still_pending = 0;
//要处理的信号没有包含在等待队列中,退出
if (unlikely(!sigismember(&list->signal, sig)))
return 0;
/*
* Collect the siginfo appropriate to this signal. Check if
* there is another siginfo for the same signal.
*/
//遍历等待队列.如果不止有一个sig 信号在等待.still_pending为1
list_for_each_entry(q, &list->list, list) {
if (q->info.si_signo == sig) {
if (first) {
still_pending = 1;
break;
}
first = q;
}
}
if (first) {
//如果等待队列中有此信号
//在等待队列中将它删除
list_del_init(&first->list);
//将信号信号copy 到info
copy_siginfo(info, &first->info);
//释放信号
__sigqueue_free(first);
if (!still_pending)
//如果只有一个信号在等待,也就是说该类的等待信号已经处理完了
//从等待位图中删除该位
sigdelset(&list->signal, sig);
} else {
/* Ok, it wasn't in the queue. This must be
a fast-pathed signal or we must have been
out of queue space. So zero out the info.
*/
//如果等待队列中没有此信号,将对应位图置0.
//info信号置空
sigdelset(&list->signal, sig);
info->si_signo = sig;
info->si_errno = 0;
info->si_code = 0;
info->si_pid = 0;
info->si_uid = 0;
}
return 1;
}
返回do_signal()中看看如果信号处理函数被重设会怎么样处理.这也是信号处理中比较难理解的部份.转入具体的处理代码之前,先思考一下:
用户空间的函数地址传递给内核空间之后,可不可以在内核直接运行呢?(即设置好内核堆,再把eip设为fuction address)?
是有可能运行的.因为内核切占不会切换CR3.用户进程切换会切换CR3.因此可以保证进程陷入内核后可以正常的对用户空间的地址进行寻址.但是基于以下几点原因.不建议直接在内核空间运行
1:安全因素.陷入内核空间后,对内核地址空间具有全部访问权限,没有内存保护进制
2:内核堆栈过小,最大只有8KB.
3:用户空间的函数在运行的时候可能会发出系统调用.由于在最高特权级下,导致系统调用/异常处理失败.
既然这样,那怎么运行信号处理函数呢?
我们只需要让它在返回用户空间后马上运行信号处理函数,运行信号处理函数再系统调用返回内核就可以了.
先分析一下有关的数据结构:
struct sigframe
{
//信号处理函数的返回地址,它指向同一个结构中的retcode字段
char __user *pretcode;
//信号数值
int sig;
//保存当前regs的一个结构
struct sigcontext sc;
//保存FPU,MMX,XMM等相关信息
struct _fpstate fpstate;
//被阻塞的实时信号的位数组
unsigned long extramask[_NSIG_WORDS-1];
//信号处理程序运行完后执行的执令
char retcode[8];
}
现在我们转入代码看是如何处理的:
static int
handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
sigset_t *oldset, struct pt_regs * regs)
{
int ret;
//在执行系统调用的时候,可能被信号给中断了.
//要根据返回值判断是否可以重启系统调用
/* Are we from a system call? */
if (regs->orig_eax >= 0) {
/* If so, check system call restarting.. */
switch (regs->eax) {
case -ERESTART_RESTARTBLOCK:
case -ERESTARTNOHAND:
regs->eax = -EINTR;
break;
case -ERESTARTSYS:
if (!(ka->sa.sa_flags & SA_RESTART)) {
regs->eax = -EINTR;
break;
}
/* fallthrough */
case -ERESTARTNOINTR:
regs->eax = regs->orig_eax;
regs->eip -= 2;
}
}
/*
* If TF is set due to a debugger (PT_DTRACE), clear the TF flag so
* that register information in the sigcontext is correct.
*/
//如果处于跟踪状态
//就像进行中断处理程序,关闭中断一样
if (unlikely(regs->eflags & TF_MASK)
&& likely(current->ptrace & PT_DTRACE)) {
current->ptrace &= ~PT_DTRACE;
regs->eflags &= ~TF_MASK;
}
/* Set up the stack frame */
//SA_SIGINFO:为信号处理提供额外的信息
//建立帧结构
if (ka->sa.sa_flags & SA_SIGINFO)
ret = setup_rt_frame(sig, ka, info, oldset, regs);
else
ret = setup_frame(sig, ka, oldset, regs);
if (ret == 0) {
spin_lock_irq(¤t->sighand->siglock);
sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask);
//SA_NODEFER:在执行信号处理函数的时候,不屏弊信号
if (!(ka->sa.sa_flags & SA_NODEFER))
//如果没有定义SA_NODEFER.那屏弊掉当前信号
sigaddset(¤t->blocked,sig);
//更新TIF_SIGPENDING 标志位
recalc_sigpending();
spin_unlock_irq(¤t->sighand->siglock);
}
return ret;
}
首先,也要恢复被中断的系统调用.然后,再调用setup_frame()或者是setup_rt_frame().setup_rt_frame()是有跟实时信号有关的.在这里以setup_frame()为例进行分析.代码如下:
static int setup_frame(int sig, struct k_sigaction *ka,
sigset_t *set, struct pt_regs * regs)
{
void __user *restorer;
struct sigframe __user *frame;
int err = 0;
int usig;
//取得在用户空间栈中存放frame的位置
frame = get_sigframe(ka, regs, sizeof(*frame));
//检查是否是可写的
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
goto give_sigsegv;
//在有的执行域里.信号的数值可能不一样,因此,需要转换一下
//signal_invmap:信号转换表
usig = current_thread_info()->exec_domain
&& current_thread_info()->exec_domain->signal_invmap
&& sig
? current_thread_info()->exec_domain->signal_invmap[sig]
: sig;
err = __put_user(usig, &frame->sig);
if (err)
goto give_sigsegv;
//保存当前内核栈里保存的用户空间的硬件环境
err = setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]);
if (err)
goto give_sigsegv;
//set:这里是表示进程以前的信号掩码
//在extramask里保存以前的信号掩码
if (_NSIG_WORDS > 1) {
//将set->sig的高32位存于extramask中
err = __copy_to_user(&frame->extramask, &set->sig[1],
sizeof(frame->extramask));
if (err)
goto give_sigsegv;
}
if (current->binfmt->hasvdso)
restorer = (void *)VDSO_SYM(&__kernel_sigreturn);
else
restorer = (void *)&frame->retcode;
if (ka->sa.sa_flags & SA_RESTORER)
restorer = ka->sa.sa_restorer;
/* Set up to return from userspace. */
//使frame->pretcode指向 frame->retcode
err |= __put_user(restorer, &frame->pretcode);
/*
* This is popl %eax ; movl $,%eax ; int $0x80
*
* WE DO NOT USE IT ANY MORE! It's only left here for historical
* reasons and because gdb uses it as a signature to notice
* signal handler stack frames.
*/
//frame->retcode:执行完信号处理函数后的下一条指令
//这里构建了一次系统调用.调用号是__NR_sigreturn
err |= __put_user(0xb858, (short __user *)(frame->retcode+0));
err |= __put_user(__NR_sigreturn, (int __user *)(frame->retcode+2));
err |= __put_user(0x80cd, (short __user *)(frame->retcode+6));
if (err)
goto give_sigsegv;
/* Set up registers for signal handler */
//因为regs结构已经保存在frame之中了.这里可以随意的修改
//修改用户空间的栈指针位置,指向frame
regs->esp = (unsigned long) frame;
//返回到用户空间的下一条指令
//即返回到用户空间后,执行信号处理程序
regs->eip = (unsigned long) ka->sa.sa_handler;
regs->eax = (unsigned long) sig;
regs->edx = (unsigned long) 0;
regs->ecx = (unsigned long) 0;
//用户空间的段寄存器都是__USER_DS
//这里是为了防止有意外的修改
regs->xds = __USER_DS;
regs->xes = __USER_DS;
regs->xss = __USER_DS;
regs->xcs = __USER_CS;
/*
* Clear TF when entering the signal handler, but
* notify any tracer that was single-stepping it.
* The tracer may want to single-step inside the
* handler too.
*/
//清除跟踪标志
//就像是处理中断处理程序,清除中断标志位一样
regs->eflags &= ~TF_MASK;
if (test_thread_flag(TIF_SINGLESTEP))
ptrace_notify(SIGTRAP);
#if DEBUG_SIG
printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p/n",
current->comm, current->pid, frame, regs->eip, frame->pretcode);
#endif
return 0;
give_sigsegv:
force_sigsegv(sig, current);
return -EFAULT;
}
get_sigframe()代码如下:
static inline void __user *
get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
{
unsigned long esp;
/* Default to using normal stack */
esp = regs->esp;
/* This is the X/Open sanctioned signal stack switching. */
//用户指定了栈位置
//sas__ss_flags:判断指定的栈位置是否为于当前栈的下部有效空间
//进程的地址空间中,栈空间占据着最上部
if (ka->sa.sa_flags & SA_ONSTACK) {
if (sas_ss_flags(esp) == 0)
//获得栈顶位置
esp = current->sas_ss_sp + current->sas_ss_size;
}
/* This is the legacy signal stack switching. */
//从Unix中遗留的调用.为了保持兼容性而设置
//不提 倡使用
else if ((regs->xss & 0xffff) != __USER_DS &&
!(ka->sa.sa_flags & SA_RESTORER) &&
ka->sa.sa_restorer) {
esp = (unsigned long) ka->sa.sa_restorer;
}
//为frame结构空出位置
esp -= frame_size;
/* Align the stack pointer according to the i386 ABI,
* i.e. so that on function entry ((sp + 4) & 15) == 0. */
//按照i386 ABI规范.对齐栈指针
esp = ((esp + 4) & -16ul) - 4;
return (void __user *) esp;
}
setup_sigcontext()代码如下:
static int
setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
struct pt_regs *regs, unsigned long mask)
{
int tmp, err = 0;
//保存regs
err |= __put_user(regs->xfs, (unsigned int __user *)&sc->fs);
savesegment(gs, tmp);
err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
err |= __put_user(regs->xes, (unsigned int __user *)&sc->es);
err |= __put_user(regs->xds, (unsigned int __user *)&sc->ds);
err |= __put_user(regs->edi, &sc->edi);
err |= __put_user(regs->esi, &sc->esi);
err |= __put_user(regs->ebp, &sc->ebp);
err |= __put_user(regs->esp, &sc->esp);
err |= __put_user(regs->ebx, &sc->ebx);
err |= __put_user(regs->edx, &sc->edx);
err |= __put_user(regs->ecx, &sc->ecx);
err |= __put_user(regs->eax, &sc->eax);
err |= __put_user(current->thread.trap_no, &sc->trapno);
err |= __put_user(current->thread.error_code, &sc->err);
err |= __put_user(regs->eip, &sc->eip);
err |= __put_user(regs->xcs, (unsigned int __user *)&sc->cs);
err |= __put_user(regs->eflags, &sc->eflags);
err |= __put_user(regs->esp, &sc->esp_at_signal);
err |= __put_user(regs->xss, (unsigned int __user *)&sc->ss);
//保存FPU,XMM.MXX等信息
tmp = save_i387(fpstate);
if (tmp
err = 1;
else
err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate);
/* non-iBCS2 extensions.. */
//mask:即为set->sig 的低32位
err |= __put_user(mask, &sc->oldmask);
err |= __put_user(current->thread.cr2, &sc->cr2);
return err;
}
用下图表示上述的操作:
注意到代码中有以下两条指令:
regs->esp = (unsigned long) frame;
regs->eip = (unsigned long) ka->sa.sa_handler;
第一条把用户的栈指令指向了frame
第二条把返回用户空间的eip设为了信号的处理函数.
这样返回到用户空间后就会执行ka->sa.sa_handler这个函数.注意到上面的堆栈结构,其实它模拟了一次函数调用.函数调用时,先把参 数压栈,再把返回地址压栈.在上面的栈中,函数的参数为sig.返回地址为pretcode.这样,在信号处理函数返回之后.就会把pretcode装入 eip.而pretcode又是指向retcode.也就是说函数返回之后,会运行retcode对应的指令.
Retcode在上面的代码中是这样被设置的:
err |= __put_user(0xb858, (short __user *)(frame->retcode+0));
err |= __put_user(__NR_sigreturn, (int __user *)(frame->retcode+2));
err |= __put_user(0x80cd, (short __user *)(frame->retcode+6));
代码中的0xb858 0x80cd可能对应的就是指令的机器码.它相应于如下指令:
popl %eax ;
movl $,%eax ;
int $0x80
即会产生一个系统调用号为__NR_sigreturn的系统调用.它对应的入口是:
asmlinkage int sys_sigreturn(unsigned long __unused)
{
//第一个参数地址就是栈指针位置
struct pt_regs *regs = (struct pt_regs *) &__unused;
//esp-8是因为在用户空间运行的时候,栈出了两个单元
//即上图中的pretcode出栈.sig出栈
struct sigframe __user *frame = (struct sigframe __user *)(regs->esp - 8);
sigset_t set;
int eax;
//检查对应区域是否可读
if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
goto badframe;
//从frame->sc.oldmask 恢复set.sig的低32 位
if (__get_user(set.sig[0], &frame->sc.oldmask)
|| (_NSIG_WORDS > 1
//从frame->extramask 中恢复set.sig的高32位
&& __copy_from_user(&set.sig[1], &frame->extramask,
sizeof(frame->extramask))))
goto badframe;
sigdelsetmask(&set, ~_BLOCKABLE);
spin_lock_irq(¤t->sighand->siglock);
current->blocked = set;
//重新判断是否还有末处理的信号
recalc_sigpending();
spin_unlock_irq(¤t->sighand->siglock);
//从frame->sc中恢复系统调用前的硬件环境
if (restore_sigcontext(regs, &frame->sc, &eax))
goto badframe;
return eax;
badframe:
if (show_unhandled_signals && printk_ratelimit())
printk("%s%s[%d] bad frame in sigreturn frame:%p eip:%lx"
" esp:%lx oeax:%lx/n",
task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
current->comm, task_pid_nr(current), frame, regs->eip,
regs->esp, regs->orig_eax);
force_sig(SIGSEGV, current);
return 0;
}
至此,内核栈又回复到以前的样子了.
五:小结
本节中,在Linux内核中跟踪了信号处理函数的设置,信号的发送.信号的处理.涉及到的代码都不是很难理解.在理解了用户自定义的信号函数的运行机制之 后,我们也很容易调用用户空间的一个特定操作.另外,虽然内核涉及到的信号处理比较简单,但要在用户空间使用好信号就要看一个人的程序设计功底了.