首先看sys/ptrace.h
我们在调用的时候使用的是PTRACE_的导出符号,glibc也导出了PT_开头的符号。PTRACE_开头的符号定义在/bionic/libc/kernel/uapi/linux/ptrace.h中
都是int型的数字,回到ptrace函数的定义,头文件中写的是一个不定参数的形式
long ptrace(int, ...);
继续跟进__ptrace函数
__ptrace函数使用汇编实现,其中____NR_ptrace是系统调用号,根据调用号进入kernel层即linux内核的逻辑。此处__NR_ptrace的调用号为26,swi #0 为产生中断,切换到内核模式
内核的实现是在/kernel/ptrace.c中
PTRACE_TRACEME调用
static int ptrace_traceme(void)
382{
383 int ret = -EPERM;
384
385 write_lock_irq(&tasklist_lock);
386 /* Are we already being traced? */
387 if (!current->ptrace) {
388 ret = security_ptrace_traceme(current->parent);
389 /*
390 * Check PF_EXITING to ensure ->real_parent has not passed
391 * exit_ptrace(). Otherwise we don't report the error but
392 * pretend ->real_parent untraces us right after return.
393 */
394 if (!ret && !(current->real_parent->flags & PF_EXITING)) {
395 current->ptrace = PT_PTRACED;
396 __ptrace_link(current, current->real_parent);
397 }
398 }
399 write_unlock_irq(&tasklist_lock);
400
401 return ret;
402}
__ptrace_link函数
void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
38{
39 BUG_ON(!list_empty(&child->ptrace_entry));
40 list_add(&child->ptrace_entry, &new_parent->ptraced);
41 child->parent = new_parent;
42}
函数首先判断当前进程有没有正在被traced,如果为否进行一系列安全检查将当前进程设置为PT_PTRACED,并调用__ptrace_link函数将子进程链接到父进程的ptrace链表中。之后再进行系统调用的时候,内核就会判断当前进程是否被设置为traced,如果是内核将向该进程发送SIGTRAP信号。该信号将导致当前进程停止。并将控制权交给父进程。
PTRACE_ATTACH调用ptrace_attach
static int ptrace_attach(struct task_struct *task, long request,
274 unsigned long addr,
275 unsigned long flags)
276{
277 bool seize = (request == PTRACE_SEIZE);
278 int retval;
279
280 retval = -EIO;
281 if (seize) {
282 if (addr != 0)
283 goto out;
284 if (flags & ~(unsigned long)PTRACE_O_MASK)
285 goto out;
286 flags = PT_PTRACED | PT_SEIZED | (flags << PT_OPT_FLAG_SHIFT);
287 } else {
288 flags = PT_PTRACED;
289 }
290
291 audit_ptrace(task);
292
293 retval = -EPERM;
294 if (unlikely(task->flags & PF_KTHREAD))
295 goto out;
296 if (same_thread_group(task, current))
297 goto out;
298
299 /*
300 * Protect exec's credential calculations against our interference;
301 * SUID, SGID and LSM creds get determined differently
302 * under ptrace.
303 */
304 retval = -ERESTARTNOINTR;
305 if (mutex_lock_interruptible(&task->signal->cred_guard_mutex))
306 goto out;
307
308 task_lock(task);
309 retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH);
310 task_unlock(task);
311 if (retval)
312 goto unlock_creds;
313
314 write_lock_irq(&tasklist_lock);
315 retval = -EPERM;
316 if (unlikely(task->exit_state))
317 goto unlock_tasklist;
318 if (task->ptrace)
319 goto unlock_tasklist;
320
321 if (seize)
322 flags |= PT_SEIZED;
323 rcu_read_lock();
324 if (ns_capable(__task_cred(task)->user_ns, CAP_SYS_PTRACE))
325 flags |= PT_PTRACE_CAP;
326 rcu_read_unlock();
327 task->ptrace = flags;
328
329 __ptrace_link(task, current);
330
331 /* SEIZE doesn't trap tracee on attach */
332 if (!seize)
333 send_sig_info(SIGSTOP, SEND_SIG_FORCED, task);
334
335 spin_lock(&task->sighand->siglock);
336
337 /*
338 * If the task is already STOPPED, set JOBCTL_TRAP_STOP and
339 * TRAPPING, and kick it so that it transits to TRACED. TRAPPING
340 * will be cleared if the child completes the transition or any
341 * event which clears the group stop states happens. We'll wait
342 * for the transition to complete before returning from this
343 * function.
344 *
345 * This hides STOPPED -> RUNNING -> TRACED transition from the
346 * attaching thread but a different thread in the same group can
347 * still observe the transient RUNNING state. IOW, if another
348 * thread's WNOHANG wait(2) on the stopped tracee races against
349 * ATTACH, the wait(2) may fail due to the transient RUNNING.
350 *
351 * The following task_is_stopped() test is safe as both transitions
352 * in and out of STOPPED are protected by siglock.
353 */
354 if (task_is_stopped(task) &&
355 task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING))
356 signal_wake_up_state(task, __TASK_STOPPED);
357
358 spin_unlock(&task->sighand->siglock);
359
360 retval = 0;
361 unlock_tasklist:
362 write_unlock_irq(&tasklist_lock);
363 unlock_creds:
364 mutex_unlock(&task->signal->cred_guard_mutex);
365 out:
366 if (!retval) {
367 wait_on_bit(&task->jobctl, JOBCTL_TRAPPING_BIT,
368 TASK_UNINTERRUPTIBLE);
369 proc_ptrace_connector(task, PTRACE_ATTACH);
370 }
371
372 return retval;
373}
整个函数的流程总结如下(分析来自linux3.5.4 ptrace源码分析二 新增部分修改)
1. 判断请求是PTRACE_SEIZE还是PTRACE_ATTACH,如果ptrace请求为PTRACE_SEIZE,则检查其参数是否正确,参数有误则退出
2. 判断task进程是否为kernel thread(PF_KTHREAD),调用same_thread_group(task,current),判断task是否和current进程在同一个线程组,查看current进程是否有权限追踪task进程,不符合要求则退出
3. 设置子进程task->ptrace = PT_TRACED,被跟踪状态,如果当前进程拥有CAP_SYS_PTRACED,设置task->ptrace |= PT_TRACE_CAP
4. 调用__ptrace_link(task, current),将task->ptrace_entry链接到current->ptraced链表中,并设置当前进程为被跟踪进程的新的父进程。
5. 如果是PTRACE_ATTACH请求(PTRACE_SEIZE请求不会停止被追踪进程),则调用send_sig_info(SIGSTOP,SEND_SIG_FORCED, task);发送SIGSTOP信号,中止task运行,设置task->state为TASK_STOPPED
6. 等待task->jobctl的JOBCTL_TRAPPING_BIT位被清零,阻塞时进程状态被设置为TASK_UNINTERRUPTIBLE并引发进程调度
PTRACE_ATTACH处理的方式与PTRACE_TRACEME处理的方式不同,PTRACE_ATTACH会使父进程向子进程发送SIGTRAP信号,如果子进程停止,父进程的wait操作则会被唤醒,从而成功attach。
而PTRACE_TRACEME只是表明该进程(child)想被trace的意愿。如果一个进程调用了PTRACE_TRACEME,那么该进程处理信号的方式将会变得不同。比如:如果一个进程正在运行,此时输入ctrl+c(SIGINT),则该进程直接退出。但是,如果该进程中有ptrace(PTRACE_TRACEME,0,NULL,NULL)。即该进程主动要求被跟踪,那么,当输入CTRL+C时,该进程将会处于stopped的状态。
PTRACE_PEEKDATA/PEEKTEXT
PTRACE_POKEDATA/POKETEXT
int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr,
1076 unsigned long data)
1077 {
1078 unsigned long tmp;
1079 int copied;
1080
1081 copied = access_process_vm(tsk, addr, &tmp, sizeof(tmp), 0);
1082 if (copied != sizeof(tmp))
1083 return -EIO;
1084 return put_user(tmp, (unsigned long __user *)data);
1085 }
7int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr,
1088 unsigned long data)
1089{
1090 int copied;
1091
1092 copied = access_process_vm(tsk, addr, &data, sizeof(data), 1);
1093 return (copied == sizeof(data)) ? 0 : -EIO;
1094}
1095
int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
2058 {
2059 struct mm_struct *mm;
2060
2061 if (addr + len < addr)
2062 return 0;
2063
2064 mm = get_task_mm(tsk);
2065 if (!mm)
2066 return 0;
2067
2068 len = __access_remote_vm(tsk, mm, addr, buf, len, write);
2069
2070 mmput(mm);
2071 return len;
2072 }
static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
2006 unsigned long addr, void *buf, int len, int write)
2007 {
2008 struct vm_area_struct *vma;
2009
2010 down_read(&mm->mmap_sem);
2011
2012 /* the access must start within one of the target process's mappings */
2013 vma = find_vma(mm, addr);
2014 if (vma) {
2015 /* don't overrun this mapping */
2016 if (addr + len >= vma->vm_end)
2017 len = vma->vm_end - addr;
2018
2019 /* only read or write mappings where it is permitted */
2020 if (write && vma->vm_flags & VM_MAYWRITE)
2021 copy_to_user_page(vma, NULL, addr,
2022 (void *) addr, buf, len);
2023 else if (!write && vma->vm_flags & VM_MAYREAD)
2024 copy_from_user_page(vma, NULL, addr,
2025 buf, (void *) addr, len);
2026 else
2027 len = 0;
2028 } else {
2029 len = 0;
2030 }
2031
2032 up_read(&mm->mmap_sem);
2033
2034 return len;
2035 }
2#undef copy_to_user_page
83static inline void copy_to_user_page(struct vm_area_struct *vma,
84 struct page *page,
85 unsigned long vaddr,
86 void *dst, void *src, int len)
87{
88 memcpy(dst, src, len);
89 if (vma->vm_flags & VM_EXEC) {
90 flush_icache_range((unsigned long) dst,
91 (unsigned long) dst + len);
92 }
93}
获取内存的值和写入内存值逻辑上差不多,内核是通过操作区域的属性来判断写入还是读取。最终由memcpy函数实现内存的拷贝。