[原帖地址: http://hi.baidu.com/higkoo/blog/item/36ab6cf3c47eefc60b46e003.html]
今天,我们来了解一下怎样使用ltrace、strace来跟踪Linux进程事件。
首先,瞄一眼ltrace、strace的说明:
ltrace - A library call tracer
strace - trace system calls and signals
下面我用C和C++的一个简单例子来演示一下它们的用法,同时使用C和C++也有点对比的意味。
main.c
#include <stdio.h>
int main(){
printf("Hello World !");
return 0;
}
main.cpp
#include <iostream>
int main(){
std::cout<<"Hello World !"<<std::endl;
return 0;
}
有兴趣的朋友可以先比较一下两者生成的汇编代码,汇编的内容和trance的结果有一定关联。
编译汇编码方式如下(由于百度博客长度限制,不能传附件,就不贴出来了):
gcc -S ./main.c -o main.c.s
g++ -S ./main.cpp -o main.cpp.s
下一步,编译二进制文件:
gcc ./main.c -o ./main_c
g++ ./main.cpp -o ./main_cpp
查看文件main_c main_cpp属性:
ELF 64-bit LSB executable, AMD x86-64, version 1 (SYSV), for GNU/Linux 2.6.9, dynamically linked (uses shared libs), for GNU/Linux 2.6.9, not stripped
两者运行结果相同,都是打印“Hello World !”后退出。
下面,我们来看下trace结果。
ltrace ./main_c
__libc_start_main(0x400498, 1, 0x7fffeb89e8a8, 0x4004d0, 0x4004c0 <unfinished ...>
printf("Hello World !") = 13
Hello World !+++ exited (status 0) +++
ltrace ./main_cpp
__libc_start_main(0x400844, 1, 0x7fff01389398, 0x400880, 0x400870 <unfinished ...>
_ZNSt8ios_base4InitC1Ev(0x600e2c, 65535, 0x7fff013893a8, 3, 0x31e1752350) = 2
__cxa_atexit(0x40082c, 0, 0x400960, 3, 0x31e1752350) = 0
_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc(0x600d10, 0x400968, 0x7fff013893a8, 4, 0x31e1752370) = 0x600d10
_ZNSolsEPFRSoS_E(0x600d10, 0x4006e0, 0, 0xfbad2a84, 0xffffffff <unfinished ...>
_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_(0x600d10, 0x4006e0, 0, 0xfbad2a84, 0xffffffffHello World !
) = 0x600d10
_ZNSt8ios_base4InitD1Ev(0x600e2c, 0, 0x31e1752370, -1, 0x2b2e79280530) = 3
+++ exited (status 0) +++
相比而已,C语言真简洁!
下面再看一下strace的结果(由于篇幅限制,省略了部分数据):
strace ./main_c
execve("./main_c", ["./main_c"], [/* 30 vars */]) = 0
brk(0) = 0x1a9d4000
uname({sys="Linux", node="performance106", ...}) = 0
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=68624, ...}) = 0
mmap(NULL, 68624, PROT_READ, MAP_PRIVATE, 3, 0) = 0x2afefc5f6000
close(3) = 0
open("/lib64/libc.so.6", O_RDONLY) = 3
read(3, "/177ELF/2/1/1/0/0/0/0/0/0/0/0/0/3/0>/0/1/0/0/0/220/332A/3411/0/0/0"..., 832) = 832
mprotect(0x31e154d000, 2097152, PROT_NONE) = 0
close(3) = 0
arch_prctl(ARCH_SET_FS, 0x2afefc608210) = 0
write(1, "Hello World !", 13Hello World !) = 13
exit_group(0)
strace ./main_cpp
execve("./main_cpp", ["./main_cpp"], [/* 30 vars */]) = 0
brk(0) = 0x1d954000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b4f71efd000
uname({sys="Linux", node="performance106", ...}) = 0
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=68624, ...}) = 0
close(3) = 0
open("/usr/lib64/libstdc++.so.6", O_RDONLY) = 3
arch_prctl(ARCH_SET_FS, 0x2b4f71f11530) = 0
mprotect(0x31e121b000, 4096, PROT_READ) = 0
munmap(0x2b4f71efe000, 68624) = 0
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 1), ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b4f71efe000
write(1, "Hello World !/n", 14Hello World !
) = 14
exit_group(0) = ?
从系统调用过程来看,最终方式是相同的,但C++过程繁琐一些。
上述示例是捕捉可执行程序的执行过程,同理也可以捕捉服务程序内部正在执行的事件。
譬如Nginx、Apache、Python等服务当前做了哪些事情,当遇到异常时非常有参考阶段。
由于Java使用了自己的虚拟机,必须使用Java对应的监控工具才行(如Jprofile、Jconsole、JvisualVM等)。用ltrace进行跟踪,会收到“Cannot attach to pid 18663: Operation not permitted”。
分析过程一般还需要用到 truss、gdb、pstack、pfiles、top、lsof、pmap、netstat、ps 等指令共同分析。