问题场景:
测试人员报告,业务 modb 作为 RabbitMQ 的消费者,消费消息的速度非常慢,慢到大约每秒 2 条左右,从而导致 RabbitMQ 的队列中积压了 4000+ 条消息。
排查过程:
先通过 top 命令定位一下
[root@upucore_1 ~]# top -Hp 8177
top - 09:50:33 up 34 min, 8 users, load average: 4.76, 4.32, 3.65
Tasks: 2 total, 0 running, 2 sleeping, 0 stopped, 0 zombie
Cpu0 : 11.8%us, 11.8%sy, 0.0%ni, 76.5%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st
Cpu1 : 11.1%us, 11.1%sy, 0.0%ni, 77.8%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st
Cpu2 : 5.9%us, 11.8%sy, 0.0%ni, 82.4%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st
Cpu3 : 23.5%us, 29.4%sy, 0.0%ni, 47.1%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st
Cpu4 : 5.6%us, 5.6%sy, 0.0%ni, 88.9%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st
Cpu5 : 11.1%us, 16.7%sy, 0.0%ni, 72.2%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st
Cpu6 : 5.3%us, 5.3%sy, 0.0%ni, 84.2%id, 0.0%wa, 0.0%hi, 5.3%si, 0.0%st
Cpu7 : 16.7%us, 33.3%sy, 0.0%ni, 50.0%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st
Cpu8 : 5.6%us, 0.0%sy, 0.0%ni, 94.4%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st
Cpu9 : 5.6%us, 94.4%sy, 0.0%ni, 0.0%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st
Cpu10 : 10.5%us, 31.6%sy, 0.0%ni, 57.9%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st
Cpu11 : 12.5%us, 25.0%sy, 0.0%ni, 62.5%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st
Cpu12 : 10.5%us, 15.8%sy, 0.0%ni, 73.7%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st
Cpu13 : 0.0%us, 70.6%sy, 0.0%ni, 29.4%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st
Cpu14 : 0.0%us,100.0%sy, 0.0%ni, 0.0%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st
Cpu15 : 5.6%us, 0.0%sy, 0.0%ni, 94.4%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st
Mem: 32823540k total, 9229512k used, 23594028k free, 214588k buffers
Swap: 0k total, 0k used, 0k free, 809616k cached
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
8177 root 20 0 110m 3268 1544 S 0.0 0.0 0:00.66 modb
13784 root 20 0 110m 3268 1544 S 0.0 0.0 0:00.00 modb
看起来 CPU 和内存使用没有什么异常。通过 pstack 进行观察
[root@upucore_1 ~]# pstack 8177
Thread 2 (Thread 0x7f26f132e700 (LWP 13784)):
#0 0x00000032698e8ef3 in epoll_wait () from /lib64/libc.so.6
#1 0x00007f26f1f62a96 in epoll_dispatch () from /opt/mcu/modbcore/libevent_core.so
#2 0x00007f26f1f529f6 in event_base_loop () from /opt/mcu/modbcore/libevent_core.so
#3 0x0000000000408fcc in sql_event_thread_loop(event_thread_t*) ()
#4 0x0000003269c079d1 in start_thread () from /lib64/libpthread.so.0
#5 0x00000032698e88fd in clone () from /lib64/libc.so.6
Thread 1 (Thread 0x7f26f1330720 (LWP 8177)):
#0 0x00000032698db53d in write () from /lib64/libc.so.6
#1 0x0000003269871ad3 in _IO_new_file_write () from /lib64/libc.so.6
#2 0x000000326987199a in _IO_new_file_xsputn () from /lib64/libc.so.6
#3 0x0000003269849681 in buffered_vfprintf () from /lib64/libc.so.6
#4 0x000000326984421e in vfprintf () from /lib64/libc.so.6
#5 0x000000326984f0e8 in fprintf () from /lib64/libc.so.6
#6 0x00007f26f1d37f26 in fsm(amqp_connection_state_t_*) () from /opt/mcu/modbcore/librabbitmq_r.so
#7 0x00007f26f1f52dbc in event_base_loop () from /opt/mcu/modbcore/libevent_core.so
#8 0x0000000000406835 in main ()
发现 pid 为 8177 的 modb 调用栈确实会卡在 write() 系统调用上。对应到业务代码,则是 modb(内部链接的共享库 librabbitmq.so)中通过 fprintf 输出日志到 stderr 上的情况。
继续通过 strace 确认一下
[root@upucore_1 ~]# strace -t -p 8177
Process 8177 attached - interrupt to quit
09:53:30 write(2, "110000199\",null,null,null,null,n"..., 1667) = 1667
09:53:30 open("/var/log/modbcore.log", O_WRONLY|O_CREAT|O_APPEND, 0666) = 18
09:53:30 fstat(18, {st_mode=S_IFREG|0644, st_size=7607275, ...}) = 0
09:53:30 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f26f1553000
09:53:30 fstat(18, {st_mode=S_IFREG|0644, st_size=7607275, ...}) = 0
09:53:30 lseek(18, 7607275, SEEK_SET) = 7607275
09:53:30 stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=405, ...}) = 0
09:53:30 write(18, "[8177] 17 Sep 09:53:30 \n[Modb] l"..., 4096) = 4096
09:53:30 write(18, "values(?,?)\"],\"src...\n", 22) = 22
09:53:30 close(18) = 0
09:53:30 munmap(0x7f26f1553000, 4096) = 0
09:53:30 write(2, "[Modb] ####### curBMC(1755) > l"..., 54) = 54
09:53:30 open("/var/log/modbcore.log", O_WRONLY|O_CREAT|O_APPEND, 0666) = 18
09:53:30 fstat(18, {st_mode=S_IFREG|0644, st_size=7611393, ...}) = 0
09:53:30 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f26f1553000
09:53:30 fstat(18, {st_mode=S_IFREG|0644, st_size=7611393, ...}) = 0
09:53:30 lseek(18, 7611393, SEEK_SET) = 7611393
09:53:30 stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=405, ...}) = 0
09:53:30 write(18, "[8177] 17 Sep 09:53:30 [Modb] ##"..., 77) = 77
09:53:30 close(18) = 0
09:53:30 munmap(0x7f26f1553000, 4096) = 0
09:53:30 write(2, "[Modb] ####### modbKey = 26277d"..., 90) = 90
09:53:30 open("/var/log/modbcore.log", O_WRONLY|O_CREAT|O_APPEND, 0666) = 18
09:53:30 fstat(18, {st_mode=S_IFREG|0644, st_size=7611470, ...}) = 0
09:53:30 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f26f1553000
09:53:30 fstat(18, {st_mode=S_IFREG|0644, st_size=7611470, ...}) = 0
09:53:30 lseek(18, 7611470, SEEK_SET) = 7611470
09:53:30 stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=405, ...}) = 0
09:53:30 write(18, "[8177] 17 Sep 09:53:30 [Modb] ##"..., 113) = 113
09:53:30 close(18) = 0
09:53:30 munmap(0x7f26f1553000, 4096) = 0
09:53:30 sendto(8, "\f\0\0\0\3use modb_db", 16, 0, NULL, 0) = 16
09:53:30 recvfrom(8, "\7\0\0\1\0\0\0\2\0\0\0", 16384, 0, NULL, NULL) = 11
09:53:30 write(2, "[Modb] 'use modb_db' Success!\n", 30) = 30
09:53:30 open("/var/log/modbcore.log", O_WRONLY|O_CREAT|O_APPEND, 0666) = 18
09:53:30 fstat(18, {st_mode=S_IFREG|0644, st_size=7611583, ...}) = 0
09:53:30 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f26f1553000
09:53:30 fstat(18, {st_mode=S_IFREG|0644, st_size=7611583, ...}) = 0
09:53:30 lseek(18, 7611583, SEEK_SET) = 7611583
09:53:30 stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=405, ...}) = 0
09:53:30 write(18, "[8177] 17 Sep 09:53:30 [Modb] 'u"..., 53) = 53
09:53:30 close(18) = 0
09:53:30 munmap(0x7f26f1553000, 4096) = 0
09:53:30 sendto(8, "\305\0\0\0\3insert into modbVer(bmc_ver"..., 201, 0, NULL, 0) = 201
09:53:30 recvfrom(8, "\7\0\0\1\0\1\0\2\0\0\0", 16384, 0, NULL, NULL) = 11
09:53:30 write(2, "[Modb] 'insert into modbVer(bmc_"..., 215
(此处会卡住,人为回车进行分隔)
) = 215
09:53:34 open("/var/log/modbcore.log", O_WRONLY|O_CREAT|O_APPEND, 0666) = 18
09:53:34 fstat(18, {st_mode=S_IFREG|0644, st_size=7611636, ...}) = 0
09:53:34 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f26f1553000
09:53:34 fstat(18, {st_mode=S_IFREG|0644, st_size=7611636, ...}) = 0
09:53:34 lseek(18, 7611636, SEEK_SET) = 7611636
09:53:34 stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=405, ...}) = 0
09:53:34 write(18, "[8177] 17 Sep 09:53:34 [Modb] 'i"..., 238) = 238
09:53:34 close(18) = 0
09:53:34 munmap(0x7f26f1553000, 4096) = 0
09:53:34 write(2, "\n[Modb] local_comsume_cb: not DE"..., 83) = 83
09:53:34 open("/var/log/modbcore.log", O_WRONLY|O_CREAT|O_APPEND, 0666) = 18
09:53:34 fstat(18, {st_mode=S_IFREG|0644, st_size=7611874, ...}) = 0
09:53:34 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f26f1553000
09:53:34 fstat(18, {st_mode=S_IFREG|0644, st_size=7611874, ...}) = 0
09:53:34 lseek(18, 7611874, SEEK_SET) = 7611874
09:53:34 stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=405, ...}) = 0
09:53:34 write(18, "[8177] 17 Sep 09:53:34 \n[Modb] l"..., 106) = 106
09:53:34 close(18) = 0
...
09:53:34 munmap(0x7f26f1553000, 4096) = 0
09:53:34 sendto(16, "\1\0\1\0\0\0\r\0<\0P\0\0\0\0\0\0\1\251\0\316", 21, MSG_NOSIGNAL, NULL, 0) = 21
09:53:34 write(2, "[MQ][CON_f_LOCAL s:16] --> send"..., 43) = 43
09:53:34 write(2, "[MQ][CON_f_LOCAL s:16][fsm:conn_"..., 56) = 56
09:53:34 epoll_ctl(3, EPOLL_CTL_ADD, 16, {EPOLLIN, {u32=16, u64=16}}) = 0
09:53:34 epoll_wait(3, {}, 32, 0) = 0
09:53:34 epoll_ctl(3, EPOLL_CTL_DEL, 16, {EPOLLIN, {u32=16, u64=16}}) = 0
09:53:34 stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=405, ...}) = 0
09:53:34 write(2, "[MQ][s:17] mq_pop => timestamp ="..., 53) = 53
09:53:34 write(2, "[MQ][PRO_t_TRANS s:17][fsm:conn_"..., 79) = 79
09:53:34 write(2, "[MQ][PRO_t_TRANS s:17][fsm:conn_"..., 96) = 96
09:53:34 write(2, "[MQ][PRO_t_TRANS s:17][fsm:conn_"..., 60) = 60
09:53:34 epoll_ctl(3, EPOLL_CTL_ADD, 17, {EPOLLOUT, {u32=17, u64=17}}) = 0
09:53:34 recvfrom(16, "\1\0\1\0\0\0.\0<\0<\vCON_f_LOCAL\0\0\0\0\0\0\1\252\1"..., 131072, 0, NULL, NULL) = 4168
09:53:34 write(2, "[MQ][CON_f_LOCAL s:16] <-- recv"..., 47) = 47
09:53:34 epoll_ctl(3, EPOLL_CTL_ADD, 16, {EPOLLIN, {u32=16, u64=16}}) = 0
09:53:34 epoll_wait(3, {{EPOLLOUT, {u32=17, u64=17}}}, 32, 0) = 1
09:53:34 epoll_ctl(3, EPOLL_CTL_DEL, 16, {EPOLLIN, {u32=16, u64=16}}) = 0
09:53:34 epoll_ctl(3, EPOLL_CTL_DEL, 17, {EPOLLOUT, {u32=17, u64=17}}) = 0
09:53:34 sendto(17, "\1\0\1\0\0\0B\0<\0(\0\0\25modb.transfer.ex:c"..., 74, MSG_NOSIGNAL, NULL, 0) = 74
09:53:34 stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=405, ...}) = 0
09:53:34 write(2, "[MQ][s:17] send Basic.Publish =>"..., 65) = 65
09:53:34 write(2, "[MQ][PRO_t_TRANS s:17] --> send"..., 47) = 47
09:53:34 epoll_ctl(3, EPOLL_CTL_ADD, 17, {EPOLLOUT, {u32=17, u64=17}}) = 0
09:53:34 sendto(17, "\2\0\1\0\0\0 \0<\0\0\0\0\0\0\0\0\2\371\220\0\20applicatio"..., 40, MSG_NOSIGNAL, NULL, 0) = 40
09:53:34 write(2, "[MQ][PRO_t_TRANS s:17] --> send"..., 55) = 55
09:53:34 epoll_ctl(3, EPOLL_CTL_DEL, 17, {EPOLLOUT, {u32=17, u64=17}}) = 0
09:53:34 epoll_ctl(3, EPOLL_CTL_ADD, 17, {EPOLLOUT, {u32=17, u64=17}}) = 0
09:53:34 sendto(17, "\3\0\1\0\0\2\371{\"keys\": [], \"app\": \"movi"..., 769, MSG_NOSIGNAL, NULL, 0) = 769
09:53:34 stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=405, ...}) = 0
09:53:34 write(2, "[MQ][s:17] send Content-Body => "..., 64) = 64
09:53:34 write(2, "[MQ][PRO_t_TRANS s:17] --> send"..., 53) = 53
09:53:34 epoll_ctl(3, EPOLL_CTL_DEL, 17, {EPOLLOUT, {u32=17, u64=17}}) = 0
09:53:34 epoll_ctl(3, EPOLL_CTL_ADD, 17, {EPOLLIN, {u32=17, u64=17}}) = 0
09:53:34 write(2, "[MQ][CON_f_LOCAL s:16] <-- recv"..., 55) = 55
09:53:34 epoll_ctl(3, EPOLL_CTL_ADD, 16, {EPOLLIN, {u32=16, u64=16}}) = 0
09:53:34 epoll_wait(3, {}, 32, 0) = 0
09:53:34 epoll_ctl(3, EPOLL_CTL_DEL, 16, {EPOLLIN, {u32=16, u64=16}}) = 0
09:53:34 write(2, "[MQ][CON_f_LOCAL s:16] <-- recv"..., 53) = 53
09:53:34 write(2, "\n[Modb] local_comsume_cb: Recv C"..., 4115
(此处会卡住,人为回车进行分隔)
) = 4115
09:53:42 open("/var/log/modbcore.log", O_WRONLY|O_CREAT|O_APPEND, 0666) = 18
09:53:42 fstat(18, {st_mode=S_IFREG|0644, st_size=7613621, ...}) = 0
09:53:42 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f26f1553000
09:53:42 fstat(18, {st_mode=S_IFREG|0644, st_size=7613621, ...}) = 0
09:53:42 lseek(18, 7613621, SEEK_SET) = 7613621
09:53:42 stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=405, ...}) = 0
09:53:42 write(18, "[8177] 17 Sep 09:53:42 \n[Modb] l"..., 4096) = 4096
09:53:42 write(18, "values(?,?)\"],\"src...\n", 22) = 22
09:53:42 close(18) = 0
...
09:53:51 write(2, "[MQ][s:17] send Content-Body => "..., 64
^C <unfinished ...>
Process 8177 detached
[root@upucore_1 ~]#
通过 strace 发现,每次卡住的情况都发生在进行 write 系统调用时,且为向 fd 2 上输出日志信息。可以看到,输出的日志内容包括 modb 自身的日志和其所链接的 librabbitmq.so 中的日志。
怀疑是否因为硬盘被写满导致
,查看系统硬盘和内存情况,发现均无异常。
[root@upucore_1 ~]# df -h
Filesystem Size Used Avail Use% Mounted on
/dev/sda2 39G 9.8G 27G 27% /
tmpfs 16G 0 16G 0% /dev/shm
/dev/sda1 58G 7.9G 47G 15% /opt/mcu
/dev/sda5 4.9G 138M 4.5G 3% /opt/sync
/dev/sda3 9.7G 150M 9.0G 2% /usr/mcu/log
gfsd1:/web_app 39G 9.8G 27G 27% /mnt/files
[root@upucore_1 ~]#
[root@upucore_1 ~]#
[root@upucore_1 ~]# free -m
total used free shared buffers cached
Mem: 32054 9081 22972 0 221 818
-/+ buffers/cache: 8042 24012
Swap: 0 0 0
[root@upucore_1 ~]#
怀疑是否由于有其他业务程序写磁盘,导致 I/O 能力不够。查看发现只有 mysql 线程的磁盘写比较高,但也在合理范围内。
[root@upucore_1 data]# iotop -o
Total DISK READ: 3.06 K/s | Total DISK WRITE: 1344.33 K/s
TID PRIO USER DISK READ DISK WRITE SWAPIN IO> COMMAND
540 be/3 root 0.00 B/s 0.00 B/s 0.00 % 25.83 % [jbd2/sda2-8]
7917 be/4 root 0.00 B/s 45.93 K/s 0.00 % 6.60 % beam.smp -W w -K true -A30 -P 1048576 -- -root /usr/local/~l/rabbitmq/sbin/../var/lib/rabbitmq/mnesia/rabbitmq@rmq_1"
4806 be/4 mysql 0.00 B/s 58.18 K/s 0.00 % 4.74 % mysqld --basedir=/usr/local/mysql --datadir=/usr/local/mys~ql/data/upucore_1.pid --socket=/tmp/mysql.sock --port=3306
7934 be/4 root 0.00 B/s 30.62 K/s 0.00 % 4.69 % beam.smp -W w -K true -A30 -P 1048576 -- -root /usr/local/~l/rabbitmq/sbin/../var/lib/rabbitmq/mnesia/rabbitmq@rmq_1"
7916 be/4 root 0.00 B/s 27.56 K/s 0.00 % 2.47 % beam.smp -W w -K true -A30 -P 1048576 -- -root /usr/local/~l/rabbitmq/sbin/../var/lib/rabbitmq/mnesia/rabbitmq@rmq_1"
1229 be/3 root 0.00 B/s 79.62 K/s 0.00 % 1.08 % [jbd2/sda1-8]
6923 be/4 root 0.00 B/s 3.06 K/s 0.00 % 0.80 % run_erl -daemon /tmp//opt/mcu/upucore/ /opt/mcu/upucore/log exec /opt/mcu/upucore/bin/upu console ''
9449 be/4 root 0.00 B/s 3.06 K/s 0.00 % 0.61 % run_erl -daemon /tmp//opt/mcu/upu/ /opt/mcu/upu/log exec /opt/mcu/upu/bin/upu console ''
4933 be/4 mysql 0.00 B/s 857.43 K/s 0.00 % 0.48 % mysqld --basedir=/usr/local/mysql --datadir=/usr/local/mys~ql/data/upucore_1.pid --socket=/tmp/mysql.sock --port=3306
4935 be/4 mysql 0.00 B/s 6.12 K/s 0.00 % 0.31 % mysqld --basedir=/usr/local/mysql --datadir=/usr/local/mys~ql/data/upucore_1.pid --socket=/tmp/mysql.sock --port=3306
21292 be/4 mysql 3.06 K/s 9.19 K/s 0.00 % 0.27 % mysqld --basedir=/usr/local/mysql --datadir=/usr/local/mys~ql/data/upucore_1.pid --socket=/tmp/mysql.sock --port=3306
4921 be/4 mysql 0.00 B/s 3.06 K/s 0.00 % 0.22 % mysqld --basedir=/usr/local/mysql --datadir=/usr/local/mys~ql/data/upucore_1.pid --socket=/tmp/mysql.sock --port=3306
4803 be/4 mysql 0.00 B/s 0.00 B/s 0.00 % 0.02 % mysqld --basedir=/usr/local/mysql --datadir=/usr/local/mys~ql/data/upucore_1.pid --socket=/tmp/mysql.sock --port=3306
4804 be/4 mysql 0.00 B/s 0.00 B/s 0.00 % 0.00 % mysqld --basedir=/usr/local/mysql --datadir=/usr/local/mys~ql/data/upucore_1.pid --socket=/tmp/mysql.sock --port=3306
1602 be/4 root 0.00 B/s 21.44 K/s 0.00 % 0.00 % ./sa
1612 be/4 root 0.00 B/s 3.06 K/s 0.00 % 0.00 % ./sa
1638 be/4 root 0.00 B/s 9.19 K/s 0.00 % 0.00 % ./sa
13119 be/4 root 0.00 B/s 3.06 K/s 0.00 % 0.00 % nms_adapter
1 be/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % init
2 be/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [kthreadd]
3 rt/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [migration/0]
4 be/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [ksoftirqd/0]
5 rt/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [migration/0]
6 rt/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [watchdog/0]
7 rt/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [migration/1]
8 rt/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [migration/1]
9 be/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [ksoftirqd/1]
10 rt/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [watchdog/1]
11 rt/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [migration/2]
12 rt/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [migration/2]
13 be/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [ksoftirqd/2]
14 rt/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [watchdog/2]
15 rt/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [migration/3]
16 rt/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [migration/3]
17 be/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [ksoftirqd/3]
18 rt/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [watchdog/3]
19 rt/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [migration/4]
20 rt/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [migration/4]
21 be/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [ksoftirqd/4]
[root@upucore_1 data]#
经过一番分析,发现之前的排查方向存在问题。日志输出到 stderr 上,但 stderr 却没有重定向到文件上。所以应该和磁盘 I/O 没直接关系。
查看下 stderr 的指向
[root@upucore_1 ~]# ps aux|grep modb
root 4179 0.0 0.0 111744 2080 ? Sl 16:57 0:00 /opt/mcu/modb/modb /etc/modb.conf
root 4189 0.0 0.0 110952 2040 ? Sl 16:57 0:00 /opt/mcu/modbcore/modb /etc/modbcore.conf
root 4413 0.0 0.0 108200 1300 ? S 16:57 0:00 /bin/sh /usr/local/keepalived/etc/keepalived/script_master.sh mysql nds modbcore zookeeper
root 4750 0.0 0.0 103256 872 pts/0 S+ 16:57 0:00 grep modb
[root@upucore_1 ~]#
[root@upucore_1 ~]# lsof -p 4179 -a -d 0-2
COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME
modb 4179 root 0u CHR 5,1 0t0 5559 /dev/console
modb 4179 root 1u CHR 5,1 0t0 5559 /dev/console
modb 4179 root 2u CHR 5,1 0t0 5559 /dev/console
[root@upucore_1 ~]#
[root@upucore_1 ~]# lsof -p 4189 -a -d 0-2
COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME
modb 4189 root 0u CHR 5,1 0t0 5559 /dev/console
modb 4189 root 1u CHR 5,1 0t0 5559 /dev/console
modb 4189 root 2u CHR 5,1 0t0 5559 /dev/console
[root@upucore_1 ~]#
[root@upucore_1 ~]# lsof /dev/console
COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME
modb 4179 root 0u CHR 5,1 0t0 5559 /dev/console
modb 4179 root 1u CHR 5,1 0t0 5559 /dev/console
modb 4179 root 2u CHR 5,1 0t0 5559 /dev/console
modb 4189 root 0u CHR 5,1 0t0 5559 /dev/console
modb 4189 root 1u CHR 5,1 0t0 5559 /dev/console
modb 4189 root 2u CHR 5,1 0t0 5559 /dev/console
[root@upucore_1 ~]#
结果发现,确实两个 modb 业务的 fd 0,1,2 均指向了 /dev/console 。并且系统中只有 modb 业务的 fd 0,1,2 是指向 /dev/console 的。
那么 /dev/console 是什么呢?
=== 我是伪装者真好看的分隔线 ===
- tty 是一类 char 设备的通称;
- /dev/console 是一个虚拟的 tty ,它将映射到真正的 tty 上;
- console 有多种含义,简单理解,可以认为特指 printk 输出的设备;
- console 和 tty 有很大区别:console 是个只输出的设备,功能很简单,只能在内核中访问;tty 是 char 设备,可以被用户程序访问。
- 实际的驱动,比如串口驱动会对物理设备(串口)注册两次,一个是 tty ,一个是 console ,并通过在 console 的结构中记录 tty 的主次设备号建立了联系。
- 在内核中,tty 和 console 都可以注册多个。
- 当内核命令行上指定 console=ttyS0 之类的参数时,首先确定了 printk 实际使用哪个 console 作为输出,其次由于 console 和 tty 之间的对应关系,打开 /dev/console 时,就会映射到相应的 tty 上。用一句话说:/dev/console 将映射到默认 console 对应的 tty 上。
=== 我是伪装者真好看的分隔线 ===
The /dev/console device is not necessarily available to you. The reason for that is because some system messages are written to the system console. For Linux that device would only show any result if you happen to look at the current virtual terminal.
From the documentation:
- /dev/tty Current TTY device
- /dev/console System console
- /dev/tty0 Current virtual console
In the good old days /dev/console was System Administrator console.
And TTYs were users' serial devices attached to a server.
Now /dev/console and /dev/tty0 represent current display and usually are the same.
You can override it for example by adding console=ttyS0 to grub.conf. After that your /dev/tty0 is a monitor and /dev/console is /dev/ttyS0.
/dev/console is a virtual set of devices which can be set as a parameter at boot time.
It might be redirected to a serial device or a virtual console and by default points to /dev/tty0.
/dev/tty0 is the current virtual console
/dev/tty[1-x] is one of the virtual consoles you switch to with control-alt-F1 and so on.
/dev/tty is the console used by the process querying it. Unlike the other devices, you do not need root privileges to write to it. Note also that processes like the ones launched by cron and similar batch processes have no usable /dev/tty, as they aren't associated with any. These processes have a ? in the TTY column of ps -ef output.When multiple console= options are passed to the kernel, the console output will go to more than one device.
=== 我是伪装者真好看的分隔线 ===
tty devices are among the most numerous and the most confusing of all the files in /dev. Every time you log in or launch a new xterm, a tty device is assigned to the shell. This device is responsible for displaying the shell's output and providing its input. It is known as the controlling tty for that shell and its child processes. The device /dev/tty is a magic device that refers to whichever tty device is the controlling tty for that process.
Besides /dev/tty, there are three types of tty devices: consoles, serial ports and pseudo devices.
Console ttys are used when the keyboard and monitor are directly connected to the system without running the X Window System. Since you can have several virtual consoles, the devices are tty0 through tty63. In theory you can have 64 virtual consoles, but most people use only a few. The device /dev/console is identical to tty0 and is needed for historical reasons. If your system lets you log in on consoles 1 through 6, then when you run X Windows System, X uses console 7, so you'll need /dev/tty1 through /dev/tty7 on your system. I recommend having files up through /dev/tty12. For more information on using virtual consoles, see the article Keyboards, Consoles and VT Cruising by John Fisk in the November 1996 issue of Linux Journal.
=== 我是伪装者真好看的分隔线 ===
从上面的信息可以知道,/dev/console 的值和内核命令行参数设置有关。查看当前内核命令行设置,如下
[root@upucore_1 ~]# cat /proc/cmdline
ro root=UUID=ef97984a-07ef-4cdb-9825-cb845c23e7ec rd_NO_LUKS LANG=en_US.UTF-8 KEYTABLE=us rd_NO_MD SYSFONT=latarcyrheb-sun16 crashkernel=131M@0M console=ttyS0,9600n8 rd_NO_LVM rd_NO_DM
[root@upucore_1 ~]#
可以看到 console=ttyS0,9600n8 这个设置,意思是输出到 /dev/console 的内容,实际是发送给了串口,而串口波特率默认值为 9600 b/s 。虽然后续会调整到 115200 ,那也不过才 115 kB/s 的输出速度。
所以,在日志输出频繁的情况下,将导致串口输出速度达到饱和,进而卡住业务进程中的 write 系统调用,最终导致业务行为上“慢半拍”。
虽然已经知道问题的原因了,但为什么 modb 这个业务的 fd 0,1,2 会指向 /dev/console 呢?
这和两方面有关:
- 业务 modb 已经被添加为系统服务,所以在系统启动时会被 init 进程所启动;
- 为了排查问题的方便,在 /proc/cmdline 中配置了 console=ttyS0,9600n8
那么是否其他业务作为系统服务被 init 进程启动时也有这个问题呢?查看信息如下
[root@upucore_1 ~]# chkconfig --list
...
haproxy 0:off 1:off 2:on 3:on 4:on 5:on 6:off
...
keepalived 0:off 1:off 2:off 3:off 4:off 5:off 6:off
...
memcached 0:off 1:off 2:on 3:on 4:on 5:on 6:off
...
modb 0:off 1:off 2:on 3:on 4:on 5:on 6:off
modbcore 0:off 1:off 2:on 3:on 4:on 5:on 6:off
...
nginx 0:off 1:off 2:on 3:on 4:on 5:on 6:off
...
rabbitmq 0:off 1:off 2:off 3:off 4:off 5:off 6:off
...
redis 0:off 1:off 2:on 3:on 4:on 5:on 6:off
...
zookeeper 0:off 1:off 2:on 3:on 4:on 5:on 6:off
...
[root@upucore_1 ~]#
选择其中几个进行查看
[root@upucore_1 ~]# lsof -c redis -a -d 0-2
COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME
redis-ser 4159 root 0u CHR 1,3 0t0 3968 /dev/null
redis-ser 4159 root 1u CHR 1,3 0t0 3968 /dev/null
redis-ser 4159 root 2u CHR 1,3 0t0 3968 /dev/null
[root@upucore_1 ~]#
[root@upucore_1 ~]# lsof -c memcached -a -d 0-2
COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME
memcached 1976 root 0u CHR 1,3 0t0 3968 /dev/null
memcached 1976 root 1u CHR 1,3 0t0 3968 /dev/null
memcached 1976 root 2u CHR 1,3 0t0 3968 /dev/null
[root@upucore_1 ~]#
[root@upucore_1 ~]# ps ajxf|grep nginx
1 1994 1994 1994 ? -1 Ss 0 0:00 nginx: master process /usr/local/nginx/sbin/nginx -c /usr/local/nginx/conf/nginx.conf
1994 1995 1994 1994 ? -1 S 0 0:03 \_ nginx: worker process
1994 1996 1994 1994 ? -1 S 0 0:03 \_ nginx: worker process
1994 1997 1994 1994 ? -1 S 0 0:03 \_ nginx: worker process
1994 1998 1994 1994 ? -1 S 0 0:03 \_ nginx: worker process
1994 2000 1994 1994 ? -1 S 0 0:04 \_ nginx: worker process
1994 2001 1994 1994 ? -1 S 0 0:03 \_ nginx: worker process
1994 2002 1994 1994 ? -1 S 0 0:03 \_ nginx: worker process
1994 2003 1994 1994 ? -1 S 0 0:03 \_ nginx: worker process
1994 2004 1994 1994 ? -1 S 0 0:04 \_ nginx: worker process
1994 2005 1994 1994 ? -1 S 0 0:03 \_ nginx: worker process
1994 2007 1994 1994 ? -1 S 0 0:03 \_ nginx: worker process
1994 2008 1994 1994 ? -1 S 0 0:04 \_ nginx: worker process
1994 2009 1994 1994 ? -1 S 0 0:03 \_ nginx: worker process
1994 2010 1994 1994 ? -1 S 0 0:04 \_ nginx: worker process
1994 2011 1994 1994 ? -1 S 0 0:04 \_ nginx: worker process
1994 2012 1994 1994 ? -1 S 0 0:03 \_ nginx: worker process
1994 2013 1994 1994 ? -1 S 0 0:02 \_ nginx: cache manager process
8264 17252 17251 8264 pts/0 17251 S+ 0 0:00 | \_ grep nginx
[root@upucore_1 ~]#
[root@upucore_1 ~]# lsof -p 1994 -a -d 0-2
COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME
nginx 1994 root 0u CHR 1,3 0t0 3968 /dev/null
nginx 1994 root 1u CHR 1,3 0t0 3968 /dev/null
nginx 1994 root 2w REG 8,2 9623092 1051452 /usr/local/nginx/logs/error.log
[root@upucore_1 ~]#
[root@upucore_1 ~]#
可以看到,fd 0,1,2 要么被重定向到 /dev/null ,要么被重定向到文件中了。
好吧,这说明要想作为系统服务运行的话,还是应该“洁身自好”才行~~
解决办法:
不再将业务 modb 添加为系统服务,只通过自定义脚本控制其启动和停止。并且在启动脚本中做重定向。
因为自定义脚本对应的进程为 -bash 的子进程,而业务程序以常规命令行形式启动,所以 fd 0,1,2 的重定向取决于和当前业务相关的进程树中是否有重定向动作。而通常的实现方式为,在启动业务程序的时候直接通过 >/xxx/xxx.log 2>&1 的方式进行处理。
最后确定一下,输出的日志数据是否过大,以及是否存在由于代码编写错误导致的无用输出。结果发现,消息内容是合法的,消息体确实比较大。
[root@upucore_1 modbcore]# strace -s 4096 -p 8177
Process 8177 attached - interrupt to quit
...
open("/var/log/modbcore.log", O_WRONLY|O_CREAT|O_APPEND, 0666) = 18
fstat(18, {st_mode=S_IFREG|0644, st_size=19540496, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f26f1553000
fstat(18, {st_mode=S_IFREG|0644, st_size=19540496, ...}) = 0
lseek(18, 19540496, SEEK_SET) = 19540496
stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=405, ...}) = 0
write(18, "[8177] 17 Sep 15:06:45 \n[Modb] local_comsume_cb: Recv Content-Body ==>\n {\"app\":\"movision\",\"data\":{\"account\":\"0512110000195\",\"accountType\":1,\"binded\":false,\"brief\":\"\",\"cmsApproval\":false,\"createdAt\":null,\"dateOfBirth\":null,\"dcsAdmin\":false,\"defaultServiceDomainAdmin\":false,\"defaultUserDomainAdmin\":false,\"deviceGuid\":\"10252200000000000000512110000195\",\"deviceType\":\"522\",\"e164\":\"0512110000195\",\"email\":\"\",\"enable\":true,\"enableBMC\":false,\"enableCall\":false,\"enableDCS\":true,\"enableHD\":true,\"enableHDMeeting\":true,\"enableIncoming\":false,\"enableLive\":false,\"enableMeeting\":true,\"enableMeetingSMS\":false,\"enableNM\":false,\"enableOut\":false,\"enablePlay\":false,\"enableRoam\":false,\"enableSatellite\":false,\"enableSatelliteP2P\":false,\"enableUMC\":false,\"enableVRS\":true,\"enableVenueMonitor\":false,\"enableVideo\":false,\"enableWeibo\":true,\"extNum\":\"\",\"fax\":\"\",\"fullPy\":\"\",\"id\":643,\"ipRegion\":\"\",\"jid\":\"b9da57fa-c384-4247-a797-05fa19537730@hello\",\"jobNum\":\"\",\"limited\":false,\"meetingAdmin\":false,\"mobile\":\"\",\"moid\":\"b9da57fa-c384-4247-a797-05fa19537730\",\"name\":\"0512110000195\",\"nmAdmin\":false,\"nuServerId\":\"3b7cfbb3-b5c5-42b2-a759-209557a1ecf3\",\"officeLocation\":\"\",\"password\":\"21218cca77804d2ba1922c33e0151105\",\"portrait128\":\"\",\"portrait256\":\"\",\"portrait32\":\"\",\"portrait40\":\"\",\"portrait64\":\"\",\"restrictStrategy\":0,\"restrictUsedOn\":\"HD100;HD100S;HD1000;WD1000;TS6610;TS5610;TrueLink;iPad;iPhone;Android_Phone;SKY X500 1080P60;SKY X500 1080P30;SKY X500 720P60;TrueSens X500 1080P;TrueSens X500 720P;SKY X700;SKY X900;SKY for Windows;SKY for Mac;SKY for Android TV;SKY for Android Pad;HD100-TP\",\"seat\":\"\",\"serviceDomainAdmin\":false,\"serviceDomainMoid\":\"26277da4-b84d-4cad-8c69-fc977c9397d0\",\"serviceDomainName\":\"keda\",\"sex\":\"\",\"userDomainAdmin\":false,\"userDomainMoid\":\"zzeq2smvxtq687vgzno1vbco\",\"userDomainName\":\"admin\",\"validityPeriod\":{\"date\":29,\"day\":5,\"hours\":0,\"minutes\":0,\"month\":11,\"seconds\":0,\"time\":1514476800000,\"timezoneOffset\":-480,\"year\":117},\"vrsAdmin\":false,\"weiboAdmin\":false},\"db\":\"use ap\",\"event\":\"createAccount\",\"init\":true,\"keys\":[\"26277da4-b84d-4cad-8c69-fc977c9397d0\"],\"sqlargs\":[[\"b9da57fa-c384-4247-a797-05fa19537730\",\"0512110000195\",null,\"0512110000195\",null,null,\"b9da57fa-c384-4247-a797-05fa19537730@hello\",\"21218cca77804d2ba1922c33e0151105\",\"zzeq2smvxtq687vgzno1vbco\",\"26277da4-b84d-4cad-8c69-fc977c9397d0\",\"10252200000000000000512110000195\",\"3b7cfbb3-b5c5-42b2-a759-209557a1ecf3\",\"522\",1,false,true,false,null],[\"b9da57fa-c384-4247-a797-05fa19537730\",\"0512110000195\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",null,\"\",\"2017-12-29\",\"HD100;HD100S;HD1000;WD1000;TS6610;TS5610;TrueLink;iPad;iPhone;Android_Phone;SKY X500 1080P60;SKY X500 1080P30;SKY X500 720P60;TrueSens X500 1080P;TrueSens X500 720P;SKY X700;SKY X900;SKY for Windows;SKY for Mac;SKY for Android TV;SKY for Android Pad;HD100-TP\",null,null,null,null,null,null],[\"b9da57fa-c384-4247-a797-05fa19537730\",\"enableMeeting\"],[\"b9da57fa-c384-4247-a797-05fa19537730\",\"enableHD\"],[\"b9da57fa-c384-4247-a797-05fa19537730\",\"enableVRS\"],[\"b9da57fa-c384-4247-a797-05fa19537730\",\"enableHDMeeting\"],[\"b9da57fa-c384-4247-a797-05fa19537730\",\"enableDCS\"],[\"b9da57fa-c384-4247-a797-05fa19537730\",\"enableWeibo\"]],\"sqls\":[\"insert into user_info(moid,account,email,e164,mobile,ip_region,jid,password,user_domain_moid,service_domain_moid,device_guid,nu_server_id,device_type,account_type,binded,enable,created_at,limited,manage_user_domain_moid) values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,now(),?,?)\",\"insert into user_profile(moid,name,seat,ext_num,fax,job_num,brief,full_py,sex,date_of_birth,office_location,validity_period,restrict_used_on,restrict_strategy,portrait_32,portrait_40,portrait_64,portrait_128,portrait_256) values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)\",\"insert into user_privilege_data(moid,privilege_key) values(?,?)\",\"insert into user_privilege_data(moid,privilege_key) values(?,?)\",\"insert into user_privilege_data(moid,privilege_key) values(?,?)\",\"insert into user_privilege_data(moid,privilege_key) values(?,?)\",\"insert into user_privilege_data(moid,privilege_key) values(?,?)\",\"insert into user_privilege_data(moid,privilege_key) values(?,?)\"],\"sr", 4096) = 4096
write(18, "c\":\"\",\"version\":\"\"}\n\n", 21) = 21
close(18) = 0
munmap(0x7f26f1553000, 4096) = 0
write(2, "\n[Modb] local_comsume_cb: not DEST Platform, do TRANSFER and BROADCAST if config!\n\n", 83) = 83
open("/var/log/modbcore.log", O_WRONLY|O_CREAT|O_APPEND, 0666) = 18
fstat(18, {st_mode=S_IFREG|0644, st_size=19544613, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f26f1553000
fstat(18, {st_mode=S_IFREG|0644, st_size=19544613, ...}) = 0
lseek(18, 19544613, SEEK_SET) = 19544613
stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=405, ...}) = 0
write(18, "[8177] 17 Sep 15:06:45 \n[Modb] local_comsume_cb: not DEST Platform, do TRANSFER and BROADCAST if config!\n\n", 106) = 106
close(18) = 0
munmap(0x7f26f1553000, 4096) = 0
stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=405, ...}) = 0
write(2, "[MQ] mq_push => timestamp = 17 Sep 15:06:45.307\n", 48) = 48
epoll_ctl(3, EPOLL_CTL_ADD, 16, {EPOLLOUT, {u32=16, u64=16}}) = 0
epoll_wait(3, {{EPOLLIN, {u32=17, u64=17}}, {EPOLLOUT, {u32=16, u64=16}}}, 32, 10) = 2
epoll_ctl(3, EPOLL_CTL_DEL, 17, {EPOLLIN, {u32=17, u64=17}}) = 0
epoll_ctl(3, EPOLL_CTL_DEL, 16, {EPOLLOUT, {u32=16, u64=16}}) = 0
recvfrom(17, "\1\0\1\0\0\0\r\0<\0P\0\0\0\0\0\0\16h\0\316", 131072, 0, NULL, NULL) = 21
write(2, "[MQ][PRO_t_TRANS s:17] <-- recv Basic.Ack\n", 43) = 43
stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=405, ...}) = 0
write(2, "[MQ][s:17] recv Basic.Ack => timestamp = 17 Sep 15:06:45.308\n", 61) = 61
write(2, "[MoDB] ### CB: Publisher Confirm -- [Basic.Ack] Tag:[PRO_t_TRANS] Sock[17] Delivery_Tag:[3688] multiple:[0]\n", 109) = 109
open("/var/log/modbcore.log", O_WRONLY|O_CREAT|O_APPEND, 0666) = 18
fstat(18, {st_mode=S_IFREG|0644, st_size=19544719, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f26f1553000
fstat(18, {st_mode=S_IFREG|0644, st_size=19544719, ...}) = 0
lseek(18, 19544719, SEEK_SET) = 19544719
stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=405, ...}) = 0
write(18, "[8177] 17 Sep 15:06:45 [MoDB] ### CB: Publisher Confirm -- [Basic.Ack] Tag:[PRO_t_TRANS] Sock[17] Delivery_Tag:[3688] multiple:[0]\n", 132) = 132
close(18) = 0
munmap(0x7f26f1553000, 4096) = 0
sendto(16, "\1\0\1\0\0\0\r\0<\0P\0\0\0\0\0\0\22\26\0\316", 21, MSG_NOSIGNAL, NULL, 0) = 21
write(2, "[MQ][CON_f_LOCAL s:16] --> send Basic.Ack\n", 43) = 43
write(2, "[MQ][CON_f_LOCAL s:16][fsm:conn_idle][C] Start to recv!\n", 56
(卡住)
) = 56
...
^C <unfinished ...>
Process 8177 detached
[root@upucore_1 modbcore]#
参考:
- 《Understanding /dev》
- 《Text-Terminal-HOWTO》