一次linux的groupadd hang住处理


         某日从同事那里要了一台linux服务器,准备来安装timesten。在执行groupadd ttadmin时,命令一直没有响应,卡住了

 使用top命令查看系统资源,发现系统处于一个很空间的状态.没有任何异常

Cpu(s):  0.5%us,  0.0%sy,  0.0%ni, 99.5%id,  0.0%wa,  0.0%hi,  0.0%si,  0.0%st


top -p 7621 命令可以看到当前的进程没有任何运行,状态一直处于S

top - 17:14:09 up 1 day, 22:03,  7 users,  load average: 0.06, 0.25, 0.34
Tasks:   1 total,   0 running,   1 sleeping,   0 stopped,   0 zombie
Cpu(s):  0.0%us,  0.0%sy,  0.0%ni,100.0%id,  0.0%wa,  0.0%hi,  0.0%si,  0.0%st
Mem:   2395192k total,  2217776k used,   177416k free,   202580k buffers
Swap:  4456440k total,        0k used,  4456440k free,  1263728k cached

  PID USER      PR  NI  VIRT  RES  SHR S %CPU %MEM    TIME+  COMMAND                                                                                                  
 root      17   0 10160  796  656 S  0.0  0.0   0:00.00   groupadd

于是使用了pstack 发现最后调用的函数是一个sleep函数.进程为什么会引起睡眠呢?后面有调用ldap相关的函数,但是当时我没太在意,以为是要进行用户验证之类的.
[root@IP119 ~]# pstack 7621
#1  0x00000038b3299fc4 in sleep () from /lib64/libc.so.6
#2  0x00002adbc79039e8 in ?? () from /lib64/libnss_ldap.so.2
#3  0x00002adbc7904494 in _nss_ldap_search_s () from /lib64/libnss_ldap.so.2
#4  0x00002adbc790505b in _nss_ldap_getbyname () from /lib64/libnss_ldap.so.2
#5  0x00002adbc79068cb in _nss_ldap_getgrnam_r () from /lib64/libnss_ldap.so.2
#6  0x00000038b3298105 in getgrnam_r@@GLIBC_2.2.5 () from /lib64/libc.so.6
#7  0x00000038b3297760 in getgrnam () from /lib64/libc.so.6
#8  0x000000000040208d in fflush ()
#9  0x00000038b321d994 in __libc_start_main () from /lib64/libc.so.6
#10 0x0000000000401d19 in fflush ()
#11 0x00007fff05e305c8 in ?? ()
#12 0x0000000000000000 in ?? ()


再次使用
[root@IP119 7559]# strace groupadd ttadmin   对这个命令进程跟踪

munmap(0x2b6ab75a9000, 4096)            = 0
socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 5
fcntl(5, F_SETFD, FD_CLOEXEC)           = 0
setsockopt(5, SOL_SOCKET, SO_KEEPALIVE, [1], 4) = 0
setsockopt(5, SOL_TCP, TCP_NODELAY, [1], 4) = 0
fcntl(5, F_GETFL)                       = 0x2 (flags O_RDWR)
fcntl(5, F_SETFL, O_RDWR|O_NONBLOCK)    = 0
connect(5, {sa_family=AF_INET, sin_port=htons(389), sin_addr=inet_addr("172.28.10.117")}, 16) = -1 EINPROGRESS (Operation now in progress)
poll([{fd=5, events=POLLOUT|POLLERR|POLLHUP}], 1, 120000) = 1 ([{fd=5, revents=POLLERR|POLLHUP}])
getpeername(5, 0x7fff28b3b7a0, [4294967424]) = -1 ENOTCONN (Transport endpoint is not connected)
read(5, 0x7fff28b3b798, 1)              = -1 ECONNREFUSED (Connection refused)
shutdown(5, 2 /* send and receive */)   = -1 ENOTCONN (Transport endpoint is not connected)
close(5)                                = 0
stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=118, ...}) = 0
stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=118, ...}) = 0
stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=118, ...}) = 0
sendto(4, "<86>Jun  9 12:38:50 groupadd[777"..., 97, MSG_NOSIGNAL, NULL, 0) = 97
rt_sigprocmask(SIG_BLOCK, [CHLD], [], 8) = 0
rt_sigaction(SIGCHLD, NULL, {SIG_DFL, [], 0}, 8) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
nanosleep({8, 0}, {8, 0})               = 0
stat("/etc/ldap.conf", {st_mode=S_IFREG|0644, st_size=9168, ...}) = 0
geteuid()                               = 0
open("/etc/hosts", O_RDONLY)            = 5
fcntl(5, F_GETFD)                       = 0
fcntl(5, F_SETFD, FD_CLOEXEC)           = 0
fstat(5, {st_mode=S_IFREG|0644, st_size=258, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b6ab75a9000
read(5, "# Do not remove the following li"..., 4096) = 258
read(5, "", 4096)                       = 0
close(5)                                = 0
munmap(0x2b6ab75a9000, 4096)            = 0
open("/etc/hosts", O_RDONLY)            = 5
fcntl(5, F_GETFD)                       = 0
fcntl(5, F_SETFD, FD_CLOEXEC)           = 0
fstat(5, {st_mode=S_IFREG|0644, st_size=258, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b6ab75a9000
read(5, "# Do not remove the following li"..., 4096) = 258
close(5)                                = 0
munmap(0x2b6ab75a9000, 4096)            = 0
socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 5
fcntl(5, F_SETFD, FD_CLOEXEC)           = 0
setsockopt(5, SOL_SOCKET, SO_KEEPALIVE, [1], 4) = 0
setsockopt(5, SOL_TCP, TCP_NODELAY, [1], 4) = 0
fcntl(5, F_GETFL)                       = 0x2 (flags O_RDWR)
fcntl(5, F_SETFL, O_RDWR|O_NONBLOCK)    = 0
connect(5, {sa_family=AF_INET, sin_port=htons(389), sin_addr=inet_addr("172.28.10.117")}, 16) = -1 EINPROGRESS (Operation now in progress)
poll([{fd=5, events=POLLOUT|POLLERR|POLLHUP}], 1, 120000) = 1 ([{fd=5, revents=POLLERR|POLLHUP}])
getpeername(5, 0x7fff28b3b7a0, [4294967424]) = -1 ENOTCONN (Transport endpoint is not connected)
read(5, 0x7fff28b3b798, 1)              = -1 ECONNREFUSED (Connection refused)
shutdown(5, 2 /* send and receive */)   = -1 ENOTCONN (Transport endpoint is not connected)
close(5)                                = 0
stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=118, ...}) = 0
stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=118, ...}) = 0

stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=118, ...}) = 0
sendto(4, "<86>Jun  9 12:38:58 groupadd[777"..., 98, MSG_NOSIGNAL, NULL, 0) = 98
rt_sigprocmask(SIG_BLOCK, [CHLD], [], 8) = 0
rt_sigaction(SIGCHLD, NULL, {SIG_DFL, [], 0}, 8) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
nanosleep({16, 0}, 0x7fff28b3ba00)      = ? ERESTART_RESTARTBLOCK (To be restarted)

发现这个进程会去一直去连接172.28.10.117,但是连接不成功,从而导致进程进入睡眠。进一步确认该服务器原来是一台ldap服务器,不过被移除了.
应该是groupadd ,useradd,groupdel之类的命令在执行时,如果配置了ldap,需要向ladp进行一些确认。ldap服务器没响应从而使这些命令进行睡眠状态
结果移除这台服务器的ldap.命令执行正常了.

你可能感兴趣的:(一次linux的groupadd hang住处理)