记一次postgres导致cpu100%

周末想打两把训练赛,没想到朋友发来一个截图
记一次postgres导致cpu100%_第1张图片
我:嗯??wtf
于是我上服务器看了一下日志,诶我超,还真的
记一次postgres导致cpu100%_第2张图片
记一次postgres导致cpu100%_第3张图片

查看进程详情

[root@iZ7xv7q4im4c48qen2do2bZ project]# pstree -tp postgres
memory(904475)─┬─{memory}(904476)
               ├─{memory}(904477)
               ├─{memory}(904478)
               ├─{memory}(904479)
               ├─{memory}(904480)
               ├─{memory}(904856)
               └─{memory}(904857)

pg_mem(904437)─┬─{pg_mem}(904440)
               ├─{pg_mem}(904441)
               ├─{pg_mem}(904442)
               └─{pg_mem}(904444)

psql(61378)

systemd(61206)───(sd-pam)(61208)
[root@iZ7xv7q4im4c48qen2do2bZ project]# ps -aux
postgres  904437  0.1  2.3 730512 40128 ?        Ssl  12:09   0:01 postgres
postgres  904475 23.6 15.6 310828 265260 ?       Sl   12:09   3:32 postgres: autovacuum reader

查看是否有人ssh登录

[root@iZ7xv7q4im4c48qen2do2bZ project]# w
 12:26:20 up 8 days,  1:26,  0 users,  load average: 0.03, 0.15, 0.33
USER     TTY        LOGIN@   IDLE   JCPU   PCPU WHAT

查看登录记录

[root@iZ7xv7q4im4c48qen2do2bZ project]# last
root     pts/3        127.0.0.1        Sat Sep  2 12:08 - 12:08  (00:00)
root     pts/1        127.0.0.1        Mon Aug 28 10:09 - 12:02  (01:53)
root     pts/1        127.0.0.1        Fri Aug 25 17:41 - 17:41  (00:00)
root     pts/0        127.0.0.1        Fri Aug 25 11:50 - 14:04  (02:13)
root     pts/0        127.0.0.1        Fri Aug 25 11:01 - 11:50  (00:49)
reboot   system boot  5.14.0-350.el9.x Fri Aug 25 10:59   still running
root     pts/0        127.0.0.1        Fri Aug 25 09:54 - 10:59  (01:05)
admin    pts/0        100.104.200.30   Fri Aug 25 09:53 - 09:53  (00:00)
admin    pts/0        100.104.200.30   Fri Aug 25 09:53 - 09:53  (00:00)
reboot   system boot  5.14.0-350.el9.x Fri Aug 25 09:49 - 10:59  (01:10)
root     pts/0        127.0.0.1        Fri Aug 25 01:28 - 01:28  (00:00)
root     pts/0        127.0.0.1        Fri Aug 25 01:01 - 01:19  (00:17)
root     pts/1        127.0.0.1        Fri Aug 25 00:03 - 00:04  (00:01)
root     pts/0        127.0.0.1        Thu Aug 24 21:13 - 21:15  (00:01)
root     pts/0        127.0.0.1        Thu Aug 24 20:54 - 21:12  (00:18)
root     pts/0        127.0.0.1        Thu Aug 24 20:52 - 20:53  (00:01)
root     pts/0        127.0.0.1        Thu Aug 24 20:41 - 20:51  (00:10)
root     pts/0        127.0.0.1        Thu Aug 24 00:31 - 08:16  (07:44)
root     pts/2        127.0.0.1        Wed Aug 23 17:46 - 17:46  (00:00)
root     pts/2        127.0.0.1        Wed Aug 23 17:38 - 17:38  (00:00)
root     pts/2        127.0.0.1        Wed Aug 23 17:16 - 17:16  (00:00)
root     pts/2        127.0.0.1        Wed Aug 23 16:08 - 16:08  (00:00)
root     pts/2        127.0.0.1        Wed Aug 23 16:07 - 16:07  (00:00)
root     pts/2        127.0.0.1        Wed Aug 23 15:46 - 15:47  (00:00)
root     pts/1        127.0.0.1        Wed Aug 23 15:10 - 17:50  (02:39)
root     pts/1        127.0.0.1        Wed Aug 23 12:58 - 12:58  (00:00)
root     pts/1        127.0.0.1        Wed Aug 23 12:58 - 12:58  (00:00)
root     pts/1        127.0.0.1        Wed Aug 23 12:56 - 12:56  (00:00)
root     pts/1        127.0.0.1        Wed Aug 23 11:51 - 11:51  (00:00)
root     pts/1        127.0.0.1        Wed Aug 23 11:49 - 11:49  (00:00)
admin    pts/0        100.104.200.36   Wed Aug 23 11:41 - 17:57  (06:15)
admin    pts/0        100.104.200.36   Wed Aug 23 11:41 - 11:41  (00:00)
root     pts/0        127.0.0.1        Wed Aug 23 01:32 - 07:29  (05:57)
root     pts/0        127.0.0.1        Wed Aug 23 01:26 - 01:32  (00:05)
root     pts/0        127.0.0.1        Wed Aug 23 01:16 - 01:17  (00:01)
reboot   system boot  5.14.0-350.el9.x Wed Aug 23 01:06 - 10:59 (2+09:53)
admin    pts/0        100.104.200.27   Wed Aug 23 00:35 - crash  (00:30)
admin    pts/0        100.104.200.27   Wed Aug 23 00:35 - 00:35  (00:00)
reboot   system boot  5.14.0-350.el9.x Wed Aug 23 00:33 - 10:59 (2+10:26)
admin    pts/0        100.104.200.41   Wed Aug 23 00:08 - 00:32  (00:23)
admin    pts/0        100.104.200.41   Wed Aug 23 00:08 - 00:08  (00:00)
reboot   system boot  5.14.0-350.el9.x Wed Aug 23 00:08 - 10:59 (2+10:51)
admin    pts/0        100.104.200.30   Tue Aug 22 23:04 - 23:38  (00:33)
admin    pts/0        100.104.200.30   Tue Aug 22 23:04 - 23:04  (00:00)
reboot   system boot  5.14.0-350.el9.x Tue Aug 22 23:03 - 10:59 (2+11:56)
root     pts/1        127.0.0.1        Tue Aug 22 22:29 - 22:29  (00:00)
root     pts/1        127.0.0.1        Tue Aug 22 22:26 - 22:26  (00:00)
admin    pts/0        100.104.200.23   Tue Aug 22 21:50 - crash  (01:12)
admin    pts/0        100.104.200.23   Tue Aug 22 21:50 - 21:50  (00:00)
reboot   system boot  5.14.0-134.el9.x Tue Aug 22 21:45 - 10:59 (2+13:13)
reboot   system boot  5.14.0-134.el9.x Tue Aug  9 15:38 - 15:39  (00:00)

wtmp begins Wed Jul 27 14:03:31 2022

查看进程详情

 [root@iZ7xv7q4im4c48qen2do2bZ project]top -p 904475
top - 12:34:17 up 8 days,  1:34,  0 users,  load average: 2.27, 1.78, 1.04
Tasks:   1 total,   0 running,   1 sleeping,   0 stopped,   0 zombie
%Cpu(s): 97.0 us,  1.8 sy,  0.0 ni,  0.2 id,  0.0 wa,  0.8 hi,  0.2 si,  0.0 st
MiB Mem :   1660.5 total,     74.1 free,   1510.4 used,    239.6 buff/cache
MiB Swap:   1025.0 total,   1025.0 free,      0.0 used.    150.1 avail Mem 

    PID USER      PR  NI    VIRT    RES    SHR S  %CPU  %MEM     TIME+ COMMAND                                                             
 904475 postgres  20   0  325500 273364      0 S 189.0  16.1  15:46.91 memory    
 [root@iZ7xv7q4im4c48qen2do2bZ project]# ll /proc/904475
total 0
-r--r--r--  1 postgres postgres 0 Sep  2 12:34 arch_status
dr-xr-xr-x  2 postgres postgres 0 Sep  2 12:34 attr
-rw-r--r--  1 postgres postgres 0 Sep  2 12:34 autogroup
-r--------  1 postgres postgres 0 Sep  2 12:34 auxv
-r--r--r--  1 postgres postgres 0 Sep  2 12:32 cgroup
--w-------  1 postgres postgres 0 Sep  2 12:34 clear_refs
-r--r--r--  1 postgres postgres 0 Sep  2 12:09 cmdline
-rw-r--r--  1 postgres postgres 0 Sep  2 12:09 comm
-rw-r--r--  1 postgres postgres 0 Sep  2 12:34 coredump_filter
-r--r--r--  1 postgres postgres 0 Sep  2 12:34 cpu_resctrl_groups
-r--r--r--  1 postgres postgres 0 Sep  2 12:34 cpuset
lrwxrwxrwx  1 postgres postgres 0 Sep  2 12:09 cwd -> /tmp
-r--------  1 postgres postgres 0 Sep  2 12:34 environ
lrwxrwxrwx  1 postgres postgres 0 Sep  2 12:09 exe -> /var/tmp/other_project/memory
dr-x------  2 postgres postgres 0 Sep  2 12:09 fd
dr-xr-xr-x  2 postgres postgres 0 Sep  2 12:34 fdinfo
-rw-r--r--  1 postgres postgres 0 Sep  2 12:34 gid_map
-r--------  1 postgres postgres 0 Sep  2 12:09 io
-r--------  1 postgres postgres 0 Sep  2 12:34 ksm_merging_pages
-r--r--r--  1 postgres postgres 0 Sep  2 12:34 limits
-rw-r--r--  1 postgres postgres 0 Sep  2 12:34 loginuid
dr-x------  2 postgres postgres 0 Sep  2 12:34 map_files
-r--r--r--  1 postgres postgres 0 Sep  2 12:34 maps
-rw-------  1 postgres postgres 0 Sep  2 12:34 mem
-r--r--r--  1 postgres postgres 0 Sep  2 12:34 mountinfo
-r--r--r--  1 postgres postgres 0 Sep  2 12:34 mounts
-r--------  1 postgres postgres 0 Sep  2 12:34 mountstats
dr-xr-xr-x 56 postgres postgres 0 Sep  2 12:34 net
dr-x--x--x  2 postgres postgres 0 Sep  2 12:34 ns
-r--r--r--  1 postgres postgres 0 Sep  2 12:34 numa_maps
-rw-r--r--  1 postgres postgres 0 Sep  2 12:34 oom_adj
-r--r--r--  1 postgres postgres 0 Sep  2 12:34 oom_score
-rw-r--r--  1 postgres postgres 0 Sep  2 12:34 oom_score_adj
-r--------  1 postgres postgres 0 Sep  2 12:34 pagemap
-r--------  1 postgres postgres 0 Sep  2 12:34 patch_state
-r--------  1 postgres postgres 0 Sep  2 12:34 personality
-rw-r--r--  1 postgres postgres 0 Sep  2 12:34 projid_map
lrwxrwxrwx  1 postgres postgres 0 Sep  2 12:34 root -> /
-rw-r--r--  1 postgres postgres 0 Sep  2 12:34 sched
-r--r--r--  1 postgres postgres 0 Sep  2 12:34 schedstat
-r--r--r--  1 postgres postgres 0 Sep  2 12:34 sessionid
-rw-r--r--  1 postgres postgres 0 Sep  2 12:34 setgroups
-r--r--r--  1 postgres postgres 0 Sep  2 12:34 smaps
-r--r--r--  1 postgres postgres 0 Sep  2 12:34 smaps_rollup
-r--------  1 postgres postgres 0 Sep  2 12:34 stack
-r--r--r--  1 postgres postgres 0 Sep  2 12:09 stat
-r--r--r--  1 postgres postgres 0 Sep  2 12:09 statm
-r--r--r--  1 postgres postgres 0 Sep  2 12:09 status
-r--------  1 postgres postgres 0 Sep  2 12:34 syscall
dr-xr-xr-x 10 postgres postgres 0 Sep  2 12:34 task
-rw-r--r--  1 postgres postgres 0 Sep  2 12:34 timens_offsets
-r--r--r--  1 postgres postgres 0 Sep  2 12:34 timers
-rw-rw-rw-  1 postgres postgres 0 Sep  2 12:34 timerslack_ns
-rw-r--r--  1 postgres postgres 0 Sep  2 12:34 uid_map
-r--r--r--  1 postgres postgres 0 Sep  2 12:34 wchan

这postgres在干啥,用了99%的cpu

查看系统服务

[root@iZ7xv7q4im4c48qen2do2bZ project]#systemctl  status 
 └─user-26.slice
               ├─session-5922.scope
               │ ├─904437 postgres
               │ └─904475 "postgres: autovacuum reader"
               ├─session-c7.scope
               │ └─61378 psql
               └─[email protected]
                 └─init.scope
                   ├─61206 /usr/lib/systemd/systemd --user
                   └─61208 "(sd-pam)"
[root@iZ7xv7q4im4c48qen2do2bZ project]# systemctl -t slice
  UNIT                                 LOAD   ACTIVE SUB    DESCRIPTION                    
  -.slice                              loaded active active Root Slice
  system-code\x2dserver.slice          loaded active active Slice /system/code-server
  UNIT                                 LOAD   ACTIVE SUB    DESCRIPTION                    
  -.slice                              loaded active active Root Slice
  system-code\x2dserver.slice          loaded active active Slice /system/code-server
  system-code\x2dserver\x2dafter.slice loaded active active Slice /system/code-server-after
  system-getty.slice                   loaded active active Slice /system/getty
  system-modprobe.slice                loaded active active Slice /system/modprobe
  system-serial\x2dgetty.slice         loaded active active Slice /system/serial-getty
  system-sshd\x2dkeygen.slice          loaded active active Slice /system/sshd-keygen
  system-systemd\x2dcoredump.slice     loaded active active Slice /system/systemd-coredump
  system-systemd\x2dfsck.slice         loaded active active Slice /system/systemd-fsck
  system.slice                         loaded active active System Slice
  user-26.slice                        loaded active active User Slice of UID 26
  user.slice                           loaded active active User and Session Slice

LOAD   = Reflects whether the unit definition was properly loaded.
ACTIVE = The high-level unit activation state, i.e. generalization of SUB.

[root@iZ7xv7q4im4c48qen2do2bZ project]# systemctl status user-26.slice 
● user-26.slice - User Slice of UID 26
     Loaded: loaded
    Drop-In: /usr/lib/systemd/system/user-.slice.d
             └─10-defaults.conf
     Active: active since Fri 2023-08-25 17:32:40 CST; 1 week 0 days ago
      Until: Fri 2023-08-25 17:32:40 CST; 1 week 0 days ago
       Docs: man:[email protected](5)
      Tasks: 17 (limit: 4281)
     Memory: 337.6M
        CPU: 36min 34.062s
     CGroup: /user.slice/user-26.slice
             ├─session-5922.scope
             │ ├─904437 postgres
             │ └─904475 "postgres: autovacuum reader"
             ├─session-c7.scope
             │ └─61378 psql
Sep 02 12:50:40 iZ7xv7q4im4c48qen2do2bZ systemd[910257]: Created slice User Background Tasks Slice.
Sep 02 12:50:40 iZ7xv7q4im4c48qen2do2bZ systemd[910257]: Starting Cleanup of User's Temporary Files and Directories...
Sep 02 12:50:40 iZ7xv7q4im4c48qen2do2bZ systemd[910257]: Finished Cleanup of User's Temporary Files and Directories.
Sep 02 12:51:01 iZ7xv7q4im4c48qen2do2bZ CROND[911181]: (postgres) CMD (/var/lib/pgsql/data/pg_mem nLTlZMOfS1ekfDPMUgBASqHkjsz1bMc/WqOiBI8v>
Sep 02 12:52:01 iZ7xv7q4im4c48qen2do2bZ CROND[911305]: (postgres) CMD (/var/lib/pgsql/data/pg_mem nLTlZMOfS1ekfDPMUgBASqHkjsz1bMc/WqOiBI8v>
Sep 02 12:53:01 iZ7xv7q4im4c48qen2do2bZ CROND[911380]: (postgres) CMD (/var/lib/pgsql/data/pg_mem nLTlZMOfS1ekfDPMUgBASqHkjsz1bMc/WqOiBI8v>
Sep 02 12:54:01 iZ7xv7q4im4c48qen2do2bZ CROND[911454]: (postgres) CMD (/var/lib/pgsql/data/pg_mem nLTlZMOfS1ekfDPMUgBASqHkjsz1bMc/WqOiBI8v>
Sep 02 12:55:02 iZ7xv7q4im4c48qen2do2bZ CROND[911537]: (postgres) CMD (/var/lib/pgsql/data/pg_mem nLTlZMOfS1ekfDPMUgBASqHkjsz1bMc/WqOiBI8v>
Sep 02 12:56:01 iZ7xv7q4im4c48qen2do2bZ CROND[911877]: (postgres) CMD (/var/lib/pgsql/data/pg_mem nLTlZMOfS1ekfDPMUgBASqHkjsz1bMc/WqOiBI8v>
Sep 02 12:57:01 iZ7xv7q4im4c48qen2do2bZ CROND[912045]: (postgres) CMD (/var/lib/pgsql/data/pg_mem nLTlZMOfS1ekfDPMUgBASqHkjsz1bMc/WqOiBI8v>

[root@iZ7xv7q4im4c48qen2do2bZ project]# systemctl cat user-26.slice 
# /usr/lib/systemd/system/user-.slice.d/10-defaults.conf
#  SPDX-License-Identifier: LGPL-2.1-or-later
#
#  This file is part of systemd.
#
#  systemd is free software; you can redistribute it and/or modify it
#  under the terms of the GNU Lesser General Public License as published by
#  the Free Software Foundation; either version 2.1 of the License, or
#  (at your option) any later version.

[Unit]
Description=User Slice of UID %j
Documentation=man:[email protected](5)
StopWhenUnneeded=yes

[Slice]
TasksMax=33%


[root@iZ7xv7q4im4c48qen2do2bZ project]#find /usr/lib/systemd/system -name *user-26*

[root@iZ7xv7q4im4c48qen2do2bZ project]# find / -name *user-26*
/sys/fs/cgroup/user.slice/user-26.slice

# 查看网络访问
[root@iZ7xv7q4im4c48qen2do2bZ user-26.slice]# netstat -antpu |grep post
tcp        0      1 172.18.42.125:35246     157.240.8.36:13333      SYN_SENT    10084/postgres: aut 

 systemd-cgtop
[root@iZ7xv7q4im4c48qen2do2bZ cgroup]# find / -name *session-122.scope*
/run/systemd/transient/session-122.scope
/run/systemd/units/invocation:session-122.scope
/sys/fs/cgroup/user.slice/user-26.slice/session-122.scope



[root@iZ7xv7q4im4c48qen2do2bZ cgroup]# systemd-cgls
├─user.slice (#1213)
│ → user.invocation_id: 3c49f5127e624ba4865bad4633078564
│ → trusted.invocation_id: 3c49f5127e624ba4865bad4633078564
│ └─user-26.slice (#15590)
│   → user.invocation_id: ffc42d8b229c438a9bde46d2ca97e78b
│   → trusted.invocation_id: ffc42d8b229c438a9bde46d2ca97e78b
│   ├─[email protected](#15660)
│   │ → user.delegate: 1
│   │ → trusted.delegate: 1
│   │ → user.invocation_id: 1178a7bb2d6c47d7b00ccc2d8e13776a
│   │ → trusted.invocation_id: 1178a7bb2d6c47d7b00ccc2d8e13776a
│   │ └─init.scope (#15695)
│   │   ├─18728 /usr/lib/systemd/systemd --user
│   │   └─18729 (sd-pam)
│   └─session-170.scope (#15835)
│     → user.invocation_id: 09dfca512d4e4535a23c270e4ce2df41
│     → trusted.invocation_id: 09dfca512d4e4535a23c270e4ce2df41
│     ├─18740 postgres
│     └─18776 postgres: autovacuum reader

确定了是利用了user-26.slice进行自启动[email protected][email protected]又启动了18776 postgres: autovacuum reader

#终止session又会自启动
systemctl kill session-122.scope

有知道怎么终止user-26.slice的跟我说下,网上找一圈没找到方法,去redhat也搜不到。

最终解决方案

# 因为这个账户是dnf install postgresql 创建的,目前postgresql服务已经停了就直接删除这个用户,阻止他自启动
userdel postgres -f

其实锁定用户应该也行,至于问题的原因是postgresql的bug还是被入侵了后面有空再看

其他

https://help.aliyun.com/knowledge_list/60787.html
https://help.aliyun.com/knowledge_list/60792.html

你可能感兴趣的:(运维,postgres,slice)