系统版本centos7.9
postgresql版本13.4
repmgr版本5.2.1
主机:192.168.101.132 node1
备机:192.168.101.133 node2
[root@node1 ~]# tar -zxvf repmgr-5.2.1.tar.gz
[root@node1 ~]# cd repmgr-5.2.1
[root@node1 repmgr-5.2.1]# ./configure --prefix=/home/postgresql
//这里编译会报错,因为没有flex
[root@node1 repmgr-5.2.1]# yum install -y flex
//安装之后在执行上述编译
[root@node1 repmgr-5.2.1]# make && make install
2.1、基本信息
[root@node1 ~]# cat /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.101.132 node1
192.168.101.133 node2
[root@node1 ~]# cat /etc/profile
... ...
unset i
unset -f pathmunge
PATH=$PATH:/home/postgresql/bin
[root@node1 ~]# su - postgres
Last login: Wed Sep 22 17:19:08 CST 2021 on pts/0
[postgres@node1 ~]$ pwd
/home/postgres
[postgres@node1 ~]$ cat .pgpass
192.168.101.132:5432:repmgr:repmgr:repmgr
192.168.101.133:5432:repmgr:repmgr:repmgr
[postgres@node1 ~]$ cat .bash_profile
# .bash_profile
# Get the aliases and functions
if [ -f ~/.bashrc ]; then
. ~/.bashrc
fi
# User specific environment and startup programs
PATH=$PATH:$HOME/.local/bin:$HOME/bin
export PATH
PGHOME=/home/postgresql
export PGHOME
PGDATA=$PGHOME/data
export PGDATA
PATH=$PATH:$HOME/.local/bin:$HOME/bin:$PGHOME/bin
export PATH
[postgres@node1 ~]$ vim /home/postgresql/data/postgresql.conf
unix_socket_directories='/home/postgresql/data'
wal_log_hints = on
archive_mode = on
archive_command = 'arch.sh %f %p'
#archive_command = 'test ! -f /home/postgres/pgarch/%f && cp %p /home/postgres/pgarch/%f'
//arch.sh脚本位置及内容
[postgres@node1 pgdata]$ pwd
/home/postgres/pgdata
[postgres@node1 pgdata]$ ll
total 4
drwxrwxr-x. 2 postgres postgres 6 Sep 23 11:15 arch
-rw-rw-r--. 1 postgres postgres 178 Sep 23 11:13 arch.sh
[postgres@node1 pgdata]$ cat arch.sh //删除7天内的归档日志。
test ! -f /home/postgresql/pgdata/arch/$1 && cp --preserve=timestamps $2 /home/postgresql/pgdata/arch/$1 ; find /home/postgresql/pgdata/arch/ -type f -mtime +7 -exec rm -f {} \;
//创建复制用户
[postgres@node1 ~]$ createuser -s -P repmgr //-P设置密码,与用户名一样即可,并且对应上述.pgpass文件中的配置信息
[postgres@node1 ~]$ createdb repmgr -O repmgr
[postgres@node1 ~]$ vim /home/postgresql/data/pg_hba.conf
# replication privilege.
host replication repmgr 192.168.101.0/24 trust
2.2、备库连接测试
[root@node2 ~]# psql 'host=192.168.101.132 user=repmgr dbname=repmgr connect_timeout=2'
2.3、创建repmgr.conf文件
[root@node1 ~]# vim /etc/repmgr.conf
node_id=1
node_name=node1
conninfo='host=192.168.101.132 user=repmgr dbname=repmgr connect_timeout=2'
data_directory='/home/postgresql/data'
pg_bindir='/home/postgresql/bin'
2.4、注册主库
[postgres@node1 ~]# repmgr -f /etc/repmgr.conf primary register
2.5、查看信息
[postgres@node1 ~]# repmgr cluster show
[postgres@node1 ~]# psql -d repmgr
psql (13.4)
Type "help" for help.
repmgr=# SELECT * FROM repmgr.nodes;
3.1、基本配置
[root@node2 ~]# cat /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.101.132 node1
192.168.101.133 node2
[root@node2 ~]# su - postgres
Last login: Wed Sep 22 17:19:08 CST 2021 on pts/0
[postgres@node2 ~]$ pwd
/home/postgres
[postgres@node2 ~]$ cat .pgpass
192.168.101.132:5432:repmgr:repmgr:repmgr
192.168.101.133:5432:repmgr:repmgr:repmgr
[postgres@node2 ~]$ cat .bash_profile
# .bash_profile
# Get the aliases and functions
if [ -f ~/.bashrc ]; then
. ~/.bashrc
fi
# User specific environment and startup programs
PATH=$PATH:$HOME/.local/bin:$HOME/bin
export PATH
PGHOME=/home/postgresql
export PGHOME
PGDATA=$PGHOME/data
export PGDATA
PATH=$PATH:$HOME/.local/bin:$HOME/bin:$PGHOME/bin
export PATH
3.2、创建repmgr.conf文件
[root@node2 ~]# vim /etc/repmgr.conf
node_id=2
node_name=node2
conninfo='host=192.168.101.133 user=repmgr dbname=repmgr connect_timeout=2'
data_directory='/home/postgresql/data'
pg_bindir='/home/postgresql/bin'
3.3、克隆主库data目录
[postgres@node2 ~]$ repmgr -h 192.168.101.132 -U repmgr -d repmgr -f /etc/repmgr.conf standby clone --dry-run
[postgres@node2 ~]$ repmgr -h 192.168.101.132 -U repmgr -d repmgr -f /etc/repmgr.conf standby clone
3.4、启动pg库并注册备库
[postgres@node2 ~]$ repmgr -f /etc/repmgr.conf standby register
3.5、查看信息
[postgres@node2 ~]# repmgr cluster show
4.1、主机
[postgres@node1 ~]$ ssh-keygen -t rsa //一直回车就好
[postgres@node1 ~]$ ssh-copy-id -i .ssh/id_rsa.pub postgres@node2
//这里报错是因为node2节点postgres用户没有设置密码
/bin/ssh-copy-id: INFO: Source of key(s) to be installed: ".ssh/id_rsa.pub"
/bin/ssh-copy-id: INFO: attempting to log in with the new key(s), to filter out any that are already installed
/bin/ssh-copy-id: INFO: 1 key(s) remain to be installed -- if you are prompted now it is to install the new keys
postgres@node2's password:
Permission denied, please try again.
postgres@node2's password:
[root@node2 ~]# passwd postgres //设置postgres密码
//成功再次执行上述命令
[postgres@node1 ~]$ ssh node2 date //不提示需要密码,返回时间即为正确
4.2、备机
[postgres@node2 ~]$ ssh-keygen -t rsa //一直回车就好
[postgres@node2 ~]$ ssh-copy-id -i .ssh/id_rsa.pub postgres@node1
//这里报错是因为node1节点postgres用户没有设置密码
/bin/ssh-copy-id: INFO: Source of key(s) to be installed: ".ssh/id_rsa.pub"
/bin/ssh-copy-id: INFO: attempting to log in with the new key(s), to filter out any that are already installed
/bin/ssh-copy-id: INFO: 1 key(s) remain to be installed -- if you are prompted now it is to install the new keys
postgres@node1's password:
Permission denied, please try again.
postgres@node1's password:
[root@node1 ~]# passwd postgres //设置postgres密码
//成功再次执行上述命令
[postgres@node2 ~]$ ssh node1 date //不提示需要密码,返回时间即为正确
5.1、switchover
[postgres@node2 ~]$ repmgr standby switchover
NOTICE: executing switchover on node "node2" (ID: 2)
NOTICE: local node "node2" (ID: 2) will be promoted to primary; current primary "node1" (ID: 1) will be demoted to standby
NOTICE: stopping current primary node "node1" (ID: 1)
NOTICE: issuing CHECKPOINT
DETAIL: executing server command "pg_ctl -D '/home/postgresql/data' -W -m fast stop"
INFO: checking for primary shutdown; 1 of 60 attempts ("shutdown_check_timeout")
INFO: checking for primary shutdown; 2 of 60 attempts ("shutdown_check_timeout")
INFO: checking for primary shutdown; 3 of 60 attempts ("shutdown_check_timeout")
NOTICE: current primary has been cleanly shut down at location 0/14000028
NOTICE: promoting standby to primary
DETAIL: promoting server "node2" (ID: 2) using "pg_ctl -w -D '/home/postgresql/data' promote"
waiting for server to promote.... done
server promoted
NOTICE: waiting up to 60 seconds (parameter "promote_check_timeout") for promotion to complete
NOTICE: STANDBY PROMOTE successful
DETAIL: server "node2" (ID: 2) was successfully promoted to primary
INFO: local node 1 can attach to rejoin target node 2
DETAIL: local node's recovery point: 0/14000028; rejoin target node's fork point: 0/14000098
NOTICE: setting node 1's upstream to node 2
WARNING: unable to ping "host=192.168.101.132 user=repmgr dbname=repmgr connect_timeout=2"
DETAIL: PQping() returned "PQPING_NO_RESPONSE"
NOTICE: starting server using "pg_ctl -w -D '/home/postgresql/data' start"
NOTICE: NODE REJOIN successful
DETAIL: node 1 is now attached to node 2
NOTICE: switchover was successful
DETAIL: node "node2" is now primary and node "node1" is attached as standby
NOTICE: STANDBY SWITCHOVER has completed successfully
5.2、promote
模仿主机宕机
[postgres@node1 ~]$ /home/postgresql/bin/pg_ctl stop -D /home/postgresql/data
waiting for server to shut down.... done
server stopped
[postgres@node2 ~]$ repmgr cluster show
ID | Name | Role | Status | Upstream | Location | Priority | Timeline | Connection string
----+------+---------+---------------+----------+----------+----------+----------+------------------------------------------------------------------
1 | node1 | primary | ? unreachable | ? | default | 100 | | host=192.168.101.132 user=repmgr dbname=repmgr connect_timeout=2
2 | node2 | standby | running | ? node1 | default | 100 | 2 | host=192.168.101.133 user=repmgr dbname=repmgr connect_timeout=2
WARNING: following issues were detected
- unable to connect to node "node1" (ID: 1)
- node "node1" (ID: 1) is registered as an active primary but is unreachable
- unable to connect to node "node2" (ID: 2)'s upstream node "node1" (ID: 1)
- unable to determine if node "node2" (ID: 2) is attached to its upstream node "node1" (ID: 1)
HINT: execute with --verbose option to see connection error messages
[postgres@node2 ~]$ repmgr standby promote
NOTICE: promoting standby to primary
DETAIL: promoting server "node2" (ID: 2) using pg_promote()
NOTICE: waiting up to 60 seconds (parameter "promote_check_timeout") for promotion to complete
NOTICE: STANDBY PROMOTE successful
DETAIL: server "node2" (ID: 2) was successfully promoted to primary
[postgres@node2 ~]$ repmgr standby promote
ERROR: STANDBY PROMOTE can only be executed on a standby node
[postgres@node2 ~]$ repmgr cluster show
ID | Name | Role | Status | Upstream | Location | Priority | Timeline | Connection string
----+------+---------+-----------+----------+----------+----------+----------+------------------------------------------------------------------
1 | node1 | primary | - failed | ? | default | 100 | | host=192.168.101.132 user=repmgr dbname=repmgr connect_timeout=2
2 | node2 | primary | * running | | default | 100 | 4 | host=192.168.101.133 user=repmgr dbname=repmgr connect_timeout=2
WARNING: following issues were detected
- unable to connect to node "node1" (ID: 1)
HINT: execute with --verbose option to see connection error messages
[postgres@node1 ~]$ repmgr node rejoin -d 'host=192.168.101.133 port=5432 user=repmgr dbname=repmgr' --force-rewind --config-files=postgresql.conf,postgresql.auto.conf --verbose
INFO: looking for configuration file in /etc
INFO: configuration file found at: "/etc/repmgr.conf"
INFO: prerequisites for using pg_rewind are met
INFO: 2 files copied to "/tmp/repmgr-config-archive-node1"
NOTICE: executing pg_rewind
DETAIL: pg_rewind command is "/home/postgresql/bin/pg_rewind -D '/home/postgresql/data' --source-server='host=192.168.101.133 user=repmgr dbname=repmgr connect_timeout=2'"
NOTICE: 2 files copied to /home/postgresql/data
INFO: directory "/tmp/repmgr-config-archive-node1" deleted
NOTICE: setting node 1's upstream to node 2
WARNING: unable to ping "host=192.168.101.132 user=repmgr dbname=repmgr connect_timeout=2"
DETAIL: PQping() returned "PQPING_NO_RESPONSE"
NOTICE: starting server using "/home/postgresql/bin/pg_ctl -w -D '/home/postgresql/data' start"
INFO: node "node1" (ID: 1) is pingable
INFO: node "node1" (ID: 1) has attached to its upstream node
NOTICE: NODE REJOIN successful
DETAIL: node 1 is now attached to node 2
[postgres@node1 ~]$ repmgr cluster show
ID | Name | Role | Status | Upstream | Location | Priority | Timeline | Connection string
----+------+---------+-----------+----------+----------+----------+----------+------------------------------------------------------------------
1 | node1 | standby | running | node2 | default | 100 | 3 | host=192.168.101.132 user=repmgr dbname=repmgr connect_timeout=2
2 | node2 | primary | * running | | default | 100 | 4 | host=192.168.101.133 user=repmgr dbname=repmgr connect_timeout=2
切换成功,主备恢复