主从搭建请参考
一主一从
192.168.67.100 #主
192.168.67.101 #从
cd /usr/local/redis6/
vim sentinel.conf
#绑定授权IP
bind 0.0.0.0
#sentinel 监听端口
port 26379
#开启后台运行
daemonize yes
#日志等文件保存目录
dir "/tmp"
#将运行日志保存在 /tmp 目录下
logfile 'sentinel.log'
#配置监听的主服务器,这里sentinel monitor代表监控,mymaster代表服务器的名称(自定义),192.168.67.100 代表监控的master,6379 为port,2代表只有两个或两个以上的哨兵认为主服务器不可用的时候,才会进行failover(选举)操作。
sentinel monitor mymaster 192.168.67.100 6379 2
#定义服务的密码,mymaster是服务名称,123456是Redis服务器密码
#sentinel auth-pass
sentinel auth-pass mymaster 123456
redis-sentinel sentinel.conf
正常启动日志打印信息
70012:X 24 Apr 2021 05:00:10.249 # oO0OoO0OoO0Oo Redis is starting oO0OoO0OoO0Oo
70012:X 24 Apr 2021 05:00:10.249 # Redis version=6.0.6, bits=64, commit=00000000, modified=0, pid=70012, just started
70012:X 24 Apr 2021 05:00:10.249 # Configuration loaded
70013:X 24 Apr 2021 05:00:10.251 * Increased maximum number of open files to 10032 (it was originally set to 1024).
70013:X 24 Apr 2021 05:00:10.253 * Running mode=sentinel, port=26379.
70013:X 24 Apr 2021 05:00:10.254 # WARNING: The TCP backlog setting of 511 cannot be enforced because /proc/sys/net/core/somaxconn is set to the lower value of 128.
70013:X 24 Apr 2021 05:00:10.261 # Sentinel ID is ac4896f197ae98b2a557771dda7b6e24f01cecbc
70013:X 24 Apr 2021 05:00:10.261 # +monitor master mymaster 192.168.67.100 6379 quorum 1
70013:X 24 Apr 2021 05:00:10.908 * +sentinel-address-switch master mymaster 192.168.67.100 6379 ip 192.168.67.100 port 26379 for 1ac8d7c442b6f6bbc03f33045def2fa17960423e
#192.168.67.100 redis-server stop
[root@192 tmp]# service redisd stop
Stopping ...
Waiting for Redis to shutdown ...
Redis stopped
[root@192 tmp]#
127.0.0.1:6379> info replication
#Replication
role:slave
master_host:192.168.67.100
master_port:6379
master_link_status:down #master 连接断开
master_last_io_seconds_ago:-1
master_sync_in_progress:0
slave_repl_offset:270000
master_link_down_since_seconds:21
slave_priority:100
slave_read_only:1
connected_slaves:0
master_replid:11aadc2d0dbc86e746d1a0a0236cfe805bf5f9ce
master_replid2:0000000000000000000000000000000000000000
master_repl_offset:270000
second_repl_offset:-1
repl_backlog_active:1
repl_backlog_size:1048576
repl_backlog_first_byte_offset:103598
repl_backlog_histlen:166403
127.0.0.1:6379>
65100:X 24 Apr 2021 05:14:33.172 * +sentinel-address-switch master mymaster 192.168.67.100 6379 ip 192.168.67.136 port 26379 for ac4896f197ae98b2a557771dda7b6e24f01cecbc
65100:X 24 Apr 2021 05:14:33.829 # +sdown master mymaster 192.168.67.100 6379
65100:X 24 Apr 2021 05:14:33.829 # +odown master mymaster 192.168.67.100 6379 #quorum 1/1
65100:X 24 Apr 2021 05:14:33.829 # +new-epoch 12
65100:X 24 Apr 2021 05:14:33.829 # +try-failover master mymaster 192.168.67.100 6379
65100:X 24 Apr 2021 05:14:33.831 # +vote-for-leader 1ac8d7c442b6f6bbc03f33045def2fa17960423e 12
65100:X 24 Apr 2021 05:14:33.842 # ac4896f197ae98b2a557771dda7b6e24f01cecbc voted for 1ac8d7c442b6f6bbc03f33045def2fa17960423e 12
65100:X 24 Apr 2021 05:14:33.895 # +elected-leader master mymaster 192.168.67.100 6379
65100:X 24 Apr 2021 05:14:33.895 # +failover-state-select-slave master mymaster 192.168.67.100 6379
65100:X 24 Apr 2021 05:14:33.949 # +selected-slave slave 192.168.67.101:6379 192.168.67.101 6379 @ mymaster 192.168.67.100 6379
65100:X 24 Apr 2021 05:14:33.949 * +failover-state-send-slaveof-noone slave 192.168.67.101:6379 192.168.67.101 6379 @ mymaster 192.168.67.100 6379
65100:X 24 Apr 2021 05:14:34.006 * +failover-state-wait-promotion slave 192.168.67.101:6379 192.168.67.101 6379 @ mymaster 192.168.67.100 6379
65100:X 24 Apr 2021 05:14:34.101 * +sentinel-address-switch master mymaster 192.168.67.100 6379 ip 192.168.67.101 port 26379 for ac4896f197ae98b2a557771dda7b6e24f01cecbc
65100:X 24 Apr 2021 05:14:34.743 # +promoted-slave slave 192.168.67.101:6379 192.168.67.101 6379 @ mymaster 192.168.67.100 6379
65100:X 24 Apr 2021 05:14:34.743 # +failover-state-reconf-slaves master mymaster 192.168.67.100 6379
65100:X 24 Apr 2021 05:14:34.810 # +failover-end master mymaster 192.168.67.100 6379
65100:X 24 Apr 2021 05:14:34.810 # +switch-master mymaster 192.168.67.100 6379 192.168.67.101 6379 #选举成功
service redisd start
sdown是主观宕机,就一个哨兵如果自己觉得master宕机了,那么就是主观宕机
odown是客观宕机,如果quorum数量的哨兵都觉得master宕机了,那么就是客观宕机
会考虑slave的一些信息
(1)跟master断开连接的时长
(2)slave优先级
(3)复制offset
(4)run id
sentinel down-after-milliseconds master-name milliseconds
这个配置项指定了需要多少失效时间,一个master才会被这个sentinel主观地认为是不可用的。 单位是毫秒,默认为30秒
sentinel down-after-milliseconds mymaster 30000
sentinel parallel-syncs master-name numslaves
当 Sentinel 节点集合对主节点故障判定达成一致时, Sentinel 领导者节点会做故障转移操作,选出新的主节点,原来的从节点会向新的主节点发起复制操 作, parallel-syncs 就是用来限制在一次故障转移之后,每次向新的主节点发起复制操作的从节点个数,指出 Sentinel 属于并发还是串行。1代表每次只能 复制一个,可以减轻 Master 的压力;
sentinel parallel-syncs mymaster 1
127.0.0.1:26379> sentinel masters
1) 1) "name"
2) "mymaster"
3) "ip"
4) "192.168.148.131"
5) "port"
6) "6379"
7) "runid"
8) "ae3c13ce22779eb97e81350a1606a8456338a330"
9) "flags"
10) "master"
..................
127.0.0.1:26379> sentinel master mymaster
1) "name"
2) "mymaster"
3) "ip"
4) "192.168.148.131"
5) "port"
6) "6379"
7) "runid"
8) "ae3c13ce22779eb97e81350a1606a8456338a330"
9) "flags"
10) "master"
....................
127.0.0.1:26379> sentinel slaves mymaster
1) 1) "name"
2) "192.168.148.133:6379"
3) "ip"
4) "192.168.148.133"
5) "port"
6) "6379"
7) "runid"
8) "32d652b47a94c49042d73d5e99516c6e1bc2d208"
9) "flags"
10) "slave"
.................
127.0.0.1:26379> sentinel get-master-addr-by-name mymaster
1) "192.168.148.131"
2) "6379"
127.0.0.1:26379> sentinel failover mymaster
OK
127.0.0.1:26379> info sentinel
# Sentinel
sentinel_masters:1
sentinel_tilt:0
sentinel_running_scripts:0
sentinel_scripts_queue_length:0
sentinel_simulate_failure_flags:0
master0:name=mymaster,status=ok,address=192.168.148.133:6379,slaves=2,sentinels=3
1、异步复制导致数据丢失 因为master->slave的复制是异步,所以可能有部分还没来得及复制到slave就宕机了,此时这些部分数据就丢失了。
2、集群脑裂导致数据丢失 脑裂,也就是说,某个master所在机器突然脱离了正常的网络,跟其它slave机器不能连接,但是实际上master还运行着。
- 造成的问题
此时哨兵可能就会认为master宕机了,然后开始选举,将其它 slave 切换成 master 。这时候集群里就会有2个 master ,也就是所谓的脑裂。 此时虽然某个 slave 被切换成了 master ,但是可能client还没来得及切换成新的 master ,还继续写向旧的 master 的数据可能就丢失了。 因此旧master再次恢复的时候,会被作为一个 slave 挂到新的 master 上去,自己的数据会被清空,重新从新的 master 复制数据。
redis.conf 加入以下配置
要求至少有1个slave,数据复制和同步的延迟不能超过10秒 如果说一旦所有的slave,数据复制和同步的延迟都超过了10秒钟,那么这个时候,master就不会再接收任何请求了
# 辅助实现min-slaves配置选项
min-slaves-to-write 1
min-slaves-max-lag 10