一、环境说明
192.168.5.65 master(redis、sentinel)
192.168.5.66 slave1(redis、sentinel)
192.168.5.71 slave2(redis、sentinel)
#hosts文件配置
cat >> /etc/hosts << EOF 192.168.5.65 redis65 192.168.5.66 redis65 192.168.5.71 redis71 EOF
二、修改系统参数
#修改最大可打开文件数
cat >> /etc/security/limits.conf << EOF * soft nofile 102400 * hard nofile 102400 EOF
#TCP监听队列大小
echo "net.core.somaxconn = 32767" >> /etc/sysctl.conf sysctl -p
#OOM相关:vm.overcommit_memory
echo "vm.overcommit_memory=1" >> /etc/sysctl.conf sysctl -p
#开启内核的“Transparent Huge Pages (THP)”特性
echo never > /sys/kernel/mm/transparent_hugepage/enabled echo "echo never > /sys/kernel/mm/transparent_hugepage/enabled" >>/etc/rc.local chmod +x /etc/rc.local
三、安装redis
[root@redis65 /]# cd /opt [root@redis65 opt]# wget http://download.redis.io/releases/redis-5.0.0.tar.gz [root@redis65 opt]# tar -zxvf redis-5.0.0.tar.gz [root@redis65 opt]# cd redis-5.0.0/ [root@redis65 redis-5.0.0]# make [root@redis65 redis-5.0.0]# make install PREFIX=/usr/local/redis
#创建实例目录
[root@redis65 /]# mkdir -p /data/redis/redis_7001
##redis65 7001配置文件(master)
cat >> /data/redis/redis_7001/redis_7001.conf <##redis66 7001配置文件(slave)
cat >> /data/redis/redis_7001/redis_7001.conf <##redis启动文件
cat >> /etc/init.d/redis_7001 </dev/null sleep 2 while [ -x $PIDFILE ] do echo "Waiting for Redis to shutdown..." sleep 1 done echo "Redis stopped" fi ;; restart|force-reload) ${0} stop ${0} start ;; *) echo "Usage: /etc/init.d/redis_7001 {start|stop|restart|force-reload}" >&2 exit 1 esac EOF chmod +x /etc/init.d/redis_7001 /etc/init.d/redis_7001 start chkconfig redis_7001 on# 在redis-cli -h 127.0.0.1 -a 'password'这里会有一个问题,当在shell中输入以上命令时,控制台总会输出一串“Warning: Using a password with '-a' option on the command line interface may not be safe.”
#解决办法将标准错误去除即可,加了2>/dev/null,将标准错误丢弃即可,如:redis-cli -h 192.168.5.65 -p 7001 -c -a 'zxc789' 2>/dev/null
# 同步确认
[root@redis65 data]# redis-cli -h 192.168.5.65 -p 7001 -c -a 'zxc789' 2>/dev/null 192.168.5.65:7001> info replication # Replication role:master connected_slaves:3 slave0:ip=192.168.5.66,port=7001,state=online,offset=14919,lag=1 slave1:ip=192.168.5.65,port=7002,state=online,offset=14919,lag=1 slave2:ip=192.168.5.71,port=7001,state=online,offset=14919,lag=0 master_replid:c38f8eee4a6a7e71614cf4d0bf38add46a5f8cb0 master_replid2:0000000000000000000000000000000000000000 master_repl_offset:14919 second_repl_offset:-1 repl_backlog_active:1 repl_backlog_size:1048576 repl_backlog_first_byte_offset:1 repl_backlog_histlen:14919[root@redis66 /]# redis-cli -h 192.168.5.66 -p 7001 -c -a 'zxc789' 2>/dev/null192.168.5.66:7001> INFO replication # Replication role:slave master_host:192.168.5.65 master_port:7001 master_link_status:up master_last_io_seconds_ago:9 master_sync_in_progress:0 slave_repl_offset:112 slave_priority:100 slave_read_only:1 connected_slaves:0 master_replid:c8cbd4ae635da25193060cacaaebf8a696227476 master_replid2:0000000000000000000000000000000000000000 master_repl_offset:112 second_repl_offset:-1 repl_backlog_active:1 repl_backlog_size:1048576 repl_backlog_first_byte_offset:1 repl_backlog_histlen:112同期正常时:
master_link_status:up
master_repl_offset #和slave_repl_offset相等,
master_last_io_seconds_ago #在10秒内。
#shell脚本随机插入数据测试
cat >> /data/11.sh </dev/null done EOF chmod +x /data/11.sh time sh /data/11.sh# slave只读不允许写
192.168.5.66:7001> set yaya 465464654 (error) READONLY You can't write against a read only replica.#停止slave后往master里写数据
cat >> /data/11.sh </dev/null done EOF #master写完数据再开启slave,看slave是否有同步数据
192.168.5.65:7001> info keyspace # Keyspace db0:keys=149,expires=0,avg_ttl=0 192.168.5.65:7001> DBSIZE (integer) 149 192.168.5.65:7001> get name112 "64we64f54f4qwe464cvw6e4" 192.168.5.65:7001> INFO replication # Replication role:master connected_slaves:0 master_replid:c8cbd4ae635da25193060cacaaebf8a696227476 master_replid2:0000000000000000000000000000000000000000 master_repl_offset:14549 second_repl_offset:-1 repl_backlog_active:1 repl_backlog_size:1048576 repl_backlog_first_byte_offset:1 repl_backlog_histlen:14549 192.168.5.65:7001> INFO replication # Replication role:master connected_slaves:3 slave0:ip=192.168.5.66,port=7001,state=online,offset=14563,lag=1 slave1:ip=192.168.5.65,port=7002,state=online,offset=14919,lag=1 slave2:ip=192.168.5.71,port=7001,state=online,offset=14919,lag=0 master_replid:c8cbd4ae635da25193060cacaaebf8a696227476 master_replid2:0000000000000000000000000000000000000000 master_repl_offset:14563 second_repl_offset:-1 repl_backlog_active:1 repl_backlog_size:1048576 repl_backlog_first_byte_offset:1 repl_backlog_histlen:14563 192.168.5.66:7001> info keyspace # Keyspace db0:keys=149,expires=0,avg_ttl=0 192.168.5.66:7001> DBSIZE (integer) 149 192.168.5.66:7001> get name112 "64we64f54f4qwe464cvw6e4" 192.168.5.66:7001> INFO replication # Replication role:slave master_host:192.168.5.65 master_port:7001 master_link_status:up master_last_io_seconds_ago:1 master_sync_in_progress:0 slave_repl_offset:14633 slave_priority:100 slave_read_only:1 connected_slaves:0 master_replid:c8cbd4ae635da25193060cacaaebf8a696227476 master_replid2:0000000000000000000000000000000000000000 master_repl_offset:14633 second_repl_offset:-1 repl_backlog_active:1 repl_backlog_size:1048576 repl_backlog_first_byte_offset:14550 repl_backlog_histlen:84# 从上面可以看到数据已同步
三、redis哨兵模式
Redis哨兵为Redis提供了高可用性。实际上这意味着你可以使用哨兵模式创建一个可以不用人为干预而应对各种故障的Redis部署,哨兵模式还提供了其他的附加功能,如监控,通知,为客户端提供配置。
监控:哨兵不断的检查master和slave是否正常的运行。
通知:当监控的某台Redis实例发生问题时,可以通过API通知系统管理员和其他的应用程序。
自动故障转移:如果一个master不正常运行了,哨兵可以启动一个故障转移进程,将一个slave升级成为master,其他的slave被重新配置使用新的master,并且应用程序使用Redis服务端通知的新地址。
配置提供者:哨兵作为Redis客户端发现的权威来源:客户端连接到哨兵请求当前可靠的master的地址。如果发生故障,哨兵将报告新地址。
1、sentinel.conf配置文件(主从都一样)
[root@redis65 data]# mkdir -p /data/redis/sentinel_27001 [root@redis65 data]# cat >> /data/redis/sentinel_27001/sentinel_27001.conf < port 27001 #1表示在sentinel集群中只要有两个节点检测到redis主节点出故障就进行切换 #如果3s内mymaster无响应,则认为mymaster宕机了 #如果10秒后,mysater仍没活过来,则启动failover sentinel monitor mymaster 192.168.5.65 7001 2 sentinel down-after-milliseconds mymaster 3000 sentinel failover-timeout mymaster 10000 daemonize yes #指定工作目录 dir "/data/redis/sentinel_27001/" protected-mode no logfile "/data/redis/sentinel_27001/sentinel_27001.log" #redis主节点密码 sentinel auth-pass mymaster zxc789 # Generated by CONFIG REWRITE EOF2、启动所有的sentinel实例
[root@redis65 data]# redis-sentinel /data/redis/sentinel_27001/sentinel_27001.conf [root@redis65 data]# netstat -tnlp|grep 27001 tcp 0 0 0.0.0.0:27001 0.0.0.0:* LISTEN 17731/redis-sentine tcp6 0 0 :::27001 :::* LISTEN 17731/redis-sentine[root@redis66 redis_7001]# netstat -tnlp|grep 27001 tcp 0 0 0.0.0.0:27001 0.0.0.0:* LISTEN 15100/redis-sentine tcp6 0 0 :::27001 :::* LISTEN 15100/redis-sentine[root@db71 /]# netstat -tnlp|grep 27001 tcp 0 0 0.0.0.0:27001 0.0.0.0:* LISTEN 13831/redis-sentine tcp6 0 0 :::27001 :::* LISTEN 13831/redis-sentine#sentinel监控日志,通过日志可以看到一个master、三个slave
15100:X 11 Jan 2019 16:33:19.813 * +slave slave 192.168.5.66:7001 192.168.5.66 7001 @ mymaster 192.168.5.65 7001 15100:X 11 Jan 2019 16:33:19.998 * +slave slave 192.168.5.65:7002 192.168.5.65 7002 @ mymaster 192.168.5.65 7001 15100:X 11 Jan 2019 16:33:20.042 * +slave slave 192.168.5.71:7001 192.168.5.71 7001 @ mymaster 192.168.5.65 7001 15100:X 11 Jan 2019 16:33:21.797 * +sentinel sentinel bd0bf09a1dcb2cadce4bed1c19e2b0c766dbeee1 192.168.5.65 27001 @ mymaster 192.168.5.65 7001 15100:X 11 Jan 2019 16:34:00.915 * +sentinel sentinel 8437221283819083fba5f2f07f9fb3ec98169af4 192.168.5.71 27001 @ mymaster 192.168.5.65 70013、sentinel常用命令
[root@db71 aa]# redis-cli -p 27001 127.0.0.1:27001> sentinel masters 1) 1) "name" 2) "mymaster" 3) "ip" 4) "192.168.5.65" 5) "port" 6) "7001" 7) "runid" 8) "29541866b0235fb9d8b11ca9e8e0a334cb547e47" 127.0.0.1:27001> sentinel slaves mymaster 1) 1) "name" 2) "192.168.5.71:7001" 3) "ip" 4) "192.168.5.71" 5) "port" 6) "7001" 2) 1) "name" 2) "192.168.5.65:7002" 3) "ip" 4) "192.168.5.65" 5) "port" 6) "7002" 7) "runid" 8) "a27281370ab282263e9e38766d3743112b52ae99" 9) "flags" 10) "slave" 3) 1) "name" 2) "192.168.5.66:7001" 3) "ip" 4) "192.168.5.66" 5) "port" 6) "7001" 7) "runid" 8) "496bc3511d0ecc4e8d4965d699f851f053c94f37" 9) "flags" 10) "slave"sentinel masters 列出所有监视的主节点
sentinel slaves
获取某主节点的所有从节点信息 sentinel get-master-addr-by-name
根据主节点名称来获取其对应的ip地址 sentinel reset 清除所有操作状态,包括故障转移
sentinel failover
手动将主节点转移到某节点 4、master宕机,故障转移
[root@redis65 data]# redis-cli -h 192.168.5.65 -p 7001 -c -a 'zxc789' 2>/dev/null 192.168.5.65:7001> info replication # Replication role:master connected_slaves:3 slave0:ip=192.168.5.66,port=7001,state=online,offset=14919,lag=1 slave1:ip=192.168.5.65,port=7002,state=online,offset=14919,lag=1 slave2:ip=192.168.5.71,port=7001,state=online,offset=14919,lag=0#关闭master
[root@redis65 /]# /etc/init.d/redis_7001 stop#通过sentinel日志查看sentinel选举新的master的过程
cat /data/redis/sentinel_27001/sentinel_27001.log 17799:X 11 Jan 2019 22:51:51.383 # +sdown master mymaster 192.168.5.65 7001 17799:X 11 Jan 2019 22:51:51.383 # +odown master mymaster 192.168.5.65 7001 #quorum 1/1 17799:X 11 Jan 2019 22:51:51.383 # +new-epoch 1 17799:X 11 Jan 2019 22:51:51.383 # +try-failover master mymaster 192.168.5.65 7001 17799:X 11 Jan 2019 22:51:51.423 # +vote-for-leader bd0bf09a1dcb2cadce4bed1c19e2b0c766dbeee1 1 17799:X 11 Jan 2019 22:51:51.443 # 0d39de811fec35f16192a0143b4099822837b34b voted for 0d39de811fec35f16192a0143b4099822837b34b 1 17799:X 11 Jan 2019 22:51:51.462 # 8437221283819083fba5f2f07f9fb3ec98169af4 voted for bd0bf09a1dcb2cadce4bed1c19e2b0c766dbeee1 1 17799:X 11 Jan 2019 22:51:51.489 # +elected-leader master mymaster 192.168.5.65 7001 17799:X 11 Jan 2019 22:51:51.489 # +failover-state-select-slave master mymaster 192.168.5.65 7001 17799:X 11 Jan 2019 22:51:51.579 # +selected-slave slave 192.168.5.71:7001 192.168.5.71 7001 @ mymaster 192.168.5.65 7001 17799:X 11 Jan 2019 22:51:51.579 * +failover-state-send-slaveof-noone slave 192.168.5.71:7001 192.168.5.71 7001 @ mymaster 192.168.5.65 7001 17799:X 11 Jan 2019 22:51:51.650 * +failover-state-wait-promotion slave 192.168.5.71:7001 192.168.5.71 7001 @ mymaster 192.168.5.65 7001 17799:X 11 Jan 2019 22:51:52.701 # +promoted-slave slave 192.168.5.71:7001 192.168.5.71 7001 @ mymaster 192.168.5.65 7001 17799:X 11 Jan 2019 22:51:52.701 # +failover-state-reconf-slaves master mymaster 192.168.5.65 7001 17799:X 11 Jan 2019 22:51:52.702 * +slave-reconf-sent slave 192.168.5.65:7002 192.168.5.65 7002 @ mymaster 192.168.5.65 7001 17799:X 11 Jan 2019 22:51:53.462 * +slave-reconf-inprog slave 192.168.5.65:7002 192.168.5.65 7002 @ mymaster 192.168.5.65 7001 17799:X 11 Jan 2019 22:51:53.462 * +slave-reconf-done slave 192.168.5.65:7002 192.168.5.65 7002 @ mymaster 192.168.5.65 7001 17799:X 11 Jan 2019 22:51:53.520 * +slave-reconf-sent slave 192.168.5.66:7001 192.168.5.66 7001 @ mymaster 192.168.5.65 7001 17799:X 11 Jan 2019 22:51:54.469 * +slave-reconf-inprog slave 192.168.5.66:7001 192.168.5.66 7001 @ mymaster 192.168.5.65 7001 17799:X 11 Jan 2019 22:51:54.469 * +slave-reconf-done slave 192.168.5.66:7001 192.168.5.66 7001 @ mymaster 192.168.5.65 7001 17799:X 11 Jan 2019 22:51:54.568 # +failover-end master mymaster 192.168.5.65 7001 17799:X 11 Jan 2019 22:51:54.568 # +switch-master mymaster 192.168.5.65 7001 192.168.5.71 7001 17799:X 11 Jan 2019 22:51:54.568 * +slave slave 192.168.5.65:7002 192.168.5.65 7002 @ mymaster 192.168.5.71 7001 17799:X 11 Jan 2019 22:51:54.568 * +slave slave 192.168.5.66:7001 192.168.5.66 7001 @ mymaster 192.168.5.71 7001 17799:X 11 Jan 2019 22:51:54.568 * +slave slave 192.168.5.65:7001 192.168.5.65 7001 @ mymaster 192.168.5.71 7001 17799:X 11 Jan 2019 22:51:57.648 # +sdown slave 192.168.5.65:7001 192.168.5.65 7001 @ mymaster 192.168.5.71 7001// 通过日志可以看到选举192.168.5.71 7001为新和master,为什么是192.168.5.71 7001它为新的master咧,那是因为192.168.5.71 7001这个配置文件中replica-priority 参数设的最小
#查看新的主从信息
[root@db71 aa]# redis-cli -h 192.168.5.71 -p 7001 -c -a 'zxc789' 2>/dev/null 192.168.5.71:7001> info replication # Replication role:master connected_slaves:2 slave0:ip=192.168.5.65,port=7002,state=online,offset=3341911,lag=0 slave1:ip=192.168.5.66,port=7001,state=online,offset=3341911,lag=1 master_replid:b9e65b745379e49560ae5647a2b35f59417e6451 master_replid2:c38f8eee4a6a7e71614cf4d0bf38add46a5f8cb0 master_repl_offset:3341911 second_repl_offset:3305650 repl_backlog_active:1 repl_backlog_size:1048576 repl_backlog_first_byte_offset:2293336 repl_backlog_histlen:1048576#查看其它slave的配置文件,发现自动修改为同步新和master
#192.168.5.71 7001配置文件中连接master的配置已删除
#开启原来的master 192.168.5.65 7001
[root@redis65 data]# /etc/init.d/redis_7001 start Starting Redis server... Redis is running...#通过sentinel日志查看原master的启动变化(-sdown:说明恢复服务)
cat /data/redis/sentinel_27001/sentinel_27001.log 17799:X 12 Jan 2019 10:22:17.351 * +reboot slave 192.168.5.65:7001 192.168.5.65 7001 @ mymaster 192.168.5.71 7001 17799:X 12 Jan 2019 10:22:17.402 # -sdown slave 192.168.5.65:7001 192.168.5.65 7001 @ mymaster 192.168.5.71 7001#在新的master实例上看看原来的master是否变成slaveb
192.168.5.71:7001> info Replication # Replication role:master connected_slaves:3 slave0:ip=192.168.5.65,port=7002,state=online,offset=11868218,lag=0 slave1:ip=192.168.5.66,port=7001,state=online,offset=11868079,lag=1 slave2:ip=192.168.5.65,port=7001,state=online,offset=11868218,lag=0 master_replid:b9e65b745379e49560ae5647a2b35f59417e6451 master_replid2:c38f8eee4a6a7e71614cf4d0bf38add46a5f8cb0 master_repl_offset:11868218 second_repl_offset:3305650 repl_backlog_active:1 repl_backlog_size:1048576 repl_backlog_first_byte_offset:10819643 repl_backlog_histlen:1048576// 可以看到原来的master变成slave了,原master 192.168.5.65:7001配置文件也发生了变化,自动添加了连接新master的命令
#如果当原master 192.168.5.65:7001重启后,因为配置文件或者网络的原因,没有变成新master下面的一个slave,客户端仍然向里面写数据,一但恢复新的主从关系,这部份写入的数据将会丢失,为了避免这种情况出现,就要做以下的设置
min-slaves-to-write 1
min-slaves-max-lag 10
// 通过上面的配置,当一个redis是master时,如果它不能向至少一个slave写数据(上面的min-slaves-to-write指定了slave的数量),它将会拒绝接受客户端的写请求。由于复制是异步的,master无法向slave写数据意味着slave要么断开连接了,要么不在指定时间内向master发送同步数据的请求了(上面的min-slaves-max-lag指定了这个时间)。