NFS高可用(NFS+keepalive+Sersync)

作者: 张首富
QQ: 18163201(请备注姓名)
个人博客: www.zhangshoufu.com
QQ群: 895291458

背景

NFS这样古老的共享存储的技术,被众多小公司和没钱的公司采用,现在就我司就需要出一套客户的离线版本方案,客户们想数据安全却又不想花钱,所以我就采用了NFS做后端数据存储,

NFS目前数据同步的方式主要两种:

  • 使用Sersync来实现主从同步
  • 第二种借助DRBD实现主从同步

但是这两种方案都只是实现了数据的主从同步,对NFS服务的高可用没有任何实现,网上大部分是采用heartbeat来实现,我这边想采用不一样的keepalive来实现这个

网络拓扑

NFS高可用(NFS+keepalive+Sersync)_第1张图片

安装前准备

服务器信息:

IP 角色/HOSTNAME
192.168.1.110 ×××(keepalive的虚拟IP)
192.168.1.112 NFS-Master
192.168.1.111 NFS-Slave
192.168.1.120 NFS-Client

服务器信息:

# cat /etc/redhat-release
CentOS Linux release 7.5.1804 (Core)
# uname  -r
3.10.0-862.el7.x86_64

共享的目录:

nfs master 和slave 都创建一个/test_nfs 目录来当做共享目录

初始化环境安装

在三台机器上同时执行
服务器基本优化:

#Yum源更换为国内阿里源
yum install wget telnet -y
mv /etc/yum.repos.d/CentOS-Base.repo /etc/yum.repos.d/CentOS-Base.repo.backup
wget -O /etc/yum.repos.d/CentOS-Base.repo http://mirrors.aliyun.com/repo/Centos-7.repo

#添加阿里的epel源
#add the epel
wget -O /etc/yum.repos.d/epel.repo http://mirrors.aliyun.com/repo/epel-7.repo
# rpm -ivh http://dl.fedoraproject.org/pub/epel/7/x86_64/e/epel-release-7-8.noarch.rpm

#yum重新建立缓存
yum clean all
yum makecache
#同步时间
yum -y install ntp
/usr/sbin/ntpdate cn.pool.ntp.org
echo "* 4 * * * /usr/sbin/ntpdate cn.pool.ntp.org > /dev/null 2>&1" >> /var/spool/cron/root
systemctl  restart crond.service

#安装vim
yum -y install vim

#设置最大打开文件描述符数
echo "ulimit -SHn 102400" >> /etc/rc.local
cat >> /etc/security/limits.conf << EOF
*           soft   nofile       655350
*           hard   nofile       655350
EOF

#禁用selinux
sed -i 's/SELINUX=enforcing/SELINUX=disabled/' /etc/selinux/config
setenforce 0

#关闭防火墙
systemctl disable firewalld.service 
systemctl stop firewalld.service 

#set ssh
sed -i 's/^GSSAPIAuthentication yes$/GSSAPIAuthentication no/' /etc/ssh/sshd_config
sed -i 's/#UseDNS yes/UseDNS no/' /etc/ssh/sshd_config
systemctl  restart sshd.service

#内核参数优化
cat >> /etc/sysctl.conf << EOF
vm.overcommit_memory = 1
net.ipv4.ip_local_port_range = 1024 65536
net.ipv4.tcp_fin_timeout = 1
net.ipv4.tcp_keepalive_time = 1200
net.ipv4.tcp_mem = 94500000 915000000 927000000
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_tw_recycle = 1
net.ipv4.tcp_timestamps = 0
net.ipv4.tcp_synack_retries = 1
net.ipv4.tcp_syn_retries = 1
net.ipv4.tcp_abort_on_overflow = 0
net.core.rmem_max = 16777216
net.core.wmem_max = 16777216
net.core.netdev_max_backlog = 262144
net.core.somaxconn = 262144
net.ipv4.tcp_max_orphans = 3276800
net.ipv4.tcp_max_syn_backlog = 262144
net.core.wmem_default = 8388608
net.core.rmem_default = 8388608
net.ipv4.netfilter.ip_conntrack_max = 2097152
net.nf_conntrack_max = 655360
net.netfilter.nf_conntrack_tcp_timeout_established = 1200
EOF
/sbin/sysctl -p

安装nfs

yum -y install nfs-utils rpcbind

配置nfs共享目录

NFS-Master:

[root@NFS-Master ~]# echo '/test_nfs 192.168.1.0/24(rw,sync,all_squash)' >> /etc/exports
[root@NFS-Master ~]# systemctl start rpcbind && systemctl start nfs
[root@NFS-Master ~]# systemctl enable rpcbind && systemctl enable nfs
Created symlink from /etc/systemd/system/multi-user.target.wants/nfs-server.service to /usr/lib/systemd/system/nfs-server.service.

NFS-Client:

[root@NFS-Slave ~]# echo '/test_nfs 192.168.1.0/24(rw,sync,all_squash)' >> /etc/exports
[root@NFS-Slave ~]# systemctl start rpcbind && systemctl start nfs
[root@NFS-Slave ~]# systemctl enable rpcbind && systemctl enable nfs
Created symlink from /etc/systemd/system/multi-user.target.wants/nfs-server.service to /usr/lib/systemd/system/nfs-server.service.

测试挂载是否成功:

#测试NFS-Master端
[root@NFS-Client /]# mount -t nfs 192.168.1.112:/test_nfs /mnt
[root@NFS-Client /]# df -Th
文件系统                类型      容量  已用  可用 已用% 挂载点
192.168.1.112:/test_nfs nfs4      921G   69G  852G    8% /mnt
[root@NFS-Client /]# umount /mnt

#测试NFS-Slave端
[root@NFS-Client /]# mount -t nfs 192.168.1.111:/test_nfs /mnt
[root@NFS-Client /]# df -Th
文件系统                类型      容量  已用  可用 已用% 挂载点
192.168.1.111:/test_nfs nfs4      931G   53G  878G    6% /mnt
[root@NFS-Client /]# umount /mnt

安装配置rsync + Sersync

在NFS-Slave端安装rsync,因为我们在NFS-Master上写入数据要备份到NFS-Slave服务器上,所以我们在NFS-Slave上启动rsync,如果不清楚可以看rsync服务介绍这里不多讲

[root@NFS-Slave ~]# yum -y install rsync.x86_64
[root@NFS-Slave ~]# cat /etc/rsyncd.conf
uid = nfsnobody
gid = nfsnobody
port = 873
pid file = /var/rsyncd.pid
log file = /var/log/rsyncd.log
use chroot = no
max connections = 200
read only = false
list = false
fake super = yes
ignore errors
[test_nfs]
path = /test_nfs
auth users = test_nfs
secrets file = /etc/rsync.pass
hosts allow = 192.168.1.0/24
[root@NFS-Slave ~]# systemctl start rsyncd && systemctl enable rsyncd
[root@NFS-Slave ~]# echo 'test_nfs:zsf123' > /etc/rsync.pass
[root@NFS-Slave ~]# chmod 600 /etc/rsync.pass
[root@NFS-Slave ~]# chown nfsnobody:nfsnobody /test_nfs/

NFS-Master测试

[root@NFS-Master ~]# yum -y install rsync.x86_64
[root@NFS-Master ~]# chown nfsnobody:nfsnobody /test_nfs/
[root@NFS-Master ~]# echo "zsf123" > /etc/rsync.pass
[root@NFS-Master ~]# chmod 600 /etc/rsync.pass
#创建测试文件,测试推送
[root@NFS-Master ~]# cd /test_nfs/
[root@NFS-Master test_nfs]# echo "This is test file" > file.txt
[root@NFS-Master test_nfs]# rsync -arv /test_nfs/ [email protected]::test_nfs --password-file=/etc/rsync.pass
sending incremental file list
./
file.txt

sent 155 bytes  received 38 bytes  386.00 bytes/sec
total size is 18  speedup is 0.09

#到NFS-Slave上查看文件
[root@NFS-Slave ~]# ls /test_nfs/
file.txt
[root@NFS-Slave ~]# cat /test_nfs/file.txt
This is test file

NFS-Master安装Sersync
因为Sersync只有安装在NFS-Master上的时候才能检测到/test_nfs目录是否有文件写入,才能触发推送

[root@NFS-Master test_nfs]# cd /usr/local/
[root@NFS-Master local]# yum -y install wget.x86_64
#下载Sersync的安装包
[root@NFS-Master local]# wget https://raw.githubusercontent.com/wsgzao/sersync/master/sersync2.5.4_64bit_binary_stable_final.tar.gz
[root@NFS-Master local]# tar xvf sersync2.5.4_64bit_binary_stable_final.tar.gz
GNU-Linux-x86/
GNU-Linux-x86/sersync2
GNU-Linux-x86/confxml.xml
[root@NFS-Master local]# mv GNU-Linux-x86/ sersync
[root@NFS-Master local]# cd sersync/
更改sersync的配置文件
[root@NFS-Master local]# sed -ri 's###g'
[root@NFS-Master local]# sed -ri '24s###g' confxml.xml
[root@NFS-Master local]# sed -ri '25s###g' confxml.xml
[root@NFS-Master local]# sed -ri '30s###g' confxml.xml
[root@NFS-Master local]# sed -ri '31s###g' confxml.xml
[root@NFS-Master local]# sed -ri '33s###g' confxml.xml

#启动Sersync
[root@NFS-Master sersync]# /usr/local/sersync/sersync2 -dro /usr/local/sersync/confxml.xml

测试:

[root@NFS-Master test_nfs]# echo "This is two test file" > two.file.txt
[root@NFS-Slave test_nfs]# ls
file.txt  two.file.txt
[root@NFS-Slave test_nfs]# cat two.file.txt
This is two test file

看到上面结果说明Sersync实时同步我们已经完成了,

安装配置keepalive

NFS-Master

[root@NFS-Master test_nfs]# yum -y install keepalived.x86_64
[root@NFS-Master test_nfs]# cat /etc/keepalived/keepalived.conf
! Configuration File for keepalived

global_defs {
   router_id NFS-Master
}

vrrp_instance VI_1 {
    state MASTER
    interface enp0s31f6
    virtual_router_id 51
    priority 150
    advert_int 1
    authentication {
        auth_type PASS
        auth_pass zhangshoufu
    }
    virtual_ipaddress {
        192.168.1.101
    }
}
[root@NFS-Master test_nfs]# systemctl start  keepalived.service && systemctl enable keepalived.service

NFS-Slave

[root@NFS-Slave test_nfs]# yum -y install keepalived.x86_64
[root@NFS-Slave test_nfs]# cat /etc/keepalived/keepalived.conf
! Configuration File for keepalived

global_defs {
   router_id NFS-Slave
}

vrrp_instance VI_1 {
    state MASTER
    interface enp0s31f6
    virtual_router_id 51
    priority 100
    advert_int 1
    authentication {
        auth_type PASS
        auth_pass zhangshoufu
    }
    virtual_ipaddress {
        192.168.1.101
    }
}
[root@NFS-Slave test_nfs]# systemctl start  keepalived.service && systemctl enable keepalived.service

查看虚拟IP是否存在

[root@NFS-Master test_nfs]# ip a | grep  192.168.1.101
    inet 192.168.1.101/32 scope global enp0s31f6

看到上图说明keepalive启动成功,
配置文件解读

//全局定义模块
global_defs {
   router_id NFS-Master
   //运行keepalive机器的标识
}

vrrp_instance VI_1 {
    //靠priority 来区别主从,这个不生效
    state MASTER
    //实例绑定的网卡
    interface enp0s31f6
    //组ID,值一样为一个组
    virtual_router_id 51
    //优先级,优先级高的为master
    priority 150
    //检查时间间隔
    advert_int 1
    //认证模式为密码认证
    authentication {
        auth_type PASS
        auth_pass zhangshoufu
    }
    //设置的VIP
    virtual_ipaddress {
        192.168.1.101
    }
}

NFS-Client通过VIP挂载测试

[root@NFS-Client ~]# mount -t nfs 192.168.1.101:/test_nfs /mnt
[root@NFS-Client ~]# ls /mnt/
file.txt  two.file.txt
[root@NFS-Client ~]# umount /mnt/

模拟机器Down机,测试虚拟IP地址是否会漂移

[root@NFS-Master scripts]# ip a | grep 101
    inet 192.168.1.101/32 scope global enp0s31f6
[root@NFS-Slave ~]# ip a | grep 101
[root@NFS-Slave ~]#

//关闭NFS-Master上的keepalive服务
[root@NFS-Master scripts]# systemctl stop keepalived.service
[root@NFS-Master scripts]# ip a | grep 101
[root@NFS-Master scripts]#

[root@NFS-Slave ~]# ip a | grep 101
    inet 192.168.1.101/32 scope global enp0s31f6

成功漂移
keepalive脚本
因为keepalive的漂移机制是根据keepalive这个服务是否存活来判断IP地址是否漂移的,如果我们是机器down机,此方法可以直接使用,但是如果是网络出现问题导致了服务不可用的,

[root@NFS-Master scripts]# pwd
/usr/local/scripts
[root@NFS-Master scripts]# cat check_keepalive.sh
#!/bin/bash
export PATH=$PATH
sum=0
for i in `seq 29`;do
    if ping -c 1 192.168.1.1  &> /dev/null ||  ;then
        continue
    else
        sum=`awk '{sum++;print sum}'`
        if [ $sum -eq 3 ];then
            systemctl stop keepalived.service
        fi
    fi
    sleep 2
done

加到定时任务里:

[root@NFS-Master ~]# chmod 777 /usr/local/scripts/check_keepalive.sh
[root@NFS-Master ~]# crontab -e
* * * * * /usr/local/scripts/check_keepalive.sh &> /dev/null

客户端检测脚本

#!/bin/bash
export PATH=$PATH
for i in `seq 29`;do
    df -Th &> /dev/null
    if [ `echo $?` -ne 0 ];then
        umount -lf /mnt && mount -t nfs 192.168.1.101:/test_nfs /mnt
    fi
    sleep 2
done

不足:
这个里面会存在几秒数据的丢失,如果真想保持数据强一致,还是不要省钱采用分布式存储把