常用分布式文件系统
Lustre、Hadoop、FastDFS、Ceph、GlusterFS
1、物理机作为所有节点yum源
[root@room9pc01 soft]# mkdir /var/ftp/ceph
[root@room9pc01 soft]# ls
rhcs2.0-rhosp9-20161113-x86_64.iso
[root@room9pc01 soft]# mount rhcs2.0-rhosp9-20161113-x86_64.iso /var/ftp/ceph/
[root@room9pc01 soft]# ls /var/ftp/ceph/
rhceph-2.0-rhel-7-x86_64 rhel-7-server-openstack-9-rpms rhscon-2.0-rhel-7-x86_64
[root@room9pc01 soft]# cd /var/ftp/ceph/rhceph-2.0-rhel-7-x86_64/
[root@room9pc01 rhceph-2.0-rhel-7-x86_64]# ls
EULA GPL MON OSD README RPM-GPG-KEY-redhat-release Tools TRANS.TBL
############################################################################
[root@client10 ~]# vim /etc/yum.repos.d/dvd.repo //客户端配置yum源
[Centos]
name=Centos7.4
baseurl=ftp://192.168.4.254/rhel7
enabled=1
gpgcheck=0
[MON]
name=MON
baseurl=ftp://192.168.4.254/ceph/rhceph-2.0-rhel-7-x86_64/MON
enabled=1
gpgcheck=0
[OSD]
name=OSD
baseurl=ftp://192.168.4.254/ceph/rhceph-2.0-rhel-7-x86_64/OSD
enabled=1
gpgcheck=0
[Tools]
name=Tools
baseurl=ftp://192.168.4.254/ceph/rhceph-2.0-rhel-7-x86_64/Tools
enabled=1
gpgcheck=0
[root@client10 ~]# yum repolist
已加载插件:fastestmirror
Centos | 4.1 kB 00:00:00
MON | 4.1 kB 00:00:00
OSD | 4.1 kB 00:00:00
Tools | 3.8 kB 00:00:00
(1/8): MON/primary_db | 40 kB 00:00:00
(2/8): OSD/group_gz | 447 B 00:00:00
(3/8): Centos/primary_db | 4.0 MB 00:00:00
(4/8): OSD/primary_db | 31 kB 00:00:00
(5/8): MON/group_gz | 489 B 00:00:00
(6/8): Tools/group_gz | 459 B 00:00:00
(7/8): Centos/group_gz | 137 kB 00:00:00
(8/8): Tools/primary_db | 31 kB 00:00:00
Determining fastest mirrors
源标识 源名称 状态
Centos Centos7.4 4,986
MON MON 41
OSD OSD 28
Tools Tools 33
repolist: 5,088
[root@client10 ~]# for i in 11 12 13
> do
> scp /etc/yum.repos.d/dvd.repo 192.168.4.$i:/etc/yum.repos.d/
> done //拷贝yum文件给其余node主机
2、修改/etc/hosts并同步到其他主机
[root@client10 ~]# vim /etc/hosts
192.168.4.10 client10
192.168.4.11 node1
192.168.4.12 node2
192.168.4.13 node3
[root@client10 ~]# for i in 11 12 13; do scp /etc/hosts 192.168.4.$i:/etc/; done
3、配置ssh无密码连接
[root@node1 ~]# ssh-keygen -N '' -f /root/.ssh/id_rsa //在node1上创建密钥
[root@node1 ~]# for i in 10 11 12 13
> do
> ssh-copy-id 192.168.4.$i
> done //传密钥给其他主机(包括自己)
4、配置NTP时间同步(物理机作为NTP服务器)
[root@room9pc01 ~]# vim /etc/chrony.conf //修改物理机时间同步配置文件
server 0.centos.pool.ntp.org iburst
allow 192.168.4.0/24
local stratum 10
[root@room9pc01 ~]# systemctl restart chronyd
#################################################################
[root@node1 ~]# vim /etc/chrony.conf //修改node节点时间同步配置文件
#server 0.centos.pool.ntp.org iburst
#server 1.centos.pool.ntp.org iburst
#server 2.centos.pool.ntp.org iburst
#server 3.centos.pool.ntp.org iburst
server 192.168.4.254 iburst //以物理机作为时间同步服务器
[root@node1 ~]# systemctl restart chronyd
[root@node1 ~]# for i in 10 12 13
> do
> scp /etc/chrony.conf 192.168.4.$i:/etc/
> done
[root@node1 ~]# for i in client10 node2 node3
> do
> ssh $i "systemctl restart chronyd"
> done
[root@node1 ~]# chronyc sources -v //查看时间同步情况
5、为每台node节点主机添加3块20G虚拟磁盘
1、安装部署软件
[root@node1 ~]# yum -y install ceph-deploy //以node1作为部署主机,安装ceph部署工具
[root@node1 ~]# ceph-deploy --help
[root@node1 ~]# mkdir ceph-cluster //创建ceph-deploy命令工作目录
[root@node1 ~]# cd ceph-cluster/
2、部署集群配置
[root@node1 ceph-cluster]# ls
[root@node1 ceph-cluster]# ceph-deploy new node1 node2 node3 //这里将3台monitor装在node节点上,实际生产环境应独立出来
[root@node1 ceph-cluster]# ls
ceph.conf ceph-deploy-ceph.log ceph.mon.keyring
[root@node1 ceph-cluster]# cat ceph.conf
[global]
fsid = 8b8cc6ac-7792-4541-82d1-e0e6bcfb640a
mon_initial_members = node1, node2, node3
mon_host = 192.168.4.11,192.168.4.12,192.168.4.13
auth_cluster_required = cephx
auth_service_required = cephx
auth_client_required = cephx
3、给所有节点安装软件包
[root@node1 ceph-cluster]# ceph-deploy install node1 node2 node3
[root@node1 ceph-cluster]# rpm -qa |grep ceph
libcephfs1-10.2.2-38.el7cp.x86_64
ceph-osd-10.2.2-38.el7cp.x86_64
ceph-deploy-1.5.33-1.el7cp.noarch
ceph-base-10.2.2-38.el7cp.x86_64
ceph-mds-10.2.2-38.el7cp.x86_64
ceph-common-10.2.2-38.el7cp.x86_64
ceph-mon-10.2.2-38.el7cp.x86_64
ceph-selinux-10.2.2-38.el7cp.x86_64
python-cephfs-10.2.2-38.el7cp.x86_64
ceph-radosgw-10.2.2-38.el7cp.x86_64
4、初始化所有节点的mon服务(主机名解析必须对)
[root@node1 ceph-cluster]# ceph-deploy mon create-initial
[root@node2 ~]# ceph -s //任意主机查看集群状态
cluster e865557c-60fd-4f40-af88-7cfa7457e3e2
health HEALTH_ERR
no osds
monmap e1: 3 mons at {node1=192.168.4.11:6789/0,node2=192.168.4.12:6789/0,node3=192.168.4.13:6789/0}
election epoch 4, quorum 0,1,2 node1,node2,node3
osdmap e1: 0 osds: 0 up, 0 in
flags sortbitwise
pgmap v2: 64 pgs, 1 pools, 0 bytes data, 0 objects
0 kB used, 0 kB / 0 kB avail
64 creating
5、磁盘分区(这里以node1操作为例)
[root@node1 ceph-cluster]# lsblk //分区之前查看磁盘分区大小
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT
sr0 11:0 1 1024M 0 rom
vda 252:0 0 20G 0 disk
├─vda1 252:1 0 1G 0 part /boot
└─vda2 252:2 0 19G 0 part
├─rhel-root 253:0 0 17G 0 lvm /
└─rhel-swap 253:1 0 2G 0 lvm [SWAP]
vdb 252:16 0 20G 0 disk
vdc 252:32 0 20G 0 disk
vdd 252:48 0 20G 0 disk
[root@node1 ceph-cluster]# parted /dev/vdb mklabel gpt //定义分区表格式
[root@node1 ceph-cluster]# parted /dev/vdb mkpart primary 1 50% //vdb分区,将前50%作为一个分区
[root@node1 ceph-cluster]# parted /dev/vdb mkpart primary 50% 100% //vdb分区,将后50%作为一个分区
[root@node1 ceph-cluster]# lsblk //查看分区后的磁盘分区大小
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT
sr0 11:0 1 1024M 0 rom
vda 252:0 0 20G 0 disk
├─vda1 252:1 0 1G 0 part /boot
└─vda2 252:2 0 19G 0 part
├─rhel-root 253:0 0 17G 0 lvm /
└─rhel-swap 253:1 0 2G 0 lvm [SWAP]
vdb 252:16 0 20G 0 disk
├─vdb1 252:17 0 10G 0 part //vdb1和vdb2分别作为vdc和vdd的缓存盘
└─vdb2 252:18 0 10G 0 part
vdc 252:32 0 20G 0 disk
vdd 252:48 0 20G 0 disk
[root@node1 ceph-cluster]# chown ceph.ceph /dev/vdb1 //修改分区所有者,临时生效
[root@node1 ceph-cluster]# chown ceph.ceph /dev/vdb2
[root@node1 ceph-cluster]# vim /etc/udev/rules.d/70-vdb.rules //修改分区所有者,永久生效
{DEVNAME}=="/dev/vdb1",OWNER="ceph",GROUP="ceph"
{DEVNAME}=="/dev/vdb2",OWNER="ceph",GROUP="ceph"
6、初始化清空磁盘数据(在node1上操作)
[root@node1 ceph-cluster]# ceph-deploy disk zap node1:vdc node1:vdd
[root@node1 ceph-cluster]# ceph-deploy disk zap node2:vdc node2:vdd
[root@node1 ceph-cluster]# ceph-deploy disk zap node3:vdc node3:vdd
7、创建osd存储空间(在node1上操作)
[root@node1 ceph-cluster]# ceph-deploy osd create node1:vdc:/dev/vdb1 node1:vdd:/dev/vdb2
[root@node1 ceph-cluster]# ceph-deploy osd create node2:vdc:/dev/vdb1 node2:vdd:/dev/vdb2
[root@node1 ceph-cluster]# ceph-deploy osd create node3:vdc:/dev/vdb1 node3:vdd:/dev/vdb2
[root@node1 ceph-cluster]# ceph -s //查看集群状态
cluster e865557c-60fd-4f40-af88-7cfa7457e3e2
health HEALTH_OK //健康状态为OK,集群创建成功
monmap e1: 3 mons at {node1=192.168.4.11:6789/0,node2=192.168.4.12:6789/0,node3=192.168.4.13:6789/0}
election epoch 4, quorum 0,1,2 node1,node2,node3
osdmap e35: 6 osds: 6 up, 6 in //6个osd存储设备
flags sortbitwise
pgmap v83: 64 pgs, 1 pools, 0 bytes data, 0 objects
202 MB used, 119 GB / 119 GB avail //集群提供120G存储空间
64 active+clean
1、查看共享池
[root@node1 ceph-cluster]# ceph osd lspools
0 rbd, //rbd共享池空间为120G
2、创建镜像,查看镜像(在任意node节点操作,均可识别)
[root@node2 ~]# rbd create demo-image --image-feature layering --size 10G //默认从rbd中取出10G空间创建镜像demo-image
[root@node2 ~]# rbd create rbd/image --image-feature layering --size 10G //指定从rbd中取出10G空间创建镜像image
[root@node2 ~]# rbd list //列出镜像
demo-image
image
[root@node2 ~]# rbd info image //查看镜像信息
rbd image 'image':
size 10240 MB in 2560 objects
order 22 (4096 kB objects)
block_name_prefix: rbd_data.1037238e1f29
format: 2
features: layering
flags:
3、动态调整容量
[root@node2 ~]# rbd resize --size 5G image --allow-shrink //缩小容量为5G
Resizing image: 100% complete...done.
[root@node2 ~]# rbd info image
rbd image 'image':
size 5120 MB in 1280 objects
order 22 (4096 kB objects)
block_name_prefix: rbd_data.1037238e1f29
format: 2
features: layering
flags:
[root@node2 ~]# rbd resize --size 15G image //扩大容量至15G
Resizing image: 100% complete...done.
[root@node2 ~]# rbd info image
rbd image 'image':
size 15360 MB in 3840 objects
order 22 (4096 kB objects)
block_name_prefix: rbd_data.1037238e1f29
format: 2
features: layering
flags:
4、客户端通过KRBD访问
[root@client10 ~]# yum -y install ceph-common
[root@node1 ceph-cluster]# cd /etc/ceph/
[root@node1 ceph]# ls
ceph.client.admin.keyring ceph.conf rbdmap tmpgIWUMA
[root@node1 ceph]# scp ceph.conf 192.168.4.10:/etc/ceph/ //拷贝ceph集群配置文件(包含集群信息)至客户端
[root@node1 ceph]# scp ceph.client.admin.keyring client10:/etc/ceph/ //拷贝连接密钥至客户端,否则客户端无权限访问ceph集群
[root@client10 ~]# ls /etc/ceph/
ceph.client.admin.keyring ceph.conf rbdmap
[root@client10 ~]# rbd map image //将集群内镜像image映射为客户端本地磁盘
/dev/rbd0
[root@client10 ~]# lsblk
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT
sr0 11:0 1 1024M 0 rom
vda 252:0 0 20G 0 disk
├─vda1 252:1 0 1G 0 part /boot
└─vda2 252:2 0 19G 0 part
├─rhel-root 253:0 0 17G 0 lvm /
└─rhel-swap 253:1 0 2G 0 lvm [SWAP]
rbd0 251:0 0 15G 0 disk //客户端增加一个15G分区
[root@client10 ~]# rbd showmapped //查看镜像挂载情况
id pool image snap device
0 rbd image - /dev/rbd0
[root@client10 ~]# rbd unmap /dev/rbd0 //卸载分区
[root@client10 ~]# lsblk
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT
sr0 11:0 1 1024M 0 rom
vda 252:0 0 20G 0 disk
├─vda1 252:1 0 1G 0 part /boot
└─vda2 252:2 0 19G 0 part
├─rhel-root 253:0 0 17G 0 lvm /
└─rhel-swap 253:1 0 2G 0 lvm [SWAP]
[root@client10 ~]# rbd map image //重新映射镜像image为客户端本地磁盘
5、客户端格式化、挂载分区
[root@client10 ~]# mkfs.xfs /dev/rbd0 //格式化分区
[root@client10 ~]# mount /dev/rbd0 /mnt/
[root@client10 ~]# echo "test" > /mnt/test.txt //写入测试文件
6、创建镜像快照
[root@node1 ceph]# rbd snap ls image //查看镜像快照
[root@node1 ceph]# rbd snap create image --snap image-snap1 //给image镜像创建名为image-snap1的快照
[root@node1 ceph]# rbd snap ls image
SNAPID NAME SIZE
4 image-snap1 15360 MB
7、还原快照
[root@client10 ~]# rm -rf /mnt/test.txt
[root@client10 ~]# ls /mnt/test.txt
ls: 无法访问/mnt/test.txt: 没有那个文件或目录
[root@client10 ~]# umount /mnt //卸载/mnt
[root@node1 ceph]# rbd snap rollback image --snap image-snap1 //还原快照
Rolling back to snapshot: 100% complete...done.
[root@client10 ~]# mount /dev/rbd0 /mnt //重新挂载/mnt
[root@client10 ~]# ls /mnt //查看还原情况
test.txt
8、创建、删除克隆快照镜像
[root@node1 ceph]# rbd snap protect image --snap image-snap1 //克隆快照之前,需要先保护快照,否则勿删后,克隆的快照镜像无法使用
[root@node1 ceph]# rbd snap rm image --snap image-snap1 //删除快照失败
rbd: snapshot 'image-snap1' is protected from removal.
2019-06-03 11:45:06.908919 7fabef8bcd80 -1 librbd::Operations: snapshot is protected
[root@node1 ceph]# rbd clone image --snap image-snap1 image-clone --image-feature layering //使用image的快照image-snap1克隆一个新的image-clone镜像
[root@node1 ceph]# rbd list //查看镜像
demo-image
image
image-clone
[root@node1 ceph]# rbd info image-clone
rbd image 'image-clone':
size 15360 MB in 3840 objects
order 22 (4096 kB objects)
block_name_prefix: rbd_data.104b238e1f29
format: 2
features: layering
flags:
parent: rbd/image@image-snap1 //克隆镜像数据来源于快照image-snap1
overlap: 15360 MB
[root@node1 ceph]# rbd flatten image-clone //如果希望克隆镜像可以独立工作,就需要将父快照中的数据,全部拷贝一份,但比较耗时
[root@node1 ceph]# rbd info image-clone
rbd image 'image-clone':
size 15360 MB in 3840 objects
order 22 (4096 kB objects)
block_name_prefix: rbd_data.104b238e1f29
format: 2
features: layering
flags:
[root@node1 ceph]# rbd snap unprotect image --snap image-snap1 //取消快照保护
[root@node1 ceph]# rbd snap rm image --snap image-snap1 //删除快照
9、块存储应用案例:物理机作为客户端部署虚拟机,虚拟机调用ceph块存储
(1)ceph认证账户
[root@room9pc01 ~]# yum -y install ceph-common
[root@room9pc01 ~]# scp 192.168.4.11:/etc/ceph/ceph.conf /etc/ceph/
[root@room9pc01 ~]# scp 192.168.4.11:/etc/ceph/ceph.client.admin.keyring /etc/ceph/
[root@room9pc01 ~]# ls /etc/ceph/
ceph.client.admin.keyring ceph.conf rbdmap
[root@room9pc01 ~]# vim secret.xml //创建secret文件,使kvm虚拟机也能访问ceph块存储
client.admin secret
[root@room9pc01 ~]# virsh secret-define --file secret.xml //生成UUID账户信息
生成 secret 4a126092-b631-43d0-b70f-2a5eb628a4ae
[root@room9pc01 ~]# virsh secret-list
UUID 用量
--------------------------------------------------------------------------------
4a126092-b631-43d0-b70f-2a5eb628a4ae ceph client.admin secret
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
[root@room9pc01 ~]# virsh secret-undefine 4a126092-b631-43d0-b70f-2a5eb628a4ae //删除UUID
已删除 secret 4a126092-b631-43d0-b70f-2a5eb628a4ae
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
[root@room9pc01 ~]# virsh secret-define --file secret.xml //重新生成UUID
生成 secret 328305b0-a53c-4134-b3b4-490fe7af4657
[root@room9pc01 ~]# cat /etc/ceph/ceph.client.admin.keyring
[client.admin]
key = AQAV3vNczPvQCBAA9UHZSC3jiL3NblRULDEcVw==
[root@room9pc01 ~]# virsh secret-set-value \
> --secret 328305b0-a53c-4134-b3b4-490fe7af4657 \
> --base64 AQAV3vNczPvQCBAA9UHZSC3jiL3NblRULDEcVw== //设置secret,添加账户密钥
secret 值设定
(2)新建虚拟机(new),编辑虚拟机配置文件new.xml
[root@node1 ceph]# rbd create vm1-image --image-feature layering --size 20G //创建镜像vm1-imgae
[root@node1 ceph]# rbd list
demo-image
image
image-clone
vm1-image
[root@room9pc01 ~]# virsh edit new
//修改disk区域,type改为network
//修改UUID
//修改bus接口类型
(3)kvm查看虚拟机new,即为ceph集群上的一块磁盘
(1)部署node3主机作为mds节点(注:一般mds节点应为独立主机,且需要配置NTP、相同yun源以及无密码远程,并安装ceph-mds)
[root@node3 ~]# rpm -qa |grep ceph-mds
ceph-mds-10.2.2-38.el7cp.x86_64
[root@node1 ~]# cd ceph-cluster/
[root@node1 ceph-cluster]# ceph-deploy mds create node3 //启动node3主机的mds服务
[root@node3 ~]# systemctl status [email protected]
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
[root@node1 ceph-cluster]# ceph-deploy admin mds_node //同步配置文件和密钥至mds节点,由于node3之前已做相关配置,故省略此步操作
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
(2)创建存储池
[root@node3 ~]# ceph osd lspools //查看共享池
0 rbd,
[root@node3 ~]# ceph osd pool create cephfs_data 128 //创建存储池的block区域
[root@node3 ~]# ceph osd pool create cephfs_metadata 128 //创建存储池的inode区域
[root@node3 ~]# ceph osd lspools
0 rbd,1 cephfs_data,2 cephfs_metadata,
[root@node3 ~]# ceph mds stat //参看mds状态
e2:, 1 up:standby
(3)创建ceph文件系统
[root@node3 ~]# ceph fs new myfs1 cephfs_metadata cephfs_data //创建文件系统,注意先写metadata池,再写data池
[root@node3 ~]# ceph mds stat
e5: 1/1/1 up {0=node3=up:active}
[root@node3 ~]# ceph fs ls //查看文件系统
name: myfs1, metadata pool: cephfs_metadata, data pools: [cephfs_data ]
(4)客户端挂载
[root@client10 ~]# mount -t ceph 192.168.4.11:6789:/ /mnt/cephfs/ \
> -o name=admin,secret=AQAV3vNczPvQCBAA9UHZSC3jiL3NblRULDEcVw== //挂载ceph文件系统的根目录至客户端本地目录/mnt/media,secret为ceph密钥文件中的key值
[root@client10 ~]# df -h /mnt/cephfs/ //查看挂载的文件系统
文件系统 容量 已用 可用 已用% 挂载点
192.168.4.11:6789:/ 120G 500M 120G 1% /mnt/media
部署node3主机作为rgw节点(注:一般rgw节点应为独立主机,且需要配置NTP、相同yun源以及无密码远程,并安装ceph-radosgw)
[root@node3 ~]# rpm -qa |grep ceph-radosgw
ceph-radosgw-10.2.2-38.el7cp.x86_64
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
[root@node1 ceph-cluster]# ceph-deploy install --rgw rgw_node //独立rgw节点主机需要安装ceph-radosgw
[root@node1 ceph-cluster]# ceph-deploy admin rgw_node //同步配置文件和密钥至rgw节点,由于node3之前已做相关配置,故省略此步操作
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
[root@node1 ceph-cluster]# ceph-deploy rgw create node3 //启动node3的rgw服务
[root@node3 ~]# systemctl status [email protected] //查看node3节点rgw服务启动情况
[root@node3 ~]# vim /etc/ceph/ceph.conf
#添加以下配置,修改rgw服务端口
[client.rgw.node3]
host = node3
rgw_frontends = "civetweb port=8000"
[root@node3 ~]# systemctl restart [email protected]
[root@node3 ~]# netstat -ntulp |grep :8000
部署完以上对象存储服务器之后,需要开发相应的程序运用该对象存储服务