1.安装时间服务器进行时间同步
所有的kubernetes的集群节点
[root@master ~]# yum -y install ntpdate
[root@master ~]# ntpdate ntp2.aliyun.com
2.启动rdb模块
[root@master ~]# modprobe rbd
[root@master ~]# cat > /etc/rc.sysinit << EOF
#!/bin/bash
for file in /etc/sysconfig/modules/*.modules
do
[ -x \$file ] && \$file
done
EOF
[root@master ~]# cat > /etc/sysconfig/modules/rbd.modules << EOF
modprobe rbd
EOF
[root@master ~]# chmod 755 /etc/sysconfig/modules/rbd.modules
[root@master ~]# lsmod |grep rbd
rbd 83889 0
libceph 282661 1 rbd
1.各个节点拉取镜像
[root@k8s-master01 ~]# docker pull registry.cn-hangzhou.aliyuncs.com/vinc-auto/ceph:v1.2.6
[root@k8s-master01 ~]# docker pull registry.cn-hangzhou.aliyuncs.com/vinc-auto/ceph:v14.2.8
[root@k8s-master01 ~]# docker pull registry.cn-hangzhou.aliyuncs.com/vinc-auto/csi-node-driver-registrar:v1.2.0
[root@k8s-master01 ~]# docker pull registry.cn-hangzhou.aliyuncs.com/vinc-auto/csi-provisioner:v1.4.0
[root@k8s-master01 ~]# docker pull registry.cn-hangzhou.aliyuncs.com/vinc-auto/csi-attacher:v1.2.0
[root@k8s-master01 ~]# docker pull registry.cn-hangzhou.aliyuncs.com/vinc-auto/csi-snapshotter:v1.2.2
[root@k8s-master01 ~]# docker pull registry.cn-hangzhou.aliyuncs.com/vinc-auto/cephcsi:v1.2.2
2.手动做镜像tag
[root@k8s-master01 ~]# docker tag registry.cn-hangzhou.aliyuncs.com/vinc-auto/csi-node-driver-registrar:v1.2.0 quay.io/k8scsi/csi-node-driver-registrar:v1.2.0
[root@k8s-master01 ~]# docker tag registry.cn-hangzhou.aliyuncs.com/vinc-auto/csi-provisioner:v1.4.0 quay.io/k8scsi/csi-provisioner:v1.4.0
[root@k8s-master01 ~]# docker tag registry.cn-hangzhou.aliyuncs.com/vinc-auto/csi-attacher:v1.2.0 quay.io/k8scsi/csi-attacher:v1.2.0
[root@k8s-master01 ~]# docker tag registry.cn-hangzhou.aliyuncs.com/vinc-auto/csi-snapshotter:v1.2.2 quay.io/k8scsi/csi-snapshotter:v1.2.2
[root@k8s-master01 ~]# docker tag registry.cn-hangzhou.aliyuncs.com/vinc-auto/cephcsi:v1.2.2 quay.io/cephcsi/cephcsi:v1.2.2
1.master节点下载Rook部署 Ceph 集群
注意:这里本地下载
[root@k8s-master01 ~]# cd /tmp
[root@k8s-master01 ~]# git clone --single-branch --branch release-1.2 https://github.com/rook/rook.git
2.配置 ceph 集群环境
[root@master ~]# cd /tmp/rook/cluster/examples/kubernetes/ceph/
[root@master ceph]# kubectl create -f common.yaml
[root@master ceph]# sed -i 's#rook/ceph:v1.2.7#registry.cn-hangzhou.aliyuncs.com/vinc-auto/ceph:v1.2.6#g' operator.yaml
[root@master ceph]# kubectl apply -f operator.yaml
[root@master ceph]# kubectl -n rook-ceph get pod -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
rook-ceph-operator-c78d48dd8-vpzcv 1/1 Running 0 2m54s 10.244.1.3 node-1 <none> <none>
rook-discover-bt2lc 1/1 Running 0 2m36s 10.244.2.4 node-2 <none> <none>
rook-discover-ht9gg 1/1 Running 0 2m36s 10.244.3.5 node-3 <none> <none>
rook-discover-r27t4 1/1 Running 0 2m36s 10.244.1.4 node-1 <none> <none>
3.ceph 集群部署配置
cluster.yaml 是生产存储集群配置,需要至少三个节点
cluster-test.yaml 是测试集群配置,只需要一个节点
cluster-minimal.yaml 仅仅会配置一个ceph-mon和一个ceph-mgr
修改集群配置文件,替换镜像,关闭所有节点和所有设备选择,手动指定节点和设备
[root@master ceph]# sed -i 's|ceph/ceph:v14.2.9|registry.cn-hangzhou.aliyuncs.com/vinc-auto/ceph:v14.2.8|g' cluster.yaml
[root@master ceph]# sed -i 's|useAllNodes: true|useAllNodes: false|g' cluster.yaml
[root@master ceph]# sed -i 's|useAllDevices: true|useAllDevices: false|g' cluster.yaml
1.获取第二块磁盘名称
[root@node-1 ~]# lsblk
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT
sr0 11:0 1 1024M 0 rom
vda 252:0 0 40G 0 disk
├─vda1 252:1 0 500M 0 part /boot
└─vda2 252:2 0 39.5G 0 part
└─centos-root 253:0 0 39.5G 0 lvm /
vdb 252:16 0 8G 0 disk
2.获取集群节点名称
[root@master ceph]# kubectl get node
NAME STATUS ROLES AGE VERSION
master Ready master 10h v1.19.4
node-1 Ready <none> 10h v1.19.4
node-2 Ready <none> 10h v1.19.4
node-3 Ready <none> 10h v1.19.4
[root@master ceph]# kubectl describe node master|grep kubernetes.io/hostname
kubernetes.io/hostname=master
3.添加 ceph 集群节点配置
在storage标签的config:下添加配置
注意,name 不能够配置为IP,而应该是标签 kubernetes.io/hostname 的内容
config:
metadataDevice:
databaseSizeMB: "1024"
journalSizeMB: "1024"
nodes:
- name: "node-1"
devices:
- name: "vdb"
config:
storeType: bluestore
- name: "node-2"
devices:
- name: "vdb"
config:
storeType: bluestore
- name: "node-3"
devices:
- name: "vdb"
config:
storeType: bluestore
4.创建 ceph 集群
[root@master ceph]# kubectl create -f cluster.yaml
cephcluster.ceph.rook.io/rook-ceph created
[root@master ceph]# kubectl -n rook-ceph get pod -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
csi-cephfsplugin-hmnbl 3/3 Running 0 6m44s 10.0.1.11 node-1 <none> <none>
csi-cephfsplugin-lj6mp 3/3 Running 0 6m44s 10.0.1.12 node-2 <none> <none>
csi-cephfsplugin-provisioner-558b4777b-vl892 4/4 Running 0 6m44s 10.244.1.5 node-1 <none> <none>
csi-cephfsplugin-provisioner-558b4777b-zfpl5 4/4 Running 0 6m43s 10.244.2.6 node-2 <none> <none>
csi-cephfsplugin-vncpk 3/3 Running 0 6m44s 10.0.1.13 node-3 <none> <none>
csi-rbdplugin-2cmjr 3/3 Running 0 6m48s 10.0.1.11 node-1 <none> <none>
csi-rbdplugin-55xwl 3/3 Running 0 6m48s 10.0.1.13 node-3 <none> <none>
csi-rbdplugin-provisioner-55494cc8b4-hqjbz 5/5 Running 0 6m48s 10.244.3.6 node-3 <none> <none>
csi-rbdplugin-provisioner-55494cc8b4-k974x 5/5 Running 0 6m48s 10.244.2.5 node-2 <none> <none>
csi-rbdplugin-t78nf 3/3 Running 0 6m48s 10.0.1.12 node-2 <none> <none>
rook-ceph-crashcollector-node-1-f6df45cd5-dtjmb 1/1 Running 0 3m56s 10.244.1.8 node-1 <none> <none>
rook-ceph-crashcollector-node-2-546b678bbb-dxfpr 1/1 Running 0 4m38s 10.244.2.11 node-2 <none> <none>
rook-ceph-crashcollector-node-3-579889779-n2p9t 1/1 Running 0 87s 10.244.3.14 node-3 <none> <none>
rook-ceph-mgr-a-5ff48dbfbb-qd6cw 1/1 Running 0 3m29s 10.244.3.9 node-3 <none> <none>
rook-ceph-mon-a-559ccdd4c-6rnsz 1/1 Running 0 4m39s 10.244.2.9 node-2 <none> <none>
rook-ceph-mon-b-699cb56d4f-kn4fb 1/1 Running 0 4m21s 10.244.3.8 node-3 <none> <none>
rook-ceph-mon-c-7b79ff8bb4-t6zsn 1/1 Running 0 3m56s 10.244.1.7 node-1 <none> <none>
rook-ceph-operator-c78d48dd8-vpzcv 1/1 Running 0 27m 10.244.1.3 node-1 <none> <none>
rook-ceph-osd-0-5d7b89c8d5-28snp 1/1 Running 0 105s 10.244.1.10 node-1 <none> <none>
rook-ceph-osd-1-79959f5b49-6qnth 1/1 Running 0 102s 10.244.2.12 node-2 <none> <none>
rook-ceph-osd-2-676cc4df65-zms9n 1/1 Running 0 90s 10.244.3.13 node-3 <none> <none>
rook-discover-bt2lc 1/1 Running 0 27m 10.244.2.4 node-2 <none> <none>
rook-discover-ht9gg 1/1 Running 0 27m 10.244.3.5 node-3 <none> <none>
rook-discover-r27t4 1/1 Running 0 27m 10.244.1.4 node-1 <none> <none>
1.部署托管于K8S的Ceph工具
[root@master ceph]# sed -i 's|rook/ceph:v1.2.7|registry.cn-hangzhou.aliyuncs.com/vinc-auto/ceph:v1.2.6|g' toolbox.yaml
[root@master ceph]# kubectl apply -f toolbox.yaml
2.检测 Ceph 集群状态
[root@master ceph]# kubectl -n rook-ceph get pod -l "app=rook-ceph-tools"
NAME READY STATUS RESTARTS AGE
rook-ceph-tools-7476c966b7-5f5kg 1/1 Running 0 40s
[root@master ceph]# NAME=$(kubectl -n rook-ceph get pod -l "app=rook-ceph-tools" -o jsonpath='{.items[0].metadata.name}')
[root@master ceph]# kubectl -n rook-ceph exec -it ${NAME} sh
kubectl exec [POD] [COMMAND] is DEPRECATED and will be removed in a future version. Use kubectl exec [POD] -- [COMMAND] instead.
sh-4.2# ceph status
cluster:
id: fb3cdbc2-8fea-4346-b752-131fd1eb2baf
health: HEALTH_WARN
clock skew detected on mon.b, mon.c
services:
mon: 3 daemons, quorum a,b,c (age 10m)
mgr: a(active, since 9m)
osd: 3 osds: 3 up (since 7m), 3 in (since 7m)
data:
pools: 0 pools, 0 pgs
objects: 0 objects, 0 B
usage: 3.0 GiB used, 21 GiB / 24 GiB avail
pgs:
sh-4.2# ceph osd status
+----+--------+-------+-------+--------+---------+--------+---------+-----------+
| id | host | used | avail | wr ops | wr data | rd ops | rd data | state |
+----+--------+-------+-------+--------+---------+--------+---------+-----------+
| 0 | node-1 | 1025M | 7162M | 0 | 0 | 0 | 0 | exists,up |
| 1 | node-2 | 1025M | 7162M | 0 | 0 | 0 | 0 | exists,up |
| 2 | node-3 | 1025M | 7162M | 0 | 0 | 0 | 0 | exists,up |
+----+--------+-------+-------+--------+---------+--------+---------+-----------+
sh-4.2# ceph osd df
ID CLASS WEIGHT REWEIGHT SIZE RAW USE DATA OMAP META AVAIL %USE VAR PGS STATUS
0 hdd 0.00780 1.00000 8.0 GiB 1.0 GiB 1.4 MiB 0 B 1 GiB 7.0 GiB 12.52 1.00 0 up
1 hdd 0.00780 1.00000 8.0 GiB 1.0 GiB 1.4 MiB 0 B 1 GiB 7.0 GiB 12.52 1.00 0 up
2 hdd 0.00780 1.00000 8.0 GiB 1.0 GiB 1.4 MiB 0 B 1 GiB 7.0 GiB 12.52 1.00 0 up
TOTAL 24 GiB 3.0 GiB 4.1 MiB 0 B 3 GiB 21 GiB 12.52
MIN/MAX VAR: 1.00/1.00 STDDEV: 0
sh-4.2# ceph osd pool stats
there are no pools!
sh-4.2# ceph osd tree
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-1 0.02339 root default
-3 0.00780 host node-1
0 hdd 0.00780 osd.0 up 1.00000 1.00000
-5 0.00780 host node-2
1 hdd 0.00780 osd.1 up 1.00000 1.00000
-7 0.00780 host node-3
2 hdd 0.00780 osd.2 up 1.00000 1.00000
sh-4.2# ceph pg stat
0 pgs: ; 0 B data, 4.3 MiB used, 21 GiB / 24 GiB avail
sh-4.2# ceph df
RAW STORAGE:
CLASS SIZE AVAIL USED RAW USED %RAW USED
hdd 24 GiB 21 GiB 4.3 MiB 3.0 GiB 12.52
TOTAL 24 GiB 21 GiB 4.3 MiB 3.0 GiB 12.52
POOLS:
POOL ID STORED OBJECTS USED %USED MAX AVAIL
sh-4.2# rados df
POOL_NAME USED OBJECTS CLONES COPIES MISSING_ON_PRIMARY UNFOUND DEGRADED RD_OPS RD WR_OPS WR USED COMPR UNDER COMPR
total_objects 0
total_used 3.0 GiB
total_avail 21 GiB
total_space 24 GiB
在Ceph集群配置文件中配置开启了dashboard,但是需要配置后才能进行登陆
1.暴露 Ceph Dashboard
Dashboard默认是ClusterIP类型的,无法在k8s节点之外的主机访问,修改ClusterIP为NodePort
[root@master ceph]# kubectl edit service rook-ceph-mgr-dashboard -n rook-ceph
service/rook-ceph-mgr-dashboard edited
[root@master ceph]# kubectl -n rook-ceph get service rook-ceph-mgr-dashboard -o wide
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE SELECTOR
rook-ceph-mgr-dashboard NodePort 10.101.215.33 <none> 8443:30624/TCP 60m app=rook-ceph-mgr,rook_cluster=rook-ceph
2.获取登陆密码
[root@master ceph]# Ciphertext=$(kubectl -n rook-ceph get secret rook-ceph-dashboard-password -o jsonpath="{['data']['password']}")
[root@master ceph]# Pass=$(echo ${Ciphertext}|base64 --decode)
[root@master ceph]# echo ${Pass}
IJpf1nsvcY
3.登录验证
用户名为:admin
ceph 集群配置清除
[root@master ceph]# kubectl -n rook-ceph delete cephcluster rook-ceph
[root@master ceph]# kubectl -n rook-ceph get cephcluster
系统环境清理
Ceph集群的所有节点均要清空相应的配置目录和抹除相应的存储设备数据并重启
否则再次部署集群时会出现各种问题导致集群部署失败
[root@master ceph]# yum -y install gdisk
[root@master ceph]# sgdisk --zap-all /dev/nvme0n2
[root@master ceph]# rm -rvf /var/lib/rook/*
[root@master ceph]# reboot