1.24版本k8s集群安装部署rook-ceph存储集群

一、初始花基本环境
1.主机环境信息

系统 主机名 主机IP地址 配置说明
AlmaLinux release 8.6 (Sky Tiger) master01 192.168.3.31 k8s管理节点,VIP192.168.3.30
AlmaLinux release 8.6 (Sky Tiger) master02 192.168.3.32 k8s管理节点,VIP192.168.3.30
AlmaLinux release 8.6 (Sky Tiger) master03 192.168.3.33 k8s管理节点,VIP192.168.3.30
AlmaLinux release 8.6 (Sky Tiger) node01 192.168.3.41 k8s工作节点,10G数据盘1+10G数据盘2
AlmaLinux release 8.6 (Sky Tiger) node02 192.168.3.42 k8s工作节点,系统盘+10G数据盘1+10G数据盘2
AlmaLinux release 8.6 (Sky Tiger) node03 192.168.3.43 k8s工作节点,系统盘+10G数据盘1+10G数据盘2
AlmaLinux release 8.6 (Sky Tiger) node04 192.168.3.44 k8s工作节点,系统盘+10G数据盘1+10G数据盘2
AlmaLinux release 8.6 (Sky Tiger) node03 192.168.3.45 k8s工作节点,系统盘+10G数据盘1+10G数据盘2

2.部署k8s集群
参见;https://blog.csdn.net/lic95/article/details/125044136

二、部署rook ceph
1.参考rook-ceph说明
https://rook.github.io/docs/rook/latest/Getting-Started/quickstart/#deploy-the-rook-operator

2.确认none节点硬盘情况,本文5个node节点共10块硬盘

[root@node01 ~]# lsblk -f
NAME               FSTYPE      LABEL UUID                                   MOUNTPOINT
nvme0n1                                                                     
├─nvme0n1p1        xfs               4f22cfd0-c208-4a72-a2d5-82ee32d7f956   /boot
└─nvme0n1p2        LVM2_member       2o3Cz0-u0vm-D81w-hysk-LwSv-cLGg-5YyA5c 
  ├─almalinux-root xfs               919eb2ea-14db-4105-b7fd-af85b1ec2dfd   /
  └─almalinux-swap swap              d246b8f0-1ee4-425b-9a37-d8d9b2781403   
nvme0n2                                                                     
nvme0n3                                                                     
[root@node01 ~]# 

#如果该FSTYPE字段不为空,则无法被rook osd使用,请手动处理

3.部署证书管理器

#下载yaml文件
 wget https://github.com/cert-manager/cert-manager/releases/download/v1.8.0/cert-manager.yaml

#启动pod
kubectl apply -f cert-manager.yaml

#查看启动状态
[root@master01 ~]# kubectl get pods -n cert-manager
NAME                                       READY   STATUS    RESTARTS   AGE
cert-manager-6868fddcb4-kcvpp              1/1     Running   0          42s
cert-manager-cainjector-6d6bbc7965-f5trt   1/1     Running   0          42s
cert-manager-webhook-59f66d6c7b-wsw6f      1/1     Running   0          42s
[root@master01 ~]# 

4.安装lvm包管理器,用于处理ceph硬盘

yum install -y lvm2

5.部署rook ceph

#在master节点克隆指定版本
git clone --single-branch --branch v1.9.4 https://github.com/rook/rook.git

# 部署 rook Operator
cd rook/deploy/examples
kubectl create -f crds.yaml -f common.yaml -f operator.yaml
kubectl create -f cluster.yaml

# 在继续操作之前,验证 rook-ceph-operator 是否处于“Running”状态:
# 注意,部分镜像位于k8s.gcr.io,需要科学上网,或者查看日志手动下载到各个node节点
kubectl get pod -n rook-ceph
[root@master01 ~]# kubectl get pods -n rook-ceph
NAME                                               READY   STATUS      RESTARTS   AGE
csi-cephfsplugin-4bbbh                             3/3     Running     0          55m
csi-cephfsplugin-9zsjn                             3/3     Running     0          55m
csi-cephfsplugin-provisioner-5c6c4c7785-dlrfh      6/6     Running     0          55m
csi-cephfsplugin-provisioner-5c6c4c7785-fs6nz      6/6     Running     0          55m
csi-cephfsplugin-tvlxt                             3/3     Running     0          55m
csi-cephfsplugin-vj7s9                             3/3     Running     0          55m
csi-cephfsplugin-xg92l                             3/3     Running     0          55m
csi-rbdplugin-9s64s                                3/3     Running     0          55m
csi-rbdplugin-gvkbw                                3/3     Running     0          55m
csi-rbdplugin-provisioner-7c756d9bd7-9b9sm         6/6     Running     0          55m
csi-rbdplugin-provisioner-7c756d9bd7-cdlfd         6/6     Running     0          55m
csi-rbdplugin-rdtxb                                3/3     Running     0          55m
csi-rbdplugin-s9t2r                                3/3     Running     0          55m
csi-rbdplugin-x2ldf                                3/3     Running     0          55m
rook-ceph-crashcollector-node01-5c65c4845d-wtqgz   1/1     Running     0          51m
rook-ceph-crashcollector-node02-64fd8d97f7-w9mlv   1/1     Running     0          50m
rook-ceph-crashcollector-node03-675b749756-b9gjq   1/1     Running     0          49m
rook-ceph-crashcollector-node04-7dcb76b499-lc4td   1/1     Running     0          51m
rook-ceph-crashcollector-node05-79b4c99f86-sfvvf   1/1     Running     0          51m
rook-ceph-mgr-a-7dc64d847f-kzf26                   2/2     Running     0          51m
rook-ceph-mgr-b-5dc59949ff-fwkl4                   2/2     Running     0          51m
rook-ceph-mon-a-779dc5cd57-wlkhx                   1/1     Running     0          55m
rook-ceph-mon-b-b9bdf6486-t48ks                    1/1     Running     0          54m
rook-ceph-mon-c-776f7674b6-r29zr                   1/1     Running     0          51m
rook-ceph-operator-74c6447d5b-gmlmx                1/1     Running     0          58m
rook-ceph-osd-0-7d746b7b59-7zn58                   1/1     Running     0          51m
rook-ceph-osd-1-698b49669-5plgq                    1/1     Running     0          51m
rook-ceph-osd-2-777bb8bfc9-4zm56                   1/1     Running     0          51m
rook-ceph-osd-3-7568df5fd4-lgh25                   1/1     Running     0          51m
rook-ceph-osd-4-6fd6747d6-bxtxx                    1/1     Running     0          51m
rook-ceph-osd-5-868d874bc4-jpxjc                   1/1     Running     0          51m
rook-ceph-osd-6-d7d46949-fgxb2                     1/1     Running     0          50m
rook-ceph-osd-7-6bc688dcf6-t84g6                   1/1     Running     0          50m
rook-ceph-osd-8-6fb5cdb988-fcnd5                   1/1     Running     0          49m
rook-ceph-osd-9-7c595fd74d-khxdl                   1/1     Running     0          49m
rook-ceph-osd-prepare-node01-s6z6d                 0/1     Completed   0          49m
rook-ceph-osd-prepare-node02-z6s5z                 0/1     Completed   0          49m
rook-ceph-osd-prepare-node03-9sjtl                 0/1     Completed   0          49m
rook-ceph-osd-prepare-node04-7bglr                 0/1     Completed   0          49m
rook-ceph-osd-prepare-node05-6rkgc                 0/1     Completed   0          49m
rook-ceph-tools-68f89f79f9-jqcg8                   1/1     Running     0          52m
[root@master01 ~]# 

6.部署rook Toolbox

# 启动 rook-ceph-tools pod
kubectl create -f deploy/examples/toolbox.yaml

#等待工具箱 pod 下载其容器并进入running状态
[root@master01 ~]# kubectl -n rook-ceph rollout status deploy/rook-ceph-tools
deployment "rook-ceph-tools" successfully rolled out
[root@master01 ~]# 

7.查看集群状态

#链接 toolbox 
kubectl -n rook-ceph exec -it deploy/rook-ceph-tools -- bash

#插看集群
[rook@rook-ceph-tools-68f89f79f9-jqcg8 /]$ ceph -s
  cluster:
    id:     da0ab8bb-e42f-42b1-b134-eb1d58888828
    health: HEALTH_OK
 
  services:
    mon: 3 daemons, quorum a,b,c (age 99m)
    mgr: a(active, since 98m), standbys: b
    osd: 10 osds: 10 up (since 97m), 10 in (since 97m)
 
  data:
    pools:   1 pools, 1 pgs
    objects: 0 objects, 0 B
    usage:   51 MiB used, 100 GiB / 100 GiB avail
    pgs:     1 active+clean
 
[rook@rook-ceph-tools-68f89f79f9-jqcg8 /]$ 

[rook@rook-ceph-tools-68f89f79f9-jqcg8 /]$ ceph osd status
ID  HOST     USED  AVAIL  WR OPS  WR DATA  RD OPS  RD DATA  STATE      
 0  node01  5200k  9.99G      0        0       0        0   exists,up  
 1  node04  5260k  9.99G      0        0       0        0   exists,up  
 2  node05  5200k  9.99G      0        0       0        0   exists,up  
 3  node01  5264k  9.99G      0        0       0        0   exists,up  
 4  node04  5200k  9.99G      0        0       0        0   exists,up  
 5  node05  5264k  9.99G      0        0       0        0   exists,up  
 6  node02  5136k  9.99G      0        0       0        0   exists,up  
 7  node02  5136k  9.99G      0        0       0        0   exists,up  
 8  node03  5072k  9.99G      0        0       0        0   exists,up  
 9  node03  5072k  9.99G      0        0       0        0   exists,up  
[rook@rook-ceph-tools-68f89f79f9-jqcg8 /]$ 

[rook@rook-ceph-tools-68f89f79f9-jqcg8 /]$ ceph df
--- RAW STORAGE ---
CLASS     SIZE    AVAIL    USED  RAW USED  %RAW USED
ssd    100 GiB  100 GiB  51 MiB    51 MiB       0.05
TOTAL  100 GiB  100 GiB  51 MiB    51 MiB       0.05
 
--- POOLS ---
POOL                   ID  PGS  STORED  OBJECTS  USED  %USED  MAX AVAIL
device_health_metrics   1    1     0 B        0   0 B      0     32 GiB
[rook@rook-ceph-tools-68f89f79f9-jqcg8 /]$ 

[rook@rook-ceph-tools-68f89f79f9-jqcg8 /]$ rados df
POOL_NAME              USED  OBJECTS  CLONES  COPIES  MISSING_ON_PRIMARY  UNFOUND  DEGRADED  RD_OPS   RD  WR_OPS   WR  USED COMPR  UNDER COMPR
device_health_metrics   0 B        0       0       0                   0        0         0       0  0 B       0  0 B         0 B          0 B

total_objects    0
total_used       51 MiB
total_avail      100 GiB
total_space      100 GiB
[rook@rook-ceph-tools-68f89f79f9-jqcg8 /]$ 

8.收集操作日志,用于调试

kubectl create -f deploy/examples/toolbox-job.yaml
kubectl -n rook-ceph logs -l job-name=rook-ceph-toolbox-job

六、dashboard配置

#部署Ceph Dashboard
[root@master01 examples]# kubectl apply -f dashboard-external-https.yaml
service/rook-ceph-mgr-dashboard-external-https created

# 获取 dashboard admin密码
[root@master01 examples]# kubectl -n rook-ceph get secret rook-ceph-dashboard-password -o jsonpath="{['data']['password']}" | base64 -d

#输出密码:
}=1:6:@C>:NP!KVGId;r

#查看新端口
[root@master01 examples]# kubectl get svc -n rook-ceph
NAME                                     TYPE        CLUSTER-IP       EXTERNAL-IP   PORT(S)             AGE
csi-cephfsplugin-metrics                 ClusterIP   172.18.4.87              8080/TCP,8081/TCP   78m
csi-rbdplugin-metrics                    ClusterIP   172.18.187.253           8080/TCP,8081/TCP   78m
rook-ceph-admission-controller           ClusterIP   172.18.49.28             443/TCP             83m
rook-ceph-mgr                            ClusterIP   172.18.41.24             9283/TCP            80m
rook-ceph-mgr-dashboard                  ClusterIP   172.18.239.24            8443/TCP            80m
rook-ceph-mgr-dashboard-external-https   NodePort    172.18.66.56             8443:30044/TCP      11m
rook-ceph-mon-a                          ClusterIP   172.18.26.25             6789/TCP,3300/TCP   82m
rook-ceph-mon-b                          ClusterIP   172.18.147.238           6789/TCP,3300/TCP   80m
rook-ceph-mon-c                          ClusterIP   172.18.244.12            6789/TCP,3300/TCP   80m

#浏览器访问:
https://192.168.3.41:30044

七、部署rbd和cephfs存储支持

# rdb:
#创建一个名为replicapool的rbd pool
[root@master01 examples]# kubectl apply -f csi/rbd/storageclass.yaml 
cephblockpool.ceph.rook.io/replicapool created
storageclass.storage.k8s.io/rook-ceph-block created

#cephfs:
[root@master01 examples]# kubectl apply -f filesystem.yaml
cephfilesystem.ceph.rook.io/myfs created
[root@master01 examples]# kubectl apply -f csi/cephfs/storageclass.yaml
storageclass.storage.k8s.io/rook-cephfs created
[root@master01 examples]# 
[root@k8s-master1 examples]# kubectl apply -f filesystem.yaml
[root@k8s-master1 examples]# kubectl apply -f csi/cephfs/storageclass.yaml

#查看部署情况
[root@master01 examples]# kubectl get sc
NAME              PROVISIONER                     RECLAIMPOLICY   VOLUMEBINDINGMODE   ALLOWVOLUMEEXPANSION   AGE
rook-ceph-block   rook-ceph.rbd.csi.ceph.com      Delete          Immediate           true                   109s
rook-cephfs       rook-ceph.cephfs.csi.ceph.com   Delete          Immediate           true                   55s
[root@master01 examples]# 

八、使用ceph作为后端存储部署redis集群测试
1.Redis是一个有状态应用
  当把redis以pod的形式部署在k8s中时,每个pod里缓存的数据都是不一样的,而且pod的IP是会随时变化,这时候如果使用普通的deployment和service来部署redis-cluster就会出现很多问题,因此需要改用StatefulSet + Headless Service来解决。

2.数据持久化
  redis虽然是基于内存的缓存,但还是需要依赖于磁盘进行数据的持久化,以便服务出现问题重启时可以恢复已经缓存的数据

3.Headless Service
  headless Service就是没有指定Cluster IP的Service,相应的,在k8s的dns映射里,Headless Service的解析结果不是一个Cluster IP,而是它所关联的所有Pod的IP列表

4.StatefulSet
​​  StatefulSet​​​是k8s中专门用于解决有状态应用部署的一种资源,总的来说可以认为它是​​Deployment/RC​​的一个变种,它有以下几个特性:

  • StatefulSet管理的每个Pod都有唯一的文档/网络标识,并且按照数字规律生成,而不是像Deployment中那样名称和IP都是随机的(比如StatefulSet名字为redis,那么pod名就是redis-0, redis-1 …)
    StatefulSet中ReplicaSet的启停顺序是严格受控的,操作第N个pod一定要等前N-1个执行完才可以
  • StatefulSet中的Pod采用稳定的持久化储存,并且对应的PV不会随着Pod的删除而被销毁
  • 另外需要说明的是,StatefulSet必须要配合Headless Service使用,它会在Headless Service提供的DNS映射上再加一层,最终形成精确到每个pod的域名映射,格式如下:
    ( p o d n a m e ) . (podname). (podname).(headless service name)

6.生成yaml配置文件
​​  redis 配置文件使用 configmap 方式进行挂载,chage-pod-ip.sh 脚本的作用用于当 redis 集群某 pod 重建后 Pod IP 发生变化,在 /data/nodes.conf 中将新的 Pod IP 替换原 Pod IP。不然集群会出问题,创建配置文件文件:

mkdir -p redis-cluster
cd redis-cluster

[root@master01 redis-cluster]# ll
总用量 8
-rw-r--r-- 1 root root 2374 6月   2 20:48 redis-cluster-configmap.yaml
-rw-r--r-- 1 root root 1942 6月   2 20:49 redis-cluster.yaml
[root@master01 redis-cluster]# 

[root@master01 redis-cluster]# cat redis-cluster-configmap.yaml 
apiVersion: v1
kind: ConfigMap
metadata:
  name: redis-cluster
  namespace: redis-cluster
data:
  chage-pod-ip.sh: |
    #!/bin/sh
    CLUSTER_CONFIG="/data/nodes.conf"
    if [ -f ${CLUSTER_CONFIG} ]; then
      if [ -z "${POD_IP}" ]; then
        echo "Unable to determine Pod IP address!"
        exit 1
      fi
      echo "Updating my IP to ${POD_IP} in ${CLUSTER_CONFIG}"
      sed -i.bak -e '/myself/ s/[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}/'${POD_IP}'/' ${CLUSTER_CONFIG}
    fi
    exec "$@"

  redis.conf: |
    bind 0.0.0.0
    protected-mode yes
    port 6379
    tcp-backlog 2048
    timeout 0
    tcp-keepalive 300
    daemonize no
    supervised no
    pidfile /var/run/redis.pid
    loglevel notice
    logfile /data/redis.log
    databases 16
    always-show-logo yes
    stop-writes-on-bgsave-error yes
    rdbcompression yes
    rdbchecksum yes
    dbfilename dump.rdb
    dir /data
    masterauth demo@2022
    replica-serve-stale-data yes
    replica-read-only no
    repl-diskless-sync no
    repl-diskless-sync-delay 5
    repl-disable-tcp-nodelay no
    replica-priority 100
    requirepass demo@2022
    maxclients 32768
    maxmemory-policy allkeys-lru
    lazyfree-lazy-eviction no
    lazyfree-lazy-expire no
    lazyfree-lazy-server-del no
    replica-lazy-flush no
    appendonly yes
    appendfilename "appendonly.aof"
    appendfsync everysec
    no-appendfsync-on-rewrite no
    auto-aof-rewrite-percentage 100
    auto-aof-rewrite-min-size 64mb
    aof-load-truncated yes
    aof-use-rdb-preamble yes
    lua-time-limit 5000
    cluster-enabled yes
    cluster-config-file /data/nodes.conf
    cluster-node-timeout 15000
    slowlog-log-slower-than 10000
    slowlog-max-len 128
    latency-monitor-threshold 0
    notify-keyspace-events ""
    hash-max-ziplist-entries 512
    hash-max-ziplist-value 64
    list-max-ziplist-size -2
    list-compress-depth 0
    set-max-intset-entries 512
    zset-max-ziplist-entries 128
    zset-max-ziplist-value 64
    hll-sparse-max-bytes 3000
    stream-node-max-bytes 4096
    stream-node-max-entries 100
    activerehashing yes
    client-output-buffer-limit normal 0 0 0
    client-output-buffer-limit replica 256mb 64mb 60
    client-output-buffer-limit pubsub 32mb 8mb 60
    hz 10
    dynamic-hz yes
    aof-rewrite-incremental-fsync yes
    rdb-save-incremental-fsync yes


[root@master01 redis-cluster]# cat redis-cluster.yaml 
---
apiVersion: v1
kind: Service
metadata:
  namespace: redis-cluster
  name: redis-cluster
spec:
  clusterIP: None
  ports:
  - port: 6379
    targetPort: 6379
    name: client
  - port: 16379
    targetPort: 16379
    name: gossip
  selector:
    app: redis-cluster
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
  namespace: redis-cluster
  name: redis-cluster
spec:
  serviceName: redis-cluster
  replicas: 6
  selector:
    matchLabels:
      app: redis-cluster
  template:
    metadata:
      labels:
        app: redis-cluster
    spec:
      terminationGracePeriodSeconds: 20
      # pod反亲和配置
      affinity:
        podAntiAffinity:
          preferredDuringSchedulingIgnoredDuringExecution:
          - weight: 100
            podAffinityTerm:
              labelSelector:
                matchExpressions:
                - key: app
                  operator: In
                  values:
                  - redis-cluster
              topologyKey: kubernetes.io/hostname
      containers:
      - name: redis
        image: redis:5.0.13
        ports:
        - containerPort: 6379
          name: client
        - containerPort: 16379
          name: gossip
        command: ["/etc/redis/chage-pod-ip.sh", "redis-server", "/etc/redis/redis.conf"]
        env:
        - name: POD_IP
          valueFrom:
            fieldRef:
              fieldPath: status.podIP
        volumeMounts:
        - name: conf
          mountPath: /etc/redis/
          readOnly: false
        - name: data
          mountPath: /data
          readOnly: false
      volumes:
      - name: conf
        configMap:
          name: redis-cluster
          defaultMode: 0755
  # 使用ceph集群文件存储动态提供pv配置段
  volumeClaimTemplates:
  - metadata:
      name: data
    spec:
      storageClassName: "rook-cephfs"
      accessModes:
        - ReadWriteMany
      resources:
        requests:
          storage: 10Gi

7、部署

[root@master01 redis-cluster]# kubectl create ns redis-cluster
namespace/redis-cluster created
[root@master01 redis-cluster]# kubectl apply -f redis-cluster-configmap.yaml
configmap/redis-cluster created
[root@master01 redis-cluster]# kubectl apply -f redis-cluster.yaml 
service/redis-cluster created
statefulset.apps/redis-cluster created
[root@master01 redis-cluster]# 

# 查看部署状态
[root@master01 redis-cluster]# kubectl get pod -n redis-cluster -o wide
NAME              READY   STATUS    RESTARTS   AGE   IP               NODE     NOMINATED NODE   READINESS GATES
redis-cluster-0   1/1     Running   0          62s   10.244.140.92    node02              
redis-cluster-1   1/1     Running   0          58s   10.244.196.149   node01              
redis-cluster-2   1/1     Running   0          50s   10.244.114.21    node05              
redis-cluster-3   1/1     Running   0          40s   10.244.186.215   node03              
redis-cluster-4   1/1     Running   0          32s   10.244.248.215   node04              
redis-cluster-5   1/1     Running   0          22s   10.244.140.93    node02              

[root@master01 redis-cluster]# kubectl get svc -n redis-cluster
NAME            TYPE        CLUSTER-IP   EXTERNAL-IP   PORT(S)              AGE
redis-cluster   ClusterIP   None                 6379/TCP,16379/TCP   94s

# 查看pvc及pv
[root@master01 redis-cluster]# kubectl get svc -n redis-cluster
NAME            TYPE        CLUSTER-IP   EXTERNAL-IP   PORT(S)              AGE
redis-cluster   ClusterIP   None                 6379/TCP,16379/TCP   94s
[root@master01 redis-cluster]# kubectl get pvc,pv -n redis-cluster
NAME                                         STATUS   VOLUME                                     CAPACITY   ACCESS MODES   STORAGECLASS   AGE
persistentvolumeclaim/data-redis-cluster-0   Bound    pvc-78309c22-463c-48c9-8e9f-00ed32fec2e6   10Gi       RWX            rook-cephfs    16m
persistentvolumeclaim/data-redis-cluster-1   Bound    pvc-6839daf7-53ed-42cf-961c-3a4aa403327f   10Gi       RWX            rook-cephfs    16m
persistentvolumeclaim/data-redis-cluster-2   Bound    pvc-58e79d62-415e-4bc1-9e2f-0572f9144c12   10Gi       RWX            rook-cephfs    16m
persistentvolumeclaim/data-redis-cluster-3   Bound    pvc-0dc7f552-4fd1-4f7a-831c-e30b2b11a27f   10Gi       RWX            rook-cephfs    16m
persistentvolumeclaim/data-redis-cluster-4   Bound    pvc-12532ea4-2347-4f7f-b2f5-26b5dd949a86   10Gi       RWX            rook-cephfs    16m
persistentvolumeclaim/data-redis-cluster-5   Bound    pvc-28fb6439-752e-461a-9600-13883a7bdd74   10Gi       RWX            rook-cephfs    75s

NAME                                                        CAPACITY   ACCESS MODES   RECLAIM POLICY   STATUS   CLAIM                                STORAGECLASS   REASON   AGE
persistentvolume/pvc-0dc7f552-4fd1-4f7a-831c-e30b2b11a27f   10Gi       RWX            Delete           Bound    redis-cluster/data-redis-cluster-3   rook-cephfs             16m
persistentvolume/pvc-12532ea4-2347-4f7f-b2f5-26b5dd949a86   10Gi       RWX            Delete           Bound    redis-cluster/data-redis-cluster-4   rook-cephfs             16m
persistentvolume/pvc-28fb6439-752e-461a-9600-13883a7bdd74   10Gi       RWX            Delete           Bound    redis-cluster/data-redis-cluster-5   rook-cephfs             75s
persistentvolume/pvc-58e79d62-415e-4bc1-9e2f-0572f9144c12   10Gi       RWX            Delete           Bound    redis-cluster/data-redis-cluster-2   rook-cephfs             16m
persistentvolume/pvc-6839daf7-53ed-42cf-961c-3a4aa403327f   10Gi       RWX            Delete           Bound    redis-cluster/data-redis-cluster-1   rook-cephfs             16m
persistentvolume/pvc-78309c22-463c-48c9-8e9f-00ed32fec2e6   10Gi       RWX            Delete           Bound    redis-cluster/data-redis-cluster-0   rook-cephfs             16m

8.创建集群

#获取集群IP
[root@master01 redis-cluster]# kubectl get pod -n redis-cluster -o wide | awk '{print $6}'
IP
10.244.140.92
10.244.196.149
10.244.114.21
10.244.186.215
10.244.248.215
10.244.140.93

#进入redis容器
[root@master01 redis-cluster]# kubectl exec -it redis-cluster-0 -n redis-cluster -- bash

#创建集群,按提示输入"yes"即可完成集群创建
redis-cli -a demo@2022 --cluster create \
10.244.140.92:6379 \
10.244.196.149:6379 \
10.244.114.21:6379 \
10.244.186.215:6379 \
10.244.248.215:6379 \
10.244.140.93:6379 \
--cluster-replicas 1

9、验证集群

# 可以看到集群状态正常
[root@master01 redis-cluster]# kubectl exec -it redis-cluster-0 -n redis-cluster -- bash
root@redis-cluster-0:/data# redis-cli -c -h redis-cluster-2.redis-cluster.redis-cluster.svc.cluster.local -a 'demo@2022'
Warning: Using a password with '-a' or '-u' option on the command line interface may not be safe.

redis-cluster-2.redis-cluster.redis-cluster.svc.cluster.local:6379> cluster info
cluster_state:ok
cluster_slots_assigned:16384
cluster_slots_ok:16384
cluster_slots_pfail:0
cluster_slots_fail:0
cluster_known_nodes:6
cluster_size:3
cluster_current_epoch:6
cluster_my_epoch:3
cluster_stats_messages_ping_sent:111
cluster_stats_messages_pong_sent:102
cluster_stats_messages_meet_sent:1
cluster_stats_messages_sent:214
cluster_stats_messages_ping_received:102
cluster_stats_messages_pong_received:112
cluster_stats_messages_received:214
redis-cluster-2.redis-cluster.redis-cluster.svc.cluster.local:6379> 

redis-cluster-2.redis-cluster.redis-cluster.svc.cluster.local:6379> cluster info
cluster_state:ok
cluster_slots_assigned:16384
cluster_slots_ok:16384
cluster_slots_pfail:0
cluster_slots_fail:0
cluster_known_nodes:6
cluster_size:3
cluster_current_epoch:6
cluster_my_epoch:3
cluster_stats_messages_ping_sent:170
cluster_stats_messages_pong_sent:161
cluster_stats_messages_meet_sent:1
cluster_stats_messages_sent:332
cluster_stats_messages_ping_received:161
cluster_stats_messages_pong_received:171
cluster_stats_messages_received:332
redis-cluster-2.redis-cluster.redis-cluster.svc.cluster.local:6379> 

redis-cluster-2.redis-cluster.redis-cluster.svc.cluster.local:6379> cluster nodes
147ce6d4a6ece2a69c69ba62d9dcb0cc3fcd3252 10.244.114.21:6379@16379 myself,master - 0 1654177346000 3 connected 10923-16383
e805b85e338356615b7ad896f882d43e79281f47 10.244.186.215:6379@16379 slave 147ce6d4a6ece2a69c69ba62d9dcb0cc3fcd3252 0 1654177345000 4 connected
b98047a17cf7fcd144c94abac0e2576bafe9bb30 10.244.196.149:6379@16379 master - 0 1654177345674 2 connected 5461-10922
cebfdfbc97ef43d94d59cf5a87845c9b993d9954 10.244.140.92:6379@16379 master - 0 1654177343000 1 connected 0-5460
313081321f48ccae93f3a67bc43e2d6b0eae93a6 10.244.140.93:6379@16379 slave b98047a17cf7fcd144c94abac0e2576bafe9bb30 0 1654177346678 6 connected
94fbbe644f27519b348bfa6909d9bf44e680da20 10.244.248.215:6379@16379 slave cebfdfbc97ef43d94d59cf5a87845c9b993d9954 0 1654177345000 5 connected
redis-cluster-2.redis-cluster.redis-cluster.svc.cluster.local:6379> 

10、故障测试
  删除任意一个 pod(删除名称为redis-cluster-1的pod),可以看到k8s会自动拉起一个同样名称的pod(edis-cluster-1),自动绑定原来的pvc和pv,pod的IP也自动被chage-pod-ip.sh脚本修改为当前pod的IP

# 查看pod
[root@master01 redis-cluster]# kubectl get pods -n  redis-cluster -o wide
NAME              READY   STATUS    RESTARTS   AGE     IP               NODE     NOMINATED NODE   READINESS GATES
redis-cluster-0   1/1     Running   0          10m     10.244.140.92    node02              
redis-cluster-1   1/1     Running   0          21s     10.244.196.151   node01              
redis-cluster-2   1/1     Running   0          10m     10.244.114.21    node05              
redis-cluster-3   1/1     Running   0          9m59s   10.244.186.215   node03              
redis-cluster-4   1/1     Running   0          9m51s   10.244.248.215   node04              
redis-cluster-5   1/1     Running   0          9m41s   10.244.140.93    node02              

# 删除redis-cluster-1  pod
[root@master01 redis-cluster]# kubectl delete pod redis-cluster-1 -n redis-cluster
pod "redis-cluster-1" deleted

# pod重建
[root@master01 redis-cluster]# kubectl get pods -n  redis-cluster -o wideNAME              READY   STATUS              RESTARTS   AGE     IP               NODE     NOMINATED NODE   READINESS GATES
redis-cluster-0   1/1     Running             0          10m     10.244.140.92    node02              
redis-cluster-1   0/1     ContainerCreating   0          2s                 node01              
redis-cluster-2   1/1     Running             0          10m     10.244.114.21    node05              
redis-cluster-3   1/1     Running             0          10m     10.244.186.215   node03              
redis-cluster-4   1/1     Running             0          10m     10.244.248.215   node04              
redis-cluster-5   1/1     Running             0          9m54s   10.244.140.93    node02              

# pod重建完成,ip地址由原先的10.244.196.151变为了10.244.196.152,且由于设置了pod反亲和,六个redis pod不会被调度到同一台虚拟机
[root@master01 redis-cluster]# kubectl get pods -n  redis-cluster -o wide
NAME              READY   STATUS    RESTARTS   AGE     IP               NODE     NOMINATED NODE   READINESS GATES
redis-cluster-0   1/1     Running   0          10m     10.244.140.92    node02              
redis-cluster-1   1/1     Running   0          4s      10.244.196.152   node01              
redis-cluster-2   1/1     Running   0          10m     10.244.114.21    node05              
redis-cluster-3   1/1     Running   0          10m     10.244.186.215   node03              
redis-cluster-4   1/1     Running   0          10m     10.244.248.215   node04              
redis-cluster-5   1/1     Running   0          9m56s   10.244.140.93    node02              
[root@master01 redis-cluster]# 

# 查看集群配置
[root@master01 redis-cluster]# kubectl exec -it redis-cluster-0 -n redis-cluster -- ls
appendonly.aof  dump.rdb  nodes.conf  nodes.conf.bak  redis.log
[root@master01 redis-cluster]# kubectl exec -it redis-cluster-0 -n redis-cluster -- cat nodes.conf
e805b85e338356615b7ad896f882d43e79281f47 10.244.186.215:6379@16379 slave 147ce6d4a6ece2a69c69ba62d9dcb0cc3fcd3252 0 1654177555790 4 connected
94fbbe644f27519b348bfa6909d9bf44e680da20 10.244.248.215:6379@16379 slave cebfdfbc97ef43d94d59cf5a87845c9b993d9954 0 1654177556797 5 connected
cebfdfbc97ef43d94d59cf5a87845c9b993d9954 10.244.140.92:6379@16379 myself,master - 0 1654177554000 1 connected 0-5460
313081321f48ccae93f3a67bc43e2d6b0eae93a6 10.244.140.93:6379@16379 slave b98047a17cf7fcd144c94abac0e2576bafe9bb30 0 1654177555000 6 connected
147ce6d4a6ece2a69c69ba62d9dcb0cc3fcd3252 10.244.114.21:6379@16379 master - 0 1654177555000 3 connected 10923-16383
b98047a17cf7fcd144c94abac0e2576bafe9bb30 10.244.196.152:6379@16379 master - 1654177555088 1654177553000 2 disconnected 5461-10922
vars currentEpoch 6 lastVoteEpoch 0
[root@master01 redis-cluster]# 

# 进入集群查看集群状态,集群状态又恢复了正常
[root@master01 redis-cluster]# kubectl exec -it redis-cluster-0 -n redis-cluster -- bash

root@redis-cluster-0:~# redis-cli -c -h redis-cluster-2.redis-cluster.redis-cluster.svc.cluster.local -a 'demo@2022'
Warning: Using a password with '-a' or '-u' option on the command line interface may not be safe.
redis-cluster-2.redis-cluster.redis-cluster.svc.cluster.local:6379> 
redis-cluster-2.redis-cluster.redis-cluster.svc.cluster.local:6379> 
redis-cluster-2.redis-cluster.redis-cluster.svc.cluster.local:6379> cluster info
cluster_state:ok
cluster_slots_assigned:16384
cluster_slots_ok:16384
cluster_slots_pfail:0
cluster_slots_fail:0
cluster_known_nodes:6
cluster_size:3
cluster_current_epoch:6
cluster_my_epoch:3
cluster_stats_messages_ping_sent:1039
cluster_stats_messages_pong_sent:986
cluster_stats_messages_meet_sent:1
cluster_stats_messages_sent:2026
cluster_stats_messages_ping_received:986
cluster_stats_messages_pong_received:1034
cluster_stats_messages_received:2020
redis-cluster-2.redis-cluster.redis-cluster.svc.cluster.local:6379> 

九、总结
  通过rook复用k8s节点部署的ceph集群虽然部署方便,但是由于rook部署ceph时全程自动化且服务全部为pod导致后期维护ceph集群比较困难,我个人并不建议在生产环境中使用rook部署ceph集群,生产中应独立部署ceph集群比较方便维护。可以服用k8s集群部分节点独立部署ceph集群,方便维护,也方便两个集群分别维护。

你可能感兴趣的:(云计算专题,kubernetes,docker,运维,分布式)