一、准备工作

Ceph版本:v13.2.5 mimic稳定版

1、Ceph上准备存储池

[root@ceph-node1 ceph]# ceph osd pool create k8s 128 128
pool 'k8s' created
[root@ceph-node1 ceph]# ceph osd pool ls
k8s

2、Ceph上准备K8S客户端账号

本环境中直接使用了Ceph的admin账号,当然生产环境中还是要根据不同功能客户端分配不同的账号:
ceph auth get-or-create client.k8s mon 'allow r' osd 'allow rwx pool=k8s' -o ceph.client.k8s.keyring
获取账号的密钥:

[root@ceph-node1 ceph]# ceph auth get-key client.admin | base64
QVFDMmIrWmNEL3JTS2hBQWwwdmR3eGJGMmVYNUM3SjdDUGZZbkE9PQ==

3、为controller-manager提供rbd命令

使用StorageClass动态创建PV时,controller-manager会自动在Ceph上创建image,所以我们要为其准备好rbd命令。
(1) 如果集群是用kubeadm部署的,由于controller-manager官方镜像中没有rbd命令,所以我们要导入外部配置:

kind: ClusterRole 
apiVersion: rbac.authorization.k8s.io/v1 
metadata: 
  name: rbd-provisioner 
rules: 
  - apiGroups: [""] 
    resources: ["persistentvolumes"] 
    verbs: ["get", "list", "watch", "create", "delete"] 
  - apiGroups: [""] 
    resources: ["persistentvolumeclaims"] 
    verbs: ["get", "list", "watch", "update"] 
  - apiGroups: ["storage.k8s.io"] 
    resources: ["storageclasses"] 
    verbs: ["get", "list", "watch"] 
  - apiGroups: [""] 
    resources: ["events"] 
    verbs: ["create", "update", "patch"] 
  - apiGroups: [""] 
    resources: ["services"] 
    resourceNames: ["kube-dns","coredns"] 
    verbs: ["list", "get"] 
--- 
kind: ClusterRoleBinding 
apiVersion: rbac.authorization.k8s.io/v1 
metadata: 
  name: rbd-provisioner 
subjects: 
  - kind: ServiceAccount 
    name: rbd-provisioner 
    namespace: default 
roleRef: 
  kind: ClusterRole 
  name: rbd-provisioner 
  apiGroup: rbac.authorization.k8s.io 
--- 
apiVersion: rbac.authorization.k8s.io/v1 
kind: Role 
metadata: 
  name: rbd-provisioner 
rules: 
- apiGroups: [""] 
  resources: ["secrets"] 
  verbs: ["get"] 
- apiGroups: [""] 
  resources: ["endpoints"] 
  verbs: ["get", "list", "watch", "create", "update", "patch"] 
--- 
apiVersion: rbac.authorization.k8s.io/v1 
kind: RoleBinding 
metadata: 
  name: rbd-provisioner 
roleRef: 
  apiGroup: rbac.authorization.k8s.io 
  kind: Role 
  name: rbd-provisioner 
subjects: 
  - kind: ServiceAccount 
    name: rbd-provisioner 
    namespace: default 
--- 
apiVersion: extensions/v1beta1 
kind: Deployment 
metadata: 
  name: rbd-provisioner 
spec: 
  replicas: 1 
  strategy: 
    type: Recreate 
  template: 
    metadata: 
      labels: 
        app: rbd-provisioner 
    spec: 
      containers: 
      - name: rbd-provisioner 
        image: quay.io/external_storage/rbd-provisioner:latest 
        env: 
        - name: PROVISIONER_NAME 
          value: ceph.com/rbd 
      serviceAccount: rbd-provisioner 
--- 
apiVersion: v1 
kind: ServiceAccount 
metadata: 
  name: rbd-provisioner

kubectl apply -f rbd-provisioner.yaml
注意:rbd-provisioner的镜像要和ceph的版本适配,这里镜像使用最新的,根据官方提示已支持ceph mimic版。
K8S使用Ceph RBD作为后端存储_第1张图片
(2) 如果集群是用二进制方式部署的,直接在master节点安装ceph-common即可。
YUM源:

[Ceph]
name=Ceph packages for $basearch
baseurl=http://download.ceph.com/rpm-mimic/el7/$basearch
enabled=1
gpgcheck=1
type=rpm-md
gpgkey=https://download.ceph.com/keys/release.asc
priority=1
[Ceph-noarch]
name=Ceph noarch packages
baseurl=http://download.ceph.com/rpm-mimic/el7/noarch
enabled=1
gpgcheck=1
type=rpm-md
gpgkey=https://download.ceph.com/keys/release.asc
priority=1
[ceph-source]
name=Ceph source packages
baseurl=http://download.ceph.com/rpm-mimic/el7/SRPMS
enabled=1
gpgcheck=1
type=rpm-md
gpgkey=https://download.ceph.com/keys/release.asc
priority=1

#安装客户端
yum -y install ceph-common-13.2.5
#拷贝keyring文件
将ceph的ceph.client.admin.keyring文件拷贝到master的/etc/ceph目录下。

4、为kubelet提供rbd命令

创建pod时,kubelet需要使用rbd命令去检测和挂载pv对应的ceph image,所以要在所有的worker节点安装ceph客户端ceph-common-13.2.5。

二、K8S上试用Ceph RBD存储

1、创建存储类

kind: StorageClass
apiVersion: storage.k8s.io/v1
metadata:
  name: ceph-sc
  namespace: default
  annotations: 
    storageclass.kubernetes.io/is-default-class: "false"
provisioner: ceph.com/rbd
reclaimPolicy: Retain
parameters:
  monitors: 172.16.1.31:6789,172.16.1.32:6789,172.16.1.33:6789
  adminId: admin
  adminSecretName: storage-secret
  adminSecretNamespace: default
  pool: k8s
  fsType: xfs
  userId: admin
  userSecretName: storage-secret
  imageFormat: "2"
  imageFeatures: "layering"

kubectl apply -f storage_class.yaml

2、为存储类提供secret

apiVersion: v1
kind: Secret
metadata:
  name: storage-secret
  namespace: default
data:
  key: QVFDMmIrWmNEL3JTS2hBQWwwdmR3eGJGMmVYNUM3SjdDUGZZbkE9PQ==
type:
  kubernetes.io/rbd

kubectl apply -f storage_secret.yaml
注意:provisioner的值要和rbd-provisioner设置的值一样

3、创建PVC

kind: PersistentVolumeClaim
apiVersion: v1
metadata:
  name: ceph-pvc
  namespace: default
spec:
  storageClassName: ceph-sc
  accessModes:
  - ReadWriteOnce
  resources:
    requests:
      storage: 1Gi

kubectl apply -f storage_pvc.yaml
#创建完PVC后,PV会自动创建:

[root@k8s-master03 ceph]# kubectl get pv       
NAME                                       CAPACITY   ACCESS MODES   RECLAIM POLICY   STATUS   CLAIM                  STORAGECLASS   REASON   AGE
pvc-315991e9-7d4b-11e9-b6cc-0050569ba238   1Gi        RWO            Retain           Bound    default/ceph-sc-test   prom-sc                 13h

#正常情况PVC也处于Bound状态

[root@k8s-master03 ceph]# kubectl get pvc
NAME           STATUS   VOLUME                                     CAPACITY   ACCESS MODES   STORAGECLASS   AGE
ceph-sc-test   Bound    pvc-315991e9-7d4b-11e9-b6cc-0050569ba238   1Gi        RWO            prom-sc        17s

4、创建测试应用

apiVersion: v1
kind: Pod
metadata:
  name: ceph-pod1
spec:
  nodeName: k8s-node02
  containers:
  - name: nginx
    image: nginx:1.14
    volumeMounts:
    - name: ceph-rdb-vol1
      mountPath: /usr/share/nginx/html
      readOnly: false
  volumes:
  - name: ceph-rdb-vol1
    persistentVolumeClaim:
      claimName: ceph-pvc

kubectl apply -f storage_pod.yaml
#查看pod状态

[root@k8s-master03 ceph]# kubectl get pods -o wide
NAME                             READY   STATUS    RESTARTS   AGE     IP            NODE         NOMINATED NODE   READINESS GATES
ceph-pod1                        1/1     Running   0          3d23h   10.244.4.75   k8s-node02              

#进入容器查看挂载情况,可以看到rbd已挂载到/usr/share/nginx/html目录。

[root@k8s-master03 ceph]# kubectl exec -it ceph-pod1 -- /bin/bash
root@ceph-pod1:/# df –hT
/dev/rbd0            xfs     1014M   33M  982M   4% /usr/share/nginx/html
#在挂载目录下添加一个测试文件
root@ceph-pod1:/# cat /usr/share/nginx/html/index.html
hello ceph!

#在Ceph上检查对应image挂载的节点,目前在172.16.1.22即k8s-node02。

[root@ceph-node1 ~]# rbd status k8s/kubernetes-dynamic-pvc-2410765c-7dec-11e9-aa80-26a98c3bc9e4
Watchers:
        watcher=172.16.1.22:0/264870305 client.24553 cookie=18446462598732840961

#而后我们删掉这个的pod

[root@k8s-master03 ceph]# kubectl delete -f  storage_pod.yaml   
pod "ceph-pod1" deleted

#修改清单文件storage_pod.yaml,将pod调度到k8s-node01上,并应用。
#稍后,查看pod的状态,改pod已部署在k8s-node01上了。

[root@k8s-master01 ~]# kubectl get pods -o wide
NAME                             READY   STATUS    RESTARTS   AGE    IP            NODE         NOMINATED NODE   READINESS GATES
ceph-pod1                        1/1     Running   0          34s    10.244.3.28   k8s-node01              

#在Ceph上再次检查image挂载节点,目前在172.16.1.21即k8s-node01

[root@ceph-node1 ~]# rbd status k8s/kubernetes-dynamic-pvc-2410765c-7dec-11e9-aa80-26a98c3bc9e4
Watchers:
        watcher=172.16.1.21:0/1812501701 client.114340 cookie=18446462598732840963

#进入容器,检查文件存在并没有丢失,说明pod切换节点后使用了原来的image。

[root@k8s-master03 ceph]# kubectl exec -it ceph-pod1 -- /bin/bash
root@ceph-pod1:/# cat /usr/share/nginx/html/index.html
hello ceph!