主要参考文章入口
###环境
centos:7.5Mini
kernel:4.19.5-1.el7.elrepo.x86_64
docker-ce:17.03.3-ce
kubernetes:1.12.2
kubeadm部署
网络:calico
DNS:coredns
双节点虚拟机集群:
192.168.100.100 wh-01
192.168.100.101 lcoalhost.localdomain(准备一块大于5G的硬盘)
/dev/sdb 100GB (不分区不格式化!!!)
ip_forward
# 无特殊说明,全部操作都在master操作
# cd $HOME
# git clone https://github.com/rook/rook.git
# cd rook/cluster/examples/kubernetes/ceph
# kubectl apply -f operator.yaml
# kubectl get pod -n rook-ceph-system -o wide
附yaml:
apiVersion: v1
kind: Namespace
metadata:
name: rook-ceph-system
---
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
name: clusters.ceph.rook.io
spec:
group: ceph.rook.io
names:
kind: Cluster
listKind: ClusterList
plural: clusters
singular: cluster
shortNames:
- rcc
scope: Namespaced
version: v1beta1
---
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
name: filesystems.ceph.rook.io
spec:
group: ceph.rook.io
names:
kind: Filesystem
listKind: FilesystemList
plural: filesystems
singular: filesystem
shortNames:
- rcfs
scope: Namespaced
version: v1beta1
---
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
name: objectstores.ceph.rook.io
spec:
group: ceph.rook.io
names:
kind: ObjectStore
listKind: ObjectStoreList
plural: objectstores
singular: objectstore
shortNames:
- rco
scope: Namespaced
version: v1beta1
---
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
name: pools.ceph.rook.io
spec:
group: ceph.rook.io
names:
kind: Pool
listKind: PoolList
plural: pools
singular: pool
shortNames:
- rcp
scope: Namespaced
version: v1beta1
---
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
name: volumes.rook.io
spec:
group: rook.io
names:
kind: Volume
listKind: VolumeList
plural: volumes
singular: volume
shortNames:
- rv
scope: Namespaced
version: v1alpha2
---
# The cluster role for managing all the cluster-specific resources in a namespace
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
name: rook-ceph-cluster-mgmt
labels:
operator: rook
storage-backend: ceph
rules:
- apiGroups:
- ""
resources:
- secrets
- pods
- services
- configmaps
verbs:
- get
- list
- watch
- patch
- create
- update
- delete
- apiGroups:
- extensions
resources:
- deployments
- daemonsets
- replicasets
verbs:
- get
- list
- watch
- create
- update
- delete
---
# The role for the operator to manage resources in the system namespace
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: Role
metadata:
name: rook-ceph-system
namespace: rook-ceph-system
labels:
operator: rook
storage-backend: ceph
rules:
- apiGroups:
- ""
resources:
- pods
- configmaps
verbs:
- get
- list
- watch
- patch
- create
- update
- delete
- apiGroups:
- extensions
resources:
- daemonsets
verbs:
- get
- list
- watch
- create
- update
- delete
---
# The cluster role for managing the Rook CRDs
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
name: rook-ceph-global
labels:
operator: rook
storage-backend: ceph
rules:
- apiGroups:
- ""
resources:
# Pod access is needed for fencing
- pods
# Node access is needed for determining nodes where mons should run
- nodes
- nodes/proxy
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- events
# PVs and PVCs are managed by the Rook provisioner
- persistentvolumes
- persistentvolumeclaims
verbs:
- get
- list
- watch
- patch
- create
- update
- delete
- apiGroups:
- storage.k8s.io
resources:
- storageclasses
verbs:
- get
- list
- watch
- apiGroups:
- batch
resources:
- jobs
verbs:
- get
- list
- watch
- create
- update
- delete
- apiGroups:
- ceph.rook.io
resources:
- "*"
verbs:
- "*"
- apiGroups:
- rook.io
resources:
- "*"
verbs:
- "*"
---
# The rook system service account used by the operator, agent, and discovery pods
apiVersion: v1
kind: ServiceAccount
metadata:
name: rook-ceph-system
namespace: rook-ceph-system
labels:
operator: rook
storage-backend: ceph
---
# Grant the operator, agent, and discovery agents access to resources in the rook-ceph-system namespace
kind: RoleBinding
apiVersion: rbac.authorization.k8s.io/v1beta1
metadata:
name: rook-ceph-system
namespace: rook-ceph-system
labels:
operator: rook
storage-backend: ceph
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: rook-ceph-system
subjects:
- kind: ServiceAccount
name: rook-ceph-system
namespace: rook-ceph-system
---
# Grant the rook system daemons cluster-wide access to manage the Rook CRDs, PVCs, and storage classes
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1beta1
metadata:
name: rook-ceph-global
namespace: rook-ceph-system
labels:
operator: rook
storage-backend: ceph
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: rook-ceph-global
subjects:
- kind: ServiceAccount
name: rook-ceph-system
namespace: rook-ceph-system
---
# The deployment for the rook operator
apiVersion: apps/v1beta1
kind: Deployment
metadata:
name: rook-ceph-operator
namespace: rook-ceph-system
labels:
operator: rook
storage-backend: ceph
spec:
replicas: 1
template:
metadata:
labels:
app: rook-ceph-operator
spec:
serviceAccountName: rook-ceph-system
containers:
- name: rook-ceph-operator
image: rook/ceph:v0.8.3
args: ["ceph", "operator"]
volumeMounts:
- mountPath: /var/lib/rook
name: rook-config
- mountPath: /etc/ceph
name: default-config-dir
env:
# To disable RBAC, uncomment the following:
# - name: RBAC_ENABLED
# value: "false"
# Rook Agent toleration. Will tolerate all taints with all keys.
# Choose between NoSchedule, PreferNoSchedule and NoExecute:
# - name: AGENT_TOLERATION
# value: "NoSchedule"
# (Optional) Rook Agent toleration key. Set this to the key of the taint you want to tolerate
# - name: AGENT_TOLERATION_KEY
# value: ""
# Set the path where the Rook agent can find the flex volumes
# - name: FLEXVOLUME_DIR_PATH
# value: ""
# Rook Discover toleration. Will tolerate all taints with all keys.
# Choose between NoSchedule, PreferNoSchedule and NoExecute:
# - name: DISCOVER_TOLERATION
# value: "NoSchedule"
# (Optional) Rook Discover toleration key. Set this to the key of the taint you want to tolerate
# - name: DISCOVER_TOLERATION_KEY
# value: ""
# Allow rook to create multiple file systems. Note: This is considered
# an experimental feature in Ceph as described at
# http://docs.ceph.com/docs/master/cephfs/experimental-features/#multiple-filesystems-within-a-ceph-cluster
# which might cause mons to crash as seen in https://github.com/rook/rook/issues/1027
- name: ROOK_ALLOW_MULTIPLE_FILESYSTEMS
value: "false"
# The logging level for the operator: INFO | DEBUG
- name: ROOK_LOG_LEVEL
value: "INFO"
# The interval to check if every mon is in the quorum.
- name: ROOK_MON_HEALTHCHECK_INTERVAL
value: "45s"
# The duration to wait before trying to failover or remove/replace the
# current mon with a new mon (useful for compensating flapping network).
- name: ROOK_MON_OUT_TIMEOUT
value: "300s"
# Whether to start pods as privileged that mount a host path, which includes the Ceph mon and osd pods.
# This is necessary to workaround the anyuid issues when running on OpenShift.
# For more details see https://github.com/rook/rook/issues/1314#issuecomment-355799641
- name: ROOK_HOSTPATH_REQUIRES_PRIVILEGED
value: "false"
# The name of the node to pass with the downward API
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
# The pod name to pass with the downward API
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
# The pod namespace to pass with the downward API
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
volumes:
- name: rook-config
emptyDir: {}
- name: default-config-dir
emptyDir: {}
# 还是刚才的目录
# rook/cluster/examples/kubernetes/ceph
# kubectl apply -f cluster.yaml
# kubectl get all -n rook-ceph -o wide -w
NAME READY STATUS RESTARTS AGE
rook-ceph-mon0-jtjb8 1/1 Running 0 17s
rook-ceph-mon1-8z8t9 1/1 Running 0 6s
rook-ceph-mon2-gtszc 0/1 Pending 0 0s
rook-ceph-mon2-gtszc 0/1 Pending 0 0s
rook-ceph-mon2-gtszc 0/1 ContainerCreating 0 0s
rook-ceph-mon2-gtszc 0/1 ContainerCreating 0 1s
rook-ceph-mon2-gtszc 1/1 Running 0 1s
rook-ceph-mgr-a-5b458b579f-wv5lz 0/1 Pending 0 0s
rook-ceph-mgr-a-5b458b579f-wv5lz 0/1 Pending 0 0s
rook-ceph-mgr-a-5b458b579f-wv5lz 0/1 ContainerCreating 0 0s
rook-ceph-mgr-a-5b458b579f-wv5lz 0/1 ContainerCreating 0 1s
rook-ceph-mgr-a-5b458b579f-wv5lz 1/1 Running 0 1s
rook-ceph-osd-prepare-localhost.localdomain-dxfr4 0/1 Pending 0 0s
rook-ceph-osd-prepare-localhost.localdomain-dxfr4 0/1 Pending 0 0s
rook-ceph-osd-prepare-localhost.localdomain-dxfr4 0/1 ContainerCreating 0 0s
rook-ceph-osd-prepare-localhost.localdomain-dxfr4 0/1 ContainerCreating 0 1s
rook-ceph-osd-prepare-localhost.localdomain-dxfr4 1/1 Running 0 1s
# 稍等一会查看pod状态(mgr/mon/osd)
# kubectl get pod -n rook-ceph
NAME READY STATUS RESTARTS AGE
rook-ceph-mgr-a-5b458b579f-wv5lz 1/1 Running 0 13s
rook-ceph-mon0-jtjb8 1/1 Running 0 53s
rook-ceph-mon1-8z8t9 1/1 Running 0 42s
rook-ceph-mon2-gtszc 1/1 Running 0 31s
rook-ceph-osd-prepare-localhost.localdomain-dxfr4 1/1 Running 0 9s
# 查看node节点sdb磁盘已经被自动分区并格式化
# lsblk
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT
sdb 8:16 0 100G 0 disk
├─sdb2 8:18 0 1G 0 part
├─sdb3 8:19 0 98.4G 0 part
└─sdb1 8:17 0 576M 0 part
sr0 11:0 1 906M 0 rom
sda 8:0 0 20G 0 disk
├─sda2 8:2 0 800M 0 part /boot
├─sda3 8:3 0 19.2G 0 part /
└─sda1 8:1 0 1M 0 part
[root@localhost ~]# uname -a
附cluster.yaml:
apiVersion: v1
kind: Namespace
metadata:
name: rook-ceph
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: rook-ceph-cluster
namespace: rook-ceph
---
kind: Role
apiVersion: rbac.authorization.k8s.io/v1beta1
metadata:
name: rook-ceph-cluster
namespace: rook-ceph
rules:
- apiGroups: [""]
resources: ["configmaps"]
verbs: [ "get", "list", "watch", "create", "update", "delete" ]
---
# Allow the operator to create resources in this cluster's namespace
kind: RoleBinding
apiVersion: rbac.authorization.k8s.io/v1beta1
metadata:
name: rook-ceph-cluster-mgmt
namespace: rook-ceph
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: rook-ceph-cluster-mgmt
subjects:
- kind: ServiceAccount
name: rook-ceph-system
namespace: rook-ceph-system
---
# Allow the pods in this namespace to work with configmaps
kind: RoleBinding
apiVersion: rbac.authorization.k8s.io/v1beta1
metadata:
name: rook-ceph-cluster
namespace: rook-ceph
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: rook-ceph-cluster
subjects:
- kind: ServiceAccount
name: rook-ceph-cluster
namespace: rook-ceph
---
apiVersion: ceph.rook.io/v1beta1
kind: Cluster
metadata:
name: rook-ceph
namespace: rook-ceph
spec:
cephVersion:
# The container image used to launch the Ceph daemon pods (mon, mgr, osd, mds, rgw).
# v12 is luminous, v13 is mimic, and v14 is nautilus.
# RECOMMENDATION: In production, use a specific version tag instead of the general v13 flag, which pulls the latest release and could result in different
# versions running within the cluster. See tags available at https://hub.docker.com/r/ceph/ceph/tags/.
image: ceph/ceph:v13
# Whether to allow unsupported versions of Ceph. Currently only luminous and mimic are supported.
# After nautilus is released, Rook will be updated to support nautilus.
# Do not set to true in production.
allowUnsupported: false
# The path on the host where configuration files will be persisted. If not specified, a kubernetes emptyDir will be created (not recommended).
# Important: if you reinstall the cluster, make sure you delete this directory from each host or else the mons will fail to start on the new cluster.
# In Minikube, the '/data' directory is configured to persist across reboots. Use "/data/rook" in Minikube environment.
dataDirHostPath: /var/lib/rook
# The service account under which to run the daemon pods in this cluster if the default account is not sufficient (OSDs)
serviceAccount: rook-ceph-cluster
# set the amount of mons to be started
# count可以定义ceph-mon运行的数量,这里默认三个就行了
mon:
count: 3
allowMultiplePerNode: true
# enable the ceph dashboard for viewing cluster status
# 开启ceph资源面板
dashboard:
enabled: true
# serve the dashboard under a subpath (useful when you are accessing the dashboard via a reverse proxy)
# urlPrefix: /ceph-dashboard
network:
# toggle to use hostNetwork
# 使用宿主机的网络进行通讯
# 使用宿主机的网络貌似可以让集群外的主机挂载ceph
# 但是我没试过,有兴趣的兄弟可以试试改成true
# 反正这里只是集群内用,我就不改了
hostNetwork: false
# To control where various services will be scheduled by kubernetes, use the placement configuration sections below.
# The example under 'all' would have all services scheduled on kubernetes nodes labeled with 'role=storage-node' and
# tolerate taints with a key of 'storage-node'.
placement:
# all:
# nodeAffinity:
# requiredDuringSchedulingIgnoredDuringExecution:
# nodeSelectorTerms:
# - matchExpressions:
# - key: role
# operator: In
# values:
# - storage-node
# podAffinity:
# podAntiAffinity:
# tolerations:
# - key: storage-node
# operator: Exists
# The above placement information can also be specified for mon, osd, and mgr components
# mon:
# osd:
# mgr:
# nodeAffinity:通过选择标签的方式,可以限制pod被调度到特定的节点上
# 建议限制一下,为了让这几个pod不乱跑
mon:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: ceph-mon
operator: In
values:
- enabled
osd:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: ceph-osd
operator: In
values:
- enabled
mgr:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: ceph-mgr
operator: In
values:
- enabled
resources:
# The requests and limits set here, allow the mgr pod to use half of one CPU core and 1 gigabyte of memory
# mgr:
# limits:
# cpu: "500m"
# memory: "1024Mi"
# requests:
# cpu: "500m"
# memory: "1024Mi"
# The above example requests/limits can also be added to the mon and osd components
# mon:
# osd:
storage: # cluster level storage configuration and selection
useAllNodes: false
useAllDevices: false
deviceFilter:
location:
config:
# The default and recommended storeType is dynamically set to bluestore for devices and filestore for directories.
# Set the storeType explicitly only if it is required not to use the default.
# storeType: bluestore
# databaseSizeMB: "1024" # this value can be removed for environments with normal sized disks (100 GB or larger)
# journalSizeMB: "1024" # this value can be removed for environments with normal sized disks (20 GB or larger)
# Cluster level list of directories to use for storage. These values will be set for all nodes that have no `directories` set.
# directories:
# - path: /rook/storage-dir
# Individual nodes and their config can be specified as well, but 'useAllNodes' above must be set to false. Then, only the named
# nodes below will be used as storage resources. Each node's 'name' field should match their 'kubernetes.io/hostname' label.
#建议磁盘配置方式如下:
#name: 选择一个节点,节点名字为kubernetes.io/hostname的标签,也就是kubectl get nodes看到的名字
#devices: 选择磁盘设置为OSD
# - name: "sdb":将/dev/sdb设置为osd
nodes:
- name: "kube-node1"
devices:
- name: "sdb"
- name: "kube-node2"
devices:
- name: "sdb"
- name: "kube-node3"
devices:
- name: "sdb"
# directories: # specific directories to use for storage can be specified for each node
# - path: "/rook/storage-dir"
# resources:
# limits:
# cpu: "500m"
# memory: "1024Mi"
# requests:
# cpu: "500m"
# memory: "1024Mi"
# - name: "172.17.4.201"
# devices: # specific devices to use for storage can be specified for each node
# - name: "sdb"
# - name: "sdc"
# config: # configuration can be specified at the node level which overrides the cluster level config
# storeType: filestore
# - name: "172.17.4.301"
# deviceFilter: "^sd."
# kubectl get svc -n rook-ceph -o wide
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE SELECTOR
rook-ceph-mgr ClusterIP 10.109.123.196 <none> 9283/TCP 97s app=rook-ceph-mgr,rook_cluster=rook-ceph
rook-ceph-mgr-dashboard NodePort 10.99.166.224 <none> 7000:32488/TCP 96s app=rook-ceph-mgr,rook_cluster=rook-ceph
rook-ceph-mon0 ClusterIP 10.105.134.182 <none> 6790/TCP 2m18s app=rook-ceph-mon,mon=rook-ceph-mon0,mon_cluster=rook-ceph
rook-ceph-mon1 ClusterIP 10.101.218.139 <none> 6790/TCP 2m7s app=rook-ceph-mon,mon=rook-ceph-mon1,mon_cluster=rook-ceph
rook-ceph-mon2 ClusterIP 10.96.142.209 <none> 6790/TCP 116s app=rook-ceph-mon,mon=rook-ceph-mon2,mon_cluster=rook-ceph
# 将Dashboard的serviceType改为NodePort
# kubec edit svc -n rook-ceph rook-ceph-mgr-dashboard
type: NodePort
# 浏览器访问 http://node-ip:32488
apiVersion: ceph.rook.io/v1beta1
kind: Pool
metadata:
#这个name就是创建成ceph pool之后的pool名字
name: replicapool
namespace: rook-ceph
spec:
replicated:
size: 1
# size 池中数据的副本数,1就是不保存任何副本
failureDomain: osd
# failureDomain:数据块的故障域,
# 值为host时,每个数据块将放置在不同的主机上
# 值为osd时,每个数据块将放置在不同的osd上
---
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: ceph
# StorageClass的名字,pvc调用时填的名字
provisioner: ceph.rook.io/block
parameters:
pool: replicapool
# Specify the namespace of the rook cluster from which to create volumes.
# If not specified, it will use `rook` as the default namespace of the cluster.
# This is also the namespace where the cluster will be
clusterNamespace: rook-ceph
# Specify the filesystem type of the volume. If not specified, it will use `ext4`.
fstype: xfs
# 设置回收策略默认为:Retain
reclaimPolicy: Retain
# kubectl apply -f storageclass.yaml
# kubectl get storageclasses.storage.k8s.io -n rook-ceph
NAME PROVISIONER AGE
ceph-block ceph.rook.io/block 53s
创建一个Nginx应用Pod挂载测试
cat << EOF > nginx.yaml
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: nginx-pvc
spec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: 1Gi
storageClassName: ceph
---
apiVersion: v1
kind: Service
metadata:
name: nginx
spec:
selector:
app: nginx
ports:
- port: 80
name: nginx-port
targetPort: 80
protocol: TCP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: nginx
spec:
replicas: 1
selector:
matchLabels:
app: nginx
template:
metadata:
name: nginx
labels:
app: nginx
spec:
containers:
- name: nginx
image: nginx
ports:
- containerPort: 80
volumeMounts:
- mountPath: /html
name: http-file
volumes:
- name: http-file
persistentVolumeClaim:
claimName: nginx-pvc
EOF
kubectl apply -f nginx.yaml
#解决办法:
# 标记节点为 drain 状态
kubectl drain --ignore-daemonsets --delete-local-data
# 然后再恢复
kubectl uncordon