ceph-ansible 项目 infrastructure-playbooks 有 add-osd, add-mon, shrink-osd, shrink-mon, shrink-rgw, shrink-mds, shrink-mgr,缺少 add-mds , add-rgw 等脚本,手动部署也很快
环境:ceph luminous、cenos7、k8s1.11.5
1、配置 ceph yum 源
[root@uat-mtr01 ~]# cat /etc/yum.repos.d/ceph_stable.repo
[ceph_stable]
baseurl =
http://mirror.ceph-yum.com/ceph/rpm-luminous/el7/
$basearch
gpgcheck = 1
gpgkey =
http://mirror.ceph-yum.com/ceph/keys/release.asc
name = Ceph Stable repo
2、安装 ceph
yum install ceph
3、从 mon 节点 copy /etc/ceph 到 mds 节点
scp -r /etc/ceph mds:/etc/ceph
4、创建 mds data 目录
mkdir /var/lib/ceph/mds/ceph-uat-mtr01
5、创建 authentication key
ceph auth get-or-create mds.uat-mtr01 mon 'profile mds' mgr 'profile mds' mds 'allow *' osd 'allow *' > /var/lib/ceph/mds/ceph-uat-mtr01/keyring
6、启动 ceph-mds 服务
systemctl start ceph-mds@uat-mtr01
此时 mds 还没有激活
[root@rgw01-backup ~]# ceph fs ls
No filesystems enabled
[root@rgw01-backup ~]# ceph mds stat
, 1 up:standby
[root@rgw01-backup ~]# ceph -s
cluster:
id: 0e38e7c6-a704-4132-b0e3-76b87f18d8fa
health: HEALTH_WARN
application not enabled on 1 pool(s)
too few PGs per OSD (2 < min 30)
clock skew detected on mon.rgw02-backup
services:
mon: 3 daemons, quorum rgw01-backup,rgw02-backup,rgw03-backup
mgr: rgw01-backup(active), standbys: rgw03-backup, rgw02-backup
osd: 60 osds: 60 up, 60 in
rgw: 3 daemons active
data:
pools: 7 pools, 56 pgs
objects: 244 objects, 14.3MiB
usage: 63.8GiB used, 98.2TiB / 98.2TiB avail
pgs: 56 active+clean
7、cpeh 文件系统需要至少 2 个 rados pool,一个用作 data,一个用作 metadata。注意:metadata pool 需要有足够的副本,数据一旦丢失文件系统将不可用。使用像 ssd 这样的低延时存储作为 metadata pool,可以减少客户端的操作延时
ceph osd pool create cephfs_data 32 32
ceph osd pool create cephfs_metadata 32 32
8、使用 fs new 命令 enable 文件系统
[root@rgw01-backup ~]# ceph fs new cephfs cephfs_metadata cephfs_data
new fs with metadata pool 9 and data pool 8
一旦文件系统创建成功,mds 就会激活
[root@rgw01-backup ~]# ceph mds stat
cephfs-1/1/1 up {0=uat-mtr01=up:active}
[root@rgw01-backup ~]# ceph -s
cluster:
id: 0e38e7c6-a704-4132-b0e3-76b87f18d8fa
health: HEALTH_WARN
application not enabled on 1 pool(s)
too few PGs per OSD (6 < min 30)
clock skew detected on mon.rgw02-backup
services:
mon: 3 daemons, quorum rgw01-backup,rgw02-backup,rgw03-backup
mgr: rgw01-backup(active), standbys: rgw03-backup, rgw02-backup
mds: cephfs-1/1/1 up {0=uat-mtr01=up:active}
osd: 60 osds: 60 up, 60 in
rgw: 3 daemons active
data:
pools: 9 pools, 120 pgs
objects: 265 objects, 14.3MiB
usage: 64.0GiB used, 98.2TiB / 98.2TiB avail
pgs: 120 active+clean
9、cephfs 测试
mkdir /mnt/mycephfs
挂载文件系统
mount -t ceph 192.168.1.201:6789:/ /mnt/mycephfs -o name=admin,secret=AQDmk9deMSmBFRAAH+ReLam+0rNLXYx42EYjsQ==
取消挂载
umount /mnt/cephfs
10、ceph client 权限管理
① 使用 ceph auth get-or-create 创建时直接授予权限
ceph auth get-or-create client.bruce mon 'allow r' mds 'allow r, allow rw path=/bruce' osd 'allow rw pool=cephfs_data'
如果想要进一步隔离,让不同的用户数据存储在不同的 pool,可以在不同的文件系统中添加不同的 pool
可以使用 ceph auth caps 修改权限
ceph auth caps client.bruce mon 'allow r' mds 'allow r, allow rw path=/bruce' osd 'allow rw pool=cephfs_data'
可以挂载 / 和 /bruce 目录,但是 / 目录只能读。挂载 /bruce 目录,文件系统必须先创建好 bruce 目录,可以用 client.admin 用户挂载创建
mount -t ceph 192.168.1.201:6789:/bruce /mnt/mycephfs -o name=bruce,secret=AQAR091ekK3tAhAAvvMH5JWmiklSorTs1wOCjQ==
② 使用 ceph fs authorize 授予权限
授予 client.uat 对文件系统 cephfs 目录 /uat 的 rw 权限
ceph fs authorize cephfs client.uat / r /uat rw
完全限制 /uat 的权限,挂载点只能为 192.168.1.201:6789:/uat
ceph fs authorize cephfs client.uat /uat rw
挂载
mount -t ceph 192.168.1.201:6789:/ /mnt/mycephfs -o name=uat,secret=AQC27d1eylQ9DxAAamiH6F8qJHAngDpFMrk84g==
CephFS Client Capabilities
User Management
11、kubernete 配置 cephfs storagecalss
创建 role、clusterrole、rolebinding、clusterrolebinding
[root@k8s01 cephfs-storageclass]# cat rolebinding.yml
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: cephfs-provisioner
namespace: cephfs
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: cephfs-provisioner
namespace: cephfs
rules:
- apiGroups: [""]
resources: ["persistentvolumes"]
verbs: ["get", "list", "watch", "create", "delete"]
- apiGroups: [""]
resources: ["persistentvolumeclaims"]
verbs: ["get", "list", "watch", "update"]
- apiGroups: ["storage.k8s.io"]
resources: ["storageclasses"]
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources: ["events"]
verbs: ["create", "update", "patch"]
- apiGroups: [""]
resources: ["services"]
resourceNames: ["kube-dns","coredns"]
verbs: ["list", "get"]
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: cephfs-provisioner
subjects:
- kind: ServiceAccount
name: cephfs-provisioner
namespace: cephfs
roleRef:
kind: ClusterRole
name: cephfs-provisioner
apiGroup: rbac.authorization.k8s.io
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: cephfs-provisioner
namespace: cephfs
rules:
- apiGroups: [""]
resources: ["secrets"]
verbs: ["create", "get", "delete"]
- apiGroups: [""]
resources: ["endpoints"]
verbs: ["get", "list", "watch", "create", "update", "patch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: cephfs-provisioner
namespace: cephfs
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: cephfs-provisioner
subjects:
- kind: ServiceAccount
name: cephfs-provisioner
创建 external-storage cephfs-provisioner
[root@k8s01 cephfs-storageclass]# cat deployment.yml
apiVersion: apps/v1
kind: Deployment
metadata:
name: cephfs-provisioner
namespace: cephfs
spec:
replicas: 1
selector:
matchLabels:
app: cephfs-provisioner
strategy:
type: Recreate
template:
metadata:
labels:
app: cephfs-provisioner
spec:
containers:
- name: cephfs-provisioner
image: "external_storage/cephfs-provisioner:latest"
env:
- name: PROVISIONER_NAME
value: ceph.com/cephfs
- name: PROVISIONER_SECRET_NAMESPACE
value: cephfs
command:
- "/usr/local/bin/cephfs-provisioner"
args:
- "-id=cephfs-provisioner-1"
serviceAccount: cephfs-provisioner
创建 secret
[root@uat-mtr01 ~]# ceph auth get-key client.admin > secret
[root@uat-mtr01 ~]# cat secret
AQDmk9deMSmBFRAAH+ReLam+0rNLXYx42EYjsQ==
创建 storageclass
[root@k8s01 cephfs-storageclass]# cat sc.yml
kind: StorageClass
apiVersion: storage.k8s.io/v1
metadata:
name: cephfs
provisioner: ceph.com/cephfs
parameters:
monitors: 192.168.1.201:6789
adminId: admin
adminSecretName: ceph-secret-admin
adminSecretNamespace: "cephfs"
claimRoot: /pvc-volumes
创建 pvc
[root@k8s01 cephfs-storageclass]# cat pvc.yml
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: claim1
spec:
storageClassName: cephfs
accessModes:
- ReadWriteMany
resources:
requests:
storage: 1Gi
创建 pod
[root@k8s01 cephfs-storageclass]# cat test-pod.yml
kind: Pod
apiVersion: v1
metadata:
name: test-pod
spec:
containers:
- name: test-pod
image: google_containers/busybox:1.24
command:
- "/bin/sh"
args:
- "-c"
- "touch /mnt/SUCCESS && exit 0 || exit 1"
volumeMounts:
- name: pvc
mountPath: "/mnt"
restartPolicy: "Never"
volumes:
- name: pvc
persistentVolumeClaim:
claimName: claim1
部署
kb create secret generic ceph-secret-admin --from-file=secret --namespace=cephfs
kb apply -f rolebinding.yml
kb apply -f deployment.yml
kb apply -f sc.yml
kb apply -f pvc.yml
kb apply -f test-pod.yml
cephfs-provisioner 目前还不支持 pvc 容量限制,即使 requests 1Gi,容器里面仍然是整个文件系统地容量
内核客户端需要 kernel >=4.17 ceph >=mimic,cephfs quota 才能有效
cephfs storageclass 必须使用 ceph client.admin 秘钥
kubernetes笔记: Cephfs
external-storage
12、使用 ceph-user 挂载文件系统,setfattr 设置配额、
ceph-fuse 需要用到 ceph 的 config 文件以及 一个有 ceph metadata server CAPS 权限的 keyring,从 monitor 节点上 copy 这些文件
sudo mkdir -p /etc/ceph
sudo scp {user}@{server-machine}:/etc/ceph/ceph.conf /etc/ceph/ceph.conf
sudo scp {user}@{server-machine}:/etc/ceph/ceph.keyring /etc/ceph/ceph.keyring
ceph-fuse 挂载文件系统
yum install -y ceph-fuse
[root@uat-mtr01 ~]# ceph-fuse -m 192.168.1.201:6789 /mnt/mycephfs
ceph-fuse[11668]: starting ceph client
2020-06-09 13:32:57.462328 7f85b28b7240 -1 init, newargv = 0x55f3a011f740 newargc=9
ceph-fuse[11668]: starting fuse
使用 setfattr 配额。配额的目录必须为挂载后的子目录,比如 /mnt/mycephfs/uat
yum install -y attr
[root@uat-mtr01 ~]# setfattr -n ceph.quota.max_bytes -v 100000000 /mnt/mycephfs/uat
[root@uat-mtr01 ~]# getfattr -n ceph.quota.max_bytes /mnt/mycephfs/uat
getfattr: Removing leading '/' from absolute path names
# file: mnt/mycephfs/uat
ceph.quota.max_bytes="100000000"
[root@uat-mtr01 ~]# setfattr -n ceph.quota.max_files -v 10000 /mnt/mycephfs/uat
测试
[root@uat-mtr01 mycephfs]# dd if=/dev/zero of=/mnt/mycephfs/uat/bb bs=1M count=100
dd: error writing ‘/mnt/mycephfs/uat/bb’: Disk quota exceeded
96+0 records in
95+0 records out
100007936 bytes (100 MB) copied, 0.438391 s, 228 MB/s
[root@uat-mtr01 mycephfs]# du -sh /mnt/mycephfs/uat/
96M /mnt/mycephfs/uat/
[root@uat-mtr01 mycephfs]# dd if=/dev/zero of=/mnt/mycephfs/uat/bb2 bs=1M count=100
dd: error writing ‘/mnt/mycephfs/uat/bb2’: Disk quota exceeded
1+0 records in
0+0 records out
0 bytes (0 B) copied, 0.0021256 s, 0.0 kB/s
[root@uat-mtr01 mycephfs]# ls -lh /mnt/mycephfs/uat/
total 96M
-rw-r--r-- 1 root root 96M Jun 9 13:52 bb
-rw-r--r-- 1 root root 0 Jun 9 13:53 bb2
Mount CephFS using FUSE
Quotas
参考文章:
Deploying Metadata Servers
Create a Ceph filesystem
Mount CephFS with the Kernel Drive
CephFS Client Capabilities
User Management
kubernetes笔记: Cephfs
external-storage
Mount CephFS using FUSE
Quotas