Ceph 基础组件

  • Monitors(监视器,ceph-mon)Ceph Monitor 其维护集群状态映射,包括监视器映射、OSD映射MDS映射CRUSH映射;通过保存集群状态的映射来跟踪整个集群的健康状况。除此之外 Monitor 还负载管理守护进程和客户端之间的身份验证。通常至少需要部署三个 Ceph Monitor才能实现冗余和高可用
  • Managers(管理器,ceph-mgr)Ceph Manager Daemon 负责跟踪运行时指标和 Ceph 集群当前状态,包括存储利用率,当前性能指标和系统负载。此外 Ceph Manager Daemon 还托管基于 Python 提供的模块以管理和展示 Ceph 集群信息,包括Ceph仪表盘REST API 。通常至少需要部署两个 Ceph Manager Daemon 才能实现冗余和高可用
  • Ceph OSDs(Ceph 对象存储,ceph-osd):一旦应用程序向 Ceph 集群发起写操作,数据将以对象的形式存储在 OSD 中。这是 Ceph 集群中存储用户实际数据的唯一组件 ;其负责存储数据,处理数据复制,恢复,重新平衡,并通过检查其他 Ceph OSD Daemon 的心跳来向 ceph-monceph-mgr 提供一些监视信息。通常至少需要部署三个 Ceph OSD 才能实现冗余和高可用性
  • MDSs(Ceph 元数据服务器,ceph-mds)Ceph Metadata Server 负责跟踪文件层次结构,仅为 CephFS 文件系统存储元数据,Ceph 块设备和 Ceph对象存储不使用 MDSCeph 的元数据服务器允许 POSIX 文件系统用户执行基本命令(如 lsfind

Ceph 将数据作为对象存储在逻辑存储池中。使用 CRUSH 算法,Ceph 计算哪个放置组应包含该对象,并进一步计算哪个 Ceph OSD 守护程序应存储该放置组。CRUSH 算法使 Ceph 存储集群能够动态扩展,重新平衡和恢复

Ceph 其他组件

  • RBD:提供持久块存储,其是瘦配置的、可调整大小的,并在多个 osd 上存储数据

  • RGW:提供对象存储服务。它使用 librgwlibrados,允许应用程序与 Ceph 对象存储建立连接;RGW 提供了与 Amazon S3OpenStack Swift 兼容的 RESTful api 接口

  • CephFSCephFS 提供了一个符合 posix 标准的文件系统,它使用 Ceph 存储集群在文件系统上存储用户数据。与 RBDRGW 一样,CephFS 服务也作为 librados 的本机接口实现

使用 cephadm 安装 Ceph 集群

  • Ceph octopus 15.2.3
  • Python 3.6
  • Docker 18.09.6
  • 注意:OSD 硬盘需要大于 5G
系统版本 主机名 PublicIP 硬盘 角色
Centos 7,kernel 4.4.225 ceph01 192.168.1.71 OSD盘:/dev/sdb cephadm,ceph-mon,ceph-mgr,ceph-osd,ceph-mds,rgw
Centos 7,kernel 4.4.225 ceph02 192.168.1.72 OSD盘:/dev/sdb ceph-mon,ceph-mgr,ceph-osd,ceph-mds,rgw
Centos 7,kernel 4.4.225 ceph03 192.168.1.73 OSD盘:/dev/sdb ceph-mon,ceph-mgr,ceph-osd,ceph-mds,rgw

配置系统环境

# 关闭防火墙和SElinux
[root@ceph01 ~]# systemctl disable --now firewalld
[root@ceph01 ~]# setenforce 0
[root@ceph01 ~]# sed -i 's/^SELINUX=.*/SELINUX=disabled/' /etc/selinux/config
# 配置时间同步
[root@ceph01 ~]# yum install -y chrony
[root@ceph01 ~]# systemctl enable --now chronyd

安装依赖

[root@ceph01 ~]# wget -O /etc/yum.repos.d/epel.repo http://mirrors.aliyun.com/repo/epel-7.repo
[root@ceph01 ~]# yum -y install python3 yum-utils
[root@ceph01 ~]# yum-config-manager --add-repo http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
[root@ceph01 ~]# yum -y install docker-ce-18.09.6 docker-ce-cli-18.09.6
[root@ceph01 ~]# systemctl enable --now docker
[root@ceph01 ~]# curl --silent --remote-name --location https://github.com/ceph/ceph/raw/octopus/src/cephadm/cephadm
[root@ceph01 ~]# chmod +x cephadm

初始化集群

# 此命令将生成 ceph yum 源
[root@ceph01 ~]# ./cephadm add-repo --release octopus

# 备份 ceph yum 源并将其替换使用 阿里云 yum 源
[root@ceph01 ~]# cp /etc/yum.repos.d/ceph.repo{,.back}
[root@ceph01 ~]# sed -i 's#download.ceph.com#mirrors.aliyun.com/ceph#' /etc/yum.repos.d/ceph.repo
[root@ceph01 ~]# yum list | grep ceph

# 安装 cephadm 到当前节点(其实就是将 cephadm 复制到环境变量)
[root@ceph01 ~]# ./cephadm install
[root@ceph01 ~]# which cephadm
/usr/sbin/cephadm

# 引导 Ceph 集群(注意: 如果觉得速度慢可以手动执行 docker pull docker.io/ceph/ceph:v15 拉取 docker 启动 ceph 集群所需镜像)
[root@ceph01 ~]# mkdir -p /etc/ceph
[root@ceph01 ~]# cephadm bootstrap --mon-ip 192.168.1.71
......
        URL: https://ceph01:8443/
        User: admin
        Password: soricu721m

INFO:cephadm:You can access the Ceph CLI with:

        sudo /usr/sbin/cephadm shell --fsid ff2b5380-b5eb-11ea-85a5-000c29177e91 -c /etc/ceph/ceph.conf -k /etc/ceph/ceph.client.admin.keyring

INFO:cephadm:Please consider enabling telemetry to help improve Ceph:

        ceph telemetry on

For more information see:

        https://docs.ceph.com/docs/master/mgr/telemetry/

INFO:cephadm:Bootstrap complete.

[root@ceph01 ~]# /usr/sbin/cephadm shell --fsid ff2b5380-b5eb-11ea-85a5-000c29177e91 -c /etc/ceph/ceph.conf -k /etc/ceph/ceph.client.admin.keyring
INFO:cephadm:Using recent ceph image ceph/ceph:v15

[ceph: root@ceph01 /]# exit

# 在浏览器中访问 https://ceph01:8443/ 打开 ceph ui, 第一次登陆要求更改默认密码

# 安装 ceph 工具包, 其中包括 ceph, rbd, mount.ceph 等命令
[root@ceph01 ~]# cephadm install ceph-common

基础使用

# 查看 ceph 集群所有组件运行状态
[root@ceph01 ~]# ceph orch ps 
NAME               HOST    STATUS         REFRESHED  AGE  VERSION  IMAGE NAME               IMAGE ID      CONTAINER ID  
crash.ceph01       ceph01  running (83m)  3m ago     83m  15.2.3   docker.io/ceph/ceph:v15  d72755c420bc  6eeecc944c89  
mgr.ceph01.zqqyym  ceph01  running (84m)  3m ago     84m  15.2.3   docker.io/ceph/ceph:v15  d72755c420bc  4bb1b13e5c3b  
mon.ceph01         ceph01  running (84m)  3m ago     84m  15.2.3   docker.io/ceph/ceph:v15  d72755c420bc  ddf141776199

# 查看指定组件运行状态
[root@ceph01 ~]# ceph orch ps --daemon-type mon
NAME        HOST    STATUS         REFRESHED  AGE  VERSION  IMAGE NAME               IMAGE ID      CONTAINER ID  
mon.ceph01  ceph01  running (86m)  5m ago     86m  15.2.3   docker.io/ceph/ceph:v15  d72755c420bc  ddf141776199

# 查看集群当前状态
[root@ceph01 ~]# ceph status
  cluster:
    id:     44c6209a-b5d4-11ea-a1b5-000c29177e91
    health: HEALTH_WARN
            Reduced data availability: 1 pg inactive
            OSD count 0 < osd_pool_default_size 3

  services:
    mon: 1 daemons, quorum ceph01 (age 90m)
    mgr: ceph01.zqqyym(active, since 89m)
    osd: 0 osds: 0 up, 0 in

  data:
    pools:   1 pools, 1 pgs
    objects: 0 objects, 0 B
    usage:   0 B used, 0 B / 0 B avail
    pgs:     100.000% pgs unknown
             1 unknown

将主机添加到集群中

[root@ceph01 ~]# ssh-copy-id -f -i /etc/ceph/ceph.pub root@ceph02
[root@ceph01 ~]# ssh-copy-id -f -i /etc/ceph/ceph.pub root@ceph03

# 将主机添加到集群中, 注意:目标主机必须安装了 python3 和 docker
[root@ceph01 ~]# ceph orch host add ceph02
Added host 'ceph02'
[root@ceph01 ~]# ceph orch host add ceph03
Added host 'ceph03'

# 查看当前节点
[root@ceph01 ~]# ceph orch host ls
HOST    ADDR    LABELS  STATUS  
ceph01  ceph01                  
ceph02  ceph02                  
ceph03  ceph03 

# 查看集群是否已经扩展完成(3个crash,3个mon,2个mgr)
[root@ceph01 ~]# ceph orch ps 
NAME               HOST    STATUS          REFRESHED  AGE   VERSION  IMAGE NAME               IMAGE ID      CONTAINER ID  
crash.ceph01       ceph01  running (110m)  2m ago     110m  15.2.3   docker.io/ceph/ceph:v15  d72755c420bc  6eeecc944c89  
crash.ceph02       ceph02  running (4m)    2m ago     4m    15.2.3   docker.io/ceph/ceph:v15  d72755c420bc  53c83cfc2470  
crash.ceph03       ceph03  running (3m)    2m ago     3m    15.2.3   docker.io/ceph/ceph:v15  d72755c420bc  91f41bd39008  
mgr.ceph01.zqqyym  ceph01  running (111m)  2m ago     111m  15.2.3   docker.io/ceph/ceph:v15  d72755c420bc  4bb1b13e5c3b  
mgr.ceph03.znmizf  ceph03  running (3m)    2m ago     3m    15.2.3   docker.io/ceph/ceph:v15  d72755c420bc  9d65b425cbff  
mon.ceph01         ceph01  running (111m)  2m ago     111m  15.2.3   docker.io/ceph/ceph:v15  d72755c420bc  ddf141776199  
mon.ceph02         ceph02  running (3m)    2m ago     3m    15.2.3   docker.io/ceph/ceph:v15  d72755c420bc  c3a004194faa  
mon.ceph03         ceph03  running (2m)    2m ago     2m    15.2.3   docker.io/ceph/ceph:v15  d72755c420bc  7a6239a28215  

部署 OSD

[root@ceph01 ~]# ceph orch daemon add osd ceph01:/dev/sdb
Created osd(s) 0 on host 'ceph01'

[root@ceph01 ~]# ceph orch daemon add osd ceph02:/dev/sdb
Created osd(s) 1 on host 'ceph02'

[root@ceph01 ~]# ceph orch daemon add osd ceph03:/dev/sdb
Created osd(s) 2 on host 'ceph03'

[root@ceph01 ~]# ceph orch device ls
HOST    PATH      TYPE   SIZE  DEVICE  AVAIL  REJECT REASONS                                          
ceph01  /dev/sda  hdd   40.0G          False  locked, Insufficient space (<5GB) on vgs, LVM detected  
ceph01  /dev/sdb  hdd   10.0G          False  locked, Insufficient space (<5GB) on vgs, LVM detected  
ceph02  /dev/sda  hdd   40.0G          False  Insufficient space (<5GB) on vgs, locked, LVM detected  
ceph02  /dev/sdb  hdd   10.0G          False  Insufficient space (<5GB) on vgs, locked, LVM detected  
ceph03  /dev/sda  hdd   40.0G          False  LVM detected, Insufficient space (<5GB) on vgs, locked  
ceph03  /dev/sdb  hdd   10.0G          False  LVM detected, Insufficient space (<5GB) on vgs, locked 

部署 MDS 提供 CephFs 功能

# 创建一个 pool 用于存储 cephfs 数据
[root@ceph01 ~]# ceph osd pool create cephfs_data 64 64
pool 'cephfs_data' created

# 创建一个 pool 用于存储 cephfs 元数据
[root@ceph01 ~]# ceph osd pool create cephfs_metadata 32 32
pool 'cephfs_metadata' created

# 创建 cephfs, 指定 cephfs_metadata 存储元数据, 指定 cephfs_data 存储实际数据
[root@ceph01 ~]# ceph fs new cephfs cephfs_metadata cephfs_data
new fs with metadata pool 3 and data pool 2

# 查看 cephfs
[root@ceph01 ~]# ceph fs ls
name: cephfs, metadata pool: cephfs_metadata, data pools: [cephfs_data ]

# 在 ceph01, ceph02, ceph03 部署 mds
[root@ceph01 ~]# ceph orch apply mds cephfs --placement="3 ceph01 ceph02 ceph03"
Scheduled mds.cephfs update...

# 查看 mds 是否启动
[root@ceph01 ~]# ceph orch ps --daemon-type mds
NAME                      HOST    STATUS        REFRESHED  AGE  VERSION  IMAGE NAME               IMAGE ID      CONTAINER ID  
mds.cephfs.ceph01.lmrpri  ceph01  running (4m)  4m ago     4m   15.2.3   docker.io/ceph/ceph:v15  d72755c420bc  bcbc19c43089  
mds.cephfs.ceph02.ggapal  ceph02  running (4m)  4m ago     4m   15.2.3   docker.io/ceph/ceph:v15  d72755c420bc  987aa0d80bd9  
mds.cephfs.ceph03.eelzdg  ceph03  running (4m)  4m ago     4m   15.2.3   docker.io/ceph/ceph:v15  d72755c420bc  f3e2de2d5817  

# 查看当前集群的所有 pool
[root@ceph01 ~]# ceph osd lspools
1 device_health_metrics
2 cephfs_data
3 cephfs_metadata

部署 RGWS

# 创建领域
[root@ceph01 ~]# radosgw-admin realm create --rgw-realm=rgw01 --default 
{
    "id": "e8f17788-0adf-47fd-a920-4d3be7faf604",
    "name": "rgw01",
    "current_period": "436ef150-e9ef-4748-83e9-b572a8812465",
    "epoch": 1
}

# 创建区域组
[root@ceph01 ~]# radosgw-admin zonegroup create --rgw-zonegroup=default --master --default
{
    "id": "aa0c224d-5894-40f9-88ce-6d65fef851d1",
    "name": "default",
    "api_name": "default",
    "is_master": "true",
    "endpoints": [],
    "hostnames": [],
    "hostnames_s3website": [],
    "master_zone": "",
    "zones": [],
    "placement_targets": [],
    "default_placement": "",
    "realm_id": "e8f17788-0adf-47fd-a920-4d3be7faf604",
    "sync_policy": {
        "groups": []
    }
}

# 创建区域
[root@ceph01 ~]# radosgw-admin zone create --rgw-zonegroup=default --rgw-zone=cn-hangzho --master --default
{
    "id": "e0cd3501-b5bc-4015-832e-6b6b02708c02",
    "name": "cn-hangzho",
    "domain_root": "cn-hangzho.rgw.meta:root",
    "control_pool": "cn-hangzho.rgw.control",
    "gc_pool": "cn-hangzho.rgw.log:gc",
    "lc_pool": "cn-hangzho.rgw.log:lc",
    "log_pool": "cn-hangzho.rgw.log",
    "intent_log_pool": "cn-hangzho.rgw.log:intent",
    "usage_log_pool": "cn-hangzho.rgw.log:usage",
    "roles_pool": "cn-hangzho.rgw.meta:roles",
    "reshard_pool": "cn-hangzho.rgw.log:reshard",
    "user_keys_pool": "cn-hangzho.rgw.meta:users.keys",
    "user_email_pool": "cn-hangzho.rgw.meta:users.email",
    "user_swift_pool": "cn-hangzho.rgw.meta:users.swift",
    "user_uid_pool": "cn-hangzho.rgw.meta:users.uid",
    "otp_pool": "cn-hangzho.rgw.otp",
    "system_key": {
        "access_key": "",
        "secret_key": ""
    },
    "placement_pools": [
        {
            "key": "default-placement",
            "val": {
                "index_pool": "cn-hangzho.rgw.buckets.index",
                "storage_classes": {
                    "STANDARD": {
                        "data_pool": "cn-hangzho.rgw.buckets.data"
                    }
                },
                "data_extra_pool": "cn-hangzho.rgw.buckets.non-ec",
                "index_type": 0
            }
        }
    ],
    "realm_id": "e8f17788-0adf-47fd-a920-4d3be7faf604"
}

# 在 ceph01, ceph02, ceph03 上部署 rgw
[root@ceph01 ~]# ceph orch apply rgw rgw01 cn-hangzho --placement="3 ceph01 ceph02 ceph03"
Scheduled rgw.rgw01.cn-hangzho update...

# 查看各节点 rgw 是否启动
[root@ceph01 ~]# ceph orch ps --daemon-type rgw
NAME                                HOST    STATUS         REFRESHED  AGE  VERSION  IMAGE NAME               IMAGE ID      CONTAINER ID  
rgw.rgw01.cn-hangzho.ceph01.uaglvv  ceph01  running (25s)  23s ago    25s  15.2.3   docker.io/ceph/ceph:v15  d72755c420bc  d7e90e8d4d1b  
rgw.rgw01.cn-hangzho.ceph02.szkeqj  ceph02  running (27s)  21s ago    27s  15.2.3   docker.io/ceph/ceph:v15  d72755c420bc  95171c8f0e4b  
rgw.rgw01.cn-hangzho.ceph03.zphone  ceph03  running (26s)  20s ago    26s  15.2.3   docker.io/ceph/ceph:v15  d72755c420bc  19804d391100  

测试 RBD 使用

# 创建 RBD
[root@ceph01 ~]# ceph osd pool create rbd 16

# application enable RBD
[root@ceph01 ~]# ceph osd pool application enable rbd rbd

# 创建 rbd 存储, 指定大小为 10GB
[root@ceph01 ~]# rbd create rbd1 --size 10240

# 查看 rbd 信息
[root@ceph01 ~]# rbd --image rbd1 info
rbd image 'rbd1':
        size 10 GiB in 2560 objects
        order 22 (4 MiB objects)
        snapshot_count: 0
        id: 39e7c066a17a
        block_name_prefix: rbd_data.39e7c066a17a
        format: 2
        features: layering, exclusive-lock, object-map, fast-diff, deep-flatten
        op_features: 
        flags: 
        create_timestamp: Wed Jun 24 16:52:53 2020
        access_timestamp: Wed Jun 24 16:52:53 2020
        modify_timestamp: Wed Jun 24 16:52:53 2020

[root@ceph01 ~]# ceph osd crush tunables hammer
adjusted tunables profile to hammer

[root@ceph01 ~]# ceph osd crush reweight-all
reweighted crush hierarchy

# 由于关闭一些内核默认不支持的特性
[root@ceph01 ~]# rbd feature disable rbd1 exclusive-lock object-map fast-diff deep-flatten

# 查看特性是否已禁用
[root@ceph01 ~]# rbd --image rbd1 info | grep features
        features: layering
        op_features: 

# 映射到客户端(在需要挂载的客户端运行)
[root@ceph01 ~]# rbd map --image rbd1
/dev/rbd0

# 查看映射情况
[root@ceph01 ~]# rbd showmapped 
id  pool  namespace  image  snap  device   
0   rbd              rbd1   -     /dev/rbd0

# 格式化
[root@ceph01 ~]# mkfs.xfs /dev/rbd0

# 创建挂载目录, 并将 rbd 挂载到指定目录
[root@ceph01 ~]# mkdir /mnt/rbd
[root@ceph01 ~]# mount /dev/rbd0 /mnt/rbd/

# 查看挂载情况
[root@ceph01 ~]# df -hl | grep rbd
/dev/rbd0                 10G   33M   10G   1% /mnt/rbd

测试对象存储

# 安装 AWS s3 API
[root@ceph01 ~]# yum -y install s3cmd

# 创建用户
[root@ceph01 ~]# radosgw-admin user create --uid=s3 --display-name="objcet storage" --system

# 获取用户 access_key 和 secret_key
[root@ceph01 ~]# radosgw-admin user info --uid=s3 | grep -E "access_key|secret_key"
            "access_key": "RPRUFOWDK0T4MI4GL27C",
            "secret_key": "32efWJ7O5CGeKJbRdsDuyderNwwLLNOp4cnt13ZS"

# 生成 S3 客户端配置(设置一下参数, 其余默认即可)
[root@ceph01 ~]# s3cmd --configure
Access Key: RPRUFOWDK0T4MI4GL27C
Secret Key: 32efWJ7O5CGeKJbRdsDuyderNwwLLNOp4cnt13ZS
S3 Endpoint [s3.amazonaws.com]: ceph01
DNS-style bucket+hostname:port template for accessing a bucket [%(bucket)s.s3.amazonaws.com]: %(bucket).ceph01
Use HTTPS protocol [Yes]: no
Test access with supplied credentials? [Y/n] y
Save settings? [y/N] y
Configuration saved to '/root/.s3cfg'

# 创建桶
[root@ceph01 ~]# s3cmd mb s3://bucket
Bucket 's3://bucket/' created

# 查看当前所有桶
[root@ceph01 ~]# s3cmd ls
2020-06-28 03:02  s3://bucket

测试 CephFs 使用

# 创建用户用于客户端访问 CephFs
[root@ceph01 ~]# ceph auth get-or-create client.cephfs mon 'allow r' mds 'allow r, allow rw path=/' osd 'allow rw pool=cephfs_data' -o ceph.client.cephfs.keyring

# 获取用户 tooken
[root@ceph01 ~]# ceph auth get-key client.cephfs
AQAyGfNeTXKzDhAAkQPKLA72/Rriy9qpczzp8A==

# 创建挂载目录, 并将 Ceph 挂载到指定目录, 此种挂载方式被称为内核驱动的挂载方式, 也可以将其通过 NFS Ganesha 输出为 NFS 服务器格式
[root@ceph01 ~]# mkdir /mnt/cephfs/
[root@ceph01 ~]# mount -t ceph ceph01:/ /mnt/cephfs/ -o name=cephfs,secret=AQAyGfNeTXKzDhAAkQPKLA72/Rriy9qpczzp8A==

# 查看挂载
[root@ceph01 ~]# mount | grep cephfs
192.168.1.71:/ on /mnt/cephfs type ceph (rw,relatime,name=cephfs,secret=,acl)