node1 10.3.45.112
node2 10.3.45.113
node3 10.3.45.115
硬件配置:1C1G,另外每台机器器最少挂载二块硬盘
(1)关闭防火墙:
systemctl stop firewalld
systemctl disable firewalld
(2)关闭selinux:
sed -i 's/enforcing/disabled/' /etc/selinux/config
setenforce 0
(3)关闭NetworkManager
systemctl disable NetworkManager && systemctl stop NetworkManager
(4)添加主机名与IP对应关系:
vim /etc/hosts
10.3.45.112 node1
10.3.45.113 node2
10.3.45.115 node3
(5)设置主机名:
hostnamectl set-hostname node1
hostnamectl set-hostname node2
hostnamectl set-hostname node3
(6)同步网络时间和修改时区
echo '*/2 * * * * /usr/sbin/ntpdate cn.pool.ntp.org' &>/dev/null >>/var/spool/cron/root
cp /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
(7)设置文件描述符
ulimit -n 65535
cat >> /etc/security/limits.conf << EOF
* soft nofile 65535
* hard nofile 65535
EOF
sysctl -p
(8)在node1上配置免密登录到node2、node3
ssh-copy-id root@node1
ssh-copy-id root@node2
ssh-copy-id root@node3
[root@node1 ~]# yum install epel-release -y
[root@node1 ~]# cat /etc/yum.repos.d/ceph.repo
[ceph]
name=Ceph packages for $basearch
baseurl=http://mirrors.163.com/ceph/rpm-luminous/el7/$basearch
enabled=1
priority=2
gpgcheck=1
gpgkey=https://download.ceph.com/keys/release.asc
[ceph-noarch]
name=Ceph noarch packages
baseurl=http://mirrors.163.com/ceph/rpm-luminous/el7/noarch
enabled=1
priority=2
gpgcheck=1
gpgkey=https://download.ceph.com/keys/release.asc
[ceph-source]
name=Ceph source packages
baseurl=http://mirrors.163.com/ceph/rpm-luminous/el7/SRPMS
enabled=0
priority=2
gpgcheck=1
gpgkey=https://download.ceph.com/keys/release.asc
1、创建一个ceph集群,生成mon
yum install ceph-deploy -y
mkdir /etc/ceph
cd /etc/ceph
ceph-deploy new node1 node2 node3 #生成mon
解决方法 百度网盘链接:https://pan.baidu.com/s/1ShQK4lGke0m6hcDvXP1jWA
提取码:5elt
wget https://pypi.python.org/packages/source/d/distribute/distribute-0.7.3.zip --no-check-certificate
unzip distribute-0.7.3.zip
cd distribute-0.7.3
python setup.py install
2、安装ceph软件
ceph-deploy install --no-adjust-repos node1 node2 node3
3、生成monitor检测集群所使用的的秘钥
ceph-deploy mon create-initial
4、修改ceph.conf配置
vim /etc/ceph/ceph.conf
[global]
fsid = b697e78a-2687-4291-93bf-42739e967bec
mon_initial_members = node1, node2, node3
mon_host = 10.3.45.112,10.3.44.113,10.3.45.115
auth_cluster_required = cephx
auth_service_required = cephx
auth_client_required = cephx
#注意:此文件的最底部要留有最少一个空行
mon clock drift allowed = 2
mon clock drift warn backoff = 30
# 内外网分离配置(非必须,多网卡的情况下可以配置)
public_network = 10.3.45.0/24
#cluster_network = 10.4.41.0/24
#depends on you amount of PGs
#每个OSD允许的最大pg数
mon_max_pg_per_osd = 2000
#default is 2, try to set at least 5. It will be
osd_max_pg_per_osd_hard_ratio = 10
#without it you can't remove a pool
mon_allow_pool_delete = true
#指定Ceph在OSD守护进程的多少秒时间内没有响应后标记其为“down”或“out”状态
mon_osd_down_out_interval = 86400
# 存储集群副本个数(如果只有2个OSD此处请设置为2否则会WARN)
osd_pool_default_size = 2
# 在degraded状态下允许只存在一个副本
osd_pool_default_min_size = 1
#ceph为了限制pg分裂的速度,设置了mon_osd_max_split_count这个参数。这个参数表示【在分裂的时候,每个osd容许的最大分裂数量
mon_osd_max_split_count = 100
#若少于5个OSD, 设置pg_num为128。
#5~10个OSD,设置pg_num为512。
#10~50个OSD,设置pg_num为4096。
#超过50个OSD,可以参考pgcalc计算。
#池的pg数量,Total PGs = ((Total_number_of_OSD * 100) / max_replication_count) / pool_count 结算的结果往上取靠近2的N次方的值
# 两台ceph每台二个osd pg_num计算得为512
osd_pool_default_pg_num = 512
# 两台ceph每台二个osd pgp_num计算得为512
osd_pool_default_pgp_num = 512
# CRUSH规则用到chooseleaf时的bucket的类型,默认值1
osd_crush_chooseleaf_type = 1
#关闭自动分片
rgw_dynamic_resharding = false
# 每个桶的索引的分片数量(后期可以根据需求动态的针对某个桶更新)
rgw_override_bucket_index_max_shards = 100
rgw_max_chunk_size = 1048576
rgw_cache_lru_size = 1000000
rgw_bucket_default_quota_max_objects = -1
# 如果磁盘容量大,则告警阈值从0.85调为0.9
osd_failsafe_full_ratio = 0.98
mon_osd_full_ratio = 0.95
mon_osd_backfillfull_ratio = 0.93
mon_osd_nearfull_ratio = 0.9
[osd]
#默认值2,osd发送heartbeat给其他osd的间隔时间
osd_heartbeat_interval = 15
#默认值7,OSD 多久没心跳就会被集群认为它挂( down )了
osd_heartbeat_grace = 60
# 处理peering等请求的线程数
osd_op_threads = 8
# 处理snap trim,replica trim及scrub等的线程数
osd_disk_threads = 4
# 如果这个参数被设置,那么Ceph集群启动时,就会在操作系统层面设置最大打开文件描述符。这就避免OSD进程出现与文件描述符不足的情况。参数的缺省值为0,可以设置成一个64位整数
max_open_files = 10485760
# 如果初始化的时候,把该值设置为true,然后重启所有osd。不然创建完pool会提示:100.000% pgs unknown100.000% pgs unknown。所有osd都加入,集群ok后,再统一把该值设置为false, 然后重启所有osd
osd_crush_update_on_start = true
# bluestore存储小文件的优化
#bluestore_cache_autotune = 0
bluestore_cache_size_hdd = 3221225472 #3G
bluestore_cache_kv_ratio = 0.6
bluestore_cache_meta_ratio = 0.4
bluestore_cache_kv_max = 1073741824 #1G
bluestore_csum_type = none
bluestore extent map shard max size = 200
bluestore extent map shard min size = 50
bluestore extent map shard target size = 100
bluestore rocksdb options = compression=kNoCompression,max_write_buffer_number=32,min_write_buffer_number_to_merge=2,recycle_log_file_num=32,compaction_style=kCompactionStyleLevel,write_buffer_size=67108864,target_file_size_base=67108864,max_background_compactions=31,level0_file_num_compaction_trigger=8,level0_slowdown_writes_trigger=32,level0_stop_writes_trigger=64,max_bytes_for_level_base=536870912,compaction_threads=32,max_bytes_for_level_multiplier=8,flusher_threads=8,compaction_readahead_size=2MB
osd map share max epochs = 100
osd max backfills = 5
osd memory target = 4294967296
osd op num shards = 8
osd op num threads per shard = 2
osd min pg log entries = 10
osd max pg log entries = 10
osd pg log dups tracked = 10
osd pg log trim min = 10
# scrub优化
osd scrub begin hour = 0
osd scrub end hour = 7
osd scrub chunk min = 1
osd scrub chunk max = 1
osd scrub sleep = 3
osd deep scrub interval = 241920
5、修改密钥权限并传送ceph.conf文件以及集群所使用的的秘钥
[root@node1 ceph]# chmod +r /etc/ceph/ceph.client.admin.keyring
[root@node1 ceph]# ceph-deploy --overwrite-conf admin node1 node2 node3
6、配置mgr,用于管理集群
[root@node1 ceph]# ceph-deploy mgr create node1 node2 node3
7、开启 dashboard (在任一 mon_server 节点上),传送ceph.conf文件
[root@node1 ceph]# echo -e "\n[mgr]\nmgr modules = dashboard\n" >> /etc/ceph/ceph.conf
[root@node1 ceph]# ceph mgr dump #获取mgrmap,默认最新
[root@node1 ceph]# ceph mgr module enable dashboard
[root@node1 ceph]# ceph mgr dump #获取mgrmap,默认最新
[root@node1 ceph]# ss -anpt|grep 7000
LISTEN 0 5 [::]:7000 [::]:* users:(("ceph-mgr",pid=2154,fd=26))
[root@node1 ceph]# ceph-deploy --overwrite-conf config push node1 node2 node3
8、准备磁盘(node1、node2、node3三个节点)
#磁盘初始化
parted /dev/sdb mklabel gpt -s
parted /dev/sdc mklabel gpt -s
#建立磁盘分卷
ceph-volume lvm zap /dev/sdb
ceph-volume lvm zap /dev/sdc
9、添加OSD
ceph-deploy osd create --data /dev/sdb node1
ceph-deploy osd create --data /dev/sdb node2
ceph-deploy osd create --data /dev/sdb node3
ceph-deploy osd create --data /dev/sdc node1
ceph-deploy osd create --data /dev/sdc node2
ceph-deploy osd create --data /dev/sdc node3
1、创建块设备池并使用该rbd工具初始化池以供 RBD 使用
[root@bd-server-2 ~]# ceph osd pool create ceph_rbd 128
pool 'ceph_rbd' created
[root@bd-server-2 ~]# rbd pool init ceph_rbd
[root@bd-server-2 ~]# ceph osd pool application enable ceph_rbd rbd #启动ceph_rbd作为rdb存储池
2、 创建块设备镜像,创建一个指定大小的块设备镜像,这里的size单位为M。 在下面的所有命令中,如果不指定pool默认操作名为rbd的pool
rbd create --size {megabytes} {pool-name}/{image-name}
[root@bd-server-2 ~]# rbd create --size 1024 ceph_rbd/docker_image
3、查看创建的镜像
[root@bd-server-2 ~]# rbd ls ceph_rbd
docker_image
4、检索镜像的信息
[root@bd-server-2 ~]# rbd info ceph_rbd/docker_image
rbd image 'docker_image':
size 1024 MB in 256 objects
order 22 (4096 kB objects)
block_name_prefix: rbd_data.ae356b8b4567
format: 2
features: layering, exclusive-lock, object-map, fast-diff, deep-flatten
flags:
create_timestamp: Wed Mar 2 09:39:01 2022
5、如果有需要,可以给镜像扩容或者缩容
# 扩容
[root@bd-server-2 ~]# rbd resize --size 200G ceph_rbd/docker_image #rbd层次的扩容
Resizing image: 100% complete...done.
[root@bd-server-2 ~]# resize2fs /dev/rbd0 #磁盘分区的扩容,挂盘之后必须执行这一步
# 缩容
[root@bd-server-2 ~]# rbd resize --size 1024 ceph_rbd/docker_image --allow-shrink
Resizing image: 100% complete...done.
6、禁用特性并在客户端映射块设备(需要安装ceph)
[root@bd-server-2 ~]# rbd feature disable ceph_rbd/docker_image object-map fast-diff deep-flatten
[root@bd-server-2 ~]# rbd map ceph_rbd/docker_image
/dev/rbd0
object-map:是否记录组成image的数据对象存在状态位图,通过查表加速类似于导入、导出、克隆分离、已使用容量统计
等操作、同时有助于减少COW机制带来的克隆image的I/O时延,依赖于exclusive-lock特性
fast-diff:用于计算快照间增量数据等操作加速,依赖于object-map
deep-flatten:克隆分离时是否同时解除克隆image创建的快照与父image之间的关联关系。
7、查看映射的块存储
[root@bd-server-2 ~]# lsblk -l|grep rbd0
rbd0 252:0 0 1G 0 disk
8、使用rbd块存储
[root@bd-server-2 ~]# mkfs.ext4 /dev/rbd0
[root@bd-server-2 ~]# mkdir /lijia
[root@bd-server-2 ~]# mount /dev/rbd0 /lijia
挂载之后基本能使用了
7 . 如果要卸载设备使用如下命令:
[root@local-node-1 ~]# umount /lijia
[root@bd-server-2 ~]# rbd unmap /dev/rbd0 #删除映射的块设备
8、要从池中删除块设备,并删除进程池
[root@bd-server-2 ~]# rbd rm ceph_rbd/docker_image #删除块设备
ceph osd pool rm ceph_rbd ceph_rbd --yes-i-really-really-mean-it #删除池